Skip to content

Commit 06cc797

Browse files
authored
feat: support for google keep (#18) (#85)
* feat: support for google keep * fix: indentation issues from flake8 * fix: remove updated time from key + rename timestamp * fix: only use created timestamp for the key
1 parent 9f03de7 commit 06cc797

File tree

5 files changed

+157
-1
lines changed

5 files changed

+157
-1
lines changed

google_takeout_parser/locales/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
_parse_location_history,
1212
_parse_semantic_location_history,
1313
_parse_chrome_history,
14+
_parse_keep
1415
)
1516
from ..parse_csv import ( # noqa: F401
1617
_parse_youtube_comments_csv,

google_takeout_parser/locales/en.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
_parse_location_history,
1111
_parse_semantic_location_history,
1212
_parse_chrome_history,
13+
_parse_keep
1314
)
1415

1516

@@ -86,7 +87,8 @@
8687
r"Groups": None,
8788
r"Google Play Games Services/Games/.*/(Achievements|Activity|Experience|Scores).html": None,
8889
r"Hangouts": None,
89-
r"Keep": None,
90+
r"Keep/.*?.json": _parse_keep,
91+
r"Keep/": None,
9092
r"Maps (your places)": None,
9193
r"My Maps/.*.kmz": None, # custom KML maps
9294
r"Saved/.*.csv": None, # lists with saved places from Google Maps

google_takeout_parser/models.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,19 @@ class LocationInfo(NamedTuple):
4848
sourceUrl: Optional[Url]
4949

5050

51+
class KeepListContent(NamedTuple):
52+
textHtml: str
53+
text: str
54+
isChecked: bool
55+
56+
57+
class KeepAnnotation(NamedTuple):
58+
description: str
59+
source: str
60+
title: str
61+
url: str
62+
63+
5164
# fmt: off
5265
class BaseEvent(Protocol):
5366
@property
@@ -284,6 +297,25 @@ def key(self) -> Tuple[str, int]:
284297
return self.url, int(self.dt.timestamp())
285298

286299

300+
@dataclass
301+
class Keep(BaseEvent):
302+
title: str
303+
updated_dt: datetime
304+
created_dt: datetime
305+
listContent: Optional[List[KeepListContent]]
306+
textContent: Optional[str]
307+
textContentHtml: Optional[str] # i guess this is good to have, found it in some of the json files
308+
color: str
309+
annotations: Optional[List[KeepAnnotation]]
310+
isTrashed: bool
311+
isPinned: bool
312+
isArchived: bool
313+
314+
@property
315+
def key(self) -> int:
316+
return int(self.created_dt.timestamp())
317+
318+
287319
# can't compute this dynamically -- have to write it out
288320
# if you want to override, override both global variable types with new types
289321
DEFAULT_MODEL_TYPE = Union[
@@ -296,6 +328,7 @@ def key(self) -> Tuple[str, int]:
296328
CSVYoutubeComment,
297329
CSVYoutubeLiveChat,
298330
PlaceVisit,
331+
Keep
299332
]
300333

301334
CacheResults = Iterator[Res[DEFAULT_MODEL_TYPE]]

google_takeout_parser/parse_json.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
Location,
2222
PlaceVisit,
2323
CandidateLocation,
24+
Keep,
25+
KeepListContent,
26+
KeepAnnotation
2427
)
2528
from .common import Res
2629
from .time_utils import parse_json_utc_date
@@ -282,3 +285,45 @@ def _parse_chrome_history(p: Path) -> Iterator[Res[ChromeHistory]]:
282285
)
283286
except Exception as e:
284287
yield e
288+
289+
290+
def _parse_keep(p: Path) -> Iterator[Res[Keep]]:
291+
json_data = _read_json_data(p)
292+
# For google keep, each note is stored as a separate json file,
293+
# so technically there is always just one yield value
294+
yield Keep(
295+
title=json_data["title"],
296+
created_dt=datetime.fromtimestamp(
297+
json_data["createdTimestampUsec"] / 1_000_000, tz=timezone.utc
298+
),
299+
updated_dt=datetime.fromtimestamp(
300+
json_data["userEditedTimestampUsec"] / 1_000_000, tz=timezone.utc
301+
),
302+
listContent=[
303+
KeepListContent(
304+
textHtml=content["textHtml"],
305+
text=content["text"],
306+
isChecked=content["isChecked"]
307+
) for content in json_data.get("listContent", [])
308+
],
309+
textContent=(
310+
json_data["textContent"]
311+
if "textContent" in json_data else None
312+
),
313+
textContentHtml=(
314+
json_data["textContentHtml"]
315+
if "textContentHtml" in json_data else None
316+
),
317+
color=json_data["color"],
318+
annotations=[
319+
KeepAnnotation(
320+
description=annotation["description"],
321+
source=annotation["source"],
322+
title=annotation["title"],
323+
url=annotation["url"],
324+
) for annotation in json_data.get("annotations", [])
325+
],
326+
isTrashed=json_data["isTrashed"],
327+
isPinned=json_data["isPinned"],
328+
isArchived=json_data["isArchived"]
329+
)

tests/test_json.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,3 +427,78 @@ def test_semantic_location_history_2024(tmp_path_f: Path) -> None:
427427
),
428428
],
429429
)
430+
431+
432+
def test_keep(tmp_path_f: Path) -> None:
433+
data = {
434+
"title": "Grocery List",
435+
"userEditedTimestampUsec": 1649291142757000,
436+
"createdTimestampUsec": 1649291142757000,
437+
"listContent": [
438+
{
439+
"textHtml": "<li>Apples</li>",
440+
"text": "Apples",
441+
"isChecked": False
442+
},
443+
{
444+
"textHtml": "<li>Milk</li>",
445+
"text": "Milk",
446+
"isChecked": True
447+
}
448+
],
449+
"textContent": "Don't forget to buy snacks too.",
450+
"textContentHtml": "<p>Don't forget to buy snacks too.</p>",
451+
"color": "yellow",
452+
"annotations": [
453+
{
454+
"description": "Weekly grocery run",
455+
"source": "Keep",
456+
"title": "Shopping Note",
457+
"url": "https://keep.google.com/"
458+
}
459+
],
460+
"isTrashed": False,
461+
"isPinned": True,
462+
"isArchived": False
463+
}
464+
465+
fp = tmp_path_f / "file"
466+
fp.write_text(json.dumps(data))
467+
res = list(prj._parse_keep(fp))
468+
obj = res[0]
469+
assert not isinstance(obj, Exception)
470+
assert obj == models.Keep(
471+
title="Grocery List",
472+
updated_dt=datetime.datetime(
473+
2022, 4, 7, 0, 25, 42, 757000, tzinfo=datetime.timezone.utc,
474+
),
475+
created_dt=datetime.datetime(
476+
2022, 4, 7, 0, 25, 42, 757000, tzinfo=datetime.timezone.utc,
477+
),
478+
listContent=[
479+
models.KeepListContent(
480+
textHtml="<li>Apples</li>",
481+
text="Apples",
482+
isChecked=False
483+
),
484+
models.KeepListContent(
485+
textHtml="<li>Milk</li>",
486+
text="Milk",
487+
isChecked=True
488+
)
489+
],
490+
textContent="Don't forget to buy snacks too.",
491+
textContentHtml="<p>Don't forget to buy snacks too.</p>",
492+
color="yellow",
493+
annotations=[
494+
models.KeepAnnotation(
495+
description="Weekly grocery run",
496+
source="Keep",
497+
title="Shopping Note",
498+
url="https://keep.google.com/"
499+
)
500+
],
501+
isTrashed=False,
502+
isPinned=True,
503+
isArchived=False
504+
)

0 commit comments

Comments
 (0)