@@ -68,39 +68,15 @@ def format_header(dat: Dict[str, str], header_format: str) -> str:
6868 return header_format .format (** dat )
6969
7070
71- def make_format_dict (data , calibre_library : str , book_titles_authors : Dict [int , Dict [ str , str ]]) -> Dict :
71+ def make_time_format_dict (data : Dict ) -> Dict [str , str ]:
7272 """
73+
7374 :param data: json object of a calibre highlight
74- :param calibre_library: name of the calibre library, to make a url to the highlight
75- :param book_titles_authors: dictionary mapping book ids to their titles and authors
76- :return:
75+ :return: dict containing all time-related formatting options
7776 """
7877
79- def format_blockquote (text : str ) -> str :
80- return "> " + text .replace ("\n " , "\n > " )
81-
8278 annot = data ["annotation" ]
8379
84- # format is calibre://view-book/<Library_Name>/<book_id>/<book_format>?open_at=<location>
85- # for example, calibre://view-book/Calibre_Library/39/EPUB?open_at=epubcfi(/8/2/4/84/1:184)
86- # todo: right now, opening two different links from the same book opens two different viewer windows,
87- # make it instead go to the right location in the already-open window
88- url_format = "calibre://view-book/{library}/{book_id}/{book_format}?open_at=epubcfi({location})"
89- url_args = {
90- "library" : calibre_library .replace (" " , "_" ),
91- "book_id" : data ["book_id" ],
92- "book_format" : data ["format" ],
93- # the algorithm for this, "/{2 * (spine_index + 1)}", is taken from:
94- # read_book.annotations.AnnotationsManager.cfi_for_highlight(uuid, spine_index)
95- # https://github.com/kovidgoyal/calibre/blob/master/src/pyj/read_book/annotations.pyj#L249
96- # i didn't import the algorithm from calibre because it was too inconvenient to figure out how
97- #
98- # unfortunately, this doesn't work without the spine index thing. the location is missing a number.
99- # it should be, for example /8/2/4/84/1:184, but instead, data["start_cfi"] is /2/4/84/1:184.
100- # the first number in the cfi address has to be manually calculated.
101- "location" : "/" + str ((annot ["spine_index" ] + 1 ) * 2 ) + annot ["start_cfi" ],
102- }
103-
10480 # calibre's time format example: "2022-09-10T20:32:08.820Z"
10581 # the "Z" at the end means UTC time
10682 # "%Y-%m-%dT%H:%M:%S", take [:19] of the timestamp to remove milliseconds
@@ -109,18 +85,9 @@ def format_blockquote(text: str) -> str:
10985 h_local = h_time + h_time .astimezone (datetime .datetime .now ().tzinfo ).utcoffset ()
11086 local = time .localtime ()
11187 utc = time .gmtime ()
112- title_authors = book_titles_authors .get (int (data ["book_id" ]), {}) # dict with {"title": str, "authors": Tuple[str]}
11388 utc_offset = ("" if local .tm_gmtoff < 0 else "+" ) + str (local .tm_gmtoff // 3600 ) + ":00"
11489
115- # based on https://github.com/jplattel/obsidian-clipper
116- format_options = {
117- # if you add a key to this dict, also update the format_options local variable in config.py
118- "title" : title_authors .get ("title" , "Untitled" ), # title of book
119- # todo: add "chapter" option
120- "authors" : title_authors .get ("authors" , ("Unknown" ,)), # authors of book
121- "highlight" : annot ["highlighted_text" ], # highlighted text
122- "blockquote" : format_blockquote (annot ["highlighted_text" ]), # block-quoted highlight
123- "notes" : annot ["notes" ] if "notes" in annot else "" , # user's notes on this highlight
90+ time_options = {
12491 "date" : str (h_time .date ()), # utc date highlight was made
12592 "localdate" : str (h_local .date ()),
12693 # local date highlight was made. "local" based on send time, not highlight time
@@ -142,16 +109,93 @@ def format_blockquote(text: str) -> str:
142109 "localyear" : str (h_local .year ),
143110 "utcnow" : time .strftime ("%Y-%m-%d %H:%M:%S" , utc ),
144111 "localnow" : time .strftime ("%Y-%m-%d %H:%M:%S" , local ),
112+ "timestamp" : str (h_time .timestamp ()), # Unix timestamp of highlight time. uses UTC.
113+ }
114+
115+ return time_options
116+
117+
118+ def make_highlight_format_dict (data : Dict , calibre_library : str ) -> Dict [str , str ]:
119+ """
120+
121+ :param data: json object of a calibre highlight
122+ :param calibre_library: name of library book is found in. used for making a url to the highlight.
123+ :return: dict containing all highlight-related formatting options.
124+ """
125+
126+ def format_blockquote (text : str ) -> str :
127+ return "> " + text .replace ("\n " , "\n > " )
128+
129+ annot = data ["annotation" ]
130+
131+ # format is calibre://view-book/<Library_Name>/<book_id>/<book_format>?open_at=<location>
132+ # for example, calibre://view-book/Calibre_Library/39/EPUB?open_at=epubcfi(/8/2/4/84/1:184)
133+ # todo: right now, opening two different links from the same book opens two different viewer windows,
134+ # make it instead go to the right location in the already-open window
135+ url_format = "calibre://view-book/{library}/{book_id}/{book_format}?open_at=epubcfi({location})"
136+ url_args = {
137+ "library" : calibre_library .replace (" " , "_" ),
138+ "book_id" : data ["book_id" ],
139+ "book_format" : data ["format" ],
140+ # the algorithm for this, "/{2 * (spine_index + 1)}", is taken from:
141+ # read_book.annotations.AnnotationsManager.cfi_for_highlight(uuid, spine_index)
142+ # https://github.com/kovidgoyal/calibre/blob/master/src/pyj/read_book/annotations.pyj#L249
143+ # i didn't import the algorithm from calibre because it was too inconvenient to figure out how
144+ #
145+ # unfortunately, this doesn't work without the spine index thing. the location is missing a number.
146+ # it should be, for example /8/2/4/84/1:184, but instead, data["start_cfi"] is /2/4/84/1:184.
147+ # the first number in the cfi address has to be manually calculated.
148+ "location" : "/" + str ((annot ["spine_index" ] + 1 ) * 2 ) + annot ["start_cfi" ],
149+ }
150+
151+ highlight_format = {
152+ "highlight" : annot ["highlighted_text" ], # highlighted text
153+ "blockquote" : format_blockquote (annot ["highlighted_text" ]), # block-quoted highlight
154+ "notes" : annot ["notes" ] if "notes" in annot else "" , # user's notes on this highlight
145155 "url" : url_format .format (** url_args ), # calibre:// url to open ebook viewer to this highlight
146156 "location" : url_args ["location" ], # epub cfi location of this highlight
147- "timestamp" : h_time .timestamp (), # Unix timestamp of highlight time. uses UTC.
148- "bookid" : data ["book_id" ],
149157 "uuid" : annot ["uuid" ], # highlight's ID in calibre
150158 }
151159
160+ return highlight_format
161+
162+
163+ def make_book_format_dict (data : Dict , book_titles_authors : Dict [int , Dict [str , str ]]) -> Dict [str , str ]:
164+ """
165+
166+ :param data: json object of a calibre highlight
167+ :param book_titles_authors: dictionary mapping book ids to {"title": title, "authors": authors}
168+ :return: dict containing all book-related formatting options
169+ """
170+ title_authors = book_titles_authors .get (int (data ["book_id" ]), {}) # dict with {"title": str, "authors": Tuple[str]}
171+
172+ format_options = {
173+ # if you add a key to this dict, also update the format_options local variable in config.py
174+ "title" : title_authors .get ("title" , "Untitled" ), # title of book
175+ # todo: add "chapter" option
176+ "authors" : title_authors .get ("authors" , ("Unknown" ,)), # authors of book
177+ "bookid" : data ["book_id" ],
178+ }
179+
152180 return format_options
153181
154182
183+ def make_format_dict (data , calibre_library : str , book_titles_authors : Dict [int , Dict [str , str ]]) -> Dict [str , str ]:
184+ """
185+ :param data: json object of a calibre highlight
186+ :param calibre_library: name of the calibre library, to make a url to the highlight
187+ :param book_titles_authors: dictionary mapping book ids to {"title": title, "authors": authors}
188+ :return: dict[str, str] containing formatting options
189+ """
190+
191+ # formatting options are based on https://github.com/jplattel/obsidian-clipper
192+ time_options = make_time_format_dict (data )
193+ highlight_options = make_highlight_format_dict (data , calibre_library )
194+ book_options = make_book_format_dict (data , book_titles_authors )
195+
196+ return time_options | highlight_options | book_options # | merges dictionaries https://peps.python.org/pep-0584/
197+
198+
155199class HighlightSender :
156200
157201 def __init__ (self ):
@@ -275,30 +319,46 @@ def send(self, condition: Callable[[Any], bool] = lambda x: True):
275319 condition takes a highlight's json object and returns true if that highlight should be sent to obsidian.
276320 """
277321
278- highlights = filter (lambda a : a .get ("annotation" , {}).get ("type" ) == "highlight" ,
279- self .annotations_list ) # annotations["annotations"])
280- dats = [] # List[List[obsidian_data, sort_key]]
281- headers = {} # dict[note_title:str, header:str]
322+ # todo: a lot of the lists used here and in related functions could probably be replaced with tuples
323+
324+ def is_valid_highlight (_dat :Dict ):
325+ """
326+ :param _dat: a dict with one calibre annotation's data
327+ :return: True if this is a valid highlight and should be sent, else False
328+ """
329+ _annot = _dat .get ("annotation" , {})
330+ if _annot .get ("type" ) != "highlight" :
331+ return False # annotation must be a highlight, not a bookmark
332+
333+ if _annot .get ("removed" ):
334+ return False # don't try to send highlights that have been removed
282335
283- for highlight in highlights :
284- if highlight ["annotation" ].get ("removed" , False ):
285- continue # don't try to send highlights that have been removed
336+ if not condition (_dat ):
337+ return False # user-defined condition must be true for this highlight
286338
287- if not condition (highlight ):
288- continue
339+ return True
289340
290- dat = make_format_dict (highlight , self .library_name , self .book_titles_authors )
341+ def format_add_highlight (_highlight , _dats , _headers ):
342+ """
343+ makes a formatted highlight from an annotation data object, then updates _dats and _headers.
344+
345+ :param _highlight: a calibre annotation object
346+ :param _dats: list to be updated in-place. a list [format_data() output, sort_key] will be appended.
347+ :param _headers: dict to be updated in-place. if we come across a title that's not in the dict,
348+ a formatted header will be made for that title.
349+ :return: none
350+ """
351+ dat = make_format_dict (_highlight , self .library_name , self .book_titles_authors )
291352 formatted = format_data (dat , self .title_format , self .body_format , self .no_notes_format )
292353
293- if formatted [0 ] not in headers :
294- headers [formatted [0 ]] = format_header (dat , self .header_format )
354+ if formatted [0 ] not in _headers : # only make one header per title
355+ _headers [formatted [0 ]] = format_header (dat , self .header_format )
295356
296- dats .append ([formatted , self .format_sort_key (dat )])
357+ _dats .append ([formatted , self .format_sort_key (dat )])
297358
298359 def merge_highlights (data ):
299360 """
300- returns a dictionary with formatted highlights merged into a single string for each
301- unique formatted note title found in dats.
361+ merges formatted highlights into a single string for each unique note title found in dats.
302362
303363 This limits the length of merged note contents to 20000 characters. If the length exceeds this, extra
304364 highlights will use a different title, e.g. "The Book", "The Book (1)", etc
@@ -309,14 +369,18 @@ def merge_highlights(data):
309369 :return: list of obsidian_data objects, where each unique title from the input is merged into a
310370 single, sorted item in the output.
311371 """
312- # this function has too many nested index lookups, it could use some simplification
313372
314- books = {} # dict[str, list[list[obsidian_data object, sort_key]]
315- lengths = {}
316- # make list of highlights for each note title
317- for d in data :
318- format_dat = d [0 ] # list[title, body]
319- body_and_sort = [format_dat [1 ], d [1 ]] # [note body, sort key]
373+ def add_data_item (_dat , _books , _lengths ):
374+ """
375+ :param _dat: data item: [[title, body], sort_key]
376+ :param _books: dict that will be updated in-place. will have an obsidian_data object and sort key
377+ added to a note title. like _books["title"].append([obsidian_data, sort_key]). automatically handles
378+ cases where "title" is not in _books.
379+ :param _lengths: dict that may be updated in-place, used for tracking cumulative length of highlights
380+ :return: none
381+ """
382+ format_dat = _dat [0 ] # list[title, body]
383+ body_and_sort = [format_dat [1 ], _dat [1 ]] # [note body, sort key]
320384 base_title = format_dat [0 ]
321385
322386 # limit each merged highlight to 20000 chars. it could be higher, but we need room for url encoding.
@@ -325,27 +389,34 @@ def merge_highlights(data):
325389 # problem is some detail about how webbrowser.open() is implemented. on my windows 11 laptop, calling
326390 # webbrowser.open("obsidian://" + "a" * 32699) works, but "a" * 32700 will open microsoft edge instead,
327391 # and if the number reaches 32757 it gives an error.
328- note_title , l = base_title , lengths .get (base_title , False )
329- if l : # limit size of a note's content to 20 kb.
392+ note_title , l = base_title , _lengths .get (base_title , False )
393+ if l : # start using a different title every 20k characters
330394 splits = l // 20000
331395 if splits > 0 :
332396 note_title = base_title + f" ({ splits } )"
333397
334- if note_title in books :
335- books [note_title ].append (body_and_sort )
398+ if note_title in _books :
399+ _books [note_title ].append (body_and_sort )
336400 else :
337- books [note_title ] = [body_and_sort ]
401+ _books [note_title ] = [body_and_sort ]
338402
339- if base_title in lengths :
340- lengths [base_title ] += len (body_and_sort [0 ])
403+ if base_title in _lengths :
404+ _lengths [base_title ] += len (body_and_sort [0 ])
341405 else :
342- lengths [base_title ] = len (body_and_sort [0 ])
406+ _lengths [base_title ] = len (body_and_sort [0 ])
407+
408+ books = {} # dict[title:str, list[list[obsidian_data object:Dict, sort_key]]
409+ lengths = {} # dict[book title:str, int]
410+
411+ # make list of highlights for each note title
412+ for d in data :
413+ add_data_item (d , books , lengths )
343414
344- # now, books contains lists of unsorted [note body, sort key] objects
415+ # now, ` books` contains lists of unsorted [note body, sort key] objects
345416 ret = []
346417
418+ # sort each book's highlights and then merge them into a single string
347419 for key in books :
348- # sort each book's highlights and then merge them into a single string
349420 books [key ].sort (key = lambda body_sort : body_sort [1 ])
350421 # header is only included in first of a series of same-book files
351422 # (this happens when there's too much text to send to a single file at once)
@@ -354,6 +425,14 @@ def merge_highlights(data):
354425
355426 return ret
356427
428+ highlights = filter (is_valid_highlight , self .annotations_list ) # annotations["annotations"])
429+ dats = [] # List[List[obsidian_data, sort_key]]
430+ headers = {} # dict[note_title:str, header:str]
431+
432+ # make formatted titles, bodies, and headers
433+ for highlight in highlights :
434+ format_add_highlight (highlight , dats , headers )
435+
357436 # todo: sometimes, if obsidian isn't already open, not all highlights get sent
358437 merged = merge_highlights (dats )
359438 for obsidian_dat in merged :
0 commit comments