11import time
22import webbrowser
3- from typing import Dict , List , Callable , Any
3+ from typing import Dict , List , Callable , Any , Tuple
44from urllib .parse import urlencode , quote
55import datetime
6+ import re as regex
67
78# avoid importing anything from calibre or the highlights_to_obsidian plugin here
89
@@ -170,7 +171,6 @@ def make_book_format_dict(data: Dict, book_titles_authors: Dict[int, Dict[str, s
170171 title_authors = book_titles_authors .get (int (data ["book_id" ]), {}) # dict with {"title": str, "authors": Tuple[str]}
171172
172173 format_options = {
173- # if you add a key to this dict, also update the format_options local variable in config.py
174174 "title" : title_authors .get ("title" , "Untitled" ), # title of book
175175 # todo: add "chapter" option
176176 "authors" : title_authors .get ("authors" , ("Unknown" ,)), # authors of book
@@ -180,6 +180,25 @@ def make_book_format_dict(data: Dict, book_titles_authors: Dict[int, Dict[str, s
180180 return format_options
181181
182182
183+ def make_sent_format_dict (total_sent , book_sent , highlight_sent ) -> Dict [str , str ]:
184+ """
185+ inputs will be converted to strings.
186+
187+ :param total_sent: total number of highlights being sent
188+ :param book_sent: total number of highlights being sent for this book
189+ :param highlight_sent: this highlight's position in the highlights being sent to this book, e.g. 5 if it's
190+ the fifth highlight.
191+ :return: dict containing a format option for each of the params
192+ """
193+ sent_dict = {
194+ "totalsent" : str (total_sent ), # total highlights sent
195+ "booksent" : str (book_sent ), # highlights for this book
196+ "highlightsent" : str (highlight_sent ), # position of this highlight
197+ }
198+
199+ return sent_dict
200+
201+
183202def make_format_dict (data , calibre_library : str , book_titles_authors : Dict [int , Dict [str , str ]]) -> Dict [str , str ]:
184203 """
185204 :param data: json object of a calibre highlight
@@ -189,11 +208,20 @@ def make_format_dict(data, calibre_library: str, book_titles_authors: Dict[int,
189208 """
190209
191210 # formatting options are based on https://github.com/jplattel/obsidian-clipper
211+
212+ # if you add a format option, also update the format_options local variable in config.py and the docs in README.md
192213 time_options = make_time_format_dict (data )
193214 highlight_options = make_highlight_format_dict (data , calibre_library )
194215 book_options = make_book_format_dict (data , book_titles_authors )
195216
196- return time_options | highlight_options | book_options # | merges dictionaries https://peps.python.org/pep-0584/
217+ # these formatting options can't be calculated by the time make_format_dict is called.
218+ # actually, totalsent probably could be, but let's keep it here with the others.
219+ # we need to include this so that string.format() doesn't error if it runs into one of these
220+ placeholders = make_sent_format_dict ("{totalsent}" , "{booksent}" , "{highlightsent}" )
221+
222+ # the | operator merges dictionaries https://peps.python.org/pep-0584/
223+ # could also pass a dict as a param to each make_x_dict, and have them update it in place
224+ return time_options | highlight_options | book_options | placeholders
197225
198226
199227class HighlightSender :
@@ -258,6 +286,28 @@ def set_sort_key(self, sort_key: str):
258286 # todo: verify that the sort key is valid
259287 self .sort_key = sort_key
260288
289+ def apply_sent_formats (self ) -> Tuple [bool , bool , bool ]:
290+ """
291+ since formatting options for how many highlights were sent can't be applied until after the other formatting
292+ options are applied, they'll end up being applied to formatted strings instead of templates. depending on
293+ the content of those highlights, you could end up with very large strings. this function is a small performance
294+ boost: it'll only try to apply those formatting options if said formatting options are in templates.
295+
296+ an alternative to this is to only format titles, and then count how many highlights will be sent to each
297+ note before you apply formatting to the body.
298+
299+ :return: Tuple telling you if you need to apply formatting options for how many highlights were sent. Tuple is
300+ (title, body, header), where each item is True if that part needs formatting to be applied.
301+ """
302+ format_dict = make_sent_format_dict (0 , 0 , 0 )
303+ formats = ("{" + k + "}" for k in format_dict .keys ())
304+ title , body , header = False , False , False
305+ for f in formats :
306+ title = title or (f in self .title_format )
307+ body = body or (f in self .body_format ) or (f in self .no_notes_format )
308+ header = header or (f in self .header_format )
309+ return title , body , header
310+
261311 def make_obsidian_data (self , note_file , note_content ):
262312 """
263313 limits length of note_file to 180 characters, allowing for an obsidian vault path of up to 80
@@ -320,16 +370,16 @@ def send(self, condition: Callable[[Any], bool] = lambda x: True):
320370 """
321371
322372 # todo: a lot of the lists used here and in related functions could probably be replaced with tuples
323-
324- def is_valid_highlight (_dat :Dict ):
373+
374+ def is_valid_highlight (_dat : Dict ):
325375 """
326376 :param _dat: a dict with one calibre annotation's data
327377 :return: True if this is a valid highlight and should be sent, else False
328378 """
329379 _annot = _dat .get ("annotation" , {})
330380 if _annot .get ("type" ) != "highlight" :
331381 return False # annotation must be a highlight, not a bookmark
332-
382+
333383 if _annot .get ("removed" ):
334384 return False # don't try to send highlights that have been removed
335385
@@ -356,7 +406,7 @@ def format_add_highlight(_highlight, _dats, _headers):
356406
357407 _dats .append ([formatted , self .format_sort_key (dat )])
358408
359- def merge_highlights (data ):
409+ def merge_highlights (data , _headers ):
360410 """
361411 merges formatted highlights into a single string for each unique note title found in dats.
362412
@@ -365,18 +415,23 @@ def merge_highlights(data):
365415
366416 for reference, format_data() output is a list of [title, body]
367417
368- :param data: List[List[format_data() output, sort_key]]
418+ :param data: list of all formatted highlights: List[List[format_data() output, sort_key]]
419+ :param _headers: formatted headers: dict[note_title:str, header:str]. sent amount formatting will be applied
420+ in-place.
369421 :return: list of obsidian_data objects, where each unique title from the input is merged into a
370422 single, sorted item in the output.
371423 """
372424
373- def add_data_item (_dat , _books , _lengths ):
425+ def add_data_item (_dat , _books , _lengths , _counts ):
374426 """
375427 :param _dat: data item: [[title, body], sort_key]
376- :param _books: dict that will be updated in-place. will have an obsidian_data object and sort key
377- added to a note title. like _books["title"].append([obsidian_data , sort_key]). automatically handles
428+ :param _books: dict that will be updated in-place. will have a format_data() output and sort key
429+ added to a note title. like _books["title"].append([formatted_body , sort_key]). automatically handles
378430 cases where "title" is not in _books.
379431 :param _lengths: dict that may be updated in-place, used for tracking cumulative length of highlights
432+ :param _counts: dict of {title, int} for how many highlights each book has. can't be done by taking
433+ length of _books[title] because _books splits large amounts of highlights for a single title into
434+ more than one title with a smaller amount of highlights each.
380435 :return: none
381436 """
382437 format_dat = _dat [0 ] # list[title, body]
@@ -402,22 +457,110 @@ def add_data_item(_dat, _books, _lengths):
402457
403458 if base_title in _lengths :
404459 _lengths [base_title ] += len (body_and_sort [0 ])
460+ _counts [base_title ] += 1
405461 else :
406462 _lengths [base_title ] = len (body_and_sort [0 ])
463+ _counts [base_title ] = 1
464+
465+ def apply_sent_amount_format (_books : Dict [str , List ], _headers : Dict [str , str ],
466+ total_highlights : int , book_highlights : Dict [str , int ]):
467+ """
468+ :param _books: formatted highlights being sent to each book (will be updated in-place):
469+ dict[title:str, list[list[formatted_body, sort_key]]
470+ :param _headers: formatted headers: dict[note_title:str, header:str]. sent amount formatting will be
471+ applied in-place.
472+ :param total_highlights: total number of highlights being sent
473+ :param book_highlights: Dict[title, int] that has the amount of highlights being sent to each book.
474+ :return: none
475+ """
476+ # todo: _books and _headers being updated in-place is a source of bugs. change it
477+ should_apply = self .apply_sent_formats ()
478+ if True not in should_apply :
479+ return
480+
481+ def get_base_title (_title : str , _valid_titles : List [str ]) -> str :
482+ """if this is part of a split note, e.g. "title (1)" or "title (2)", remove the " (x)" """
483+ # todo: change the data format for split note titles, so that you can simplify this
484+ _ret = _title
485+ # space, parentheses, number, parentheses, end of string
486+ __match = regex .search (" \((\d+)\)$" , t )
487+ if __match :
488+ base = t [:t .rfind (" " )]
489+ if base in _valid_titles :
490+ _ret = base
491+ return _ret
492+
493+ if should_apply [0 ]: # title
494+ # use list(_books.keys()) so that we don't get an error by changing dict keys during iteration
495+ _b = list (_books .keys ())
496+ for t in _b : # t: book title (str)
497+
498+ base_title = get_base_title (t , _b )
499+
500+ fmt = make_sent_format_dict (total_highlights , book_highlights [base_title ], - 1 )
501+ new_title = t .format (** fmt )
502+ _books [new_title ] = _books [t ]
503+ del _books [t ]
504+ if t in _headers :
505+ _headers [new_title ] = _headers [t ]
506+ del _headers [t ]
507+ if t in book_highlights :
508+ book_highlights [new_title ] = book_highlights [t ]
509+
510+ if should_apply [1 ]: # body
511+ valid_titles = list (_books .keys ())
512+ for t in _books : # t: book title (str)
513+ def count_highlights_before (_title , _base , __books ) -> int :
514+ """
515+ if a highlight has " (x)" at the end, count the highlights being sent to previous notes
516+
517+ :param _title: title of the note these highlights are being sent to
518+ :param _base: base title for this title
519+ :param __books: dict[title, list[highlights]]
520+ :return: number of highlights in notes with same base title but a lower x in their " (x)"
521+ """
522+ _ret = 0
523+ _b , _t = len (_base ), len (_title )
524+ if _b != _t :
525+ title_number = int (_title [_b + 2 :- 1 ]) # t is base title + " (num)"
526+ _ret = len (__books [_base ])
527+ for x in range (1 , title_number ):
528+ _ret += len (__books [_base + f" ({ x } )" ])
529+
530+ return _ret
531+
532+ base_title = get_base_title (t , valid_titles )
533+ highlights_before = count_highlights_before (t , base_title , _books )
534+
535+ for h in range (len (_books [t ])): # _books[h]: [formatted body, sort_key]
536+ fmt = make_sent_format_dict (total_highlights , book_highlights [base_title ],
537+ highlights_before + h + 1 )
538+ _books [t ][h ][0 ] = _books [t ][h ][0 ].format (** fmt )
539+
540+ if should_apply [2 ]: # header
541+ for h in _headers : # h: book title (str)
542+ fmt = make_sent_format_dict (total_highlights , book_highlights [h ], - 1 )
543+ _headers [h ] = _headers [h ].format (** fmt )
407544
408545 books = {} # dict[title:str, list[list[obsidian_data object:Dict, sort_key]]
409- lengths = {} # dict[book title:str, int]
546+ lengths = {} # amount of characters per book. dict[book title:str, int]
547+ counts = {} # amount of highlights per book. dict[book title:str, int]
410548
411549 # make list of highlights for each note title
412550 for d in data :
413- add_data_item (d , books , lengths )
551+ add_data_item (d , books , lengths , counts )
552+
553+ # sort books here to that apply_sent_amount_format gives accurate position of highlight in note
554+ for key in books :
555+ books [key ].sort (key = lambda body_sort : body_sort [1 ])
556+
557+ apply_sent_amount_format (books , headers , len (data ), counts )
414558
415559 # now, `books` contains lists of unsorted [note body, sort key] objects
416560 ret = []
417561
418562 # sort each book's highlights and then merge them into a single string
419563 for key in books :
420- books [key ].sort (key = lambda body_sort : body_sort [1 ])
421564 # header is only included in first of a series of same-book files
422565 # (this happens when there's too much text to send to a single file at once)
423566 text = headers .get (key , "" ) + "" .join ([a [0 ] for a in books [key ]])
@@ -426,15 +569,15 @@ def add_data_item(_dat, _books, _lengths):
426569 return ret
427570
428571 highlights = filter (is_valid_highlight , self .annotations_list ) # annotations["annotations"])
429- dats = [] # List[List[obsidian_data , sort_key]]
430- headers = {} # dict[note_title:str, header:str]
572+ dats = [] # formatted titles and bodies: List[List[format_data() output , sort_key]]
573+ headers = {} # formatted headers: dict[note_title:str, header:str]
431574
432575 # make formatted titles, bodies, and headers
433576 for highlight in highlights :
434577 format_add_highlight (highlight , dats , headers )
435578
436579 # todo: sometimes, if obsidian isn't already open, not all highlights get sent
437- merged = merge_highlights (dats )
580+ merged = merge_highlights (dats , headers )
438581 for obsidian_dat in merged :
439582 send_item_to_obsidian (obsidian_dat )
440583
0 commit comments