Skip to content

Commit 30e0abc

Browse files
committed
add totalsent, booksent, highlightsent
1 parent 03436d3 commit 30e0abc

File tree

3 files changed

+165
-17
lines changed

3 files changed

+165
-17
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ Available formatting options are as follows:
3838
- {url}: A [calibre:// url](https://manual.calibre-ebook.com/url_scheme.html) to open the ebook viewer to this highlight. Note that this may not work if your library's name contains unsafe URL characters. Numbers, letters, spaces, underscores, and hyphens are all safe.
3939
- {location}: The highlight's EPUB CFI location in the book. For example, "/2/8/6/5:192". As a sort key, this will order highlights by their position in the book.
4040
- {timestamp}: The highlight's Unix timestamp. This is the default sort key used to determine what order to send highlights in.
41+
- {totalsent}: The total number of highlights being sent.
42+
- {booksent}: The total number of highlights being sent to this Obsidian note.
43+
- {highlightsent}: This highlight's position in the highlights being sent to this note. For example, "{highlightsent} out of {booksent}" might result in "3 out of 5".
4144
- {bookid}: The book's ID in calibre.
4245
- {uuid}: The highlight's unique ID in calibre. For example, "TlNlh8_I5VGKUtqdfbOxDw".
4346

h2o/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def make_format_info_label(self):
172172
"localyear", "utcnow",
173173
"localnow", "url",
174174
"location", "timestamp",
175+
"totalsent", "booksent",
176+
"highlightsent",
175177
"bookid", "uuid",
176178
]
177179
f_opt_str = "'" + "', '".join(format_options) + "'"

h2o/highlight_sender.py

Lines changed: 160 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import time
22
import webbrowser
3-
from typing import Dict, List, Callable, Any
3+
from typing import Dict, List, Callable, Any, Tuple
44
from urllib.parse import urlencode, quote
55
import datetime
6+
import re as regex
67

78
# avoid importing anything from calibre or the highlights_to_obsidian plugin here
89

@@ -170,7 +171,6 @@ def make_book_format_dict(data: Dict, book_titles_authors: Dict[int, Dict[str, s
170171
title_authors = book_titles_authors.get(int(data["book_id"]), {}) # dict with {"title": str, "authors": Tuple[str]}
171172

172173
format_options = {
173-
# if you add a key to this dict, also update the format_options local variable in config.py
174174
"title": title_authors.get("title", "Untitled"), # title of book
175175
# todo: add "chapter" option
176176
"authors": title_authors.get("authors", ("Unknown",)), # authors of book
@@ -180,6 +180,25 @@ def make_book_format_dict(data: Dict, book_titles_authors: Dict[int, Dict[str, s
180180
return format_options
181181

182182

183+
def make_sent_format_dict(total_sent, book_sent, highlight_sent) -> Dict[str, str]:
184+
"""
185+
inputs will be converted to strings.
186+
187+
:param total_sent: total number of highlights being sent
188+
:param book_sent: total number of highlights being sent for this book
189+
:param highlight_sent: this highlight's position in the highlights being sent to this book, e.g. 5 if it's
190+
the fifth highlight.
191+
:return: dict containing a format option for each of the params
192+
"""
193+
sent_dict = {
194+
"totalsent": str(total_sent), # total highlights sent
195+
"booksent": str(book_sent), # highlights for this book
196+
"highlightsent": str(highlight_sent), # position of this highlight
197+
}
198+
199+
return sent_dict
200+
201+
183202
def make_format_dict(data, calibre_library: str, book_titles_authors: Dict[int, Dict[str, str]]) -> Dict[str, str]:
184203
"""
185204
:param data: json object of a calibre highlight
@@ -189,11 +208,20 @@ def make_format_dict(data, calibre_library: str, book_titles_authors: Dict[int,
189208
"""
190209

191210
# formatting options are based on https://github.com/jplattel/obsidian-clipper
211+
212+
# if you add a format option, also update the format_options local variable in config.py and the docs in README.md
192213
time_options = make_time_format_dict(data)
193214
highlight_options = make_highlight_format_dict(data, calibre_library)
194215
book_options = make_book_format_dict(data, book_titles_authors)
195216

196-
return time_options | highlight_options | book_options # | merges dictionaries https://peps.python.org/pep-0584/
217+
# these formatting options can't be calculated by the time make_format_dict is called.
218+
# actually, totalsent probably could be, but let's keep it here with the others.
219+
# we need to include this so that string.format() doesn't error if it runs into one of these
220+
placeholders = make_sent_format_dict("{totalsent}", "{booksent}", "{highlightsent}")
221+
222+
# the | operator merges dictionaries https://peps.python.org/pep-0584/
223+
# could also pass a dict as a param to each make_x_dict, and have them update it in place
224+
return time_options | highlight_options | book_options | placeholders
197225

198226

199227
class HighlightSender:
@@ -258,6 +286,28 @@ def set_sort_key(self, sort_key: str):
258286
# todo: verify that the sort key is valid
259287
self.sort_key = sort_key
260288

289+
def apply_sent_formats(self) -> Tuple[bool, bool, bool]:
290+
"""
291+
since formatting options for how many highlights were sent can't be applied until after the other formatting
292+
options are applied, they'll end up being applied to formatted strings instead of templates. depending on
293+
the content of those highlights, you could end up with very large strings. this function is a small performance
294+
boost: it'll only try to apply those formatting options if said formatting options are in templates.
295+
296+
an alternative to this is to only format titles, and then count how many highlights will be sent to each
297+
note before you apply formatting to the body.
298+
299+
:return: Tuple telling you if you need to apply formatting options for how many highlights were sent. Tuple is
300+
(title, body, header), where each item is True if that part needs formatting to be applied.
301+
"""
302+
format_dict = make_sent_format_dict(0, 0, 0)
303+
formats = ("{" + k + "}" for k in format_dict.keys())
304+
title, body, header = False, False, False
305+
for f in formats:
306+
title = title or (f in self.title_format)
307+
body = body or (f in self.body_format) or (f in self.no_notes_format)
308+
header = header or (f in self.header_format)
309+
return title, body, header
310+
261311
def make_obsidian_data(self, note_file, note_content):
262312
"""
263313
limits length of note_file to 180 characters, allowing for an obsidian vault path of up to 80
@@ -320,16 +370,16 @@ def send(self, condition: Callable[[Any], bool] = lambda x: True):
320370
"""
321371

322372
# todo: a lot of the lists used here and in related functions could probably be replaced with tuples
323-
324-
def is_valid_highlight(_dat:Dict):
373+
374+
def is_valid_highlight(_dat: Dict):
325375
"""
326376
:param _dat: a dict with one calibre annotation's data
327377
:return: True if this is a valid highlight and should be sent, else False
328378
"""
329379
_annot = _dat.get("annotation", {})
330380
if _annot.get("type") != "highlight":
331381
return False # annotation must be a highlight, not a bookmark
332-
382+
333383
if _annot.get("removed"):
334384
return False # don't try to send highlights that have been removed
335385

@@ -356,7 +406,7 @@ def format_add_highlight(_highlight, _dats, _headers):
356406

357407
_dats.append([formatted, self.format_sort_key(dat)])
358408

359-
def merge_highlights(data):
409+
def merge_highlights(data, _headers):
360410
"""
361411
merges formatted highlights into a single string for each unique note title found in dats.
362412
@@ -365,18 +415,23 @@ def merge_highlights(data):
365415
366416
for reference, format_data() output is a list of [title, body]
367417
368-
:param data: List[List[format_data() output, sort_key]]
418+
:param data: list of all formatted highlights: List[List[format_data() output, sort_key]]
419+
:param _headers: formatted headers: dict[note_title:str, header:str]. sent amount formatting will be applied
420+
in-place.
369421
:return: list of obsidian_data objects, where each unique title from the input is merged into a
370422
single, sorted item in the output.
371423
"""
372424

373-
def add_data_item(_dat, _books, _lengths):
425+
def add_data_item(_dat, _books, _lengths, _counts):
374426
"""
375427
:param _dat: data item: [[title, body], sort_key]
376-
:param _books: dict that will be updated in-place. will have an obsidian_data object and sort key
377-
added to a note title. like _books["title"].append([obsidian_data, sort_key]). automatically handles
428+
:param _books: dict that will be updated in-place. will have a format_data() output and sort key
429+
added to a note title. like _books["title"].append([formatted_body, sort_key]). automatically handles
378430
cases where "title" is not in _books.
379431
:param _lengths: dict that may be updated in-place, used for tracking cumulative length of highlights
432+
:param _counts: dict of {title, int} for how many highlights each book has. can't be done by taking
433+
length of _books[title] because _books splits large amounts of highlights for a single title into
434+
more than one title with a smaller amount of highlights each.
380435
:return: none
381436
"""
382437
format_dat = _dat[0] # list[title, body]
@@ -402,22 +457,110 @@ def add_data_item(_dat, _books, _lengths):
402457

403458
if base_title in _lengths:
404459
_lengths[base_title] += len(body_and_sort[0])
460+
_counts[base_title] += 1
405461
else:
406462
_lengths[base_title] = len(body_and_sort[0])
463+
_counts[base_title] = 1
464+
465+
def apply_sent_amount_format(_books: Dict[str, List], _headers: Dict[str, str],
466+
total_highlights: int, book_highlights: Dict[str, int]):
467+
"""
468+
:param _books: formatted highlights being sent to each book (will be updated in-place):
469+
dict[title:str, list[list[formatted_body, sort_key]]
470+
:param _headers: formatted headers: dict[note_title:str, header:str]. sent amount formatting will be
471+
applied in-place.
472+
:param total_highlights: total number of highlights being sent
473+
:param book_highlights: Dict[title, int] that has the amount of highlights being sent to each book.
474+
:return: none
475+
"""
476+
# todo: _books and _headers being updated in-place is a source of bugs. change it
477+
should_apply = self.apply_sent_formats()
478+
if True not in should_apply:
479+
return
480+
481+
def get_base_title(_title: str, _valid_titles: List[str]) -> str:
482+
"""if this is part of a split note, e.g. "title (1)" or "title (2)", remove the " (x)" """
483+
# todo: change the data format for split note titles, so that you can simplify this
484+
_ret = _title
485+
# space, parentheses, number, parentheses, end of string
486+
__match = regex.search(" \((\d+)\)$", t)
487+
if __match:
488+
base = t[:t.rfind(" ")]
489+
if base in _valid_titles:
490+
_ret = base
491+
return _ret
492+
493+
if should_apply[0]: # title
494+
# use list(_books.keys()) so that we don't get an error by changing dict keys during iteration
495+
_b = list(_books.keys())
496+
for t in _b: # t: book title (str)
497+
498+
base_title = get_base_title(t, _b)
499+
500+
fmt = make_sent_format_dict(total_highlights, book_highlights[base_title], -1)
501+
new_title = t.format(**fmt)
502+
_books[new_title] = _books[t]
503+
del _books[t]
504+
if t in _headers:
505+
_headers[new_title] = _headers[t]
506+
del _headers[t]
507+
if t in book_highlights:
508+
book_highlights[new_title] = book_highlights[t]
509+
510+
if should_apply[1]: # body
511+
valid_titles = list(_books.keys())
512+
for t in _books: # t: book title (str)
513+
def count_highlights_before(_title, _base, __books) -> int:
514+
"""
515+
if a highlight has " (x)" at the end, count the highlights being sent to previous notes
516+
517+
:param _title: title of the note these highlights are being sent to
518+
:param _base: base title for this title
519+
:param __books: dict[title, list[highlights]]
520+
:return: number of highlights in notes with same base title but a lower x in their " (x)"
521+
"""
522+
_ret = 0
523+
_b, _t = len(_base), len(_title)
524+
if _b != _t:
525+
title_number = int(_title[_b + 2:-1]) # t is base title + " (num)"
526+
_ret = len(__books[_base])
527+
for x in range(1, title_number):
528+
_ret += len(__books[_base + f" ({x})"])
529+
530+
return _ret
531+
532+
base_title = get_base_title(t, valid_titles)
533+
highlights_before = count_highlights_before(t, base_title, _books)
534+
535+
for h in range(len(_books[t])): # _books[h]: [formatted body, sort_key]
536+
fmt = make_sent_format_dict(total_highlights, book_highlights[base_title],
537+
highlights_before + h + 1)
538+
_books[t][h][0] = _books[t][h][0].format(**fmt)
539+
540+
if should_apply[2]: # header
541+
for h in _headers: # h: book title (str)
542+
fmt = make_sent_format_dict(total_highlights, book_highlights[h], -1)
543+
_headers[h] = _headers[h].format(**fmt)
407544

408545
books = {} # dict[title:str, list[list[obsidian_data object:Dict, sort_key]]
409-
lengths = {} # dict[book title:str, int]
546+
lengths = {} # amount of characters per book. dict[book title:str, int]
547+
counts = {} # amount of highlights per book. dict[book title:str, int]
410548

411549
# make list of highlights for each note title
412550
for d in data:
413-
add_data_item(d, books, lengths)
551+
add_data_item(d, books, lengths, counts)
552+
553+
# sort books here to that apply_sent_amount_format gives accurate position of highlight in note
554+
for key in books:
555+
books[key].sort(key=lambda body_sort: body_sort[1])
556+
557+
apply_sent_amount_format(books, headers, len(data), counts)
414558

415559
# now, `books` contains lists of unsorted [note body, sort key] objects
416560
ret = []
417561

418562
# sort each book's highlights and then merge them into a single string
419563
for key in books:
420-
books[key].sort(key=lambda body_sort: body_sort[1])
421564
# header is only included in first of a series of same-book files
422565
# (this happens when there's too much text to send to a single file at once)
423566
text = headers.get(key, "") + "".join([a[0] for a in books[key]])
@@ -426,15 +569,15 @@ def add_data_item(_dat, _books, _lengths):
426569
return ret
427570

428571
highlights = filter(is_valid_highlight, self.annotations_list) # annotations["annotations"])
429-
dats = [] # List[List[obsidian_data, sort_key]]
430-
headers = {} # dict[note_title:str, header:str]
572+
dats = [] # formatted titles and bodies: List[List[format_data() output, sort_key]]
573+
headers = {} # formatted headers: dict[note_title:str, header:str]
431574

432575
# make formatted titles, bodies, and headers
433576
for highlight in highlights:
434577
format_add_highlight(highlight, dats, headers)
435578

436579
# todo: sometimes, if obsidian isn't already open, not all highlights get sent
437-
merged = merge_highlights(dats)
580+
merged = merge_highlights(dats, headers)
438581
for obsidian_dat in merged:
439582
send_item_to_obsidian(obsidian_dat)
440583

0 commit comments

Comments
 (0)