-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathbackfill.py
More file actions
193 lines (157 loc) · 6.07 KB
/
backfill.py
File metadata and controls
193 lines (157 loc) · 6.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from __future__ import annotations
import csv
import argparse
import collections
import re
from typing import List, Dict, Any
# ========================== from anki-connect =========================== #
# https://github.com/FooSoft/anki-connect#python
import json
import urllib.request
def request(action, **params):
return {"action": action, "params": params, "version": 6}
def invoke(action, **params):
requestJson = json.dumps(request(action, **params)).encode("utf-8")
response = json.load(
urllib.request.urlopen(
urllib.request.Request("http://localhost:8765", requestJson)
)
)
if len(response) != 2:
raise Exception("response has an unexpected number of fields")
if "error" not in response:
raise Exception("response is missing required error field")
if "result" not in response:
raise Exception("response is missing required result field")
if response["error"] is not None:
raise Exception(response["error"])
return response["result"]
# =========================================================================== #
rx_HTML = re.compile("<.*?>")
def normalize_expr(expression: str):
# removes HTML and surrounding whitespace
return re.sub(rx_HTML, '', expression).strip()
def harmonic_average(frequencies: List[float]) -> int:
"""Calculate the harmonic average of a list of frequencies."""
if not frequencies:
return 0
if any(f <= 0 for f in frequencies):
# Handle case where some frequencies might be 0 or negative
# Filter out non-positive values
frequencies = [f for f in frequencies if f > 0]
if not frequencies:
return 0
return int(len(frequencies) / sum(1/f for f in frequencies))
def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"expr_field",
type=str,
help="exact field name that contains the expression",
)
parser.add_argument(
"--default",
type=int,
help="default value to fill for cards with no frequencies listed",
default=None,
)
parser.add_argument(
"--freq-field",
type=str,
help="exact field name to fill with the frequency information",
default="Frequency",
)
parser.add_argument(
"--query",
type=str,
help="exact note query to send to Anki",
default=argparse.SUPPRESS,
)
parser.add_argument(
"--freq-lists",
nargs="+",
type=str,
help="what lists to use to backfill",
default=["JPDB.txt", "cc100.txt", "vnsfreq.txt", "vnsfreqSTARS.txt"],
)
return parser.parse_args()
# freq is a string since it's not parsed at all by the csv.reader
def create_actions(ids: List[int], freq: str, freq_field: str) -> List[Dict[str, Any]]:
actions = []
for i in ids:
a = {
"action": "updateNoteFields",
"version": 6,
"params": {"note": {"id": i, "fields": {freq_field: freq}}},
}
actions.append(a)
return actions
def main():
args = get_args()
if "query" in args:
query = args.query
else:
# queries all notes with an empty frequency field
query = f'"{args.expr_field}:*" "{args.freq_field}:"'
print(f"Querying Anki with: '{query}'")
notes = invoke("findNotes", query=query)
if len(notes) == 0:
print("Cannot find any notes to change. Exiting...")
return
print(f"Query found {len(notes)} notes.")
print("Getting note info...")
notes_info = invoke("notesInfo", notes=notes)
# dict[str, list[int]]
expr_to_nid = collections.defaultdict(list)
for note_info in notes_info:
expr = normalize_expr(note_info["fields"][args.expr_field]["value"])
expr_to_nid[expr].append(note_info["noteId"])
# creates multi action to update multiple notes
actions = []
print("Parsing frequency lists...")
# dict[str, list[int]] - collect all frequencies for each expression
expr_to_frequencies = collections.defaultdict(list)
for file_path in args.freq_lists:
with open(file_path, encoding="utf-8") as f:
for line in csv.reader(f, dialect=csv.excel_tab):
expr, freq = line
if expr in expr_to_nid:
try:
freq_value = int(freq)
expr_to_frequencies[expr].append(freq_value)
except ValueError:
# Skip invalid frequency values
continue
# Calculate harmonic averages and create actions
found_exprs = set()
for expr, frequencies in expr_to_frequencies.items():
if frequencies: # Only process expressions that have valid frequencies
harmonic_avg = harmonic_average(frequencies)
new_actions = create_actions(
expr_to_nid[expr], str(harmonic_avg), args.freq_field
)
actions.extend(new_actions)
found_exprs.add(expr)
added_freqs_n = len(actions)
if args.default is not None:
for expr in expr_to_nid.keys():
if expr not in found_exprs:
new_actions = create_actions(
expr_to_nid[expr], str(args.default), args.freq_field
)
actions.extend(new_actions)
if args.default is None:
input_msg = f"This will change {len(actions)} notes ({len(notes) - len(actions)} notes had no frequencies found). Type 'yes' to confirm, or anything else to exit.\n> "
else:
input_msg = f"This will change {len(actions)} notes ({len(actions) - added_freqs_n} notes had no frequencies found and will be set to {args.default}). Type 'yes' to confirm, or anything else to exit.\n> "
confirm = input(input_msg)
if confirm != "yes":
print("Reply was not 'yes'. Exiting...")
return
print("Updating notes within Anki...")
invoke("multi", actions=actions)
print("Done!")
if __name__ == "__main__":
main()