Skip to content

Commit 79b144f

Browse files
committed
First working version
1 parent 404b591 commit 79b144f

File tree

16 files changed

+1809
-0
lines changed

16 files changed

+1809
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SnapStudySensei.egg-info
2+
venv

.screenshot.png

696 KB
Loading

LICENSE

Lines changed: 674 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# SnapStudySensei
2+
3+
**SnapStudySensei** is a tool to assist with capturing, extracting, translating
4+
and recording Japanese flashcards into [Anki].
5+
6+
This project is inspired by [Game2Text] but takes a different technical approach.
7+
8+
![SnapStudySensei screenshot](.screenshot.png)
9+
10+
11+
## Known limitations
12+
13+
- Currently **only supported on Linux**. Porting it to macOS and Windows should
14+
be doable by adding the ability to list windows (see `windows_list.py`, patch
15+
welcome)
16+
- **No configuration**, only designed for my own needs so far
17+
- A bit **slow to start** due to the OCR model initialization
18+
19+
20+
## Installation
21+
22+
[Anki] and its [Anki-Connect] plugin must be installed.
23+
24+
```sh
25+
python -m venv venv
26+
. venv/bin/activate
27+
pip install -e .
28+
```
29+
30+
### Important note
31+
32+
This is a non-intrusive standalone installation, but system input methods (such
33+
as fcitx) are unlikely to work due to how PySide6 isolation works inside a
34+
virtual env. Installing it system wide is the only alternative I could find
35+
so far.
36+
37+
38+
## Running
39+
40+
[Anki] and its [Anki-Connect] plugin must be running.
41+
42+
Enter the venv (`. venv/bin/activate`) if you used that installation method, and
43+
run `sss`.
44+
45+
### Important note
46+
47+
SnapStudySensei will automatically create a model, deck and flashcard templates
48+
in Anki. The deck is called *SnapStudySensei* and is located in the *Japanese*
49+
category.
50+
51+
52+
## Thanks to
53+
54+
- [Manga OCR](https://github.com/kha-white/manga-ocr/)
55+
- [JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project)
56+
57+
58+
[Anki]: https://apps.ankiweb.net
59+
[Anki-Connect]: https://foosoft.net/projects/anki-connect
60+
[Game2Text]: https://game2text.com

pyproject.toml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[project]
2+
name = "SnapStudySensei"
3+
version = "0.0.1"
4+
authors = [
5+
{ name="Clément Bœsch", email="u@pkh.me" },
6+
]
7+
description = "Capture, extract, translate and record Japanese flashcards into Anki"
8+
readme = "README.md"
9+
requires-python = ">=3.9"
10+
classifiers = [
11+
"Programming Language :: Python :: 3",
12+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
13+
"Natural Language :: Japanese",
14+
]
15+
dependencies = [
16+
'PySide6',
17+
'pillow',
18+
'xdg-base-dirs',
19+
'manga-ocr',
20+
'xcffib',
21+
'gtts',
22+
]
23+
24+
[project.urls]
25+
"Homepage" = "https://github.com/ubitux/SnapStudySensei"
26+
"Bug Tracker" = "https://github.com/ubitux/SnapStudySensei/issues"
27+
28+
[project.gui-scripts]
29+
sss = "snapstudysensei:run"
30+
31+
[tool.black]
32+
line-length = 120
33+
34+
[tool.isort]
35+
profile = "black"
36+
line_length = 120
37+
38+
[tool.pyright]
39+
venv = "venv"
40+
venvPath = "."

snapstudysensei/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
def _init_ocr():
2+
print(":: initializing Optical Character Recognition")
3+
from snapstudysensei.ocr import OCRWrapper
4+
5+
return OCRWrapper()
6+
7+
8+
def _init_dic():
9+
print(":: initializing dictionary")
10+
from snapstudysensei.dic import JDictionary
11+
12+
return JDictionary()
13+
14+
15+
def _init_tts():
16+
print(":: initializing Text-To-Speech")
17+
from snapstudysensei.tts import TTSWrapper
18+
19+
return TTSWrapper()
20+
21+
22+
def run():
23+
# These initializations could be slow; having a special loading UI or
24+
# splashscreen during their init might make sense
25+
ocr = _init_ocr()
26+
dic = _init_dic()
27+
tts = _init_tts()
28+
29+
from snapstudysensei.main import run as main_run
30+
31+
main_run(ocr, dic, tts)

snapstudysensei/anki.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import base64
2+
import hashlib
3+
import json
4+
import re
5+
from dataclasses import asdict, dataclass
6+
from pathlib import Path
7+
from urllib.request import Request, urlopen
8+
9+
10+
@dataclass
11+
class AnkiNote:
12+
word: str
13+
context_picture: Path | None
14+
context_sentence: str
15+
word_reading: str
16+
word_glossary: str
17+
word_audio: Path | None = None
18+
extra_info: str = ""
19+
anki_id: int = -1
20+
21+
def get_qml_record(self):
22+
assert self.anki_id != -1
23+
reading = self.word_reading.replace("[", "「").replace("]", "」") if self.word_reading else self.word
24+
return dict(
25+
record_id=str(self.anki_id), # QML doesn't support 64-bit integers (javascript bs)
26+
reading=reading,
27+
meaning=self.word_glossary,
28+
)
29+
30+
31+
class AnkiConnect:
32+
PREFIX = "SnapStudySensei"
33+
DECK_NAME = f"Japanese::{PREFIX}"
34+
MODEL_NAME = f"{PREFIX} Word"
35+
36+
def __init__(self):
37+
deck_ids = self.query("deckNamesAndIds")
38+
deck_id = deck_ids.get(self.DECK_NAME)
39+
if deck_id is None:
40+
deck_id = self.query("createDeck", deck=self.DECK_NAME)
41+
42+
model_ids = self.query("modelNamesAndIds")
43+
model_id = model_ids.get(self.MODEL_NAME)
44+
if model_id is None:
45+
tpl_dir = Path(__file__).resolve().parent / "data"
46+
front = open(tpl_dir / "front.html").read()
47+
back = open(tpl_dir / "back.html").read()
48+
css = open(tpl_dir / "style.css").read()
49+
50+
model_id = self.query(
51+
"createModel",
52+
modelName=self.MODEL_NAME,
53+
inOrderFields=[
54+
"Word",
55+
"ContextPicture",
56+
"ContextSentence",
57+
"WordReading",
58+
"WordGlossary",
59+
"WordAudio",
60+
"ExtraInfo",
61+
],
62+
css=css,
63+
cardTemplates=[dict(Front=front, Back=back)],
64+
)
65+
66+
self.media_dir_path = Path(self.query("getMediaDirPath"))
67+
68+
def add_note(self, note: AnkiNote) -> AnkiNote:
69+
# Craft a ruby string for Anki furigana text on the back side
70+
reading = note.word
71+
if note.word_reading and note.word_reading != note.word:
72+
reading += f"[{note.word_reading}]"
73+
74+
params = dict(
75+
deckName=self.DECK_NAME,
76+
modelName=self.MODEL_NAME,
77+
fields=dict(
78+
Word=note.word,
79+
ContextSentence=note.context_sentence,
80+
WordReading=reading,
81+
WordGlossary=note.word_glossary,
82+
ExtraInfo=note.extra_info,
83+
),
84+
options=dict(allowDuplicate=True),
85+
tags=[self.PREFIX],
86+
)
87+
88+
if note.context_picture:
89+
with open(note.context_picture, "rb") as f:
90+
picture_filename, data_base64 = self._get_file(note.context_picture)
91+
params["picture"] = [dict(filename=picture_filename, data=data_base64, fields=["ContextPicture"])]
92+
93+
if note.word_audio:
94+
with open(note.word_audio, "rb") as f:
95+
audio_filename, data_base64 = self._get_file(note.word_audio)
96+
params["audio"] = [dict(filename=audio_filename, data=data_base64, fields=["WordAudio"])]
97+
98+
patched_note = AnkiNote(**asdict(note))
99+
patched_note.word_reading = reading
100+
patched_note.anki_id = self.query("addNote", note=params)
101+
return patched_note
102+
103+
def _get_file(self, filepath: Path) -> tuple[str, str]:
104+
with open(filepath, "rb") as f:
105+
content = f.read()
106+
107+
# Generate a unique filename based on the content
108+
picture_hash = hashlib.sha256()
109+
picture_hash.update(content)
110+
picture_hash = picture_hash.hexdigest()
111+
112+
# Anki might be a sandboxed app where access to the filesystem is
113+
# restricted (typical usecase: a flatpak), so we use a base64 encode
114+
# instead of a file path.
115+
data_base64 = base64.b64encode(content).decode("utf-8")
116+
117+
filename = f"{self.PREFIX}_{picture_hash}{filepath.suffix}"
118+
119+
return filename, data_base64
120+
121+
def list_notes(self) -> list[AnkiNote]:
122+
notes = self.query("findNotes", query=f"deck:{self.DECK_NAME}")
123+
notes_info = self.query("notesInfo", notes=notes)
124+
125+
notes = []
126+
for note_info in notes_info:
127+
fields = note_info["fields"]
128+
129+
picture_html = fields["ContextPicture"]["value"]
130+
match = re.search(r'src="(?P<filename>[^"]+)"', picture_html)
131+
picture_path = self.media_dir_path / match["filename"] if match is not None else None
132+
133+
audio_markup = fields["WordAudio"]["value"]
134+
match = re.search(r"\[sound:(?P<filename>[^\]]+)\]", audio_markup)
135+
audio_path = self.media_dir_path / match["filename"] if match is not None else None
136+
137+
notes.append(
138+
AnkiNote(
139+
word=fields["Word"]["value"],
140+
context_picture=picture_path,
141+
context_sentence=fields["ContextSentence"]["value"],
142+
word_reading=fields["WordReading"]["value"],
143+
word_glossary=fields["WordGlossary"]["value"],
144+
word_audio=audio_path,
145+
extra_info=fields["ExtraInfo"]["value"],
146+
anki_id=note_info["noteId"],
147+
)
148+
)
149+
150+
return notes
151+
152+
def remove_note(self, anki_id: int):
153+
self.query("deleteNotes", notes=[anki_id])
154+
155+
@staticmethod
156+
def query(action, **params):
157+
# print(f"Anki: {action}", params)
158+
request_data = dict(action=action, params=params, version=6)
159+
request_json = json.dumps(request_data).encode("utf-8")
160+
response = json.load(urlopen(Request("http://localhost:8765", request_json)))
161+
if len(response) != 2:
162+
raise Exception("response has an unexpected number of fields")
163+
if "error" not in response:
164+
raise Exception("response is missing required error field")
165+
if "result" not in response:
166+
raise Exception("response is missing required result field")
167+
if response["error"] is not None:
168+
raise Exception(response["error"])
169+
return response["result"]
170+
171+
172+
if __name__ == "__main__":
173+
a = AnkiConnect()
174+
print(a.list_notes())

snapstudysensei/data/back.html

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<h1>{{furigana:WordReading}}</h1>
2+
3+
<hr>
4+
5+
<pre>{{WordGlossary}}</pre>
6+
<p>{{ExtraInfo}}</p>
7+
<p>{{ContextSentence}}</p>
8+
<p>{{WordAudio}}</p>
9+
<p>{{ContextPicture}}</p>

snapstudysensei/data/front.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<h1>{{Word}}</h1>
2+
<p>{{hint:ContextSentence}}</p>

snapstudysensei/data/style.css

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
.card {
2+
font-family: sans-serif;
3+
font-size: 1.5em;
4+
text-align: center;
5+
color: white;
6+
background-color: #222222;
7+
margin-left: auto;
8+
margin-right: auto;
9+
}
10+
11+
pre { text-align: center; }

0 commit comments

Comments
 (0)