Skip to content

Commit 402a60f

Browse files
committed
pdk glean command
1 parent bf86406 commit 402a60f

File tree

2 files changed

+161
-10
lines changed

2 files changed

+161
-10
lines changed

.pdk/pdk/__init__.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,9 @@ def chdir_Doc():
4444

4545

4646
def remove_nonprintables(text):
47-
nps = ''.join(sorted(set(chr(i)
48-
for i in range(128)) - set(string.printable)))
47+
nps = "".join(sorted(set(chr(i) for i in range(128)) - set(string.printable)))
4948
table = str.maketrans(nps, nps[0] * len(nps))
50-
text = text.translate(table).replace(nps[0], '')
49+
text = text.translate(table).replace(nps[0], "")
5150
return text.lstrip()
5251

5352

@@ -57,10 +56,11 @@ def init(self):
5756
os.chdir(MSG_DIR / "..")
5857
if pathlib.Path("cpython").exists():
5958
shutil.rmtree("cpython")
60-
sh(f"git clone --single-branch -b {os.environ['PDK_BRANCH']} https://github.com/python/cpython")
59+
sh(
60+
f"git clone --single-branch -b {os.environ['PDK_BRANCH']} https://github.com/python/cpython"
61+
)
6162
sh(f"git checkout {os.environ['PDK_REVISION']}", chdir="cpython")
62-
LC_MESSAGES = pathlib.Path(
63-
"cpython/Doc/locales/ko/LC_MESSAGES").absolute()
63+
LC_MESSAGES = pathlib.Path("cpython/Doc/locales/ko/LC_MESSAGES").absolute()
6464
create_symlink(LC_MESSAGES, MSG_DIR)
6565

6666
def build(self):
@@ -99,10 +99,10 @@ def format(self, pofile):
9999
write_po(f, catalog)
100100
odata = f.getvalue()
101101
if idata.encode() != odata:
102-
with open(pofile, 'wb') as f:
102+
with open(pofile, "wb") as f:
103103
f.write(odata)
104104
else:
105-
print('already formatted')
105+
print("already formatted")
106106
fuzzy_count = empty_count = 0
107107
for msg in catalog:
108108
if not msg.id:
@@ -112,9 +112,9 @@ def format(self, pofile):
112112
elif not msg.string:
113113
empty_count += 1
114114
if fuzzy_count:
115-
print(f'{fuzzy_count} fuzzy messages found')
115+
print(f"{fuzzy_count} fuzzy messages found")
116116
if empty_count:
117-
print(f'{empty_count} untranslated messages found')
117+
print(f"{empty_count} untranslated messages found")
118118

119119
def find_obsoletes(self, *, delete=False):
120120
"""Find obsolete .po files."""
@@ -167,6 +167,12 @@ def coverage(self):
167167
print(f"{total:7d} Total")
168168
print(f"{translated * 100.0 / total:.2f}%")
169169

170+
def glean(self, filename, *, revision=None, verbose=False):
171+
"""Try to resolve fuzzy entries."""
172+
from .gleaner import glean
173+
174+
glean(filename, revision=revision, verbose=verbose)
175+
170176

171177
def main():
172178
fire.Fire(Command)

.pdk/pdk/gleaner.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import difflib
2+
import io
3+
import itertools
4+
import re
5+
6+
from babel.messages.pofile import read_po, write_po
7+
import git
8+
9+
10+
P_INVARIANT = re.compile(
11+
"|".join(
12+
[
13+
r"``[^`]+``", # ``None``
14+
r"\*[a-zA-Z_]+\*", # *arg*
15+
r"\|[^|]+\|_?", # |version|, |tzdata|_
16+
r":[a-z\-:]+:`[^`<]+`", # :mod:`os`
17+
r"<[^>]+>`_{0,2}", # <...>`, <...>`_, <...>`__
18+
r"`[^`<:]+`__?", # `Sphinx`_,
19+
]
20+
)
21+
)
22+
23+
24+
def find_invariant(immutables, i1, i2):
25+
for k1, k2 in immutables:
26+
if k1 > i1:
27+
break
28+
# flowdas: k2 > i1 조건을 넣지 않으면 오른쪽 이어붙이기를 허용하게된다.
29+
# 왼쪽 이어붙이기도 막아야할까?
30+
if k2 >= i2 and k2 > i1:
31+
return k1, k2
32+
33+
34+
def locations(msg):
35+
locations = ["#:"]
36+
for fn, ln in msg.locations:
37+
locations.append(f"{fn}:{ln}")
38+
return " ".join(locations)
39+
40+
41+
def patch_message(old, new, *, verbose=False):
42+
print(locations(new))
43+
changed = False
44+
invariants = [m.span() for m in P_INVARIANT.finditer(old.id)]
45+
if invariants:
46+
s = difflib.SequenceMatcher(None, old.id, new.id, autojunk=False)
47+
blocks = {}
48+
count = 0
49+
for tag, i1, i2, j1, j2 in reversed(s.get_opcodes()):
50+
if tag == "equal":
51+
continue
52+
count += 1
53+
if verbose:
54+
print(
55+
f"{tag:7} a[{i1}:{i2}] --> b[{j1}:{j2}] {old.id[i1:i2]!r:>8} --> {new.id[j1:j2]!r}"
56+
)
57+
58+
idx = find_invariant(invariants, i1, i2)
59+
if not idx:
60+
continue
61+
k1, k2 = idx
62+
old_block = old.id[k1:k2]
63+
if verbose:
64+
print(f"\tblock a[{k1}:{k2}] {old_block}")
65+
if old.id.count(old_block) != 1 or old.string.count(old_block) != 1:
66+
continue
67+
template = blocks.get(old_block, old_block)
68+
blocks[old_block] = (
69+
template[: i1 - k1] + new.id[j1:j2] + template[i2 - k1 :]
70+
)
71+
count -= 1
72+
for old_block, new_block in blocks.items():
73+
# flowdas: 변경 후에도 P_INVARIANT 패턴을 유지하지 못한다면 안전하지 않다.
74+
if P_INVARIANT.match(new_block):
75+
print(f"{old_block} --> {new_block}")
76+
new.string = new.string.replace(old_block, new_block)
77+
changed = True
78+
else:
79+
count += 1
80+
if count == 0:
81+
# flowdas: 조사가 달라질 수 있기 때문에 fuzzy 를 제거하는 것이 100% 안전하지는 않다.
82+
# 하지만 위험보다 효용이 크다고 본다.
83+
new.flags.discard("fuzzy")
84+
print("clear fuzzy")
85+
return changed
86+
87+
88+
def print_diff(old, new):
89+
s = difflib.SequenceMatcher(None, old.id, new.id, autojunk=False)
90+
INS = "\x1b[38;5;16;48;5;2m"
91+
DEL = "\x1b[38;5;16;48;5;1m"
92+
END = "\x1b[0m"
93+
for tag, i1, i2, j1, j2 in s.get_opcodes():
94+
if tag == "equal":
95+
print(old.id[i1:i2], end="")
96+
if tag in {"delete", "replace"}:
97+
print(DEL + old.id[i1:i2] + END, end="")
98+
if tag in {"insert", "replace"}:
99+
print(INS + new.id[j1:j2] + END, end="")
100+
print("\n")
101+
102+
103+
def glean(filename, *, revision=None, verbose=False):
104+
# update 전후의 .po 파일을 before 와 after 로 읽어들인다
105+
with open(filename) as f:
106+
after = read_po(f, abort_invalid=True)
107+
108+
repo = git.Repo()
109+
if revision:
110+
commit = repo.commit(revision)
111+
else:
112+
commits = list(itertools.islice(repo.iter_commits(paths=filename), 0, 2))
113+
commit = commits[1]
114+
data = (commit.tree / filename).data_stream.read().decode()
115+
f = io.StringIO(data)
116+
before = read_po(f, abort_invalid=True)
117+
118+
# before 로 msgstr -> msg 매핑을 만든다
119+
str2msg = {}
120+
for msg in before:
121+
if msg.string:
122+
assert msg.string not in str2msg
123+
str2msg[msg.string] = msg
124+
125+
# after 의 fuzzy 메시지들의 msgstr 과 같은 메시지를 before 에서 찾아서 쌍을 만든다
126+
pairs = []
127+
for msg in after:
128+
if msg.id and msg.fuzzy:
129+
pairs.append((str2msg[msg.string], msg))
130+
131+
# 패치를 수행하고 Diff 를 인쇄한다
132+
changed = False
133+
for old, new in pairs:
134+
if patch_message(old, new, verbose=verbose):
135+
changed = True
136+
print()
137+
print_diff(old, new)
138+
139+
# 변경된 after 를 저장한다
140+
if changed:
141+
f = io.BytesIO()
142+
write_po(f, after)
143+
data = f.getvalue()
144+
with open(filename, "wb") as f:
145+
f.write(data)

0 commit comments

Comments
 (0)