|
| 1 | +import difflib |
| 2 | +import io |
| 3 | +import itertools |
| 4 | +import re |
| 5 | + |
| 6 | +from babel.messages.pofile import read_po, write_po |
| 7 | +import git |
| 8 | + |
| 9 | + |
| 10 | +P_INVARIANT = re.compile( |
| 11 | + "|".join( |
| 12 | + [ |
| 13 | + r"``[^`]+``", # ``None`` |
| 14 | + r"\*[a-zA-Z_]+\*", # *arg* |
| 15 | + r"\|[^|]+\|_?", # |version|, |tzdata|_ |
| 16 | + r":[a-z\-:]+:`[^`<]+`", # :mod:`os` |
| 17 | + r"<[^>]+>`_{0,2}", # <...>`, <...>`_, <...>`__ |
| 18 | + r"`[^`<:]+`__?", # `Sphinx`_, |
| 19 | + ] |
| 20 | + ) |
| 21 | +) |
| 22 | + |
| 23 | + |
| 24 | +def find_invariant(immutables, i1, i2): |
| 25 | + for k1, k2 in immutables: |
| 26 | + if k1 > i1: |
| 27 | + break |
| 28 | + # flowdas: k2 > i1 조건을 넣지 않으면 오른쪽 이어붙이기를 허용하게된다. |
| 29 | + # 왼쪽 이어붙이기도 막아야할까? |
| 30 | + if k2 >= i2 and k2 > i1: |
| 31 | + return k1, k2 |
| 32 | + |
| 33 | + |
| 34 | +def locations(msg): |
| 35 | + locations = ["#:"] |
| 36 | + for fn, ln in msg.locations: |
| 37 | + locations.append(f"{fn}:{ln}") |
| 38 | + return " ".join(locations) |
| 39 | + |
| 40 | + |
| 41 | +def patch_message(old, new, *, verbose=False): |
| 42 | + print(locations(new)) |
| 43 | + changed = False |
| 44 | + invariants = [m.span() for m in P_INVARIANT.finditer(old.id)] |
| 45 | + if invariants: |
| 46 | + s = difflib.SequenceMatcher(None, old.id, new.id, autojunk=False) |
| 47 | + blocks = {} |
| 48 | + count = 0 |
| 49 | + for tag, i1, i2, j1, j2 in reversed(s.get_opcodes()): |
| 50 | + if tag == "equal": |
| 51 | + continue |
| 52 | + count += 1 |
| 53 | + if verbose: |
| 54 | + print( |
| 55 | + f"{tag:7} a[{i1}:{i2}] --> b[{j1}:{j2}] {old.id[i1:i2]!r:>8} --> {new.id[j1:j2]!r}" |
| 56 | + ) |
| 57 | + |
| 58 | + idx = find_invariant(invariants, i1, i2) |
| 59 | + if not idx: |
| 60 | + continue |
| 61 | + k1, k2 = idx |
| 62 | + old_block = old.id[k1:k2] |
| 63 | + if verbose: |
| 64 | + print(f"\tblock a[{k1}:{k2}] {old_block}") |
| 65 | + if old.id.count(old_block) != 1 or old.string.count(old_block) != 1: |
| 66 | + continue |
| 67 | + template = blocks.get(old_block, old_block) |
| 68 | + blocks[old_block] = ( |
| 69 | + template[: i1 - k1] + new.id[j1:j2] + template[i2 - k1 :] |
| 70 | + ) |
| 71 | + count -= 1 |
| 72 | + for old_block, new_block in blocks.items(): |
| 73 | + # flowdas: 변경 후에도 P_INVARIANT 패턴을 유지하지 못한다면 안전하지 않다. |
| 74 | + if P_INVARIANT.match(new_block): |
| 75 | + print(f"{old_block} --> {new_block}") |
| 76 | + new.string = new.string.replace(old_block, new_block) |
| 77 | + changed = True |
| 78 | + else: |
| 79 | + count += 1 |
| 80 | + if count == 0: |
| 81 | + # flowdas: 조사가 달라질 수 있기 때문에 fuzzy 를 제거하는 것이 100% 안전하지는 않다. |
| 82 | + # 하지만 위험보다 효용이 크다고 본다. |
| 83 | + new.flags.discard("fuzzy") |
| 84 | + print("clear fuzzy") |
| 85 | + return changed |
| 86 | + |
| 87 | + |
| 88 | +def print_diff(old, new): |
| 89 | + s = difflib.SequenceMatcher(None, old.id, new.id, autojunk=False) |
| 90 | + INS = "\x1b[38;5;16;48;5;2m" |
| 91 | + DEL = "\x1b[38;5;16;48;5;1m" |
| 92 | + END = "\x1b[0m" |
| 93 | + for tag, i1, i2, j1, j2 in s.get_opcodes(): |
| 94 | + if tag == "equal": |
| 95 | + print(old.id[i1:i2], end="") |
| 96 | + if tag in {"delete", "replace"}: |
| 97 | + print(DEL + old.id[i1:i2] + END, end="") |
| 98 | + if tag in {"insert", "replace"}: |
| 99 | + print(INS + new.id[j1:j2] + END, end="") |
| 100 | + print("\n") |
| 101 | + |
| 102 | + |
| 103 | +def glean(filename, *, revision=None, verbose=False): |
| 104 | + # update 전후의 .po 파일을 before 와 after 로 읽어들인다 |
| 105 | + with open(filename) as f: |
| 106 | + after = read_po(f, abort_invalid=True) |
| 107 | + |
| 108 | + repo = git.Repo() |
| 109 | + if revision: |
| 110 | + commit = repo.commit(revision) |
| 111 | + else: |
| 112 | + commits = list(itertools.islice(repo.iter_commits(paths=filename), 0, 2)) |
| 113 | + commit = commits[1] |
| 114 | + data = (commit.tree / filename).data_stream.read().decode() |
| 115 | + f = io.StringIO(data) |
| 116 | + before = read_po(f, abort_invalid=True) |
| 117 | + |
| 118 | + # before 로 msgstr -> msg 매핑을 만든다 |
| 119 | + str2msg = {} |
| 120 | + for msg in before: |
| 121 | + if msg.string: |
| 122 | + assert msg.string not in str2msg |
| 123 | + str2msg[msg.string] = msg |
| 124 | + |
| 125 | + # after 의 fuzzy 메시지들의 msgstr 과 같은 메시지를 before 에서 찾아서 쌍을 만든다 |
| 126 | + pairs = [] |
| 127 | + for msg in after: |
| 128 | + if msg.id and msg.fuzzy: |
| 129 | + pairs.append((str2msg[msg.string], msg)) |
| 130 | + |
| 131 | + # 패치를 수행하고 Diff 를 인쇄한다 |
| 132 | + changed = False |
| 133 | + for old, new in pairs: |
| 134 | + if patch_message(old, new, verbose=verbose): |
| 135 | + changed = True |
| 136 | + print() |
| 137 | + print_diff(old, new) |
| 138 | + |
| 139 | + # 변경된 after 를 저장한다 |
| 140 | + if changed: |
| 141 | + f = io.BytesIO() |
| 142 | + write_po(f, after) |
| 143 | + data = f.getvalue() |
| 144 | + with open(filename, "wb") as f: |
| 145 | + f.write(data) |
0 commit comments