Skip to content

Commit 606f283

Browse files
committed
Improve selection of translation files to stage.
1 parent 61f5cdd commit 606f283

File tree

1 file changed

+135
-89
lines changed

1 file changed

+135
-89
lines changed

gitstage.py

Lines changed: 135 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
3-
"""Utility to stage updated translation files in git, ignoring files that only have minor changes
4-
to the headers and do not contain changes to the translation strings.
2+
"""Utility to stage updated translation files in git, ignoring files that only have
3+
minor changes to the headers and do not contain changes to the translation strings.
54
"""
65

7-
# Copyright (C) 2023-2024 Bob Swift
6+
# Copyright (C) 2023-2025 Bob Swift
87

98

109
import argparse
@@ -18,15 +17,17 @@
1817

1918

2019
SCRIPT_NAME = 'Picard Docs Git File Stager'
21-
SCRIPT_VERS = '0.5'
22-
SCRIPT_INITIAL_COPYRIGHT = '2024'
20+
SCRIPT_VERS = '0.7'
21+
SCRIPT_INITIAL_COPYRIGHT = '2023-2025'
2322
SCRIPT_INITIAL_AUTHOR = 'Bob Swift'
2423

2524
DEFAULT_COMPARISON_DISPLAY_LEVEL = 'changed'
2625

2726
COMMAND_TIMEOUT = 300
2827
LOCALE_DIRS = conf.locale_dirs if 'locale_dirs' in conf.__dict__ else ['_locale']
2928

29+
FILE_TYPES = {'.pot', '.po'}
30+
3031
STATUS_FILE = 'git_status.txt'
3132
DIFF_FILE = 'git_diff.txt'
3233

@@ -103,7 +104,7 @@ class Printer():
103104
silent = False
104105

105106
@classmethod
106-
def stdout(cls, text: str = ''):
107+
def stdout(cls, text: str = '') -> None:
107108
"""Print the text to stdout if the silent flag has not been set.
108109
109110
Args:
@@ -113,7 +114,7 @@ def stdout(cls, text: str = ''):
113114
print(text)
114115

115116
@staticmethod
116-
def stderr(text: str = ''):
117+
def stderr(text: str = '') -> None:
117118
"""Print the text to stderr, regardless of the silent flag setting.
118119
119120
Args:
@@ -185,7 +186,7 @@ def parse_command_line():
185186
return arg_parser.parse_args()
186187

187188

188-
def get_stdout_from_command(command: str):
189+
def get_stdout_from_command(command: str) -> str:
189190
"""Run the specified command in a shell and return the stdout response as a string.
190191
191192
Args:
@@ -195,11 +196,12 @@ def get_stdout_from_command(command: str):
195196
str: stdout response for the command output
196197
"""
197198
Printer.stdout(f"Running command: {command}")
198-
response = subprocess.run(command, shell=True, check=True, capture_output=True, encoding='utf8', timeout=COMMAND_TIMEOUT)
199+
response = subprocess.run(command, shell=True, check=True, capture_output=True,
200+
encoding='utf8', timeout=COMMAND_TIMEOUT)
199201
return response.stdout
200202

201203

202-
def is_in_locale_dir(fullpath: str):
204+
def is_in_locale_dir(fullpath: str) -> bool:
203205
"""Checks if the specified filepath is in a locale directory.
204206
205207
Args:
@@ -214,15 +216,17 @@ def is_in_locale_dir(fullpath: str):
214216
return False
215217

216218

217-
def parse_git_status(git_stat: list, files_to_stage: dict, files_to_ignore: set, stage_rst: bool = False):
219+
def parse_git_status(git_stat: list, files_to_stage: dict, files_to_ignore: set,
220+
stage_rst: bool = False) -> None:
218221
"""Parse the git status response to add new or deleted files.
219222
220223
Args:
221224
git_stat (list): List of lines in the git status response
222225
files_to_stage (dict): Dictionary of files to add to git staging
223226
files_to_ignore (set): Set of files to not add to git staging
227+
stage_rst: (bool): Whether or not RST files should be staged (default False)
224228
"""
225-
stage_types = {'.pot', '.po'}
229+
stage_types = FILE_TYPES
226230
if stage_rst:
227231
stage_types.add('.rst')
228232
for line in git_stat:
@@ -235,22 +239,46 @@ def parse_git_status(git_stat: list, files_to_stage: dict, files_to_ignore: set,
235239
_root, ext = os.path.splitext(filename)
236240
if '_video_thumbnail' in fullfilename:
237241
files_to_ignore.add(fullfilename)
238-
elif status == "??" and fullfilename not in files_to_ignore and is_in_locale_dir(fullfilename) and fullfilename.endswith('/'):
242+
elif status == "??" and fullfilename not in files_to_ignore and \
243+
is_in_locale_dir(fullfilename) and fullfilename.endswith('/'):
239244
files_to_stage[fullfilename] = 'Added'
240245
elif ext not in stage_types:
241246
files_to_ignore.add(fullfilename)
242247
elif status == "D":
243248
files_to_stage[fullfilename] = 'Deleted'
244249
elif status == "??" and fullfilename not in files_to_ignore:
245250
files_to_stage[fullfilename] = 'Added'
246-
elif status == "M" and fullfilename not in files_to_ignore and stage_rst and ext == '.rst':
251+
elif status == "M" and fullfilename not in files_to_ignore and \
252+
stage_rst and ext == '.rst':
247253
files_to_stage[fullfilename] = 'Modified'
248254

249255

250-
def parse_git_diff(git_diff: list, files_to_stage: dict, files_to_ignore: set, level: str = DEFAULT_COMPARISON_DISPLAY_LEVEL):
251-
# pylint: disable=too-many-branches
252-
# pylint: disable=too-many-statements
253-
# pylint: disable=too-many-locals
256+
def check_file(diff_plus: dict, diff_minus: dict) -> bool:
257+
"""Checks whether a file contains a mismatch of translation keys or values.
258+
259+
Args:
260+
diff_plus (dict): Dictionary of translation keys added.
261+
diff_minus (dict): Dictionary of translation keys removed.
262+
263+
Returns:
264+
bool: True if there is a mismatch, otherwise false.
265+
"""
266+
if not diff_plus and not diff_minus:
267+
return False
268+
s_p = set(diff_plus.keys())
269+
s_m = set(diff_minus.keys())
270+
if s_p.difference(s_m) or s_m.difference(s_p):
271+
return True
272+
for key in s_p.intersection(s_m):
273+
if diff_plus[key][0] != diff_minus[key][0]:
274+
return True
275+
if diff_plus[key][1] != diff_minus[key][1]:
276+
return True
277+
return False
278+
279+
280+
def parse_git_diff(git_diff: list, files_to_stage: dict, files_to_ignore: set,
281+
level: str = DEFAULT_COMPARISON_DISPLAY_LEVEL) -> None:
254282
"""Parse the git diff response. Do not add translation files that only have changed
255283
comment lines or minor changes to headers.
256284
@@ -260,128 +288,147 @@ def parse_git_diff(git_diff: list, files_to_stage: dict, files_to_ignore: set, l
260288
files_to_ignore (set): Set of files to not add to git staging
261289
level (str): Comparison display level (none|changed|all)
262290
"""
291+
# pylint: disable=too-many-locals
292+
# pylint: disable=too-many-branches
293+
# pylint: disable=too-many-statements
294+
263295
fullfilename = ''
264296
filename = ''
265-
ext = ''
297+
file_line = ''
298+
file_msgid = ''
299+
file_msgstr = ''
300+
file_sign = ''
266301
minus = ''
267302
plus = ''
268303
last = ''
269304
match_type = ''
270-
last_type = ''
271-
272-
def process_change(files_to_stage: dict, fullfilename: str, minus: str, plus: str):
273-
"""Compare the 'plus' and 'minus' strings and mark the file for staging if
274-
they are different. Print the diff if the appropriate display level is set.
275-
276-
Args:
277-
files_to_stage (dict): Dictionary of files to add to git staging
278-
fullfilename (str): Full path and name of file being examined
279-
minus (str): Combined lines being removed
280-
plus (str): Combined lines being added
281-
"""
282-
if not minus and not plus:
283-
return
284-
if level == 'all' or (minus != plus and level == 'changed'):
285-
Printer.stdout(f"\nCompare: {fullfilename}")
286-
Printer.stdout(f"--- {len(minus):,} characters\n\"{minus}\"\n+++ {len(plus):,} characters\n\"{plus}\"")
287-
if minus != plus:
288-
files_to_stage[fullfilename] = 'Modified'
305+
diff_plus = {}
306+
diff_minus = {}
307+
processing = False
308+
309+
def do_process(new_file: bool = False):
310+
nonlocal processing, file_line, file_msgid, file_msgstr, file_sign, \
311+
diff_plus, diff_minus, minus, plus, last, line_num
312+
if processing and file_line and file_msgid and file_sign:
313+
if file_sign == '+':
314+
diff_plus[file_msgid] = (file_line, file_msgstr)
315+
else:
316+
diff_minus[file_msgid] = (file_line, file_msgstr)
317+
if filename and fullfilename not in files_to_stage.keys() and fullfilename not in files_to_ignore:
318+
if minus != plus or (new_file and check_file(diff_plus=diff_plus, diff_minus=diff_minus)):
319+
files_to_stage[fullfilename] = 'Modified'
320+
processing = False
321+
diff_plus = diff_minus = {}
322+
file_line = file_msgid = file_msgstr = file_sign = plus = minus = last = ''
289323

290324
line_count = len(git_diff)
291325
line_num = 0
292326
while line_num < line_count:
293-
line = git_diff[line_num]
327+
line = str(git_diff[line_num])
294328
line_num += 1
295-
# for line in git_diff:
329+
296330
# Ignore nearby lines and unchanged ranges
297331
if line and line[0] in {' ', '@'}:
298332
continue
299333

300334
line = line.strip()
301335

302-
# Ignore selected information lines
303-
if not line or line.startswith("+++ ") or line.startswith("diff") or line.startswith("index"):
336+
# Ignore blank lines
337+
if not line:
304338
continue
305339

306340
# Start a new file filename for processing
307341
if line.startswith("--- "):
308-
process_change(files_to_stage, fullfilename, minus, plus)
309-
minus = plus = last = ''
342+
do_process(new_file=True)
343+
if filename and fullfilename not in files_to_stage.keys():
344+
files_to_ignore.add(fullfilename)
310345
fullfilename = line[6:].strip()
311346
filename = os.path.split(fullfilename)[-1]
312-
_root, ext = os.path.splitext(filename)
313347

314-
# Ignore non-translation files
315-
if ext not in {'.pot', '.po'} or not is_in_locale_dir(fullfilename):
316-
continue
348+
# Ignore non-translation files
349+
if os.path.splitext(filename)[1] not in FILE_TYPES or not is_in_locale_dir(fullfilename):
350+
fullfilename = filename = ''
351+
continue
317352

318-
# Ignore files already processed unless printing differences
319-
if (fullfilename in files_to_stage or fullfilename in files_to_ignore) and level == 'none':
320-
continue
353+
# Ignore files already processed unless printing differences
354+
if (fullfilename in files_to_stage or fullfilename in files_to_ignore) and level == 'none':
355+
fullfilename = filename = ''
356+
continue
321357

322-
# Add changed fuzzy comment lines
323-
if re.match(r'[+-]#, fuzzy', line, re.IGNORECASE):
324-
process_change(files_to_stage, fullfilename, minus, plus)
325-
minus = plus = last = ''
326-
files_to_stage[fullfilename] = 'Modified'
358+
if not filename:
327359
continue
328360

329361
# Add changed location comment lines
330362
if re.match(r'[+-]#: \.\./', line):
331-
process_change(files_to_stage, fullfilename, minus, plus)
332-
minus = plus = last = ''
333-
files_to_stage[fullfilename] = 'Modified'
363+
do_process()
364+
file_sign = line[0]
365+
file_line = line.rsplit(':', maxsplit=1)[-1].strip()
366+
processing = True
334367
continue
335368

336369
# Ignore changed comment lines
337370
if re.match(r'[+-]#', line):
338-
process_change(files_to_stage, fullfilename, minus, plus)
339-
minus = plus = last = ''
371+
do_process()
372+
continue
373+
374+
# Ignore selected information lines
375+
if not line or line.startswith("+++ ") or line.startswith("diff") or line.startswith("index"):
376+
do_process()
340377
continue
341378

342379
# Ignore changed header lines
343-
if re.match(r'[+-].*\\n"$', line) or re.match(r'[+-]"(' + HEADER_KEYS_TO_IGNORE + r')', line, re.IGNORECASE):
344-
process_change(files_to_stage, fullfilename, minus, plus)
345-
minus = plus = last = ''
380+
if re.match(r'[+-].*\\n"$', line) or re.match(r'[+-]"(' + HEADER_KEYS_TO_IGNORE + r')',
381+
line, re.IGNORECASE):
382+
do_process()
383+
junk = line_num
346384
# Keep skipping lines until header line ends with '\n"'
347385
while line_num < line_count and not re.match(r'[+-].*\\n"$', line.strip()):
348386
line = git_diff[line_num]
349387
line_num += 1
350388
continue
351389

390+
# Add changed fuzzy comment lines
391+
if re.match(r'[+-]#, fuzzy', line, re.IGNORECASE):
392+
files_to_stage[fullfilename] = 'Modified'
393+
do_process()
394+
fullfilename = filename = ''
395+
continue
396+
352397
# Add files with changed translation text lines
353-
match = re.match(r'[+-](msgid|msgstr)?\s?"', line)
398+
match = re.match(r'[+-](msgid|msgstr|)?\s?"', line)
354399
if match:
355400
action = line[0]
356401
match_type = match.group(1)
357402
text = line[len(match.group(0)):-1]
358-
359-
# All related changes in the diff show the removed lines before the added lines
360-
# so a minus following a plus should signify a new change.
361-
if (last == '+' and action == '-') or (match_type and match_type != last_type):
362-
process_change(files_to_stage, fullfilename, minus, plus)
363-
minus = plus = last = last_type = ''
364-
365-
# Combine changed translation text lines within a 'msgid' or 'msgstr' section to
366-
# accommodate lines wrapped at different lengths but the overall content is the same
367-
if action == '+':
368-
plus += text
403+
if match_type == 'msgid' and not processing:
404+
junk = str(git_diff[line_num - 2])
405+
if re.match(r'\s#: \.\./', junk):
406+
do_process()
407+
file_line = line.split(':')[-1].strip()
408+
processing = True
409+
410+
if processing:
411+
last = match_type or last
412+
if last == 'msgid':
413+
file_msgid += text
414+
if last == 'msgstr':
415+
file_msgstr += text
369416
else:
370-
minus += text
417+
if last == '+' and action == '-':
418+
do_process()
419+
last = action
420+
if last == '-':
421+
minus += text
422+
else:
423+
plus += text
371424

372-
last = action
373-
if match_type:
374-
last_type = match_type
375425
continue
376426

377-
process_change(files_to_stage, fullfilename, minus, plus)
378-
minus = plus = last = match_type = last_type = ''
379-
380427
# Handle any outstanding changes at the end of the git diff output
381-
process_change(files_to_stage, fullfilename, minus, plus)
428+
do_process(new_file=True)
382429

383430

384-
def main(): # pylint: disable=too-many-statements
431+
def main():
385432
"""Main processing method.
386433
"""
387434
args = parse_command_line()
@@ -447,9 +494,8 @@ def main(): # pylint: disable=too-many-statements
447494
).returncode:
448495
Printer.stderr(f"\nThere was a problem adding {filename if silent else 'the file'} to the commit.\n")
449496
sys.exit(1)
450-
if dry_run:
451-
Printer.stdout("\nNo files staged due to dry run option enabled.")
452-
Printer.stdout()
497+
command = "\nNo files staged due to dry run option enabled.\n" if dry_run else ''
498+
Printer.stdout(command)
453499
else:
454500
Printer.stdout("\nNo files to stage for git.\n")
455501

0 commit comments

Comments
 (0)