Skip to content

Commit 4c19667

Browse files
rmuirfinnroblin
authored andcommitted
build: fix/reformat python sources (apache#15651)
Most changes are autofixes. gradlew regenerate was run and produced unchanged java files after fixing.
1 parent 19fdab7 commit 4c19667

32 files changed

+3267
-2528
lines changed

.pre-commit-config.yml

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -195,17 +195,6 @@ repos:
195195
UV_FROZEN: "1"
196196
types_or: [ python, pyi, jupyter ]
197197
require_serial: true
198-
exclude:
199-
glob:
200-
# TODO: fix the issues with these files separately
201-
- gradle/regenerate/jflex/htmlentity.py
202-
- gradle/regenerate/packed/gen_BulkOperation.py
203-
- gradle/regenerate/moman/createLevAutomata.py
204-
- lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/**/{gen_ForUtil.py,gen_ForDeltaUtil.py}
205-
- lucene/core/src/java/org/apache/lucene/codecs/**/gen_ForUtil.py
206-
- lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.py
207-
- lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
208-
- lucene/core/src/test/org/apache/lucene/util/makeEuroparlLineFile.py
209198

210199
- id: ruff-format
211200
language: system
@@ -217,17 +206,6 @@ repos:
217206
UV_FROZEN: "1"
218207
types_or: [ python, pyi, jupyter ]
219208
require_serial: true
220-
exclude:
221-
glob:
222-
# TODO: fix the formatting of these files separately
223-
- gradle/regenerate/jflex/htmlentity.py
224-
- gradle/regenerate/moman/createLevAutomata.py
225-
- gradle/regenerate/packed/{gen_BulkOperation.py,gen_Packed64SingleBlock.py}
226-
- lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/**/{gen_ForUtil.py,gen_ForDeltaUtil.py}
227-
- lucene/core/src/java/org/apache/lucene/codecs/**/gen_ForUtil.py
228-
- lucene/core/src/java/org/apache/lucene/util/packed/{gen_BulkOperation.py,gen_Packed64SingleBlock.py}
229-
- lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.py
230-
- lucene/core/src/test/org/apache/lucene/util/makeEuroparlLineFile.py
231209

232210
- id: test-ast-grep
233211
name: Check ast-grep rules

gradle/regenerate/jflex/htmlentity.py

Lines changed: 82 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -19,79 +19,86 @@
1919
# A simple python script to generate an HTML entity map and a regex alternation
2020
# for inclusion in HTMLStripCharFilter.jflex.
2121

22+
2223
def main():
23-
with open(sys.argv[1], 'w') as f:
24-
sys.stdout = f
25-
26-
print(get_apache_license())
27-
codes = {}
28-
regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
29-
for line in get_entity_text().split('\n'):
30-
match = regex.match(line)
31-
if match:
32-
key = match.group(1)
33-
if key == 'quot': codes[key] = r'\"'
34-
elif key == 'nbsp': codes[key] = ' ';
35-
else : codes[key] = r'\u%04X' % int(match.group(2))
36-
37-
keys = sorted(codes)
38-
39-
first_entry = True
40-
output_line = 'CharacterEntities = ( '
41-
for key in keys:
42-
new_entry = ('"%s"' if first_entry else ' | "%s"') % key
43-
first_entry = False
44-
if len(output_line) + len(new_entry) >= 80:
45-
print(output_line)
46-
output_line = ' '
47-
output_line += new_entry
48-
if key in ('quot','copy','gt','lt','reg','amp'):
49-
new_entry = ' | "%s"' % key.upper()
50-
if len(output_line) + len(new_entry) >= 80:
51-
print(output_line)
52-
output_line = ' '
53-
output_line += new_entry
54-
print(output_line, ')')
55-
56-
print('%{')
57-
print(' private static final Map<String,String> upperCaseVariantsAccepted')
58-
print(' = new HashMap<>();')
59-
print(' static {')
60-
print(' upperCaseVariantsAccepted.put("quot", "QUOT");')
61-
print(' upperCaseVariantsAccepted.put("copy", "COPY");')
62-
print(' upperCaseVariantsAccepted.put("gt", "GT");')
63-
print(' upperCaseVariantsAccepted.put("lt", "LT");')
64-
print(' upperCaseVariantsAccepted.put("reg", "REG");')
65-
print(' upperCaseVariantsAccepted.put("amp", "AMP");')
66-
print(' }')
67-
print(' private static final CharArrayMap<Character> entityValues')
68-
print(' = new CharArrayMap<>(%i, false);' % len(keys))
69-
print(' static {')
70-
print(' String[] entities = {')
71-
output_line = ' '
72-
for key in keys:
73-
new_entry = ' "%s", "%s",' % (key, codes[key])
74-
if len(output_line) + len(new_entry) >= 80:
75-
print(output_line)
76-
output_line = ' '
77-
output_line += new_entry
78-
print(output_line[:-1])
79-
print(' };')
80-
print(' for (int i = 0 ; i < entities.length ; i += 2) {')
81-
print(' Character value = entities[i + 1].charAt(0);')
82-
print(' entityValues.put(entities[i], value);')
83-
print(' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);')
84-
print(' if (upperCaseVariant != null) {')
85-
print(' entityValues.put(upperCaseVariant, value);')
86-
print(' }')
87-
print(' }')
88-
print(" }")
89-
print("%}")
24+
with open(sys.argv[1], "w") as f:
25+
sys.stdout = f
26+
27+
print(get_apache_license())
28+
codes = {}
29+
regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
30+
for line in get_entity_text().split("\n"):
31+
match = regex.match(line)
32+
if match:
33+
key = match.group(1)
34+
if key == "quot":
35+
codes[key] = r"\""
36+
elif key == "nbsp":
37+
codes[key] = " "
38+
else:
39+
codes[key] = r"\u%04X" % int(match.group(2))
40+
41+
keys = sorted(codes)
42+
43+
first_entry = True
44+
output_line = "CharacterEntities = ( "
45+
for key in keys:
46+
new_entry = ('"%s"' if first_entry else ' | "%s"') % key
47+
first_entry = False
48+
if len(output_line) + len(new_entry) >= 80:
49+
print(output_line)
50+
output_line = " "
51+
output_line += new_entry
52+
if key in ("quot", "copy", "gt", "lt", "reg", "amp"):
53+
new_entry = ' | "%s"' % key.upper()
54+
if len(output_line) + len(new_entry) >= 80:
55+
print(output_line)
56+
output_line = " "
57+
output_line += new_entry
58+
print(output_line, ")")
59+
60+
print("%{")
61+
print(" private static final Map<String,String> upperCaseVariantsAccepted")
62+
print(" = new HashMap<>();")
63+
print(" static {")
64+
print(' upperCaseVariantsAccepted.put("quot", "QUOT");')
65+
print(' upperCaseVariantsAccepted.put("copy", "COPY");')
66+
print(' upperCaseVariantsAccepted.put("gt", "GT");')
67+
print(' upperCaseVariantsAccepted.put("lt", "LT");')
68+
print(' upperCaseVariantsAccepted.put("reg", "REG");')
69+
print(' upperCaseVariantsAccepted.put("amp", "AMP");')
70+
print(" }")
71+
print(" private static final CharArrayMap<Character> entityValues")
72+
print(" = new CharArrayMap<>(%i, false);" % len(keys))
73+
print(" static {")
74+
print(" String[] entities = {")
75+
output_line = " "
76+
for key in keys:
77+
new_entry = ' "%s", "%s",' % (key, codes[key])
78+
if len(output_line) + len(new_entry) >= 80:
79+
print(output_line)
80+
output_line = " "
81+
output_line += new_entry
82+
print(output_line[:-1])
83+
print(" };")
84+
print(" for (int i = 0 ; i < entities.length ; i += 2) {")
85+
print(" Character value = entities[i + 1].charAt(0);")
86+
print(" entityValues.put(entities[i], value);")
87+
print(
88+
" String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);"
89+
)
90+
print(" if (upperCaseVariant != null) {")
91+
print(" entityValues.put(upperCaseVariant, value);")
92+
print(" }")
93+
print(" }")
94+
print(" }")
95+
print("%}")
96+
9097

9198
def get_entity_text():
92-
# The text below is taken verbatim from
93-
# <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
94-
text = r"""
99+
# The text below is taken verbatim from
100+
# <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
101+
text = r"""
95102
F.1. XHTML Character Entities
96103
97104
XHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section.
@@ -517,10 +524,11 @@ def get_entity_text():
517524
518525
<!-- end of xhtml-symbol.ent -->
519526
"""
520-
return text
527+
return text
528+
521529

522530
def get_apache_license():
523-
license = r"""/*
531+
license = r"""/*
524532
* Licensed to the Apache Software Foundation (ASF) under one or more
525533
* contributor license agreements. See the NOTICE file distributed with
526534
* this work for additional information regarding copyright ownership.
@@ -538,6 +546,7 @@ def get_apache_license():
538546
*/
539547
540548
"""
541-
return license
549+
return license
550+
542551

543552
main()

0 commit comments

Comments
 (0)