finnroblin
diff --git a/‎.pre-commit-config.yml‎
Lines changed: 0 additions & 22 deletions b/‎.pre-commit-config.yml‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎gradle/regenerate/jflex/htmlentity.py‎
Lines changed: 82 additions & 73 deletions b/‎gradle/regenerate/jflex/htmlentity.py‎
Lines changed: 82 additions & 73 deletions
@@ -195,17 +195,6 @@ repos:
           UV_FROZEN: "1"
         types_or: [ python, pyi, jupyter ]
         require_serial: true
-        exclude:
-          glob:
-            # TODO: fix the issues with these files separately
-            - gradle/regenerate/jflex/htmlentity.py
-            - gradle/regenerate/packed/gen_BulkOperation.py
-            - gradle/regenerate/moman/createLevAutomata.py
-            - lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/**/{gen_ForUtil.py,gen_ForDeltaUtil.py}
-            - lucene/core/src/java/org/apache/lucene/codecs/**/gen_ForUtil.py
-            - lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.py
-            - lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
-            - lucene/core/src/test/org/apache/lucene/util/makeEuroparlLineFile.py
 
       - id: ruff-format
         language: system
@@ -217,17 +206,6 @@ repos:
           UV_FROZEN: "1"
         types_or: [ python, pyi, jupyter ]
         require_serial: true
-        exclude:
-          glob:
-            # TODO: fix the formatting of these files separately
-            - gradle/regenerate/jflex/htmlentity.py
-            - gradle/regenerate/moman/createLevAutomata.py
-            - gradle/regenerate/packed/{gen_BulkOperation.py,gen_Packed64SingleBlock.py}
-            - lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/**/{gen_ForUtil.py,gen_ForDeltaUtil.py}
-            - lucene/core/src/java/org/apache/lucene/codecs/**/gen_ForUtil.py
-            - lucene/core/src/java/org/apache/lucene/util/packed/{gen_BulkOperation.py,gen_Packed64SingleBlock.py}
-            - lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.py
-            - lucene/core/src/test/org/apache/lucene/util/makeEuroparlLineFile.py
 
       - id: test-ast-grep
         name: Check ast-grep rules
 
@@ -19,79 +19,86 @@
 # A simple python script to generate an HTML entity map and a regex alternation
 # for inclusion in HTMLStripCharFilter.jflex.
 
+
 def main():
-  with open(sys.argv[1], 'w') as f:
-      sys.stdout = f
-
-      print(get_apache_license())
-      codes = {}
-      regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
-      for line in get_entity_text().split('\n'):
-        match = regex.match(line)
-        if match:
-          key = match.group(1)
-          if   key == 'quot': codes[key] = r'\"'
-          elif key == 'nbsp': codes[key] = ' ';
-          else              : codes[key] = r'\u%04X' % int(match.group(2))
-
-      keys = sorted(codes)
-
-      first_entry = True
-      output_line = 'CharacterEntities = ( '
-      for key in keys:
-        new_entry = ('"%s"' if first_entry else ' | "%s"') % key
-        first_entry = False
-        if len(output_line) + len(new_entry) >= 80:
-          print(output_line)
-          output_line = '                   '
-        output_line += new_entry
-        if key in ('quot','copy','gt','lt','reg','amp'):
-          new_entry = ' | "%s"' % key.upper()
-          if len(output_line) + len(new_entry) >= 80:
-            print(output_line)
-            output_line = '                   '
-          output_line += new_entry
-      print(output_line, ')')
-
-      print('%{')
-      print('  private static final Map<String,String> upperCaseVariantsAccepted')
-      print('      = new HashMap<>();')
-      print('  static {')
-      print('    upperCaseVariantsAccepted.put("quot", "QUOT");')
-      print('    upperCaseVariantsAccepted.put("copy", "COPY");')
-      print('    upperCaseVariantsAccepted.put("gt", "GT");')
-      print('    upperCaseVariantsAccepted.put("lt", "LT");')
-      print('    upperCaseVariantsAccepted.put("reg", "REG");')
-      print('    upperCaseVariantsAccepted.put("amp", "AMP");')
-      print('  }')
-      print('  private static final CharArrayMap<Character> entityValues')
-      print('      = new CharArrayMap<>(%i, false);' % len(keys))
-      print('  static {')
-      print('    String[] entities = {')
-      output_line = '     '
-      for key in keys:
-        new_entry = ' "%s", "%s",' % (key, codes[key])
-        if len(output_line) + len(new_entry) >= 80:
-          print(output_line)
-          output_line = '     '
-        output_line += new_entry
-      print(output_line[:-1])
-      print('    };')
-      print('    for (int i = 0 ; i < entities.length ; i += 2) {')
-      print('      Character value = entities[i + 1].charAt(0);')
-      print('      entityValues.put(entities[i], value);')
-      print('      String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);')
-      print('      if (upperCaseVariant != null) {')
-      print('        entityValues.put(upperCaseVariant, value);')
-      print('      }')
-      print('    }')
-      print("  }")
-      print("%}")
+    with open(sys.argv[1], "w") as f:
+        sys.stdout = f
+
+        print(get_apache_license())
+        codes = {}
+        regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
+        for line in get_entity_text().split("\n"):
+            match = regex.match(line)
+            if match:
+                key = match.group(1)
+                if key == "quot":
+                    codes[key] = r"\""
+                elif key == "nbsp":
+                    codes[key] = " "
+                else:
+                    codes[key] = r"\u%04X" % int(match.group(2))
+
+        keys = sorted(codes)
+
+        first_entry = True
+        output_line = "CharacterEntities = ( "
+        for key in keys:
+            new_entry = ('"%s"' if first_entry else ' | "%s"') % key
+            first_entry = False
+            if len(output_line) + len(new_entry) >= 80:
+                print(output_line)
+                output_line = "                   "
+            output_line += new_entry
+            if key in ("quot", "copy", "gt", "lt", "reg", "amp"):
+                new_entry = ' | "%s"' % key.upper()
+                if len(output_line) + len(new_entry) >= 80:
+                    print(output_line)
+                    output_line = "                   "
+                output_line += new_entry
+        print(output_line, ")")
+
+        print("%{")
+        print("  private static final Map<String,String> upperCaseVariantsAccepted")
+        print("      = new HashMap<>();")
+        print("  static {")
+        print('    upperCaseVariantsAccepted.put("quot", "QUOT");')
+        print('    upperCaseVariantsAccepted.put("copy", "COPY");')
+        print('    upperCaseVariantsAccepted.put("gt", "GT");')
+        print('    upperCaseVariantsAccepted.put("lt", "LT");')
+        print('    upperCaseVariantsAccepted.put("reg", "REG");')
+        print('    upperCaseVariantsAccepted.put("amp", "AMP");')
+        print("  }")
+        print("  private static final CharArrayMap<Character> entityValues")
+        print("      = new CharArrayMap<>(%i, false);" % len(keys))
+        print("  static {")
+        print("    String[] entities = {")
+        output_line = "     "
+        for key in keys:
+            new_entry = ' "%s", "%s",' % (key, codes[key])
+            if len(output_line) + len(new_entry) >= 80:
+                print(output_line)
+                output_line = "     "
+            output_line += new_entry
+        print(output_line[:-1])
+        print("    };")
+        print("    for (int i = 0 ; i < entities.length ; i += 2) {")
+        print("      Character value = entities[i + 1].charAt(0);")
+        print("      entityValues.put(entities[i], value);")
+        print(
+            "      String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);"
+        )
+        print("      if (upperCaseVariant != null) {")
+        print("        entityValues.put(upperCaseVariant, value);")
+        print("      }")
+        print("    }")
+        print("  }")
+        print("%}")
+
 
 def get_entity_text():
-# The text below is taken verbatim from
-# <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
-  text = r"""
+    # The text below is taken verbatim from
+    # <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
+    text = r"""
 F.1. XHTML Character Entities
 
 XHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section.
@@ -517,10 +524,11 @@ def get_entity_text():
 
 <!-- end of xhtml-symbol.ent -->
 """
-  return text
+    return text
+
 
 def get_apache_license():
-  license = r"""/*
+    license = r"""/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -538,6 +546,7 @@ def get_apache_license():
  */
 
 """
-  return license
+    return license
+
 
 main()