|
1 | | -import os |
2 | 1 | import json |
| 2 | +from pathlib import Path |
3 | 3 |
|
4 | 4 | from django.core.management.base import BaseCommand, CommandError |
5 | 5 |
|
6 | | -from web.models import MetaInfo |
7 | | - |
8 | 6 |
|
9 | 7 | class Command(BaseCommand): |
10 | 8 | help = "Reads all language JSON files to ensure they're constructed correctly" |
11 | 9 |
|
12 | | - def handle(self, *args, **options): |
13 | | - error_count = 0 |
14 | | - warning_count = 0 |
| 10 | + def __init__(self, *args, **kwargs): |
| 11 | + super().__init__(*args, **kwargs) |
| 12 | + self.error_count = 0 |
| 13 | + self.warning_count = 0 |
| 14 | + self.thesauruses_path = Path("web/thesauruses") |
15 | 15 |
|
16 | | - # Open up thesaurus directory |
17 | | - language_dirs = os.listdir("web/thesauruses/") |
18 | | - for lang_dir in language_dirs: |
19 | | - if lang_dir == "_meta": |
20 | | - continue |
21 | | - if os.path.isfile("web/thesauruses/" + lang_dir): |
| 16 | + def handle(self, *args, **options): |
| 17 | + for lang_dir in self.thesauruses_path.iterdir(): |
| 18 | + if not lang_dir.is_dir() or lang_dir.name == "_meta": |
22 | 19 | continue |
23 | 20 |
|
24 | | - # Open up each version directory |
25 | | - versions = os.listdir("web/thesauruses/" + lang_dir) |
26 | | - for version in versions: |
27 | | - # Open up each structures file |
28 | | - structure_files = os.listdir("web/thesauruses/" + lang_dir + "/" + version) |
29 | | - for structure_file in structure_files: |
30 | | - structure = structure_file[:-5] |
31 | | - |
32 | | - # Ensure valid lang/version/name |
33 | | - meta_structure_file_path = os.path.join( |
34 | | - "web", "thesauruses", lang_dir, version, structure) + ".json" |
35 | | - |
36 | | - # parse file |
37 | | - with open(meta_structure_file_path, 'r') as meta_structure_file: |
38 | | - raw_file_data = meta_structure_file.read() |
39 | | - |
40 | | - meta_structure_file_json = json.loads(raw_file_data) |
41 | | - |
42 | | - language = meta_structure_file_json["meta"]["language"] |
43 | | - language_version = meta_structure_file_json["meta"]["language_version"] |
44 | | - language_name = meta_structure_file_json["meta"]["language_name"] |
45 | | - relative_path_name = lang_dir + "/" + version + "/" + structure + ".json" |
46 | | - |
47 | | - if not language: |
48 | | - print( |
49 | | - f"[Error] `{relative_path_name}` has an empty `language` attribute and needs to be updated") |
50 | | - error_count += 1 |
51 | | - elif language == "language_id": |
52 | | - print( |
53 | | - f"[Error] `{relative_path_name}` has the default `language` attribute and needs to be updated") |
54 | | - error_count += 1 |
55 | | - elif not language == lang_dir: |
56 | | - print( |
57 | | - f"[Error] `{relative_path_name}` has a `language` attribute that should be `{lang_dir}` and needs to be updated") |
58 | | - error_count += 1 |
59 | | - |
60 | | - if not language_version: |
61 | | - print( |
62 | | - f"[Error] `{relative_path_name}` has an empty `language_version` attribute and needs to be updated") |
63 | | - error_count += 1 |
64 | | - elif language_version == "version.number": |
65 | | - print( |
66 | | - f"[Error] `{relative_path_name}` has the default `language_version` attribute and needs to be updated") |
67 | | - error_count += 1 |
68 | | - |
69 | | - if not language_name: |
70 | | - print( |
71 | | - f"[Error] `{relative_path_name}` has an empty `language_name` attribute and needs to be updated") |
72 | | - error_count += 1 |
73 | | - elif language_name == "Human-Friendly Language Name" or language_name == "Human-Readable Language Name": |
74 | | - print( |
75 | | - f"[Error] `{relative_path_name}` has the default `language_name` attribute and needs to be updated") |
76 | | - error_count += 1 |
77 | | - |
78 | | - # Ensure categories aren't in file |
79 | | - if "categories" in meta_structure_file_json: |
80 | | - print( |
81 | | - f"[Error] `{relative_path_name}` has a `categories` section in it, which is now deprecated") |
82 | | - error_count += 1 |
83 | | - |
84 | | - # Ensure name lines are removed |
85 | | - for item in meta_structure_file_json["concepts"]: |
86 | | - structure_item_data = meta_structure_file_json["concepts"][item] |
87 | | - |
88 | | - # This generates SO many warnings that I'm commenting it out for now. Consider uncommenting |
89 | | - # when more errors and such have been resolved |
90 | | - # if "name" in structure_item_data: |
91 | | - # print(f"[Warn] `{relative_path_name}`, ID: `{item}` has a `name` line that can be " |
92 | | - # f"removed") |
93 | | - # warning_count += 1 |
94 | | - |
95 | | - # Ensure there's either code or not-implemented |
96 | | - has_code = "code" in structure_item_data |
97 | | - has_not_implemented = "not-implemented" in structure_item_data |
98 | | - has_not_underscore_implemented = "not_implemented" in structure_item_data |
99 | | - has_comments_plural = "comments" in structure_item_data |
100 | | - |
101 | | - # Ensure they use not-implemented (hyphen) not not_implemented (underscore) |
102 | | - if has_not_underscore_implemented: |
103 | | - print(f"[Error] `{relative_path_name}`, ID: `{item}` has not_implemented (underscore) " |
104 | | - f"when it should use not-implemented (hyphen)") |
105 | | - error_count += 1 |
106 | | - |
107 | | - if has_code and (has_not_implemented or has_not_underscore_implemented): |
108 | | - print( |
109 | | - f"[Error] `{relative_path_name}`, ID: `{item}` should have `code` or " |
110 | | - f"`not-implemented`, not both") |
111 | | - error_count += 1 |
112 | | - |
113 | | - if not has_code and not has_not_implemented and not has_not_underscore_implemented: |
114 | | - print( |
115 | | - f"[Error] `{relative_path_name}`, ID: `{item}` is missing a needed `code` or " |
116 | | - f"`not-implemented` line") |
117 | | - error_count += 1 |
118 | | - |
119 | | - # Ensure if not-implemented, there's no code line |
120 | | - if has_not_implemented and structure_item_data["not-implemented"] is True and has_code: |
121 | | - print(f"[Error] `{relative_path_name}`, ID: `{item}` is not implemented, but has a " |
122 | | - f"`code` line that should be removed") |
123 | | - error_count += 1 |
124 | | - |
125 | | - # Ensure if code, it's not empty and there's no not-implemented |
126 | | - if has_code and not structure_item_data["code"] and not has_not_implemented: |
127 | | - print(f"[Error] `{relative_path_name}`, ID: `{item}` is confusing: `code` is empty " |
128 | | - f"but there's no `not-implemented` either") |
129 | | - error_count += 1 |
130 | | - |
131 | | - # Ensure it's comment, not comments |
132 | | - if has_comments_plural: |
133 | | - print(f"[Error] `{relative_path_name}`, ID: `{item}` has `comments` (plural) that " |
134 | | - f"should be `comment` (singular) instead") |
135 | | - error_count += 1 |
136 | | - |
137 | | - # Code can be string or array (maybe warn if string) |
138 | | - # if has_code and isinstance(structure_item_data["code"], str): |
139 | | - # print(f"[Warning] `{relative_path_name}`, ID: `{item}` has a `code` line that's a " |
140 | | - # f"string and could optionally be an array") |
141 | | - # warning_count += 1 |
142 | | - |
143 | | - # There shouldn't be any other fields |
144 | | - for key in structure_item_data: |
145 | | - if not (key == "code" |
146 | | - or key == "comment" |
147 | | - or key == "comments" |
148 | | - or key == "not-implemented" |
149 | | - or key == "not_implemented" |
150 | | - or key == "name"): |
151 | | - # Why "not_implemented"/"name"/"comments"? Because we check for them above, |
152 | | - # this checks for other exceptions |
153 | | - print(f"[Warning] `{relative_path_name}`, ID: `{item}` has a line `{key}` that's " |
154 | | - f"unknown") |
155 | | - warning_count += 1 |
156 | | - |
157 | | - if warning_count + error_count > 0: |
158 | | - # if error_count > 0: |
159 | | - if warning_count: |
160 | | - print(str(warning_count) + " warnings found.") |
161 | | - if error_count: |
162 | | - print(str(error_count) + " errors found.") |
163 | | - raise CommandError(str(error_count) + " errors found.") |
164 | | - else: |
165 | | - print("No issues found.") |
| 21 | + for version_dir in lang_dir.iterdir(): |
| 22 | + if not version_dir.is_dir(): |
| 23 | + continue |
| 24 | + |
| 25 | + for structure_file in version_dir.glob("*.json"): |
| 26 | + self.validate_language_file(structure_file) |
| 27 | + |
| 28 | + if self.warning_count > 0: |
| 29 | + self.stdout.write(self.style.WARNING(f"{self.warning_count} warnings found.")) |
| 30 | + |
| 31 | + if self.error_count > 0: |
| 32 | + self.stdout.write(self.style.ERROR(f"{self.error_count} errors found.")) |
| 33 | + raise CommandError(f"{self.error_count} errors found.") |
| 34 | + |
| 35 | + if self.error_count == 0 and self.warning_count == 0: |
| 36 | + self.stdout.write(self.style.SUCCESS("No issues found.")) |
| 37 | + |
| 38 | + def report_error(self, message): |
| 39 | + self.stderr.write(self.style.ERROR(f"[Error] {message}")) |
| 40 | + self.error_count += 1 |
| 41 | + |
| 42 | + def report_warning(self, message): |
| 43 | + self.stdout.write(self.style.WARNING(f"[Warning] {message}")) |
| 44 | + self.warning_count += 1 |
| 45 | + |
| 46 | + def validate_language_file(self, file_path): |
| 47 | + relative_path = file_path.relative_to(self.thesauruses_path) |
| 48 | + |
| 49 | + try: |
| 50 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 51 | + data = json.load(f) |
| 52 | + except (json.JSONDecodeError, UnicodeDecodeError) as e: |
| 53 | + self.report_error(f"Failed to parse `{relative_path}`: {e}") |
| 54 | + return |
| 55 | + |
| 56 | + self.check_meta_section(data, relative_path) |
| 57 | + self.check_concepts(data, relative_path) |
| 58 | + |
| 59 | + def check_meta_section(self, data, relative_path): |
| 60 | + meta = data.get("meta", {}) |
| 61 | + # relative_path is something like "python/3/data_types.json" |
| 62 | + # parts[0] is the language directory name |
| 63 | + lang_dir = relative_path.parts[0] |
| 64 | + |
| 65 | + language = meta.get("language") |
| 66 | + language_version = meta.get("language_version") |
| 67 | + language_name = meta.get("language_name") |
| 68 | + |
| 69 | + if not language: |
| 70 | + self.report_error(f"`{relative_path}` has an empty `language` attribute and needs to be updated") |
| 71 | + elif language == "language_id": |
| 72 | + self.report_error(f"`{relative_path}` has the default `language` attribute and needs to be updated") |
| 73 | + elif language != lang_dir: |
| 74 | + self.report_error(f"`{relative_path}` has a `language` attribute that should be `{lang_dir}` and needs to be updated") |
| 75 | + |
| 76 | + if not language_version: |
| 77 | + self.report_error(f"`{relative_path}` has an empty `language_version` attribute and needs to be updated") |
| 78 | + elif language_version == "version.number": |
| 79 | + self.report_error(f"`{relative_path}` has the default `language_version` attribute and needs to be updated") |
| 80 | + |
| 81 | + if not language_name: |
| 82 | + self.report_error(f"`{relative_path}` has an empty `language_name` attribute and needs to be updated") |
| 83 | + elif language_name in ["Human-Friendly Language Name", "Human-Readable Language Name"]: |
| 84 | + self.report_error(f"`{relative_path}` has the default `language_name` attribute and needs to be updated") |
| 85 | + |
| 86 | + if "categories" in data: |
| 87 | + self.report_error(f"`{relative_path}` has a `categories` section in it, which is now deprecated") |
| 88 | + |
| 89 | + def check_concepts(self, data, relative_path): |
| 90 | + concepts = data.get("concepts", {}) |
| 91 | + for concept_id, item_data in concepts.items(): |
| 92 | + has_code = "code" in item_data |
| 93 | + has_not_implemented = "not-implemented" in item_data |
| 94 | + has_not_underscore_implemented = "not_implemented" in item_data |
| 95 | + has_comments_plural = "comments" in item_data |
| 96 | + |
| 97 | + if has_not_underscore_implemented: |
| 98 | + self.report_error( |
| 99 | + f"`{relative_path}`, ID: `{concept_id}` has not_implemented (underscore) " |
| 100 | + "when it should use not-implemented (hyphen)" |
| 101 | + ) |
| 102 | + |
| 103 | + if has_code and (has_not_implemented or has_not_underscore_implemented): |
| 104 | + self.report_error( |
| 105 | + f"`{relative_path}`, ID: `{concept_id}` should have `code` or `not-implemented`, not both" |
| 106 | + ) |
| 107 | + |
| 108 | + if not has_code and not has_not_implemented and not has_not_underscore_implemented: |
| 109 | + self.report_error( |
| 110 | + f"`{relative_path}`, ID: `{concept_id}` is missing a needed `code` or `not-implemented` line" |
| 111 | + ) |
| 112 | + |
| 113 | + if has_not_implemented and item_data.get("not-implemented") is True and has_code: |
| 114 | + self.report_error( |
| 115 | + f"`{relative_path}`, ID: `{concept_id}` is not implemented, but has a `code` line that should be removed" |
| 116 | + ) |
| 117 | + |
| 118 | + if has_code and not item_data.get("code") and not has_not_implemented: |
| 119 | + self.report_error( |
| 120 | + f"`{relative_path}`, ID: `{concept_id}` is confusing: `code` is empty but there's no `not-implemented` either" |
| 121 | + ) |
| 122 | + |
| 123 | + if has_comments_plural: |
| 124 | + self.report_error( |
| 125 | + f"`{relative_path}`, ID: `{concept_id}` has `comments` (plural) that should be `comment` (singular) instead" |
| 126 | + ) |
| 127 | + |
| 128 | + # Check for unknown keys |
| 129 | + allowed_keys = {"code", "comment", "not-implemented", "not_implemented", "name", "comments"} |
| 130 | + for key in item_data: |
| 131 | + if key not in allowed_keys: |
| 132 | + self.report_warning(f"`{relative_path}`, ID: `{concept_id}` has a line `{key}` that's unknown") |
0 commit comments