Skip to content

Commit 782559a

Browse files
Merge pull request #788 from geekygirlsarah/main
Update meta files, optimize validators
2 parents 148c42b + 25639b2 commit 782559a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+849
-345
lines changed
Lines changed: 121 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,165 +1,132 @@
1-
import os
21
import json
2+
from pathlib import Path
33

44
from django.core.management.base import BaseCommand, CommandError
55

6-
from web.models import MetaInfo
7-
86

97
class Command(BaseCommand):
108
help = "Reads all language JSON files to ensure they're constructed correctly"
119

12-
def handle(self, *args, **options):
13-
error_count = 0
14-
warning_count = 0
10+
def __init__(self, *args, **kwargs):
11+
super().__init__(*args, **kwargs)
12+
self.error_count = 0
13+
self.warning_count = 0
14+
self.thesauruses_path = Path("web/thesauruses")
1515

16-
# Open up thesaurus directory
17-
language_dirs = os.listdir("web/thesauruses/")
18-
for lang_dir in language_dirs:
19-
if lang_dir == "_meta":
20-
continue
21-
if os.path.isfile("web/thesauruses/" + lang_dir):
16+
def handle(self, *args, **options):
17+
for lang_dir in self.thesauruses_path.iterdir():
18+
if not lang_dir.is_dir() or lang_dir.name == "_meta":
2219
continue
2320

24-
# Open up each version directory
25-
versions = os.listdir("web/thesauruses/" + lang_dir)
26-
for version in versions:
27-
# Open up each structures file
28-
structure_files = os.listdir("web/thesauruses/" + lang_dir + "/" + version)
29-
for structure_file in structure_files:
30-
structure = structure_file[:-5]
31-
32-
# Ensure valid lang/version/name
33-
meta_structure_file_path = os.path.join(
34-
"web", "thesauruses", lang_dir, version, structure) + ".json"
35-
36-
# parse file
37-
with open(meta_structure_file_path, 'r') as meta_structure_file:
38-
raw_file_data = meta_structure_file.read()
39-
40-
meta_structure_file_json = json.loads(raw_file_data)
41-
42-
language = meta_structure_file_json["meta"]["language"]
43-
language_version = meta_structure_file_json["meta"]["language_version"]
44-
language_name = meta_structure_file_json["meta"]["language_name"]
45-
relative_path_name = lang_dir + "/" + version + "/" + structure + ".json"
46-
47-
if not language:
48-
print(
49-
f"[Error] `{relative_path_name}` has an empty `language` attribute and needs to be updated")
50-
error_count += 1
51-
elif language == "language_id":
52-
print(
53-
f"[Error] `{relative_path_name}` has the default `language` attribute and needs to be updated")
54-
error_count += 1
55-
elif not language == lang_dir:
56-
print(
57-
f"[Error] `{relative_path_name}` has a `language` attribute that should be `{lang_dir}` and needs to be updated")
58-
error_count += 1
59-
60-
if not language_version:
61-
print(
62-
f"[Error] `{relative_path_name}` has an empty `language_version` attribute and needs to be updated")
63-
error_count += 1
64-
elif language_version == "version.number":
65-
print(
66-
f"[Error] `{relative_path_name}` has the default `language_version` attribute and needs to be updated")
67-
error_count += 1
68-
69-
if not language_name:
70-
print(
71-
f"[Error] `{relative_path_name}` has an empty `language_name` attribute and needs to be updated")
72-
error_count += 1
73-
elif language_name == "Human-Friendly Language Name" or language_name == "Human-Readable Language Name":
74-
print(
75-
f"[Error] `{relative_path_name}` has the default `language_name` attribute and needs to be updated")
76-
error_count += 1
77-
78-
# Ensure categories aren't in file
79-
if "categories" in meta_structure_file_json:
80-
print(
81-
f"[Error] `{relative_path_name}` has a `categories` section in it, which is now deprecated")
82-
error_count += 1
83-
84-
# Ensure name lines are removed
85-
for item in meta_structure_file_json["concepts"]:
86-
structure_item_data = meta_structure_file_json["concepts"][item]
87-
88-
# This generates SO many warnings that I'm commenting it out for now. Consider uncommenting
89-
# when more errors and such have been resolved
90-
# if "name" in structure_item_data:
91-
# print(f"[Warn] `{relative_path_name}`, ID: `{item}` has a `name` line that can be "
92-
# f"removed")
93-
# warning_count += 1
94-
95-
# Ensure there's either code or not-implemented
96-
has_code = "code" in structure_item_data
97-
has_not_implemented = "not-implemented" in structure_item_data
98-
has_not_underscore_implemented = "not_implemented" in structure_item_data
99-
has_comments_plural = "comments" in structure_item_data
100-
101-
# Ensure they use not-implemented (hyphen) not not_implemented (underscore)
102-
if has_not_underscore_implemented:
103-
print(f"[Error] `{relative_path_name}`, ID: `{item}` has not_implemented (underscore) "
104-
f"when it should use not-implemented (hyphen)")
105-
error_count += 1
106-
107-
if has_code and (has_not_implemented or has_not_underscore_implemented):
108-
print(
109-
f"[Error] `{relative_path_name}`, ID: `{item}` should have `code` or "
110-
f"`not-implemented`, not both")
111-
error_count += 1
112-
113-
if not has_code and not has_not_implemented and not has_not_underscore_implemented:
114-
print(
115-
f"[Error] `{relative_path_name}`, ID: `{item}` is missing a needed `code` or "
116-
f"`not-implemented` line")
117-
error_count += 1
118-
119-
# Ensure if not-implemented, there's no code line
120-
if has_not_implemented and structure_item_data["not-implemented"] is True and has_code:
121-
print(f"[Error] `{relative_path_name}`, ID: `{item}` is not implemented, but has a "
122-
f"`code` line that should be removed")
123-
error_count += 1
124-
125-
# Ensure if code, it's not empty and there's no not-implemented
126-
if has_code and not structure_item_data["code"] and not has_not_implemented:
127-
print(f"[Error] `{relative_path_name}`, ID: `{item}` is confusing: `code` is empty "
128-
f"but there's no `not-implemented` either")
129-
error_count += 1
130-
131-
# Ensure it's comment, not comments
132-
if has_comments_plural:
133-
print(f"[Error] `{relative_path_name}`, ID: `{item}` has `comments` (plural) that "
134-
f"should be `comment` (singular) instead")
135-
error_count += 1
136-
137-
# Code can be string or array (maybe warn if string)
138-
# if has_code and isinstance(structure_item_data["code"], str):
139-
# print(f"[Warning] `{relative_path_name}`, ID: `{item}` has a `code` line that's a "
140-
# f"string and could optionally be an array")
141-
# warning_count += 1
142-
143-
# There shouldn't be any other fields
144-
for key in structure_item_data:
145-
if not (key == "code"
146-
or key == "comment"
147-
or key == "comments"
148-
or key == "not-implemented"
149-
or key == "not_implemented"
150-
or key == "name"):
151-
# Why "not_implemented"/"name"/"comments"? Because we check for them above,
152-
# this checks for other exceptions
153-
print(f"[Warning] `{relative_path_name}`, ID: `{item}` has a line `{key}` that's "
154-
f"unknown")
155-
warning_count += 1
156-
157-
if warning_count + error_count > 0:
158-
# if error_count > 0:
159-
if warning_count:
160-
print(str(warning_count) + " warnings found.")
161-
if error_count:
162-
print(str(error_count) + " errors found.")
163-
raise CommandError(str(error_count) + " errors found.")
164-
else:
165-
print("No issues found.")
21+
for version_dir in lang_dir.iterdir():
22+
if not version_dir.is_dir():
23+
continue
24+
25+
for structure_file in version_dir.glob("*.json"):
26+
self.validate_language_file(structure_file)
27+
28+
if self.warning_count > 0:
29+
self.stdout.write(self.style.WARNING(f"{self.warning_count} warnings found."))
30+
31+
if self.error_count > 0:
32+
self.stdout.write(self.style.ERROR(f"{self.error_count} errors found."))
33+
raise CommandError(f"{self.error_count} errors found.")
34+
35+
if self.error_count == 0 and self.warning_count == 0:
36+
self.stdout.write(self.style.SUCCESS("No issues found."))
37+
38+
def report_error(self, message):
39+
self.stderr.write(self.style.ERROR(f"[Error] {message}"))
40+
self.error_count += 1
41+
42+
def report_warning(self, message):
43+
self.stdout.write(self.style.WARNING(f"[Warning] {message}"))
44+
self.warning_count += 1
45+
46+
def validate_language_file(self, file_path):
47+
relative_path = file_path.relative_to(self.thesauruses_path)
48+
49+
try:
50+
with open(file_path, 'r', encoding='utf-8') as f:
51+
data = json.load(f)
52+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
53+
self.report_error(f"Failed to parse `{relative_path}`: {e}")
54+
return
55+
56+
self.check_meta_section(data, relative_path)
57+
self.check_concepts(data, relative_path)
58+
59+
def check_meta_section(self, data, relative_path):
60+
meta = data.get("meta", {})
61+
# relative_path is something like "python/3/data_types.json"
62+
# parts[0] is the language directory name
63+
lang_dir = relative_path.parts[0]
64+
65+
language = meta.get("language")
66+
language_version = meta.get("language_version")
67+
language_name = meta.get("language_name")
68+
69+
if not language:
70+
self.report_error(f"`{relative_path}` has an empty `language` attribute and needs to be updated")
71+
elif language == "language_id":
72+
self.report_error(f"`{relative_path}` has the default `language` attribute and needs to be updated")
73+
elif language != lang_dir:
74+
self.report_error(f"`{relative_path}` has a `language` attribute that should be `{lang_dir}` and needs to be updated")
75+
76+
if not language_version:
77+
self.report_error(f"`{relative_path}` has an empty `language_version` attribute and needs to be updated")
78+
elif language_version == "version.number":
79+
self.report_error(f"`{relative_path}` has the default `language_version` attribute and needs to be updated")
80+
81+
if not language_name:
82+
self.report_error(f"`{relative_path}` has an empty `language_name` attribute and needs to be updated")
83+
elif language_name in ["Human-Friendly Language Name", "Human-Readable Language Name"]:
84+
self.report_error(f"`{relative_path}` has the default `language_name` attribute and needs to be updated")
85+
86+
if "categories" in data:
87+
self.report_error(f"`{relative_path}` has a `categories` section in it, which is now deprecated")
88+
89+
def check_concepts(self, data, relative_path):
90+
concepts = data.get("concepts", {})
91+
for concept_id, item_data in concepts.items():
92+
has_code = "code" in item_data
93+
has_not_implemented = "not-implemented" in item_data
94+
has_not_underscore_implemented = "not_implemented" in item_data
95+
has_comments_plural = "comments" in item_data
96+
97+
if has_not_underscore_implemented:
98+
self.report_error(
99+
f"`{relative_path}`, ID: `{concept_id}` has not_implemented (underscore) "
100+
"when it should use not-implemented (hyphen)"
101+
)
102+
103+
if has_code and (has_not_implemented or has_not_underscore_implemented):
104+
self.report_error(
105+
f"`{relative_path}`, ID: `{concept_id}` should have `code` or `not-implemented`, not both"
106+
)
107+
108+
if not has_code and not has_not_implemented and not has_not_underscore_implemented:
109+
self.report_error(
110+
f"`{relative_path}`, ID: `{concept_id}` is missing a needed `code` or `not-implemented` line"
111+
)
112+
113+
if has_not_implemented and item_data.get("not-implemented") is True and has_code:
114+
self.report_error(
115+
f"`{relative_path}`, ID: `{concept_id}` is not implemented, but has a `code` line that should be removed"
116+
)
117+
118+
if has_code and not item_data.get("code") and not has_not_implemented:
119+
self.report_error(
120+
f"`{relative_path}`, ID: `{concept_id}` is confusing: `code` is empty but there's no `not-implemented` either"
121+
)
122+
123+
if has_comments_plural:
124+
self.report_error(
125+
f"`{relative_path}`, ID: `{concept_id}` has `comments` (plural) that should be `comment` (singular) instead"
126+
)
127+
128+
# Check for unknown keys
129+
allowed_keys = {"code", "comment", "not-implemented", "not_implemented", "name", "comments"}
130+
for key in item_data:
131+
if key not in allowed_keys:
132+
self.report_warning(f"`{relative_path}`, ID: `{concept_id}` has a line `{key}` that's unknown")

0 commit comments

Comments
 (0)