Skip to content

Commit 2218244

Browse files
committed
tool: include contents of codelists in compiled specifications
1 parent f652d0f commit 2218244

File tree

3 files changed

+114
-5
lines changed

3 files changed

+114
-5
lines changed

bin/csv_helpers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,50 @@ def write_csv(
5353
# Write the data rows
5454
for row in data:
5555
writer.writerow(row)
56+
57+
58+
def csv_to_markdown(
59+
filename="data/output.csv",
60+
fields=None,
61+
exclude_fields=None,
62+
first_field="reference",
63+
last_field=None,
64+
encoding="utf-8",
65+
):
66+
"""
67+
Convert CSV to markdown table, optionally limiting to specific fields
68+
fields: List of field names to include in output. If None, includes all fields.
69+
"""
70+
with open(filename, newline="", encoding=encoding) as csvfile:
71+
reader = csv.reader(csvfile)
72+
headers = next(reader)
73+
74+
# Start with headers as read from file
75+
headers_to_use = list(headers)
76+
77+
# If fields specified, restrict to those fields (in CSV order)
78+
if fields:
79+
headers_to_use = [h for h in headers_to_use if h in fields]
80+
81+
# If exclude_fields specified, remove them if present
82+
if exclude_fields:
83+
headers_to_use = [h for h in headers_to_use if h not in exclude_fields]
84+
85+
# Determine the indices of columns to output (preserve CSV ordering)
86+
field_indices = [headers.index(h) for h in headers_to_use]
87+
88+
# Initialize markdown table with selected headers
89+
markdown_table = "| " + " | ".join(headers_to_use) + " |\n"
90+
markdown_table += "| " + " | ".join(["---"] * len(headers_to_use)) + " |\n"
91+
92+
# Add rows with only selected fields
93+
for row in reader:
94+
selected_values = [row[i] for i in field_indices]
95+
# Clean values (strip quotes if present)
96+
cleaned_values = [
97+
val.strip("'") if val.startswith("'") and val.endswith("'") else val
98+
for val in selected_values
99+
]
100+
markdown_table += "| " + " | ".join(cleaned_values) + " |\n"
101+
102+
return markdown_table

bin/generate_info_model.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import os
2+
13
from applications import get_application_module_refs
4+
from csv_helpers import csv_to_markdown
25
from fields import (
36
format_field_display_name,
47
get_applicable_app_types,
@@ -23,7 +26,6 @@ def format_fields_table(field_entries, fields_spec, table_type="main", app_type=
2326
"| reference | name | description | requirement | notes |",
2427
"| --- | --- | --- | --- | --- |",
2528
]
26-
else:
2729
lines = [
2830
"| reference | name | description | only for application | requirement | notes |",
2931
"| --- | --- | --- | --- | --- | --- |",
@@ -141,13 +143,67 @@ def get_codelists_for_app(module_refs, fields, specification):
141143
return codelists
142144

143145

146+
def create_codelist_table(codelist_obj):
147+
lines = []
148+
name = codelist_obj.get("name", "Unknown")
149+
150+
source = codelist_obj.get("source", "")
151+
152+
# Heading
153+
heading = f"### {name}"
154+
lines.append(heading)
155+
156+
# Source information
157+
if not source:
158+
lines.append("\n_codelist source not provided_\n")
159+
else:
160+
if isinstance(source, str) and (
161+
source.startswith("http://") or source.startswith("https://")
162+
):
163+
lines.append(f"\nThis codelist is sourced from [{source}]({source})\n")
164+
else:
165+
path = source
166+
if not os.path.isabs(path):
167+
repo_root = os.getcwd()
168+
path = os.path.join(repo_root, path)
169+
170+
if not os.path.exists(path):
171+
lines.append(f"\nSource file not found: {source}\n")
172+
return "\n".join(lines)
173+
174+
try:
175+
md_table = csv_to_markdown(path)
176+
lines.append("")
177+
lines.append(md_table)
178+
except Exception as e:
179+
lines.append(f"\nError reading source file {source}: {e}\n")
180+
181+
return "\n".join(lines) if lines else ""
182+
183+
144184
def generate_codelist_md_str(codelists):
185+
"""
186+
Generates a Markdown-formatted string with the codelists required for the
187+
specification.
188+
189+
Args:
190+
codelists (list): A list of objects representing codelists. Each
191+
object should be convertible to a string or be a dict-like object.
192+
193+
Returns:
194+
str: A Markdown-formatted string enumerating the codelist names, or an
195+
empty string if no codelists are provided.
196+
"""
145197
# list out the codelist names
146198
if not codelists:
147199
return ""
148-
lines = ["This are the codelist required to support this specification:\n"]
149-
for codelist in codelists:
150-
lines.append(f"- {codelist}")
200+
201+
lines = []
202+
lines.append("Below are the codelists required to support this specification:\n")
203+
# Sort codelists by their 'name' attribute
204+
sorted_codelists = sorted(codelists, key=lambda c: c.get("name", ""))
205+
for codelist in sorted_codelists:
206+
lines.append(create_codelist_table(codelist))
151207
return "\n".join(lines)
152208

153209

@@ -157,6 +213,8 @@ def generate_application(app_ref, specification):
157213
"""
158214
applications = specification.get("application", {})
159215
fields = specification.get("field", {})
216+
codelists = specification.get("codelist", {})
217+
160218
app = applications.get(app_ref)
161219
if not app:
162220
print(f"Application '{app_ref}' not found in specification.")
@@ -210,7 +268,10 @@ def generate_application(app_ref, specification):
210268
# 6. Required Codelists
211269
if inc_codelists:
212270
out.append("## Required codelists\n")
213-
out.append(generate_codelist_md_str(inc_codelists))
271+
inc_codelist_objs = [
272+
codelists.get(ref) for ref in inc_codelists if codelists.get(ref)
273+
]
274+
out.append(generate_codelist_md_str(inc_codelist_objs))
214275

215276
return "\n".join(out)
216277

bin/integrity_checks/applications.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# 6. if extends then must be a valid application reference in application schemas
1515
# 7. define which fields are overridden and which are added to
1616
# 8. listing app types should list base types and their 'children' applications
17+
# 9. check the application is part of the official application or sub application dataset
1718

1819

1920
def check_application_names(applications):

0 commit comments

Comments
 (0)