Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions pipelines/assets/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,15 @@ def get_wealth_group_dataframe(
# In the Summary columns in the Data, Data2, Data3 worksheets, the Wealth
# Group Category is in Row 4 (District)rather than Row 3 (Wealth Group Category)
# so do a second lookup to update the blank rows.
# If this doesn't find any new values, then it's because in a WB worksheet
# there are no extra Wealth Group Categories on Row 4
try:
# Note that in a WB worksheet there are no extra Wealth Group Categories on Row 4
if worksheet_name != "WB":
wealth_group_df = wealthgroupcategorylookup.do_lookup(
wealth_group_df, "district", "wealth_group_category", update=True
)
# Remove the duplicate wealth_group_category_original column created by the second do_lookup(),
# which otherwise causes problems when trying to merge dataframes, e.g. when building the wealth_group_df.
wealth_group_df = wealth_group_df.loc[:, ~wealth_group_df.columns.duplicated()]
except ValueError:
pass

# Check if there are unrecognized wealth group categories and report
wealth_group_missing_category_df = wealth_group_df[
wealth_group_df["wealth_group_category"].isnull()
Expand Down Expand Up @@ -266,7 +264,7 @@ def baseline_instances(
}

try:
preview = json.dumps(result, indent=4)
preview = json.dumps(result, indent=4, ensure_ascii=False)
except TypeError as e:
raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e

Expand Down Expand Up @@ -359,7 +357,7 @@ def community_instances(context: AssetExecutionContext, config: BSSMetadataConfi
result = {"Community": community_df.to_dict(orient="records")}

try:
preview = json.dumps(result, indent=4)
preview = json.dumps(result, indent=4, ensure_ascii=False)
except TypeError as e:
raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e

Expand Down
8 changes: 4 additions & 4 deletions pipelines/assets/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def validate_instances(

metadata = {f"num_{key.lower()}": len(value) for key, value in instances.items()}
metadata["total_instances"] = sum(len(value) for value in instances.values())
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4, ensure_ascii=False)}\n```")
return instances, metadata


Expand Down Expand Up @@ -287,7 +287,7 @@ def get_fixture_from_instances(instance_dict: dict[str, list[dict]]) -> tuple[li
metadata[f'num_{str(model._meta).split(".")[-1]}'] += 1

metadata["total_instances"] = len(fixture)
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```")
return fixture, metadata


Expand All @@ -300,7 +300,7 @@ def import_fixture(fixture: list[dict]) -> dict:
# We need to use a .verbose_json file extension for Django to use the correct serializer.
with tempfile.NamedTemporaryFile(mode="w+", suffix=".verbose_json") as f:
# Write the fixture to a temporary file so that Django can access it
f.write(json.dumps(fixture))
f.write(json.dumps(fixture, indent=4, ensure_ascii=False))
f.seek(0)
call_command(verbose_load_data.Command(), f.name, verbosity=2, format="verbose_json", stdout=output_buffer)

Expand All @@ -309,7 +309,7 @@ def import_fixture(fixture: list[dict]) -> dict:
for instance in fixture:
metadata[f'num_{instance["model"].split(".")[-1]}'] += 1
metadata["total_instances"] = len(fixture)
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```")
metadata["output"] = MetadataValue.md(f"```\n{output_buffer.getvalue()}\n```")
return metadata

Expand Down
6 changes: 4 additions & 2 deletions pipelines/assets/livelihood_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ def get_instances_from_dataframe(
)
* 100
),
"preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"),
"preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"),
}
if not unrecognized_labels.empty:
metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
Expand Down Expand Up @@ -1116,7 +1116,9 @@ def livelihood_activity_valid_instances(
valid_instances, metadata = validate_instances(context, livelihood_activity_instances, partition_key)
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
)
return Output(
valid_instances,
metadata=metadata,
Expand Down
4 changes: 3 additions & 1 deletion pipelines/assets/other_cash_income.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def other_cash_income_valid_instances(
valid_instances, metadata = validate_instances(context, other_cash_income_instances, partition_key)
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
)
return Output(
valid_instances,
metadata=metadata,
Expand Down
6 changes: 4 additions & 2 deletions pipelines/assets/wealth_characteristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def wealth_characteristic_instances(
)
* 100
),
"preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"),
"preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"),
}
if not unrecognized_labels.empty:
metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
Expand All @@ -473,7 +473,9 @@ def wealth_characteristic_valid_instances(
valid_instances, metadata = validate_instances(context, wealth_characteristic_instances, partition_key)
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
)
return Output(
valid_instances,
metadata=metadata,
Expand Down
4 changes: 3 additions & 1 deletion pipelines/assets/wild_foods.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ def wild_foods_valid_instances(
valid_instances, metadata = validate_instances(context, wild_foods_instances, partition_key)
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
metadata["preview"] = MetadataValue.md(
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
)
return Output(
valid_instances,
metadata=metadata,
Expand Down
2 changes: 1 addition & 1 deletion pipelines/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def dump_to_path(self, context: OutputContext, obj: Any, path: "UPath"):
self.unlink(path)

with path.open("w") as file:
file.write(json.dumps(obj, indent=4))
file.write(json.dumps(obj, indent=4, ensure_ascii=False))

def load_from_path(self, context: InputContext, path: "UPath") -> Any:
with path.open("r") as file:
Expand Down