diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..83ae728d 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: patch + changes: + changed: + - Explicitly specified encoding while building county FIPS dataset + removed: + - Unneeded build of county FIPS dataset to local folder \ No newline at end of file diff --git a/policyengine_us_data/geography/county_fips.py b/policyengine_us_data/geography/county_fips.py index 644bf8a2..3e5ac518 100644 --- a/policyengine_us_data/geography/county_fips.py +++ b/policyengine_us_data/geography/county_fips.py @@ -29,6 +29,7 @@ def generate_county_fips_2020_dataset(): raise ValueError( f"Failed to download county FIPS codes: {response.status_code}" ) + response.encoding = "utf-8" county_fips_raw = StringIO(response.text) @@ -42,6 +43,7 @@ def generate_county_fips_2020_dataset(): "COUNTYFP": str, "COUNTYNAME": str, }, + encoding="utf-8", ) county_fips = county_fips.rename( @@ -66,7 +68,9 @@ def generate_county_fips_2020_dataset(): csv_buffer = BytesIO() # Save CSV into buffer object and reset pointer - county_fips.to_csv(csv_buffer, index=False, compression="gzip") + county_fips.to_csv( + csv_buffer, index=False, compression="gzip", encoding="utf-8" + ) csv_buffer.seek(0) # Upload to Hugging Face @@ -76,8 +80,6 @@ def generate_county_fips_2020_dataset(): repo_file_path="county_fips_2020.csv.gz", ) - county_fips.to_csv(LOCAL_FOLDER / "county_fips.csv.gz", compression="gzip") - if __name__ == "__main__": generate_county_fips_2020_dataset() diff --git a/policyengine_us_data/tests/test_datasets/test_county_fips.py b/policyengine_us_data/tests/test_datasets/test_county_fips.py index 72748a2b..ad1f10c5 100644 --- a/policyengine_us_data/tests/test_datasets/test_county_fips.py +++ b/policyengine_us_data/tests/test_datasets/test_county_fips.py @@ -93,10 +93,6 @@ def test_successful_download_and_processing( # Check that upload_to_hf was called mock_upload_to_hf.assert_called_once() - # Check that to_csv was called with the right path - local_csv_call = mock_to_csv.call_args_list[-1] - assert str(LOCAL_FOLDER / "county_fips.csv.gz") in str(local_csv_call) - def test_download_failure(): """Test handling of download failure"""