Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
799f7d5
explore gdal cli in shell script
jackrosacker Jan 29, 2026
b199cb0
restructure utils for gdb metadata module
jackrosacker Jan 30, 2026
5ea952a
rough out planned fns
jackrosacker Jan 30, 2026
652c4c5
type hints
jackrosacker Jan 30, 2026
78b381d
draft of read + write metadata fns
jackrosacker Feb 2, 2026
2463aa2
imports plus dummy test
jackrosacker Feb 2, 2026
28fff7d
path
jackrosacker Feb 2, 2026
cc9c4c0
rename resource: "shapefile" --> "esri"
jackrosacker Feb 3, 2026
6336e6c
add gdb resource
jackrosacker Feb 3, 2026
ed4134c
WIP: gdb md utils and tests
jackrosacker Feb 3, 2026
73381dd
fix test - wrong date assertion
jackrosacker Feb 3, 2026
3b888fa
fix test
jackrosacker Feb 3, 2026
5b1f0c2
WIP: add fgdb metadata utils
jackrosacker Feb 4, 2026
aa855ff
handle None type metadata
jackrosacker Feb 4, 2026
937a879
fix import, fixes tests
jackrosacker Feb 4, 2026
b00a6f0
rename files
jackrosacker Feb 5, 2026
8af5263
intermediate layer - unique id
jackrosacker Feb 5, 2026
ac32abb
WIP
jackrosacker Mar 9, 2026
4039988
remove single quotes in LCO arg
jackrosacker Mar 10, 2026
072a95b
add get_layers() fn and tests
jackrosacker Mar 10, 2026
98f1c8c
test other layers are kept after md edit
jackrosacker Mar 10, 2026
5b928ed
add pluto table to resources gdb
jackrosacker Mar 10, 2026
fa7da2c
parametrize: test zipped + nonzipped gdbs
jackrosacker Mar 10, 2026
88fc714
compacted for size. 75 MB to 36 KB
jackrosacker Mar 11, 2026
1ec1192
add zipped gdb handling
jackrosacker Mar 11, 2026
23f3ee8
f
jackrosacker Mar 11, 2026
6719fc4
delete "remove_metadata()" call when overwriting
jackrosacker Mar 11, 2026
e1c4bc1
sort imports
jackrosacker Mar 11, 2026
c6afc94
rename
jackrosacker Mar 26, 2026
c0f2db1
wip
jackrosacker Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dcpy/test/lifecycle/package/test_shapefiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def test_write_shapefile_xml_metadata(
)
metadata = shp.read_metadata()

if metadata is None:
pytest.fail("Expected metadata to exist")

# Test default values
assert metadata.md_stan_name == fields["md_stan_name"].default
assert metadata.md_stan_ver == fields["md_stan_ver"].default
Expand Down
2 changes: 1 addition & 1 deletion dcpy/test/utils/code_gen/test_pydantic_from_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from dcpy.utils.code_gen import pydantic_from_xml

PLUTO_METADATA_XML = "shapefile_metadata_pluto_export.xml"
PLUTO_METADATA_XML = "esri_metadata_pluto_export.xml"

# Test a minimal XML example
XML_TEMPLATE = """<?xml version="1.0"?>
Expand Down
42 changes: 42 additions & 0 deletions dcpy/test/utils/geospatial/test_esri_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from datetime import datetime

from pytest import fixture

from dcpy.utils.geospatial.esri_metadata import generate_metadata


@fixture
def today_datestamp() -> str:
return datetime.now().strftime("%Y%m%d")


def test_generate_metadata(today_datestamp):
md = generate_metadata()

expected_date = today_datestamp

assert hasattr(md, "esri")
esri = md.esri
assert isinstance(esri.crea_date, str)

# CreaTime has leading zeros and must be preserved as string
assert isinstance(esri.crea_time, str)
assert esri.crea_date == expected_date

# ArcGISFormat
assert isinstance(esri.arc_gis_format, float)
assert esri.arc_gis_format == 1.0

# SyncOnce should be string
assert isinstance(esri.sync_once, str)
assert esri.sync_once == "TRUE"

# mdHrLv.ScopeCd @value preserves leading zeros as string
assert hasattr(md, "md_hr_lv")
assert md.md_hr_lv.scope_cd.value == "005"
assert isinstance(md.md_hr_lv.scope_cd.value, str)

# mdDateSt should capture its text as an int and attribute Sync should be present
assert hasattr(md, "md_date_st")
assert isinstance(md.md_date_st.value, int)
assert md.md_date_st.sync == "TRUE"
135 changes: 135 additions & 0 deletions dcpy/test/utils/geospatial/test_fgdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import shutil
import zipfile

import pytest
from pytest import fixture

from dcpy.models.data.shapefile_metadata import Metadata
from dcpy.utils.geospatial import fgdb

GDB_ZIP = "geodatabase.gdb.zip"
FEATURE_CLASS = "mappluto_one_row"
TABLE = "pluto_one_row"
METADATA_XML = "esri_metadata.xml"


@fixture
def temp_gdb_zip_path(utils_resources_path, tmp_path):
shutil.copy2(
src=utils_resources_path / GDB_ZIP,
dst=tmp_path / GDB_ZIP,
)
assert zipfile.is_zipfile(tmp_path / GDB_ZIP), (
f"'{GDB_ZIP}' should be a valid zip file"
)
return tmp_path / GDB_ZIP


@fixture
def temp_gdb_nonzipped_path(temp_gdb_zip_path, tmp_path):
shutil.unpack_archive(filename=temp_gdb_zip_path, extract_dir=tmp_path)
gdb_path = tmp_path / temp_gdb_zip_path.stem
assert gdb_path.is_dir(), "Expected a gdb directory, but found none"
return gdb_path


@fixture
def temp_metadata_object(utils_resources_path):
xml_file = utils_resources_path / METADATA_XML
xml_content = xml_file.read_text()
assert xml_content != "", (
f"Non-empty string expected, got: '{xml_content}' instead."
)
md_object = Metadata.from_xml(xml_content)
return md_object


@fixture
def path_fixture(request):
return request.getfixturevalue(request.param)


gdb_paths = pytest.mark.parametrize(
"path_fixture",
[
pytest.param(
"temp_gdb_zip_path",
id="run_tests_on_zipped_gdb",
),
pytest.param(
"temp_gdb_nonzipped_path",
id="run_tests_on_nonzipped_gdb",
),
],
indirect=True,
)


@gdb_paths
def test_get_layers(path_fixture):
layers = fgdb.get_layers(path_fixture)
assert layers == [FEATURE_CLASS, TABLE]


@gdb_paths
def test_read_metadata(path_fixture):
md = fgdb.read_metadata(gdb=path_fixture, layer=FEATURE_CLASS)

element = "esri"
assert hasattr(md, element), f"Expected element '{element}', but found none"

assert md.esri.crea_date == "20260203"
assert md.esri.crea_time == "10392600"


@gdb_paths
def test_write_metadata(path_fixture, temp_metadata_object):
layers_before_md_write = fgdb.get_layers(path_fixture)
fgdb.write_metadata(
gdb=path_fixture,
layer=FEATURE_CLASS,
metadata=temp_metadata_object,
overwrite=True,
)
layers_after_md_write = fgdb.get_layers(path_fixture)

md = fgdb.read_metadata(path_fixture, FEATURE_CLASS)
element = "esri"
assert hasattr(md, element), f"Expected element '{element}', but found none"

assert md.esri.crea_date == "19611215"
assert md.esri.crea_time == "00000000"
# confirm that no gdb layers were lost during md writing operations
assert sorted(layers_before_md_write) == sorted(layers_after_md_write)


@gdb_paths
def test_metadata_exists(path_fixture):
originally_md_exists = fgdb.metadata_exists(gdb=path_fixture, layer=FEATURE_CLASS)
# remove metadata
fgdb.remove_metadata(
gdb=path_fixture,
layer=FEATURE_CLASS,
)
md_exists_after_removal = fgdb.metadata_exists(
gdb=path_fixture, layer=FEATURE_CLASS
)
assert originally_md_exists is True, "Expected layer metadata but found none"
assert md_exists_after_removal is False, (
"Expected no layer metadata, but found some"
)


@gdb_paths
def test_remove_metadata(path_fixture):
layers_before_md_removal = fgdb.get_layers(path_fixture)
fgdb.remove_metadata(
gdb=path_fixture,
layer=FEATURE_CLASS,
)
layers_after_md_removal = fgdb.get_layers(path_fixture)

md = fgdb.read_metadata(path_fixture, FEATURE_CLASS)
assert md is None
# confirm that no gdb layers were lost during md removal
assert sorted(layers_before_md_removal) == sorted(layers_after_md_removal)
41 changes: 1 addition & 40 deletions dcpy/test/utils/geospatial/test_shapefile.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import shutil
import zipfile
from datetime import datetime
from pathlib import Path

import pytest
from pytest import fixture

from dcpy.models.data.shapefile_metadata import Metadata
from dcpy.utils.geospatial import shapefile
from dcpy.utils.geospatial.shapefile import generate_metadata

SHP_ZIP_NO_MD = "shapefile_single_pluto_feature_no_metadata.shp.zip"
SHP_ZIP_WITH_MD = "shapefile_single_pluto_feature_with_metadata.shp.zip"
METADATA_XML = "shapefile_metadata.xml"
METADATA_XML = "esri_metadata.xml"


@fixture
Expand Down Expand Up @@ -68,11 +66,6 @@ def temp_metadata_object(utils_resources_path):
return md_object


@fixture
def today_datestamp() -> str:
return datetime.now().strftime("%Y%m%d")


def _get_info_from_file_fixture(
request: pytest.FixtureRequest, fixture: str, file_type: str
) -> dict:
Expand Down Expand Up @@ -260,35 +253,3 @@ def test_read_metadata(request, path_fixture, file_type, subdir):

assert md.esri.scale_range.min_scale == "150000000"
assert md.esri.scale_range.max_scale == "5000"


def test_generate_metadata(today_datestamp):
md = generate_metadata()

expected_date = today_datestamp

assert hasattr(md, "esri")
esri = md.esri
assert isinstance(esri.crea_date, str)

# CreaTime has leading zeros and must be preserved as string
assert isinstance(esri.crea_time, str)
assert esri.crea_date == expected_date

# ArcGISFormat
assert isinstance(esri.arc_gis_format, float)
assert esri.arc_gis_format == 1.0

# SyncOnce should be string
assert isinstance(esri.sync_once, str)
assert esri.sync_once == "TRUE"

# mdHrLv.ScopeCd @value preserves leading zeros as string
assert hasattr(md, "md_hr_lv")
assert md.md_hr_lv.scope_cd.value == "005"
assert isinstance(md.md_hr_lv.scope_cd.value, str)

# mdDateSt should capture its text as an int and attribute Sync should be present
assert hasattr(md, "md_date_st")
assert isinstance(md.md_date_st.value, int)
assert md.md_date_st.sync == "TRUE"
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?xml version="1.0"?>
<metadata xml:lang="en">
<Esri>
<CreaDate>20250421</CreaDate>
<CreaTime>15471800</CreaTime>
<CreaDate>19611215</CreaDate>
<CreaTime>00000000</CreaTime>
<ArcGISFormat>1.0</ArcGISFormat>
<SyncOnce>TRUE</SyncOnce>
<DataProperties>
Expand All @@ -24,5 +24,5 @@
<mdHrLv>
<ScopeCd value="005"></ScopeCd>
</mdHrLv>
<mdDateSt Sync="TRUE">20250421</mdDateSt>
<mdDateSt Sync="TRUE">19611215</mdDateSt>
</metadata>
Binary file added dcpy/test/utils/resources/geodatabase.gdb.zip
Binary file not shown.
55 changes: 55 additions & 0 deletions dcpy/utils/geospatial/esri_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from datetime import datetime

from dcpy.models.data.shapefile_metadata import (
Esri,
Mddatest,
Metadata,
Scalerange,
)


def generate_metadata() -> Metadata:
"""
Generates a default Esri metadata object.
Can be generated as an independent object without an existing spatial dataset.
"""
esri_datestamp, esri_timestamp = _get_esri_timestamp()
md_date_st = Mddatest(
value=esri_datestamp,
)
scale_range = Scalerange()
esri = Esri(
crea_date=esri_datestamp,
crea_time=esri_timestamp,
scale_range=scale_range,
)
metadata = Metadata(
esri=esri,
md_date_st=md_date_st,
)
return metadata


def _get_esri_timestamp(dt_obj=None):
"""
Generate Esri-style CreaDate and CreaTime values.

Args:
dt_obj: datetime object (uses current time if None)

Returns:
tuple: (CreaDate, CreaTime) as strings
"""
if dt_obj is None:
dt_obj = datetime.now()

# CreaDate: YYYYMMDD
crea_date = dt_obj.strftime("%Y%m%d")

# CreaTime: HHMMSSFF (hours, minutes, seconds, hundredths)
hundredths = 0 # Esri appears to ignore the hundredths in practice
crea_time = (
f"{dt_obj.hour:02d}{dt_obj.minute:02d}{dt_obj.second:02d}{hundredths:02d}"
)

return crea_date, crea_time
Loading
Loading