Skip to content

Commit 36feefa

Browse files
authored
Optionally include constraint property metadata in observation DataFrames (#274)
This PR adds support for including property constraint metadata for statistical variables in observation DataFrames. To support this, it includes new models and endpoint methods to fetch and represent these constraints. The PR also includes tests for all new functionality. In short: * Added new pydantic models `StatVarConstraint` and `StatVarConstraints` to represent constraints associated with statistical variables. * Implemented `NodeEndpoint.fetch_statvar_constraints` to fetch and structure constraint property/value pairs for a set of statvars. * Updated `Client.observations_dataframe` to support an `include_constraints_metadata` argument; when enabled, the returned DataFrame includes constraint id/name columns for each variable. * Added utility function `add_property_constraints_to_observations_dataframe` to insert constraint columns into observation DataFrames.
1 parent a250449 commit 36feefa

File tree

8 files changed

+548
-2
lines changed

8 files changed

+548
-2
lines changed

datacommons_client/client.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from datacommons_client.endpoints.resolve import ResolveEndpoint
77
from datacommons_client.models.observation import ObservationDate
88
from datacommons_client.utils.dataframes import add_entity_names_to_observations_dataframe
9+
from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
910
from datacommons_client.utils.decorators import requires_pandas
1011
from datacommons_client.utils.error_handling import NoDataForPropertyError
1112

@@ -92,7 +93,8 @@ def _find_filter_facet_ids(
9293
date=date,
9394
entity_dcids=entity_dcids,
9495
variable_dcids=variable_dcids,
95-
select=["variable", "entity", "facet"])
96+
select=["variable", "entity", "facet"],
97+
)
9698
else:
9799
observations = self.observation.fetch_observations_by_entity_type(
98100
date=date,
@@ -120,6 +122,7 @@ def observations_dataframe(
120122
entity_type: Optional[str] = None,
121123
parent_entity: Optional[str] = None,
122124
property_filters: Optional[dict[str, str | list[str]]] = None,
125+
include_constraints_metadata: bool = False,
123126
):
124127
"""
125128
Fetches statistical observations and returns them as a Pandas DataFrame.
@@ -139,6 +142,9 @@ def observations_dataframe(
139142
Required if `entity_dcids="all"`. Defaults to None.
140143
property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
141144
the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
145+
include_constraints_metadata (bool): If True, includes the dcid and name of any constraint
146+
properties associated with the variable DCIDs (based on the `constraintProperties` property)
147+
in the returned DataFrame. Defaults to False.
142148
143149
Returns:
144150
pd.DataFrame: A DataFrame containing the requested observations.
@@ -181,7 +187,8 @@ def observations_dataframe(
181187
date=date,
182188
entity_dcids=entity_dcids,
183189
variable_dcids=variable_dcids,
184-
filter_facet_ids=facets)
190+
filter_facet_ids=facets,
191+
)
185192

186193
# Convert the observations to a DataFrame
187194
df = pd.DataFrame(observations.to_observation_records().model_dump())
@@ -193,4 +200,10 @@ def observations_dataframe(
193200
entity_columns=["entity", "variable"],
194201
)
195202

203+
if include_constraints_metadata:
204+
df = add_property_constraints_to_observations_dataframe(
205+
endpoint=self.node,
206+
observations_df=df,
207+
)
208+
196209
return df

datacommons_client/endpoints/node.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from datacommons_client.endpoints.response import NodeResponse
1212
from datacommons_client.models.node import Name
1313
from datacommons_client.models.node import Node
14+
from datacommons_client.models.node import StatVarConstraint
15+
from datacommons_client.models.node import StatVarConstraints
1416
from datacommons_client.utils.graph import build_graph_map
1517
from datacommons_client.utils.graph import build_relationship_tree
1618
from datacommons_client.utils.graph import fetch_relationship_lru
@@ -23,6 +25,8 @@
2325

2426
PLACES_MAX_WORKERS = 10
2527

28+
CONSTRAINT_PROPERTY: str = "constraintProperties"
29+
2630
_DEPRECATED_METHODS: dict[str, dict[str, str | dict[str, str]]] = {
2731
"fetch_entity_parents": {
2832
"new_name": "fetch_place_parents",
@@ -534,3 +538,113 @@ def fetch_place_descendants(
534538
relationship="children",
535539
max_concurrent_requests=max_concurrent_requests,
536540
)
541+
542+
def _fetch_property_id_names(self, node_dcids: str | list[str],
543+
properties: str | list[str]):
544+
"""Fetch target nodes for given properties and return only (dcid, name).
545+
546+
For each input node and each requested property, returns the list of target
547+
nodes as dictionaries with ``dcid`` and ``name``.
548+
549+
Args:
550+
node_dcids: A single DCID or a list of DCIDs to query.
551+
properties: A property string or list of property strings.
552+
553+
Returns:
554+
A mapping:
555+
`{ node_dcid: { property: [ {dcid, name}, ... ], ... }, ... }`.
556+
"""
557+
data = self.fetch_property_values(node_dcids=node_dcids,
558+
properties=properties).get_properties()
559+
560+
result: dict[str, dict[str, list[dict]]] = {}
561+
562+
for node, props in data.items():
563+
result.setdefault(node, {})
564+
for prop, metadata in props.items():
565+
dest = result[node].setdefault(prop, [])
566+
for n in metadata:
567+
# Prefer 'dcid', but if property is terminal, fall back to 'value'.
568+
dcid = n.dcid or n.value
569+
name = n.name or n.value
570+
dest.append({"dcid": dcid, "name": name})
571+
return result
572+
573+
def fetch_statvar_constraints(
574+
self, variable_dcids: str | list[str]) -> StatVarConstraints:
575+
"""Fetch constraint property/value pairs for statistical variables, using
576+
the `constraintProperties` property.
577+
578+
This returns, for each StatisticalVariable, the constraints that define it.
579+
580+
Args:
581+
variable_dcids: One or more StatisticalVariable DCIDs.
582+
583+
Returns:
584+
StatVarConstraints:
585+
``{
586+
<sv_dcid>: [
587+
{
588+
"constraint_id": <constraint_property_dcid>,
589+
"constraint_name": <constraint_property_name>,
590+
"value_id": <value_node_dcid>,
591+
"value_name": <value_node_name>,
592+
},
593+
...
594+
],
595+
...
596+
}``
597+
"""
598+
# Ensure variable_dcids is a list
599+
if isinstance(variable_dcids, str):
600+
variable_dcids = [variable_dcids]
601+
602+
# Get constraints for the given variable DCIDs.
603+
constraints_mapping = self._fetch_property_id_names(
604+
node_dcids=variable_dcids, properties=[CONSTRAINT_PROPERTY])
605+
606+
# Per statvar mapping of dcid - name
607+
per_sv_constraint_names = {}
608+
# Global set of all constraint property IDs
609+
all_constraint_prop_ids = set()
610+
611+
for sv in variable_dcids:
612+
# Get the constraint properties for this statvar
613+
prop_entries = constraints_mapping.get(sv,
614+
{}).get(CONSTRAINT_PROPERTY, [])
615+
# Map the constraint properties to their names
616+
id_to_name = {entry["dcid"]: entry.get("name") for entry in prop_entries}
617+
# Add an entry for this statvar to the constraint names mapping
618+
per_sv_constraint_names[sv] = id_to_name
619+
# Update the global set of all constraint property IDs
620+
all_constraint_prop_ids.update(id_to_name.keys())
621+
622+
# In a single request, fetch all values for all the constraints, for all statvars.
623+
values_map = self._fetch_property_id_names(
624+
node_dcids=variable_dcids,
625+
properties=sorted(all_constraint_prop_ids),
626+
)
627+
628+
# Build structured response. This will include vars with no constraints (empty dicts).
629+
result = {sv: [] for sv in variable_dcids}
630+
631+
for sv in variable_dcids:
632+
constraint_names = per_sv_constraint_names.get(sv, {})
633+
sv_values = values_map.get(sv, {})
634+
635+
for constraintId, constraintName in constraint_names.items():
636+
values = sv_values.get(constraintId, [])
637+
# Continue if the stat var doesn't actually define a value for one of its constraintProperties.
638+
if not values:
639+
continue
640+
641+
# Build the StatVarConstraint object
642+
result[sv].append(
643+
StatVarConstraint(
644+
constraintId=constraintId,
645+
constraintName=constraintName,
646+
valueId=values[0]["dcid"],
647+
valueName=values[0].get("name"),
648+
))
649+
650+
return StatVarConstraints.model_validate(result)

datacommons_client/models/node.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,20 @@ class NodeList(BaseDCModel, ListLikeRootModel[list[Node]]):
9090

9191
class NodeDCIDList(BaseDCModel, ListLikeRootModel[list[NodeDCID]]):
9292
"""A root model whose value is a list of NodeDCID strings."""
93+
94+
95+
class StatVarConstraint(BaseDCModel):
96+
"""Represents a constraint for a statistical variable."""
97+
98+
constraintId: NodeDCID
99+
constraintName: Optional[str] = None
100+
valueId: NodeDCID
101+
valueName: Optional[str] = None
102+
103+
104+
class StatVarConstraints(BaseDCModel,
105+
DictLikeRootModel[dict[NodeDCID,
106+
list[StatVarConstraint]]]):
107+
"""A root model whose value is a dictionary of statvar ids - a list of StatVarConstraint objects.
108+
This model is used to represent constraints associated with statistical variables.
109+
"""

datacommons_client/tests/endpoints/test_node_endpoint.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from datacommons_client.models.node import Name
99
from datacommons_client.models.node import Node
1010
from datacommons_client.models.node import NodeGroup
11+
from datacommons_client.models.node import StatVarConstraints
1112
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
1213
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY
1314

@@ -395,3 +396,171 @@ def test_fetch_entity_ancestry_tree(mock_build_map, mock_build_tree):
395396
mock_build_tree.assert_called_once_with(root="Y",
396397
graph=mock_build_map.return_value[1],
397398
relationship_key="parents")
399+
400+
401+
def test__fetch_property_id_names_flattens_to_dcid_and_name():
402+
"""Private helper should return only dcid and name per target node."""
403+
api_mock = MagicMock(spec=API)
404+
endpoint = NodeEndpoint(api=api_mock)
405+
406+
# Simulate fetch_property_values response with Arcs, NodeGroup, Node
407+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
408+
data={
409+
"sv/1":
410+
Arcs(
411+
arcs={
412+
"constraintProperties":
413+
NodeGroup(nodes=[
414+
Node(dcid="p1", name="Prop One"),
415+
Node(dcid="p2", name="Prop Two"),
416+
])
417+
})
418+
}))
419+
420+
result = endpoint._fetch_property_id_names("sv/1", "constraintProperties")
421+
422+
assert result == {
423+
"sv/1": {
424+
"constraintProperties": [
425+
{
426+
"dcid": "p1",
427+
"name": "Prop One",
428+
},
429+
{
430+
"dcid": "p2",
431+
"name": "Prop Two",
432+
},
433+
]
434+
}
435+
}
436+
endpoint.fetch_property_values.assert_called_once_with(
437+
node_dcids="sv/1", properties="constraintProperties")
438+
439+
440+
def test_fetch_statvar_constraints_builds_constraints_and_values():
441+
"""fetch_statvar_constraints should combine constraint properties and values."""
442+
endpoint = NodeEndpoint(api=MagicMock())
443+
444+
# First call returns constraint property ids and names
445+
constraints_map = {
446+
"sv/1": {
447+
"constraintProperties": [
448+
{
449+
"dcid": "p1",
450+
"name": "Prop One"
451+
},
452+
{
453+
"dcid": "p2",
454+
"name": "Prop Two"
455+
},
456+
]
457+
}
458+
}
459+
460+
# Second call returns values for those properties
461+
values_map = {
462+
"sv/1": {
463+
"p1": [{
464+
"dcid": "v1",
465+
"name": "Val One"
466+
}],
467+
"p2": [{
468+
"dcid": "v2",
469+
"name": "Val Two"
470+
}],
471+
}
472+
}
473+
474+
with patch.object(endpoint,
475+
"_fetch_property_id_names",
476+
side_effect=[constraints_map, values_map]) as mock_helper:
477+
result = endpoint.fetch_statvar_constraints(["sv/1"])
478+
479+
# Ensure helper called twice (once for constraintProperties, once for values)
480+
assert mock_helper.call_count == 2
481+
assert isinstance(result, StatVarConstraints)
482+
assert "sv/1" in result
483+
# Two constraints returned
484+
assert len(result["sv/1"]) == 2
485+
ids = {(c.constraintId, c.valueId) for c in result["sv/1"]}
486+
assert ids == {("p1", "v1"), ("p2", "v2")}
487+
488+
489+
def test_fetch_statvar_constraints_handles_string_input_and_no_constraints():
490+
"""Single sv input and empty constraints should yield empty list for that sv."""
491+
endpoint = NodeEndpoint(api=MagicMock())
492+
493+
# No constraintProperties for sv/empty
494+
constraints_map = {"sv/empty": {"constraintProperties": []}}
495+
# Second call won't be used but provide empty map
496+
values_map = {"sv/empty": {}}
497+
498+
with patch.object(endpoint,
499+
"_fetch_property_id_names",
500+
side_effect=[constraints_map, values_map]):
501+
# string input
502+
result = endpoint.fetch_statvar_constraints("sv/empty")
503+
504+
assert isinstance(result, StatVarConstraints)
505+
assert result["sv/empty"] == []
506+
507+
508+
def test__fetch_property_id_names_handles_literal_values():
509+
"""_fetch_property_id_names should handle string literal values gracefully."""
510+
api_mock = MagicMock(spec=API)
511+
endpoint = NodeEndpoint(api=api_mock)
512+
513+
# Simulate a response where the target value is a literal string (no dcid)
514+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
515+
data={
516+
"sv/1":
517+
Arcs(arcs={"p1": NodeGroup(nodes=[Node(value="LiteralValue")])})
518+
}))
519+
520+
result = endpoint._fetch_property_id_names("sv/1", "p1")
521+
522+
assert result == {
523+
"sv/1": {
524+
"p1": [{
525+
"dcid": "LiteralValue",
526+
"name": "LiteralValue"
527+
}]
528+
}
529+
}
530+
endpoint.fetch_property_values.assert_called_once_with(node_dcids="sv/1",
531+
properties="p1")
532+
533+
534+
def test_fetch_statvar_constraints_skips_missing_constraint_values():
535+
"""If a constraintProperty has no value for a SV, skip it without error."""
536+
endpoint = NodeEndpoint(api=MagicMock())
537+
538+
constraints_map = {
539+
"sv/1": {
540+
"constraintProperties": [
541+
{
542+
"dcid": "p1",
543+
"name": "Prop One"
544+
},
545+
{
546+
"dcid": "p2",
547+
"name": "Prop Two"
548+
},
549+
]
550+
}
551+
}
552+
553+
# p1 has a value, p2 is missing/empty
554+
values_map = {"sv/1": {"p1": [{"dcid": "v1", "name": "Val One"}], "p2": []}}
555+
556+
with patch.object(endpoint,
557+
"_fetch_property_id_names",
558+
side_effect=[constraints_map, values_map]):
559+
result = endpoint.fetch_statvar_constraints(["sv/1"])
560+
561+
assert isinstance(result, StatVarConstraints)
562+
assert "sv/1" in result
563+
# Only one well-formed constraint should be included (p1)
564+
assert len(result["sv/1"]) == 1
565+
assert result["sv/1"][0].constraintId == "p1"
566+
assert result["sv/1"][0].valueId == "v1"

0 commit comments

Comments
 (0)