Skip to content

Commit 74b606c

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 2cfb4f7 + 36feefa commit 74b606c

File tree

8 files changed

+548
-2
lines changed

8 files changed

+548
-2
lines changed

datacommons_client/client.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from datacommons_client.endpoints.resolve import ResolveEndpoint
77
from datacommons_client.models.observation import ObservationDate
88
from datacommons_client.utils.dataframes import add_entity_names_to_observations_dataframe
9+
from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
910
from datacommons_client.utils.decorators import requires_pandas
1011
from datacommons_client.utils.error_handling import NoDataForPropertyError
1112

@@ -92,7 +93,8 @@ def _find_filter_facet_ids(
9293
date=date,
9394
entity_dcids=entity_dcids,
9495
variable_dcids=variable_dcids,
95-
select=["variable", "entity", "facet"])
96+
select=["variable", "entity", "facet"],
97+
)
9698
else:
9799
observations = self.observation.fetch_observations_by_entity_type(
98100
date=date,
@@ -120,6 +122,7 @@ def observations_dataframe(
120122
entity_type: Optional[str] = None,
121123
parent_entity: Optional[str] = None,
122124
property_filters: Optional[dict[str, str | list[str]]] = None,
125+
include_constraints_metadata: bool = False,
123126
):
124127
"""
125128
Fetches statistical observations and returns them as a Pandas DataFrame.
@@ -139,6 +142,9 @@ def observations_dataframe(
139142
Required if `entity_dcids="all"`. Defaults to None.
140143
property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
141144
the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
145+
include_constraints_metadata (bool): If True, includes the dcid and name of any constraint
146+
properties associated with the variable DCIDs (based on the `constraintProperties` property)
147+
in the returned DataFrame. Defaults to False.
142148
143149
Returns:
144150
pd.DataFrame: A DataFrame containing the requested observations.
@@ -181,7 +187,8 @@ def observations_dataframe(
181187
date=date,
182188
entity_dcids=entity_dcids,
183189
variable_dcids=variable_dcids,
184-
filter_facet_ids=facets)
190+
filter_facet_ids=facets,
191+
)
185192

186193
# Convert the observations to a DataFrame
187194
df = pd.DataFrame(observations.to_observation_records().model_dump())
@@ -193,4 +200,10 @@ def observations_dataframe(
193200
entity_columns=["entity", "variable"],
194201
)
195202

203+
if include_constraints_metadata:
204+
df = add_property_constraints_to_observations_dataframe(
205+
endpoint=self.node,
206+
observations_df=df,
207+
)
208+
196209
return df

datacommons_client/endpoints/node.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from datacommons_client.endpoints.response import NodeResponse
1212
from datacommons_client.models.node import Name
1313
from datacommons_client.models.node import Node
14+
from datacommons_client.models.node import StatVarConstraint
15+
from datacommons_client.models.node import StatVarConstraints
1416
from datacommons_client.utils.graph import build_graph_map
1517
from datacommons_client.utils.graph import build_relationship_tree
1618
from datacommons_client.utils.graph import fetch_relationship_lru
@@ -23,6 +25,8 @@
2325

2426
PLACES_MAX_WORKERS = 10
2527

28+
CONSTRAINT_PROPERTY: str = "constraintProperties"
29+
2630
_DEPRECATED_METHODS: dict[str, dict[str, str | dict[str, str]]] = {
2731
"fetch_entity_parents": {
2832
"new_name": "fetch_place_parents",
@@ -534,3 +538,113 @@ def fetch_place_descendants(
534538
relationship="children",
535539
max_concurrent_requests=max_concurrent_requests,
536540
)
541+
542+
def _fetch_property_id_names(self, node_dcids: str | list[str],
543+
properties: str | list[str]):
544+
"""Fetch target nodes for given properties and return only (dcid, name).
545+
546+
For each input node and each requested property, returns the list of target
547+
nodes as dictionaries with ``dcid`` and ``name``.
548+
549+
Args:
550+
node_dcids: A single DCID or a list of DCIDs to query.
551+
properties: A property string or list of property strings.
552+
553+
Returns:
554+
A mapping:
555+
`{ node_dcid: { property: [ {dcid, name}, ... ], ... }, ... }`.
556+
"""
557+
data = self.fetch_property_values(node_dcids=node_dcids,
558+
properties=properties).get_properties()
559+
560+
result: dict[str, dict[str, list[dict]]] = {}
561+
562+
for node, props in data.items():
563+
result.setdefault(node, {})
564+
for prop, metadata in props.items():
565+
dest = result[node].setdefault(prop, [])
566+
for n in metadata:
567+
# Prefer 'dcid', but if property is terminal, fall back to 'value'.
568+
dcid = n.dcid or n.value
569+
name = n.name or n.value
570+
dest.append({"dcid": dcid, "name": name})
571+
return result
572+
573+
def fetch_statvar_constraints(
574+
self, variable_dcids: str | list[str]) -> StatVarConstraints:
575+
"""Fetch constraint property/value pairs for statistical variables, using
576+
the `constraintProperties` property.
577+
578+
This returns, for each StatisticalVariable, the constraints that define it.
579+
580+
Args:
581+
variable_dcids: One or more StatisticalVariable DCIDs.
582+
583+
Returns:
584+
StatVarConstraints:
585+
``{
586+
<sv_dcid>: [
587+
{
588+
"constraint_id": <constraint_property_dcid>,
589+
"constraint_name": <constraint_property_name>,
590+
"value_id": <value_node_dcid>,
591+
"value_name": <value_node_name>,
592+
},
593+
...
594+
],
595+
...
596+
}``
597+
"""
598+
# Ensure variable_dcids is a list
599+
if isinstance(variable_dcids, str):
600+
variable_dcids = [variable_dcids]
601+
602+
# Get constraints for the given variable DCIDs.
603+
constraints_mapping = self._fetch_property_id_names(
604+
node_dcids=variable_dcids, properties=[CONSTRAINT_PROPERTY])
605+
606+
# Per statvar mapping of dcid - name
607+
per_sv_constraint_names = {}
608+
# Global set of all constraint property IDs
609+
all_constraint_prop_ids = set()
610+
611+
for sv in variable_dcids:
612+
# Get the constraint properties for this statvar
613+
prop_entries = constraints_mapping.get(sv,
614+
{}).get(CONSTRAINT_PROPERTY, [])
615+
# Map the constraint properties to their names
616+
id_to_name = {entry["dcid"]: entry.get("name") for entry in prop_entries}
617+
# Add an entry for this statvar to the constraint names mapping
618+
per_sv_constraint_names[sv] = id_to_name
619+
# Update the global set of all constraint property IDs
620+
all_constraint_prop_ids.update(id_to_name.keys())
621+
622+
# In a single request, fetch all values for all the constraints, for all statvars.
623+
values_map = self._fetch_property_id_names(
624+
node_dcids=variable_dcids,
625+
properties=sorted(all_constraint_prop_ids),
626+
)
627+
628+
# Build structured response. This will include vars with no constraints (empty dicts).
629+
result = {sv: [] for sv in variable_dcids}
630+
631+
for sv in variable_dcids:
632+
constraint_names = per_sv_constraint_names.get(sv, {})
633+
sv_values = values_map.get(sv, {})
634+
635+
for constraintId, constraintName in constraint_names.items():
636+
values = sv_values.get(constraintId, [])
637+
# Continue if the stat var doesn't actually define a value for one of its constraintProperties.
638+
if not values:
639+
continue
640+
641+
# Build the StatVarConstraint object
642+
result[sv].append(
643+
StatVarConstraint(
644+
constraintId=constraintId,
645+
constraintName=constraintName,
646+
valueId=values[0]["dcid"],
647+
valueName=values[0].get("name"),
648+
))
649+
650+
return StatVarConstraints.model_validate(result)

datacommons_client/models/node.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,20 @@ class NodeList(BaseDCModel, ListLikeRootModel[list[Node]]):
9090

9191
class NodeDCIDList(BaseDCModel, ListLikeRootModel[list[NodeDCID]]):
9292
"""A root model whose value is a list of NodeDCID strings."""
93+
94+
95+
class StatVarConstraint(BaseDCModel):
96+
"""Represents a constraint for a statistical variable."""
97+
98+
constraintId: NodeDCID
99+
constraintName: Optional[str] = None
100+
valueId: NodeDCID
101+
valueName: Optional[str] = None
102+
103+
104+
class StatVarConstraints(BaseDCModel,
105+
DictLikeRootModel[dict[NodeDCID,
106+
list[StatVarConstraint]]]):
107+
"""A root model whose value is a dictionary of statvar ids - a list of StatVarConstraint objects.
108+
This model is used to represent constraints associated with statistical variables.
109+
"""

datacommons_client/tests/endpoints/test_node_endpoint.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from datacommons_client.models.node import Name
99
from datacommons_client.models.node import Node
1010
from datacommons_client.models.node import NodeGroup
11+
from datacommons_client.models.node import StatVarConstraints
1112
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
1213
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY
1314

@@ -395,3 +396,171 @@ def test_fetch_entity_ancestry_tree(mock_build_map, mock_build_tree):
395396
mock_build_tree.assert_called_once_with(root="Y",
396397
graph=mock_build_map.return_value[1],
397398
relationship_key="parents")
399+
400+
401+
def test__fetch_property_id_names_flattens_to_dcid_and_name():
402+
"""Private helper should return only dcid and name per target node."""
403+
api_mock = MagicMock(spec=API)
404+
endpoint = NodeEndpoint(api=api_mock)
405+
406+
# Simulate fetch_property_values response with Arcs, NodeGroup, Node
407+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
408+
data={
409+
"sv/1":
410+
Arcs(
411+
arcs={
412+
"constraintProperties":
413+
NodeGroup(nodes=[
414+
Node(dcid="p1", name="Prop One"),
415+
Node(dcid="p2", name="Prop Two"),
416+
])
417+
})
418+
}))
419+
420+
result = endpoint._fetch_property_id_names("sv/1", "constraintProperties")
421+
422+
assert result == {
423+
"sv/1": {
424+
"constraintProperties": [
425+
{
426+
"dcid": "p1",
427+
"name": "Prop One",
428+
},
429+
{
430+
"dcid": "p2",
431+
"name": "Prop Two",
432+
},
433+
]
434+
}
435+
}
436+
endpoint.fetch_property_values.assert_called_once_with(
437+
node_dcids="sv/1", properties="constraintProperties")
438+
439+
440+
def test_fetch_statvar_constraints_builds_constraints_and_values():
441+
"""fetch_statvar_constraints should combine constraint properties and values."""
442+
endpoint = NodeEndpoint(api=MagicMock())
443+
444+
# First call returns constraint property ids and names
445+
constraints_map = {
446+
"sv/1": {
447+
"constraintProperties": [
448+
{
449+
"dcid": "p1",
450+
"name": "Prop One"
451+
},
452+
{
453+
"dcid": "p2",
454+
"name": "Prop Two"
455+
},
456+
]
457+
}
458+
}
459+
460+
# Second call returns values for those properties
461+
values_map = {
462+
"sv/1": {
463+
"p1": [{
464+
"dcid": "v1",
465+
"name": "Val One"
466+
}],
467+
"p2": [{
468+
"dcid": "v2",
469+
"name": "Val Two"
470+
}],
471+
}
472+
}
473+
474+
with patch.object(endpoint,
475+
"_fetch_property_id_names",
476+
side_effect=[constraints_map, values_map]) as mock_helper:
477+
result = endpoint.fetch_statvar_constraints(["sv/1"])
478+
479+
# Ensure helper called twice (once for constraintProperties, once for values)
480+
assert mock_helper.call_count == 2
481+
assert isinstance(result, StatVarConstraints)
482+
assert "sv/1" in result
483+
# Two constraints returned
484+
assert len(result["sv/1"]) == 2
485+
ids = {(c.constraintId, c.valueId) for c in result["sv/1"]}
486+
assert ids == {("p1", "v1"), ("p2", "v2")}
487+
488+
489+
def test_fetch_statvar_constraints_handles_string_input_and_no_constraints():
490+
"""Single sv input and empty constraints should yield empty list for that sv."""
491+
endpoint = NodeEndpoint(api=MagicMock())
492+
493+
# No constraintProperties for sv/empty
494+
constraints_map = {"sv/empty": {"constraintProperties": []}}
495+
# Second call won't be used but provide empty map
496+
values_map = {"sv/empty": {}}
497+
498+
with patch.object(endpoint,
499+
"_fetch_property_id_names",
500+
side_effect=[constraints_map, values_map]):
501+
# string input
502+
result = endpoint.fetch_statvar_constraints("sv/empty")
503+
504+
assert isinstance(result, StatVarConstraints)
505+
assert result["sv/empty"] == []
506+
507+
508+
def test__fetch_property_id_names_handles_literal_values():
509+
"""_fetch_property_id_names should handle string literal values gracefully."""
510+
api_mock = MagicMock(spec=API)
511+
endpoint = NodeEndpoint(api=api_mock)
512+
513+
# Simulate a response where the target value is a literal string (no dcid)
514+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
515+
data={
516+
"sv/1":
517+
Arcs(arcs={"p1": NodeGroup(nodes=[Node(value="LiteralValue")])})
518+
}))
519+
520+
result = endpoint._fetch_property_id_names("sv/1", "p1")
521+
522+
assert result == {
523+
"sv/1": {
524+
"p1": [{
525+
"dcid": "LiteralValue",
526+
"name": "LiteralValue"
527+
}]
528+
}
529+
}
530+
endpoint.fetch_property_values.assert_called_once_with(node_dcids="sv/1",
531+
properties="p1")
532+
533+
534+
def test_fetch_statvar_constraints_skips_missing_constraint_values():
535+
"""If a constraintProperty has no value for a SV, skip it without error."""
536+
endpoint = NodeEndpoint(api=MagicMock())
537+
538+
constraints_map = {
539+
"sv/1": {
540+
"constraintProperties": [
541+
{
542+
"dcid": "p1",
543+
"name": "Prop One"
544+
},
545+
{
546+
"dcid": "p2",
547+
"name": "Prop Two"
548+
},
549+
]
550+
}
551+
}
552+
553+
# p1 has a value, p2 is missing/empty
554+
values_map = {"sv/1": {"p1": [{"dcid": "v1", "name": "Val One"}], "p2": []}}
555+
556+
with patch.object(endpoint,
557+
"_fetch_property_id_names",
558+
side_effect=[constraints_map, values_map]):
559+
result = endpoint.fetch_statvar_constraints(["sv/1"])
560+
561+
assert isinstance(result, StatVarConstraints)
562+
assert "sv/1" in result
563+
# Only one well-formed constraint should be included (p1)
564+
assert len(result["sv/1"]) == 1
565+
assert result["sv/1"][0].constraintId == "p1"
566+
assert result["sv/1"][0].valueId == "v1"

0 commit comments

Comments
 (0)