Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ RUN apt-get -q update && \

# Install python
RUN python setup.py -q install
RUN pip3 install --upgrade requests

# Run the tests
RUN ./build.sh
3 changes: 0 additions & 3 deletions datacommons/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ py_library(
name = "datacommons",
srcs = glob(["*.py"]),
deps = [
requirement("google-api-python-client"),
requirement("httplib2"),
requirement("oauth2client"),
requirement("pandas"),
]
)
8 changes: 4 additions & 4 deletions datacommons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
# limitations under the License.
"""Data Commons module."""

from __future__ import absolute_import
from __future__ import division
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no matter how I refresh, I still see this as deleted. will come back to it but can you check that this was put back?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added back

from __future__ import print_function
# Data Commons SPARQL query support
from datacommons.query import Query

from datacommons.datacommons import Query
# Data Commons Python Client API
from datacommons.core import get_property_labels, get_property_values, get_triples
from datacommons.places import get_places_in
from datacommons.populations import get_populations, get_observations

# Other utilities
from .utils import clean_frame, flatten_frame
41 changes: 25 additions & 16 deletions datacommons/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@


def get_property_labels(dcids, out=True):
""" Returns a map from given dcids to a list of defined properties defined.
""" Returns the labels of properties defined for the given dcids.

The return value is a dictionary mapping dcids to lists of property labels.

Args:
dcids: A list of nodes identified by their dcids.
Expand All @@ -47,9 +49,9 @@ def get_property_labels(dcids, out=True):
results = {}
for dcid in dcids:
if out:
results[dcid] = payload[dcid]['outArcs']
results[dcid] = payload[dcid]['outLabels']
else:
results[dcid] = payload[dcid]['inArcs']
results[dcid] = payload[dcid]['inLabels']
return results


Expand Down Expand Up @@ -95,33 +97,36 @@ def get_property_values(dcids,
payload = utils._format_response(res)

# Create the result format for when dcids is provided as a list.
result = defaultdict(list)
results = defaultdict(list)
for dcid in dcids:
# Make sure each dcid is mapped to an empty list.
results[dcid]

# Add elements to this list as necessary.
if dcid in payload and prop in payload[dcid]:
for node in payload[dcid][prop]:
if 'dcid' in node:
result[dcid].append(node['dcid'])
results[dcid].append(node['dcid'])
elif 'value' in node:
result[dcid].append(node['value'])
else:
result[dcid] = []
results[dcid].append(node['value'])

# Format the result as a Series if a Pandas Series is provided.
# Format the results as a Series if a Pandas Series is provided.
if isinstance(dcids, pd.Series):
return pd.Series([result[dcid] for dcid in dcids])
return dict(result)
return pd.Series([results[dcid] for dcid in dcids])
return dict(results)


def get_triples(dcids, limit=utils._MAX_LIMIT):
""" Returns a list of triples where the dcid is either a subject or object.
""" Returns all triples associated with the given dcids.

The return value is a list of tuples (s, p, o) where s denotes the subject
entity, p the property, and o the object.
The return value is a dictionary mapping given dcids to list of triples. The
triples are repsented as 3-tuples (s, p, o) where "s" denotes the subject, "p"
the property, and "o" the object.

Args:
dcid: A list of dcids to get triples for.
dcids: A list of dcids to get triples for.
limit: The maximum number of triples to get for each combination of property
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can say property label to be more clear

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will clarify with a section on what a triple is.

and type of the neighboring node.
and type of property value.
"""
# Generate the GetTriple query and send the request.
url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples']
Expand All @@ -131,6 +136,10 @@ def get_triples(dcids, limit=utils._MAX_LIMIT):
# Create a map from dcid to list of triples.
results = defaultdict(list)
for dcid in dcids:
# Make sure each dcid is mapped to an empty list.
results[dcid]

# Add triples as appropriate
for t in payload[dcid]:
if 'objectId' in t:
results[dcid].append(
Expand Down
31 changes: 29 additions & 2 deletions datacommons/examples/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,8 +1,35 @@
load("@requirements//:requirements.bzl", "requirement")

py_binary(
name="query_basic",
srcs=["query_basic.py"],
name="core",
srcs=["core.py"],
deps=[
"//datacommons:datacommons",
requirement("pandas"),
]
)

py_binary(
name="places",
srcs=["places.py"],
deps=[
"//datacommons:datacommons",
requirement("pandas"),
]
)

py_binary(
name="populations",
srcs=["populations.py"],
deps=[
"//datacommons:datacommons",
requirement("pandas"),
]
)

py_binary(
name="query",
srcs=["query.py"],
deps=[
"//datacommons:datacommons",
requirement("pandas"),
Expand Down
4 changes: 4 additions & 0 deletions datacommons/examples/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datacommons as dc
import pandas as pd

Expand Down
7 changes: 6 additions & 1 deletion datacommons/examples/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Basic demo for get_places_in
""" Data Commons Python Client API examples.

Basic demo for get_places_in
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datacommons as dc
import pandas as pd

Expand Down
7 changes: 6 additions & 1 deletion datacommons/examples/populations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Basic demo for get_places_in
""" Data Commons Python Client API examples.

Basic demo for get_populations and get_observations.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datacommons as dc
import pandas as pd

Expand Down
7 changes: 6 additions & 1 deletion datacommons/examples/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example client for DataCommons Python API.
""" Data Commons Python Client API examples.

Example on how to use the Client API SPARQL query wrapper.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import datacommons as dc
import pandas as pd

Expand Down
4 changes: 3 additions & 1 deletion datacommons/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Data Commons base Python Client API.

"""Data Commons Places wrapper functions."""
Places wrapper functions.
"""

from __future__ import absolute_import
from __future__ import division
Expand Down
29 changes: 22 additions & 7 deletions datacommons/populations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Data Commons base Python Client API.

"""Data Commons Populations wrapper functions."""
StatisticalPopulation and Observation wrapper functions.
"""

from __future__ import absolute_import
from __future__ import division
Expand All @@ -28,7 +30,8 @@ def get_populations(dcids, population_type, constraining_properties={}):
""" Returns StatisticalPopulation dcids located at the given dcids.

When the dcids are given as a list, the returned property values are formatted
as a map from given dcid to associated StatatisticalPopulation dcid.
as a map from given dcid to associated StatatisticalPopulation dcid. The dcid
will *not* be a member of the dict if a population is not located there.

When the dcids are given as a Pandas Series, returned StatisticalPopulations
are formatted as a Pandas Series where the i-th entry corresponds to the
Expand Down Expand Up @@ -58,10 +61,12 @@ def get_populations(dcids, population_type, constraining_properties={}):
# Create the results and format it appropriately
result = utils._format_expand_payload(
payload, 'population', must_exist=dcids)
flattened = utils._flatten_results(result)
if isinstance(dcids, pd.Series):
flattened = utils._flatten_results(result, default_value="")
return pd.Series([flattened[dcid] for dcid in dcids])
return flattened

# Drop empty results while flattening
return utils._flatten_results(result)


def get_observations(dcids,
Expand All @@ -73,7 +78,8 @@ def get_observations(dcids,
""" Returns Observations made of the given dcids.

When the dcids are given as a list, the returned Observations are formatted
as a map from given dcid to Observation dcid.
as a map from given dcid to Observation dcid. The dcid will *not* be a member
of the dict if a population is there is no available observation for it.

If the dcids field is a Pandas Series, then the return value is a Series where
the i-th cell is the list of values associated with the given property for the
Expand Down Expand Up @@ -116,8 +122,17 @@ def get_observations(dcids,
# Create the results and format it appropriately
result = utils._format_expand_payload(
payload, 'observation', must_exist=dcids)
flattened = utils._flatten_results(result)
if isinstance(dcids, pd.Series):
flattened = utils._flatten_results(result, default_value="")
series = pd.Series([flattened[dcid] for dcid in dcids])
return series.apply(pd.to_numeric, errors='coerce')
return flattened

# Drop empty results by calling _flatten_results without default_value, then
# coerce the type to float if possible.
typed_results = {}
for k, v in utils._flatten_results(result).items():
try:
typed_results[k] = float(v)
except ValueError:
typed_results[k] = v
return typed_results
2 changes: 1 addition & 1 deletion datacommons/datacommons.py → datacommons/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
""" Data Commons base Python Client API.

Contains Query which performs graph queries on the Data Commons kg.
Query object for wrapping SPARQL support in Data Commons
"""

from __future__ import absolute_import
Expand Down
38 changes: 36 additions & 2 deletions datacommons/test/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,8 +1,42 @@
load("@requirements//:requirements.bzl", "requirement")


py_test(
name = "core_test",
srcs = ["core_test.py"],
deps = [
"//datacommons:datacommons",
requirement("mock"),
requirement("pandas"),
],
python_version = "PY3"
)

py_test(
name = "places_test",
srcs = ["places_test.py"],
deps = [
"//datacommons:datacommons",
requirement("mock"),
requirement("pandas"),
],
python_version = "PY3"
)

py_test(
name = "populations_test",
srcs = ["populations_test.py"],
deps = [
"//datacommons:datacommons",
requirement("mock"),
requirement("pandas"),
],
python_version = "PY3"
)

py_test(
name = "datacommons_test",
srcs = ["datacommons_test.py"],
name = "query_test",
srcs = ["query_test.py"],
deps = [
"//datacommons:datacommons",
requirement("mock"),
Expand Down
Loading