Skip to content

Commit d2848c5

Browse files
committed
Upated iter method to use iter_datalinks, added documentation and tests
1 parent 3bac206 commit d2848c5

File tree

3 files changed

+170
-14
lines changed

3 files changed

+170
-14
lines changed

docs/dal/index.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,15 @@ DatalinkResults using
869869
>>> next(datalink.bysemantics("#this")).content_type
870870
'application/fits'
871871

872+
As data becomes available from different cloud providers, some services are
873+
including cloud access information in the associated Datalink in JSON format.
874+
The ``get_cloud_params`` and ``iter_get_cloud_params`` fucntions for
875+
Records and Results respectively return an ``astropy.Table`` with the
876+
parameters specified to access data via the cloud service provider specified.
877+
878+
More generic functions, ``parse_json_params`` and ``iter_parse_json_params``,
879+
act directly on the Record or Results object and can parse JSON columns with
880+
a given column name, key, and optionally parameters to match.
872881

873882
Server-side processing
874883
----------------------

pyvo/dal/adhoc.py

Lines changed: 88 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,28 @@ def iter_parse_json_params(
403403
verbose: bool=False,
404404
**match_params
405405
):
406+
"""
407+
Iterate over all Records in a DalResult and return parsed json parameters.
408+
409+
Parameters
410+
----------
411+
colname : str, optional
412+
The column containing JSON to be parsed, by default "cloud_access"
413+
key : str, optional
414+
The key to filter JSON results by, by default "aws"
415+
verbose : bool, optional
416+
Whether to print progress and errors, by default False
417+
**match_params : str, optional
418+
Any further parameters to match on.
406419
420+
Returns
421+
-------
422+
astropy.Table
423+
A table containing the JSON parameters separated into columns, each
424+
row corresponding to a matching JSON entry for each DataLinkRecord
425+
for each row of the original DalResult.
426+
427+
"""
407428
for irow, record in enumerate(self):
408429
access_points = record.parse_json_params(
409430
colname=colname,
@@ -427,9 +448,55 @@ def iter_get_cloud_params(
427448
verbose: bool=False,
428449
**match_params
429450
):
430-
for irow, record in enumerate(self):
451+
"""
452+
Iterate over all Records in a DalResult and return parsed cloud parameters.
453+
454+
Parameters
455+
----------
456+
colname : str, optional
457+
The column containing JSON to be parsed, by default "cloud_access"
458+
provider : str, optional
459+
The key to filter JSON results by, by default "aws"
460+
verbose : bool, optional
461+
Whether to print progress and errors, by default False
462+
**match_params : str, optional
463+
Any further parameters to match on.
464+
465+
Returns
466+
-------
467+
astropy.Table
468+
A table containing the JSON parameters separated into columns, each
469+
row corresponding to matching JSON entries from each Record.
470+
471+
"""
472+
for irow, dl_results in enumerate(self.iter_datalinks()):
473+
474+
products = dl_results.bysemantics("#this")
475+
476+
for jrow, row in enumerate(products):
477+
# if no colname column, there is nothing to do
478+
try:
479+
access_points = row.parse_json_params(
480+
colname=colname,
481+
key=provider,
482+
verbose=verbose,
483+
**match_params
484+
)
485+
access_points.add_column([jrow]*len(access_points), name="datalink_row", index=0)
486+
if jrow == 0:
487+
new_dl_table = access_points
488+
else:
489+
for row in access_points.iterrows():
490+
new_dl_table.add_row(row)
491+
except KeyError:
492+
# no json column, continue
493+
if verbose:
494+
print(f'No column {colname} found for row {irow}, datalink {jrow}')
495+
new_dl_table = TableElement(VOTableFile()).to_table()
496+
continue
497+
431498
# do the json parsing
432-
cloud_params = record.get_cloud_params(colname, provider, verbose, **match_params)
499+
cloud_params = access_points
433500
cloud_params.add_column([irow]*len(cloud_params), name="record_row", index=0)
434501
if irow == 0:
435502
new_table = cloud_params
@@ -498,16 +565,20 @@ def parse_json_params(
498565
499566
Parameters
500567
----------
501-
colname: str
502-
name of column to search in
503-
provider: str, optional
504-
name of data provider: only 'aws' is presently supported.
505-
verbose: bool
506-
If True, print progress and debug text.
507-
508-
Return
509-
------
510-
A dict or a list of dict of parameters for every row in products
568+
colname : str, optional
569+
The column containing JSON to be parsed, by default "cloud_access"
570+
key : str, optional
571+
The key to filter JSON results by, by default "aws"
572+
verbose : bool, optional
573+
Whether to print progress and errors, by default False
574+
**match_params : str, optional
575+
Any further parameters to match on.
576+
577+
Returns
578+
-------
579+
astropy.Table
580+
A table containing the JSON parameters separated into columns, each
581+
row representing a matching JSON entry.
511582
512583
"""
513584
import json
@@ -548,7 +619,7 @@ def parse_json_params(
548619
return new_table
549620

550621
def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params):
551-
"""Parse information stored as JSON by key
622+
"""Parse cloud information stored as JSON by provider
552623
553624
Parameters
554625
----------
@@ -558,10 +629,13 @@ def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False
558629
name of data provider: only 'aws' is presently supported.
559630
verbose: bool
560631
If True, print progress and debug text.
632+
**match_params
561633
562634
Return
563635
------
564-
An astropy Table with parameters for every row in the datalinks
636+
astropy.Table
637+
A table containing the JSON parameters separated into columns,
638+
each row being a unique JSON entry and/or from a different DatalinkRecord.
565639
566640
"""
567641
dl_results = self.getdatalink()

pyvo/dal/tests/test_datalink.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
Tests for pyvo.dal.datalink
55
"""
66
from functools import partial
7+
from io import BytesIO
78
import re
89

910
import pytest
1011

12+
from astropy.io.votable import parse, writeto, from_table
1113
import pyvo as vo
1214
from pyvo.dal.adhoc import DatalinkResults, DALServiceError
1315
from pyvo.dal.sia2 import SIA2Results
@@ -74,6 +76,21 @@ def callback(request, context):
7476
) as matcher:
7577
yield matcher
7678

79+
@pytest.fixture()
80+
def datalink_cloud(mocker):
81+
def callback(request, context):
82+
dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml')
83+
dl_base_table = dl_base.get_first_table().to_table()
84+
cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}'
85+
dl_base_table.add_column([cloud_access_str]*4, name='cloud_access')
86+
out = BytesIO()
87+
writeto(from_table(dl_base_table), out)
88+
return out.getvalue()
89+
90+
with mocker.register_uri(
91+
'GET', 'http://example.com/datalink-cloud.xml', content=callback
92+
) as matcher:
93+
yield matcher
7794

7895
@pytest.fixture()
7996
def obscore_datalink(mocker):
@@ -336,3 +353,59 @@ def test_no_datalink():
336353
result = results[0]
337354
with pytest.raises(DALServiceError, match="No datalink found for record."):
338355
result.getdatalink()
356+
357+
@pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02")
358+
@pytest.mark.usefixtures('datalink_cloud')
359+
class TestJsonColumns:
360+
"""Tests for producing datalinks from tables containing links to
361+
datalink documents.
362+
"""
363+
364+
res = testing.create_dalresults([
365+
{"name": "access_url", "datatype": "char", "arraysize": "*",
366+
"ucd": "meta.ref.url"},
367+
{"name": "access_format", "datatype": "char", "arraysize": "*",
368+
"utype": "meta.code.mime"},
369+
{"name": "cloud_access", "datatype": "char", "arraysize": "*"},],
370+
[("http://example.com/datalink-cloud.xml",
371+
"application/x-votable+xml;content=datalink",
372+
'{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',),
373+
("http://example.com/datalink-cloud.xml",
374+
"application/x-votable+xml;content=datalink",
375+
'{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',)],
376+
resultsClass=SIA2Results
377+
)
378+
def test_record_w_json(self):
379+
380+
381+
parsed_json_matches = self.res[0].parse_json_params("cloud_access", "aws")
382+
assert parsed_json_matches[0]["bucket_name"] == "test"
383+
assert parsed_json_matches[0]["key"] == "path/to/file1.fits"
384+
assert parsed_json_matches[0]["region"] == "us-west-2"
385+
386+
def test_iter_json(self):
387+
388+
parsed_json_matches = self.res.iter_parse_json_params("cloud_access", "aws")
389+
assert parsed_json_matches[0]["record_row"] == 0
390+
assert parsed_json_matches[0]["bucket_name"] == "test"
391+
assert parsed_json_matches[0]["key"] == "path/to/file1.fits"
392+
assert parsed_json_matches[0]["region"] == "us-west-2"
393+
assert parsed_json_matches[1]["record_row"] == 1
394+
assert parsed_json_matches[1]["key"] == "path/to/file2.fits"
395+
396+
def test_datalink_json(self):
397+
parsed_cloud_params = self.res[0].get_cloud_params("cloud_access", "aws")
398+
assert parsed_cloud_params[0]["bucket_name"] == "test"
399+
assert parsed_cloud_params[0]["key"] == "path/to/cloudfile.fits"
400+
assert parsed_cloud_params[0]["region"] == "us-west-2"
401+
402+
def test_iter_datalink_json(self):
403+
parsed_json_matches = self.res.iter_get_cloud_params("cloud_access", "aws")
404+
assert parsed_json_matches[0]["record_row"] == 0
405+
assert parsed_json_matches[0]["datalink_row"] == 0
406+
assert parsed_json_matches[0]["bucket_name"] == "test"
407+
assert parsed_json_matches[0]["key"] == "path/to/cloudfile.fits"
408+
assert parsed_json_matches[0]["region"] == "us-west-2"
409+
assert parsed_json_matches[1]["record_row"] == 1
410+
assert parsed_json_matches[1]["datalink_row"] == 0
411+
assert parsed_json_matches[1]["key"] == "path/to/cloudfile.fits"

0 commit comments

Comments
 (0)