|
1 | | -from intake.catalog import Catalog |
2 | | -from intake.catalog.local import LocalCatalogEntry |
3 | | - |
4 | | - |
5 | | -class ThreddsCatalog(Catalog): |
6 | | - """Intake catalog interface to a thredds catalog. |
7 | | -
|
8 | | - Parameters |
9 | | - ---------- |
10 | | - url : str |
11 | | - Location of thredds catalog. |
12 | | - driver : str |
13 | | - Select driver to access data. Choose from 'netcdf' and 'opendap'. |
14 | | - intake_xarray_kwargs : dict |
15 | | - Keyword arguments to pass to intake_xarray DataSource. |
16 | | - **kwargs : |
17 | | - Additional keyword arguments are passed through to the |
18 | | - :py:class:`~intake.catalog.Catalog` base class. |
19 | | -
|
20 | | - Examples |
21 | | - -------- |
22 | | - >>> import intake |
23 | | - >>> cat_url = 'https://psl.noaa.gov/thredds/catalog/Datasets/noaa.ersst/catalog.xml' |
24 | | - >>> cat = intake.open_thredds_cat(cat_url) |
25 | | - """ |
26 | | - |
27 | | - name = 'thredds_cat' |
28 | | - |
29 | | - def __init__(self, url: str, driver: str = 'opendap', intake_xarray_kwargs=None, **kwargs): |
30 | | - self.url = url |
31 | | - self.driver = driver |
32 | | - self.intake_xarray_kwargs = intake_xarray_kwargs or {'chunks': {}} |
33 | | - super().__init__(**kwargs) |
34 | | - |
35 | | - def _load(self): |
36 | | - from siphon.catalog import TDSCatalog |
37 | | - |
38 | | - if 'simplecache::' in self.url: |
39 | | - if self.driver == 'netcdf': |
40 | | - self.cache = True |
41 | | - self.url_no_simplecache = self.url.replace('simplecache::', '') |
42 | | - self.metadata.update({'fsspec_pre_url': 'simplecache::'}) |
43 | | - else: |
44 | | - raise ValueError( |
45 | | - f'simplecache requires driver="netcdf", found driver="{self.driver}".' |
46 | | - ) |
47 | | - else: |
48 | | - self.cache = False |
49 | | - self.url_no_simplecache = self.url |
50 | | - |
51 | | - self.cat = TDSCatalog(self.url_no_simplecache) |
52 | | - if self.name is None: |
53 | | - self.name = self.cat.catalog_name |
54 | | - self.metadata.update(self.cat.metadata) |
55 | | - |
56 | | - # sub-cats |
57 | | - self._entries = { |
58 | | - r.title: LocalCatalogEntry( |
59 | | - r.title, |
60 | | - 'THREDDS cat', |
61 | | - 'thredds_cat', |
62 | | - True, |
63 | | - {'url': r.href}, |
64 | | - [], |
65 | | - [], |
66 | | - self.metadata, |
67 | | - None, |
68 | | - catalog=self, |
69 | | - ) |
70 | | - for r in self.cat.catalog_refs.values() |
71 | | - } |
72 | | - |
73 | | - def access_urls(ds, self): |
74 | | - if self.driver == 'opendap': |
75 | | - driver_for_access_urls = 'OPENDAP' |
76 | | - elif self.driver == 'netcdf': |
77 | | - driver_for_access_urls = 'HTTPServer' |
78 | | - url = ds.access_urls[driver_for_access_urls] |
79 | | - if 'fsspec_pre_url' in self.metadata.keys(): |
80 | | - url = f'{self.metadata["fsspec_pre_url"]}{url}' |
81 | | - return url |
82 | | - |
83 | | - def _update_args(ds): |
84 | | - args = self.intake_xarray_kwargs.copy() |
85 | | - args.update({'urlpath': access_urls(ds, self)}) |
86 | | - return args |
87 | | - |
88 | | - self._entries.update( |
89 | | - { |
90 | | - ds.name: LocalCatalogEntry( |
91 | | - ds.name, |
92 | | - 'THREDDS data', |
93 | | - self.driver, |
94 | | - True, |
95 | | - _update_args(ds), |
96 | | - [], |
97 | | - [], |
98 | | - {}, |
99 | | - None, |
100 | | - catalog=self, |
101 | | - ) |
102 | | - for ds in self.cat.datasets.values() |
103 | | - } |
104 | | - ) |
| 1 | +from intake.readers import Service |
| 2 | +from intake.readers.catalogs import THREDDSCatalogReader |
| 3 | + |
| 4 | + |
| 5 | +class ThreddsCatalog: |
| 6 | + """Intake catalog interface to a thredds catalog.""" |
| 7 | + |
| 8 | + def __new__(cls, url: str, driver: str = 'opendap', intake_xarray_kwargs=None, metadata=None): |
| 9 | + """ |
| 10 | + Parameters |
| 11 | + ---------- |
| 12 | + url : str |
| 13 | + Location of thredds catalog. |
| 14 | + driver : str |
| 15 | + Select driver to access data. Choose from 'netcdf' and 'opendap'. |
| 16 | + intake_xarray_kwargs : dict |
| 17 | + Keyword arguments to pass to intake_xarray DataSource. |
| 18 | + **kwargs : |
| 19 | + Additional keyword arguments are passed through to the |
| 20 | + :py:class:`~intake.catalog.Catalog` base class. |
| 21 | +
|
| 22 | + Examples |
| 23 | + -------- |
| 24 | + >>> import intake |
| 25 | + >>> cat_url = 'https://psl.noaa.gov/thredds/catalog/Datasets/noaa.ersst/catalog.xml' |
| 26 | + >>> cat = intake.open_thredds_cat(cat_url) |
| 27 | + """ |
| 28 | + |
| 29 | + simplecache = url.startswith('simplecache:') |
| 30 | + if simplecache and driver == 'opendap': |
| 31 | + raise ValueError('simplecache requires driver="netcdf"') |
| 32 | + url = url.removeprefix('simplecache::') |
| 33 | + data = Service(url) |
| 34 | + reader = THREDDSCatalogReader(data, make=driver[-3:]) |
| 35 | + cat = reader.read() |
| 36 | + if metadata: |
| 37 | + cat.metadata.update(metadata) |
| 38 | + if simplecache: |
| 39 | + for d in cat.data.values(): |
| 40 | + d.kwargs['url'] = 'simplecache::' + d.kwargs['url'] |
| 41 | + if intake_xarray_kwargs: |
| 42 | + intake_xarray_kwargs.update(intake_xarray_kwargs.pop('xarray_kwargs', {})) |
| 43 | + for d in cat.entries.values(): |
| 44 | + d.kwargs.update(intake_xarray_kwargs) |
| 45 | + return cat |
0 commit comments