1- __all__ = ["download" ]
1+ __all__ = ["download" , "list" ]
22
33
44# standard library
55import tarfile
66from pathlib import Path
7+ from typing import Any , Literal , overload
78
89
910# dependencies
11+ import pandas as pd
1012from fire import Fire
1113from requests import get
1214from tqdm import tqdm
1517
1618# constants
1719CHUNK_SIZE = 1024
18- DEFAULT_TAG = f"v{ __version__ } "
19- GITHUB_URL = "https://raw.githubusercontent.com/deshima-dev/rawdata"
20+ DATA_LIST = pd .read_csv (
21+ Path (__file__ ).with_name ("data.csv" ),
22+ index_col = 0 ,
23+ dtype = {0 : str },
24+ )
25+ DATA_REPO_URL = "https://github.com/deshima-dev/rawdata"
26+ DEFAULT_DATA_REF = f"v{ __version__ } "
27+ DEFAULT_LIST_FORMAT = "markdown"
2028
2129
2230def download (
@@ -26,7 +34,7 @@ def download(
2634 dir : Path = Path (),
2735 extract : bool = False ,
2836 progress : bool = False ,
29- tag : str = DEFAULT_TAG ,
37+ ref : str = DEFAULT_DATA_REF ,
3038) -> Path :
3139 """Download DESHIMA raw data for given observation ID.
3240
@@ -35,13 +43,15 @@ def download(
3543 dir: Directory where the raw data is saved.
3644 extract: Whether to extract the raw data.
3745 progress: Whether to show a progress bar.
38- tag: Git tag (or branch) of the raw data.
46+ ref: Reference of the branch or tag for the raw data.
47+ Note that this is for development use only.
3948
4049 Returns:
4150 Path of the downloaded raw data.
4251
4352 """
44- url = f"{ GITHUB_URL } /{ tag } /data/{ obsid } .tar.gz"
53+ file_name = DATA_LIST ["File name" ][str (obsid )] # type: ignore
54+ url = f"{ DATA_REPO_URL } /raw/{ ref } /data/{ file_name } "
4555
4656 if not (response := get (url , stream = True )).ok :
4757 response .raise_for_status ()
@@ -52,7 +62,7 @@ def download(
5262 "unit" : "B" ,
5363 "unit_scale" : True ,
5464 }
55- data_path = Path (dir ) / response . url . split ( "/" )[ - 1 ]
65+ data_path = Path (dir ) / file_name
5666
5767 with tqdm (** bar_options ) as bar , open (data_path , "wb" ) as f :
5868 for data in response .iter_content (CHUNK_SIZE ):
@@ -67,9 +77,32 @@ def download(
6777 dir_name = tar .getnames ()[0 ]
6878
6979 data_path .unlink (True )
70- return data_path .parent / dir_name
80+ return data_path .with_name (dir_name )
81+
82+
83+ @overload
84+ def list (format : Literal ["csv" , "json" , "markdown" ]) -> str :
85+ ...
86+
87+
88+ @overload
89+ def list (format : Literal ["dict" ]) -> dict [str , str ]:
90+ ...
91+
92+
93+ def list (format : str = DEFAULT_LIST_FORMAT ) -> Any :
94+ """List DESHIMA raw datasets available in the package.
95+
96+ Args:
97+ format: Format of the list that can be output by pandas.
98+
99+ Returns:
100+ The list of DESHIMA raw datasets with given format.
101+
102+ """
103+ return getattr (DATA_LIST , f"to_{ format } " )()
71104
72105
73106def main () -> None :
74107 """Entry point of the deshima-rawdata command."""
75- Fire ({"download" : download })
108+ Fire ({"download" : download , "list" : list })
0 commit comments