Skip to content

Commit d7c1ece

Browse files
authored
Merge pull request #178 from SPAAM-community/dates_convert
Enable the access to the dates table
2 parents f53816e + ae3e4a7 commit d7c1ece

File tree

5 files changed

+76
-8
lines changed

5 files changed

+76
-8
lines changed

amdirt/assets/tables.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,11 @@
2222
"ancientmetagenome-hostassociated": "https://raw.githubusercontent.com/SPAAM-community/AncientMetagenomeDir/master/ancientmetagenome-hostassociated/libraries/ancientmetagenome-hostassociated_libraries_schema.json",
2323
"ancientsinglegenome-hostassociated": "https://raw.githubusercontent.com/SPAAM-community/AncientMetagenomeDir/master/ancientsinglegenome-hostassociated/libraries/ancientsinglegenome-hostassociated_libraries_schema.json",
2424
"test": "https://raw.githubusercontent.com/SPAAM-community/amdirt/dev/tests/data/libraries_schema.json"
25+
},
26+
"dates": {
27+
"ancientsinglegenome-hostassociated": "https://raw.githubusercontent.com/SPAAM-community/AncientMetagenomeDir/master/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates.tsv"
28+
},
29+
"dates_schema": {
30+
"ancientsinglegenome-hostassociated": "https://raw.githubusercontent.com/SPAAM-community/AncientMetagenomeDir/master/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates_schema.json"
2531
}
26-
}
32+
}

amdirt/cli.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ def viewer(ctx, no_args_is_help=True, **kwargs):
169169
is_flag=True,
170170
help="Generate BibTeX file of all publications in input table",
171171
)
172+
@click.option(
173+
"--dates",
174+
is_flag=True,
175+
help="Generate AncientMetagenomeDir dates table of all samples in input table"
176+
)
172177
@click.option(
173178
"--curl",
174179
is_flag=True,
@@ -320,7 +325,7 @@ def merge(ctx, no_args_is_help=True, **kwargs):
320325
"-y",
321326
"--table_type",
322327
help="Type of table to download",
323-
type=click.Choice(["samples", "libraries"]),
328+
type=click.Choice(["samples", "libraries", "dates"]),
324329
default="samples",
325330
show_default=True,
326331
)

amdirt/convert/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
prepare_aMeta_table,
99
is_merge_size_zero,
1010
prepare_taxprofiler_table,
11+
get_dates,
1112
get_libraries,
1213
get_remote_resources,
1314
get_json_path,
@@ -27,6 +28,7 @@ def run_convert(
2728
tables=None,
2829
output=".",
2930
bibliography=False,
31+
dates=False,
3032
librarymetadata=False,
3133
curl=False,
3234
aspera=False,
@@ -138,6 +140,22 @@ def run_convert(
138140
else:
139141
col_drop = ["archive_accession", "sample_host"]
140142

143+
if dates == True:
144+
if table_name not in remote_resources["dates"]:
145+
logger.error(f"No dates for {table_name} available in AncientMetagenomeDir at the moment.")
146+
else:
147+
tbl_file = f"{output}/AncientMetagenomeDir_filtered_dates.tsv"
148+
dates_tbl = pd.read_csv(remote_resources['dates'][table_name],
149+
sep="\t")
150+
logger.info(f"Writing filtered dates table to {tbl_file}")
151+
datesmetadata = get_dates(table_name, samples, dates_tbl)
152+
datesmetadata.to_csv(
153+
tbl_file,
154+
sep="\t",
155+
index=False,
156+
)
157+
158+
141159
if librarymetadata == True:
142160
tbl_file = f"{output}/AncientMetagenomeDir_filtered_libraries.tsv"
143161
logger.info(f"Writing filtered libraries table to {tbl_file}")

amdirt/core/__init__.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,40 @@ def doi2bib(doi: str) -> str:
141141
return r.text
142142

143143

144+
def get_dates(
145+
table_name: str,
146+
samples: pd.DataFrame,
147+
dates: pd.DataFrame
148+
):
149+
"""Get dates from the samples table
150+
151+
Args:
152+
table_name (str): Name of the table of the table to convert
153+
samples (pd.DataFrame): Sample table
154+
155+
Returns:
156+
pd.DataFrame: filtered dates table
157+
"""
158+
samples = (samples
159+
.rename({'archive_accession': 'archive_sample_accession'}, axis=1)
160+
)
161+
if table_name in [
162+
"ancientmetagenome-environmental",
163+
]:
164+
sel_col = ["archive_accession"]
165+
else:
166+
sel_col = ["project_name", "publication_year",
167+
"sample_name", "singlegenome_species",
168+
"archive_project", "archive_sample_accession"]
169+
selected_dates = dates.merge(
170+
samples.loc[:, sel_col],
171+
on=sel_col,
172+
how="inner"
173+
)
174+
175+
return selected_dates
176+
177+
144178
@st.cache_data
145179
def get_libraries(
146180
table_name: str,

amdirt/download/__init__.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def download(table: str, table_type: str, release: str, output: str = ".") -> st
1616
table : str
1717
The AncientMetagenomeDir table to download.
1818
table_type : str
19-
The type of table to download. Allowed values are ['samples', 'libraries'].
19+
The type of table to download. Allowed values are ['samples', 'libraries', 'dates'].
2020
release : str
2121
The release of the table to download. Must be a valid release tag.
2222
output: str
@@ -47,16 +47,21 @@ def download(table: str, table_type: str, release: str, output: str = ".") -> st
4747
if check_allowed_values(tables, table) is False:
4848
raise ValueError(f"Invalid table: {table}. Allowed values are {tables}")
4949

50-
if check_allowed_values(["samples", "libraries"], table_type) is False:
50+
if check_allowed_values(["samples", "libraries", "dates"], table_type) is False:
5151
raise ValueError(
52-
f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries']"
52+
f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries', 'dates']"
5353
)
5454
table_filename = f"{table}_{table_type}_{release}.tsv"
5555
logger.info(
5656
f"Downloading {table} {table_type} table from {release} release, saving to {output}/{table_filename}"
5757
)
58-
t = requests.get(resources[table_type][table].replace("master", release))
59-
with open(table_filename, "w") as fh:
60-
fh.write(t.text)
58+
try:
59+
t = requests.get(resources[table_type][table].replace("master", release))
60+
with open(f"{output}/{table_filename}", "w") as fh:
61+
fh.write(t.text)
62+
except KeyError:
63+
logger.warning(
64+
f"Invalid table: {table}. {table} currently does not have any {table_type} information."
65+
)
6166

6267
return table_filename

0 commit comments

Comments
 (0)