Skip to content

Commit e340a19

Browse files
committed
add centers
1 parent 6709923 commit e340a19

File tree

3 files changed

+134
-37
lines changed

3 files changed

+134
-37
lines changed

script/ingestion/centers.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import pandas as pd
2+
import yaml
3+
from glob import glob
4+
from uuid import uuid5, NAMESPACE_URL
5+
from uuid import uuid5, NAMESPACE_URL
6+
from s3_update import backup_file
7+
from ingest_common import connection
8+
import io
9+
import csv
10+
dccs = pd.read_csv('https://cfde-drc.s3.amazonaws.com/database/files/current_dccs.tsv', sep="\t", index_col=0, header=0)
11+
# map dcc names to their respective ids
12+
dcc_mapper = {}
13+
for i, v in dccs.loc[:,'short_label'].items():
14+
dcc_mapper[v] = i
15+
data = {}
16+
center_publication = []
17+
for filename in glob('../../src/pages/centers/*.md'):
18+
with open(filename) as o:
19+
markdown = o.read()
20+
m = markdown.split("---")
21+
row = yaml.safe_load(m[1])
22+
if "label" in row:
23+
label = row['label']
24+
uid = str(uuid5(NAMESPACE_URL, label))
25+
data[uid] = {
26+
"label": row["label"],
27+
"short_label": row.get("short_label"),
28+
"short_description": row.get("short_description"),
29+
"description": row.get("description"),
30+
"homepage": row.get("homepage"),
31+
"icon": row.get("icon"),
32+
"grant_num": row.get("grant_num"),
33+
"active": row.get("active"),
34+
}
35+
if row.get("publications"):
36+
for pub in set(row["publications"]):
37+
center_publication.append({"center_id": uid, "publication_id": pub})
38+
39+
center_df = pd.DataFrame.from_dict(data, orient="index").fillna('')
40+
center_df.index.name = "id"
41+
center_publication_df = pd.DataFrame.from_records(center_publication, columns=['center_id', 'publication_id'])
42+
43+
## Update S3
44+
backup_file(center_df, "centers", quoting=False)
45+
backup_file(center_publication_df, "center_publication", False)
46+
47+
## ingest
48+
49+
print("ingesting...")
50+
51+
cur = connection.cursor()
52+
53+
cur.execute('''
54+
DELETE FROM center_publications;
55+
''')
56+
57+
cur.execute('''
58+
DELETE FROM centers;
59+
''')
60+
61+
cur.execute('''
62+
create table centers_tmp
63+
as table centers
64+
with no data;
65+
''')
66+
67+
p_buf = io.StringIO()
68+
center_df.to_csv(p_buf, header=True, quoting=csv.QUOTE_NONE, sep="\t")
69+
p_buf.seek(0)
70+
columns = next(p_buf).strip().split('\t')
71+
cur.copy_from(p_buf, 'centers_tmp',
72+
columns=columns,
73+
null='',
74+
sep='\t',
75+
)
76+
column_string = ", ".join(columns)
77+
set_string = ",\n".join(["%s = excluded.%s"%(i,i) for i in columns])
78+
cur.execute('''
79+
insert into centers (%s)
80+
select %s
81+
from centers_tmp
82+
on conflict (id)
83+
do update
84+
set %s
85+
;
86+
'''%(column_string, column_string, set_string))
87+
cur.execute('drop table centers_tmp;')
88+
89+
90+
cur = connection.cursor()
91+
cur.execute('''
92+
create table center_publications_tmp
93+
as table center_publications
94+
with no data;
95+
''')
96+
97+
98+
cp_buf = io.StringIO()
99+
center_publication_df.to_csv(cp_buf, header=True, sep="\t", index=None)
100+
cp_buf.seek(0)
101+
columns = next(cp_buf).strip().split('\t')
102+
cur.copy_from(cp_buf, 'center_publications_tmp',
103+
columns=columns,
104+
null='',
105+
sep='\t',
106+
)
107+
108+
column_string = ", ".join(columns)
109+
110+
cur.execute('''
111+
insert into center_publications (%s)
112+
select %s
113+
from center_publications_tmp
114+
on conflict
115+
do nothing
116+
;
117+
'''%(column_string, column_string))
118+
cur.execute('drop table center_publications_tmp;')
119+
connection.commit()
120+
121+
print("Ingested centers")
122+

script/process_markdown.ipynb

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,39 +1938,7 @@
19381938
},
19391939
{
19401940
"cell_type": "code",
1941-
"execution_count": 62,
1942-
"metadata": {},
1943-
"outputs": [
1944-
{
1945-
"data": {
1946-
"text/plain": [
1947-
"{'layout': '../../layouts/Webinar.astro',\n",
1948-
" 'start': '2024-11-22T13:00:00-05:00',\n",
1949-
" 'end': '2024-11-22T14:00:00-05:00',\n",
1950-
" 'presentations': {'Introduction to Communication and Outreach to Maximize Product Adoption (COMPA)': {'label': 'Introduction to Communication and Outreach to Maximize Product Adoption (COMPA)',\n",
1951-
" 'presenter': 'Bernard de Bono, PhD',\n",
1952-
" 'affiliation': 'PI, COMPA',\n",
1953-
" 'summary': 'In this lecture, Dr. Bernard de Bono presented information on the CFDE Communication and Outreach to Maximize Product Adoption (COMPA) partnership. He explained COMPA’s strategic approaches for user profiling, outreach activities, and gap analysis aimed to promote CFDE-developed products.',\n",
1954-
" 'video_link': 'https://www.youtube.com/watch?v=pQmmj3VJywU'},\n",
1955-
" 'Data-Driven Strategies for Developing and Marketing Scientific Information Resources': {'label': 'Data-Driven Strategies for Developing and Marketing Scientific Information Resources',\n",
1956-
" 'presenter': 'Stephen Larson, PhD',\n",
1957-
" 'affiliation': 'CEO, MetaCell',\n",
1958-
" 'summary': 'In this lecture, Dr Stephen Larson, the CEO of Metacell presented strategies for driving user engagement to scientific tools. Metacell works with the COMPA CFDE partnership. He explained the challenges of user adoption, and effective user engagement strategies, and shared their experiences on various methods of outreach activities. He also shared the survey about user behaviors and tendencies toward scientific products.',\n",
1959-
" 'video_link': 'https://www.youtube.com/watch?v=gDXHfCpszMk'}}}"
1960-
]
1961-
},
1962-
"execution_count": 62,
1963-
"metadata": {},
1964-
"output_type": "execute_result"
1965-
}
1966-
],
1967-
"source": [
1968-
"yml"
1969-
]
1970-
},
1971-
{
1972-
"cell_type": "code",
1973-
"execution_count": 78,
1941+
"execution_count": 40,
19741942
"metadata": {},
19751943
"outputs": [
19761944
{
@@ -2169,7 +2137,7 @@
21692137
"8370ed93-862f-5a49-bd14-bdf9e581f8a4 OT2OD036440 True "
21702138
]
21712139
},
2172-
"execution_count": 78,
2140+
"execution_count": 40,
21732141
"metadata": {},
21742142
"output_type": "execute_result"
21752143
}
@@ -2182,12 +2150,12 @@
21822150
},
21832151
{
21842152
"cell_type": "code",
2185-
"execution_count": 80,
2153+
"execution_count": 42,
21862154
"metadata": {},
21872155
"outputs": [],
21882156
"source": [
21892157
"for i, row in centers.iterrows():\n",
2190-
"\tyml = {\"layout\": \"../../layouts/Centers.astro\"}\n",
2158+
"\tyml = {\"layout\": \"@/layouts/Center.astro\"}\n",
21912159
"\tif row['active']:\n",
21922160
"\t\tdescription = ''\n",
21932161
"\t\tfor k,v in row.items():\n",
@@ -2199,7 +2167,7 @@
21992167
"\t\t\t\t\tyml[k] = v\n",
22002168
"\t\t\telse:\n",
22012169
"\t\t\t\tdescription = v\n",
2202-
"\t\twith open('out/centers/%s.md'%row['short_label'], 'w') as o:\n",
2170+
"\t\twith open('../src/pages/centers/%s.md'%row['short_label'], 'w') as o:\n",
22032171
"\t\t\to.write('---\\n')\n",
22042172
"\t\t\to.write(yaml.dump(yml))\n",
22052173
"\t\t\to.write('---\\n')\n",

src/pages/centers/centers.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
active: true
3+
icon: https://cfde-drc.s3.us-east-2.amazonaws.com/assets/img/cfde-centers.png
4+
label: centers
5+
layout: '@/layouts/Center.astro'
6+
short_label: centers
7+
---

0 commit comments

Comments
 (0)