Skip to content

Commit c1b0941

Browse files
committed
Update all_formula_basic_metadata.py
1 parent f05e6bc commit c1b0941

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

all_formula_basic_metadata.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from itertools import chain
12
import requests
23
import json
34
import numpy as np
@@ -64,23 +65,20 @@ def get_data(page_start_calc_id, page_size=default_page_size):
6465
result, next_page_calc_id, n_iter = post_first_request(
6566
page_start_calc_id, page_size=page_size
6667
)
67-
data = result["data"]
68-
formulas = [
69-
datum["formula"] if "formula" in datum.keys() else None for datum in data
70-
]
71-
calc_ids = [datum["calc_id"] for datum in data]
68+
# initialize
69+
data = []
70+
d = result["data"]
71+
data.append(d)
7272

7373
for _ in trange(n_iter):
74-
result, next_page_calc_id = post_request(next_page_calc_id)
75-
data = result["data"]
76-
formula = [
77-
datum["formula"] if "formula" in datum.keys() else "" for datum in data
78-
]
79-
calc_id = [datum["calc_id"] for datum in data]
80-
formulas = formulas + formula
81-
calc_ids = calc_ids + calc_id
74+
result, next_page_calc_id = post_request(next_page_calc_id, page_size=page_size)
75+
d = result["data"]
76+
data.append(d)
77+
78+
print(f"merging {n_iter + 1} lists")
79+
data = list(chain(*data))
8280

83-
df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id")
81+
df = pd.DataFrame(data).set_index("calc_id")
8482

8583
return df
8684

@@ -132,3 +130,18 @@ def get_data(page_start_calc_id, page_size=default_page_size):
132130
# df = pd.DataFrame(data, index=[0])
133131

134132
# return df
133+
134+
# formulas = [
135+
# datum["formula"] if "formula" in datum.keys() else None for datum in data
136+
# ]
137+
# calc_ids = [datum["calc_id"] for datum in data]
138+
139+
# formula = [
140+
# datum["formula"] if "formula" in datum.keys() else "" for datum in data
141+
# ]
142+
# calc_id = [datum["calc_id"] for datum in data]
143+
# formulas = formulas + formula
144+
# calc_ids = calc_ids + calc_id
145+
146+
# df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id")
147+

0 commit comments

Comments
 (0)