|
| 1 | +from itertools import chain |
1 | 2 | import requests |
2 | 3 | import json |
3 | 4 | import numpy as np |
@@ -64,23 +65,20 @@ def get_data(page_start_calc_id, page_size=default_page_size): |
64 | 65 | result, next_page_calc_id, n_iter = post_first_request( |
65 | 66 | page_start_calc_id, page_size=page_size |
66 | 67 | ) |
67 | | - data = result["data"] |
68 | | - formulas = [ |
69 | | - datum["formula"] if "formula" in datum.keys() else None for datum in data |
70 | | - ] |
71 | | - calc_ids = [datum["calc_id"] for datum in data] |
| 68 | + # initialize |
| 69 | + data = [] |
| 70 | + d = result["data"] |
| 71 | + data.append(d) |
72 | 72 |
|
73 | 73 | for _ in trange(n_iter): |
74 | | - result, next_page_calc_id = post_request(next_page_calc_id) |
75 | | - data = result["data"] |
76 | | - formula = [ |
77 | | - datum["formula"] if "formula" in datum.keys() else "" for datum in data |
78 | | - ] |
79 | | - calc_id = [datum["calc_id"] for datum in data] |
80 | | - formulas = formulas + formula |
81 | | - calc_ids = calc_ids + calc_id |
| 74 | + result, next_page_calc_id = post_request(next_page_calc_id, page_size=page_size) |
| 75 | + d = result["data"] |
| 76 | + data.append(d) |
| 77 | + |
| 78 | + print(f"merging {n_iter + 1} lists") |
| 79 | + data = list(chain(*data)) |
82 | 80 |
|
83 | | - df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id") |
| 81 | + df = pd.DataFrame(data).set_index("calc_id") |
84 | 82 |
|
85 | 83 | return df |
86 | 84 |
|
@@ -132,3 +130,18 @@ def get_data(page_start_calc_id, page_size=default_page_size): |
132 | 130 | # df = pd.DataFrame(data, index=[0]) |
133 | 131 |
|
134 | 132 | # return df |
| 133 | + |
| 134 | +# formulas = [ |
| 135 | +# datum["formula"] if "formula" in datum.keys() else None for datum in data |
| 136 | +# ] |
| 137 | +# calc_ids = [datum["calc_id"] for datum in data] |
| 138 | + |
| 139 | +# formula = [ |
| 140 | +# datum["formula"] if "formula" in datum.keys() else "" for datum in data |
| 141 | +# ] |
| 142 | +# calc_id = [datum["calc_id"] for datum in data] |
| 143 | +# formulas = formulas + formula |
| 144 | +# calc_ids = calc_ids + calc_id |
| 145 | + |
| 146 | +# df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id") |
| 147 | + |
0 commit comments