Skip to content

Commit d756719

Browse files
committed
unique formula tracking and counts
1 parent c1b0941 commit d756719

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

all_formula_basic_metadata.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,21 @@ def get_data(page_start_calc_id, page_size=default_page_size):
8989

9090
df.to_csv("all-formula.csv")
9191

92+
# keep track of repeated formula calc_id-s and track counts
93+
uniq_df = (
94+
df.reset_index()
95+
.groupby(by="formula")
96+
.agg({"calc_id": lambda x: tuple(x)})
97+
.reset_index()
98+
)
99+
uniq_df["count"] = uniq_df["calc_id"].apply(len)
100+
101+
# remove "unavailable" formula and make `calc_id`-s the index
102+
uniq_df = uniq_df[uniq_df["formula"] != "unavailable"]
103+
uniq_df = uniq_df.set_index("calc_id")
104+
105+
uniq_df.to_csv("unique-formula.csv")
106+
92107
1 + 1
93108

94109
# %% Code Graveyard

0 commit comments

Comments
 (0)