verily-src
diff --git a/‎src/aou-common/extension-builder/snippets/example/hello.py‎
Lines changed: 0 additions & 1 deletion b/‎src/aou-common/extension-builder/snippets/example/hello.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/1_Setup.py‎
Lines changed: 23 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/1_Setup.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/add_age_to_demographics.py.py‎
Lines changed: 9 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/add_age_to_demographics.py.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/join_dataframes.py.py‎
Lines changed: 13 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/join_dataframes.py.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/summarize_a_dataframe.py.py‎
Lines changed: 8 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/2_Basic_operations/summarize_a_dataframe.py.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/3_Plot_measurements/measurement_by_age_and_sex_at_birth.plotnine.plotnine‎
Lines changed: 35 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/3_Plot_measurements/measurement_by_age_and_sex_at_birth.plotnine.plotnine‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/3_Plot_measurements/measurement_by_sex_at_birth.plotnine.plotnine‎
Lines changed: 24 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/3_Plot_measurements/measurement_by_sex_at_birth.plotnine.plotnine‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/4_Summarize_survey_data/summarize_a_survey_by_question_concept_id.py.py‎
Lines changed: 38 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/4_Summarize_survey_data/summarize_a_survey_by_question_concept_id.py.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/4_Summarize_survey_data/summarize_a_survey_module.py.py‎
Lines changed: 24 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/4_Summarize_survey_data/summarize_a_survey_module.py.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/Documentation.md‎
Lines changed: 3 additions & 0 deletions b/‎src/aou-common/extension-builder/snippets/python/dataset_builder/All_of_Us_Dataset_Builder_Python_snippets/Documentation.md‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,23 @@
+import os
+import numpy as np
+import pandas as pd
+import pandas_profiling
+import plotnine
+from plotnine import *  # Provides a ggplot-like interface to matplotlib.
+from IPython.display import display
+
+## Plot setup.
+theme_set(theme_bw(base_size = 11)) # Default theme for plots.
+
+def get_boxplot_fun_data(df):
+  """Returns a data frame with a y position and a label, for use annotating ggplot boxplots.
+
+  Args:
+    d: A data frame.
+  Returns:
+    A data frame with column y as max and column label as length.
+  """
+  d = {'y': max(df), 'label': f'N = {len(df)}'}
+  return(pd.DataFrame(data=d, index=[0]))
+
+# NOTE: if you get any errors from this cell, restart your kernel and run it again.
@@ -0,0 +1,9 @@
+# Use snippet 'add_age_to_demographics' to calculate the age of people in your demographics.
+# It assumes the 'Setup' snippet has been executed.
+# It also assumes that you got your demographics dataframe from Dataset Builder
+
+# Note: This snippet calculates current age and does not take into account whether the person is already dead
+
+
+## -----[ CHANGE THE DATAFRAME NAME(S) `YOUR_DATASET_NAME_person_df` TO MATCH YOURS FROM DATASET BUILDER] -----
+YOUR_DATASET_NAME_person_df['age'] = pd.to_datetime('today').year - YOUR_DATASET_NAME_person_df['date_of_birth'].dt.year
@@ -0,0 +1,13 @@
+# Use snippet 'join_dataframes' to join together two dataframes.
+# It assumes the 'Setup' snippet has been executed.
+#
+# In the example below, it joins Demographics '_person_df' and Measurements '_measurement_df' using
+# any columns they have in common, which in this case should only be 'person_id'.
+#
+# See also https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/pandas.merge.html
+
+
+## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
+measurement_df = pd.merge(left=YOUR_DATASET_NAME_person_df, right=YOUR_DATASET_NAME_measurement_df, how='inner')
+
+measurement_df.shape
@@ -0,0 +1,8 @@
+# Use snippet 'summarize_a_dataframe' to display summary statistics for a dataframe.
+# It assumes snippet 'Setup' has been executed.
+# See also https://towardsdatascience.com/exploring-your-data-with-just-1-line-of-python-4b35ce21a82d
+
+
+## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
+YOUR_DATASET_NAME_person_df.loc[:10000,:].profile_report()  # Examine up to the first 10,000 rows. Larger
+                                                            # dataframes can be profiled, but it takes more time.
@@ -0,0 +1,35 @@
+# Use snippet 'measurement_by_age_and_sex_at_birth' to plot joined demographics and measurements dataframes.
+# This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
+# join together demographics and measurements dataframes.
+# See also https://plotnine.readthedocs.io/en/stable/
+
+
+# There could be many different measurements in the dataframe. By default, plot the first one.
+measurement_to_plot = measurement_df.standard_concept_name.unique()[0]
+
+# Create a derived variable for age group.
+measurement_df['age_at_measurement'] = ((measurement_df['measurement_datetime'].dt.tz_localize(None)
+                                     - measurement_df['date_of_birth'].dt.tz_localize(None)).dt.days)//365.24
+measurement_df['age_group'] = pd.cut(measurement_df['age_at_measurement'],
+                                                 [-np.inf, 34.5, 49.5, 64.5, np.inf],
+                                                 labels=["<35", "35-49", "50-64", "65+"])
+
+# meas_filter is a column of True and False
+meas_filter = ((measurement_df.standard_concept_name == measurement_to_plot)
+  & (measurement_df.unit_concept_name != 'No matching concept')
+  & (measurement_df.unit_concept_name.notna())
+  & (measurement_df.sex_at_birth != 'No matching concept')
+  & (measurement_df.value_as_number < 9999999)
+  & (measurement_df['age_at_measurement'].notnull())  # Get rid of nonsensical outliers.
+)
+
+(ggplot(measurement_df[meas_filter], aes(x = 'age_group', y = 'value_as_number')) +
+    geom_boxplot() +
+    stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
+                 position = position_dodge(width = 0.9), va = 'top') +
+#    scale_y_log10() +  # Uncomment if the data looks skewed.
+    coord_flip() +
+    facet_wrap(['standard_concept_name + ": " + unit_concept_name', 'sex_at_birth'], ncol = 2, scales = 'free') +
+    xlab('age group') +
+    ggtitle('Numeric values of measurements by age and sex_at_birth\nSource: All Of Us Data') +
+    theme(figure_size = (12, 12), panel_spacing = .5))
@@ -0,0 +1,24 @@
+# Use snippet 'measurement_by_sex_at_birth' to plot joined demographics and measurements dataframes.
+# This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
+# join together demographics and measurements dataframes.
+# See also https://plotnine.readthedocs.io/en/stable/
+
+
+# There could be many different measurements in the dataframe. By default, plot the first one.
+measurement_to_plot = measurement_df.standard_concept_name.unique()[0]
+
+# meas_filter is a column of True and False.
+meas_filter = ((measurement_df.standard_concept_name == measurement_to_plot)
+  & (measurement_df.unit_concept_name != 'No matching concept')
+  & (measurement_df.unit_concept_name.notna())
+  & (measurement_df.value_as_number < 9999999)  # Get rid of nonsensical outliers.
+)
+
+(ggplot(measurement_df[meas_filter], aes(x = 'sex_at_birth', y = 'value_as_number')) +
+    geom_boxplot() +
+    stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
+                 position = position_dodge(width = 0.9), va = 'top') +
+#    scale_y_log10() +  # Uncomment if the data looks skewed.
+    facet_wrap(('standard_concept_name', 'unit_concept_name'), ncol = 2, scales = 'free') +
+    ggtitle(f'Numeric values of measurements, by sex_at_birth\nSource: All Of Us Data') +
+    theme(figure_size=(12, 6), panel_spacing = .5, axis_text_x = element_text(angle=25, hjust=1)))
@@ -0,0 +1,38 @@
+# Use snippet 'summarize_a_survey_module' to output a table and a graph of 
+# participant counts by response for one question_concept_id
+# The snippet assumes that a dataframe containing survey questions and answers already exists
+# The snippet also assumes that setup has been run
+
+# Update the next 3 lines
+survey_df = YOUR_DATASET_NAME_survey_df
+question_concept_id = 1585940
+denominator = None # e.g: 200000
+
+####################################################################################
+#                           DON'T CHANGE FROM HERE
+####################################################################################
+def summarize_a_question_concept_id(df, question_concept_id, denominator=None):
+    df = df.loc[df['question_concept_id'] == question_concept_id].copy()
+    new_df = df.groupby(['answer_concept_id', 'answer'])['person_id']\
+           .nunique()\
+           .reset_index()\
+           .rename(columns=dict(person_id='n_participant'))\
+           .assign(answer_concept_id = lambda x: np.int32(x.answer_concept_id))
+    if denominator:
+        new_df['response_rate'] = round(100*new_df['n_participant']/denominator,2)
+    if question_concept_id in df['question_concept_id'].unique():
+        print(f"Distribution of response to {df.loc[df['question_concept_id'] == question_concept_id, 'question'].unique()[0]}")
+        # show table
+        display(new_df)
+        # show graph
+        display(ggplot(data=new_df) +
+              geom_bar(aes(x='answer', y='n_participant'), stat='identity') +
+               coord_flip() +
+                labs(y="Participant count", x="") +
+               theme_bw())
+    else:
+        print("There is an error with your question_concept_id")
+
+summarize_a_question_concept_id(survey_df, question_concept_id, denominator)    
+
+
@@ -0,0 +1,24 @@
+# Use snippet 'summarize_a_survey_module' to print a table of participant counts by question in a module
+# The snippet assumes that a dataframe containing survey questions and answers already exists
+
+# Update the next 3 lines
+survey_df = YOUR_DATASET_NAME_survey_df
+module_name = 'The Basics' # e.g: 'The Basics', 'Lifestyle', 'Overall Health', etc.
+denominator = None # e.g: 200000
+
+####################################################################################
+#                           DON'T CHANGE FROM HERE
+####################################################################################
+
+def summarize_a_module(df, module=None, denominator=None):
+    if module:
+        df = df[df['survey'].str.lower() == module.lower()].copy()
+    data = (df.groupby(['survey','question_concept_id','question'])['person_id'].nunique()
+                .reset_index()
+                .rename(columns={'person_id':'n_participant'}))
+    if denominator:
+        data['response_rate'] = round(100*data['n_participant']/denominator,2)
+    return data
+
+summarize_a_module(df=survey_df, module=module_name, denominator=denominator)
+
@@ -0,0 +1,3 @@
+# Documentation
+
+[Documentation](https://aousupporthelp.zendesk.com/hc/en-us/articles/360039856791-How-to-use-code-snippets-in-Jupyter-Notebooks)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Documentation`
	`2`	`+`
	`3`	`+[Documentation](https://aousupporthelp.zendesk.com/hc/en-us/articles/360039856791-How-to-use-code-snippets-in-Jupyter-Notebooks)`