UW-Madison-DSI
diff --git a/‎docs/_build/.doctrees/campus.doctree‎
-263 Bytes b/‎docs/_build/.doctrees/campus.doctree‎
-263 Bytes
diff --git a/‎docs/_build/.doctrees/environment.pickle‎
1 Byte b/‎docs/_build/.doctrees/environment.pickle‎
1 Byte
diff --git a/‎docs/_build/.doctrees/sample.doctree‎
0 Bytes b/‎docs/_build/.doctrees/sample.doctree‎
0 Bytes
diff --git a/‎docs/_build/.doctrees/sentiments.doctree‎
0 Bytes b/‎docs/_build/.doctrees/sentiments.doctree‎
0 Bytes
diff --git a/‎docs/_build/.doctrees/usage.doctree‎
554 Bytes b/‎docs/_build/.doctrees/usage.doctree‎
554 Bytes
diff --git a/‎docs/_build/html/_sources/campus.md‎
Lines changed: 10 additions & 10 deletions b/‎docs/_build/html/_sources/campus.md‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎docs/_build/html/_sources/sample.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/_build/html/_sources/sample.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/_build/html/_sources/usage.md‎
Lines changed: 52 additions & 34 deletions b/‎docs/_build/html/_sources/usage.md‎
Lines changed: 52 additions & 34 deletions
@@ -29,7 +29,7 @@ df = survey_results
 
 # Compute & glue (no visible output)
 valuable_pct = int(100 * df['QID24'].eq("Very valuable").mean())
-_ = glue("valuable_pct", valuable_pct, display=False)  # prevents display
+glue("valuable_pct", valuable_pct, display=False)  # prevents display
 
 # Figure
 qid24_order = ["Very valuable","Some value","Neutral","No value"]
@@ -74,8 +74,8 @@ agree_options = {"Strongly agree", "Somewhat agree"}
 # ----- Figure 2: "There is a vibrant culture at UNI" by Respondent Type -----
 
 
-agree_vibrant = round(100 * df['QID23'].isin(["Strongly agree","Somewhat agree"]).mean(),2)
-_ = glue("agree_vibrant", agree_vibrant, display=False)  # prevents display
+agree_vibrant = float(round(100 * df['QID23'].isin(["Strongly agree","Somewhat agree"]).mean(),2))
+glue("agree_vibrant", agree_vibrant, display=False)  # prevents display
 
 qid23_order = [
     "Strongly agree",
@@ -92,13 +92,13 @@ df_c2 = (
     .rename(columns={"QID4": "Respondent Type"})
 )
 
-pct_makes_sense = round(
+pct_makes_sense = float(round(
     100 * df['QID23'].isin([
         "Strongly agree",
         "Somewhat agree",
         "Neither agree nor disagree"
-    ]).mean(), 2)
-_ = glue("pct_makes_sense", pct_makes_sense, display=False)  # prevents display
+    ]).mean(), 2))
+glue("pct_makes_sense", pct_makes_sense, display=False)  # prevents display
 
 c2_df = (
     df_c2.groupby(["QID23", "Respondent Type"], observed=True, dropna=False)
@@ -127,7 +127,7 @@ In comparison, only **{glue:}`agree_vibrant`%** agreed that there is a vibrant o
 ```{raw} html
 :file: _static/fig2.html
 ```
-**{glue:}`pct_makes_sense`%** of respondents agreed that **“it makes sense for the university to contribute to open source software that is vital to its educational and research enterprise”**.
+**{glue:}`pct_makes_sense`%** of respondents agreed that ** "it makes sense for the university to contribute to open source software that is vital to its educational and research enterprise"**.
 
 ## Open Source Training On Campus
 
@@ -139,7 +139,7 @@ In comparison, only **{glue:}`agree_vibrant`%** agreed that there is a vibrant o
 training_yes_pct = prop(df["QID25"], lambda s: s == "Yes")
 
 received_training = int(100*df['QID25'].eq("Yes").astype(int).mean())
-_ = glue("received_training", received_training, display=False)  # prevents display
+glue("received_training", received_training, display=False)  # prevents display
 
 
 df_c3 = (
@@ -227,8 +227,8 @@ fig4.write_html('_static/fig4.html', full_html=False, include_plotlyjs='cdn')
 # Interest in more training (QID28) and OSPO workshops (QID29)
 more_training_pct = prop(df["QID28"], lambda s: s == "Yes")
 ospo_workshops_pct = prop(df["QID29"], lambda s: s == "Yes")
-_1 = glue("more_training_pct", more_training_pct, display=False)  # prevents display
-_2 = glue("ospo_workshops_pct", ospo_workshops_pct, display=False)  # prevents display
+glue("more_training_pct", more_training_pct, display=False)  # prevents display
+glue("ospo_workshops_pct", ospo_workshops_pct, display=False)  # prevents display
 
 ```
 
 
@@ -229,7 +229,7 @@ df_ten = survey_results.copy()
 df_ten = df_ten[df_ten["QID4"].isin(["Faculty", "Staff"])].copy()
 
 count_contributed_staff_fac = len(df_ten)
-pct_contributed_staff_fac = round(100*count_contributed_staff_fac/len(df),2)
+pct_contributed_staff_fac = float(round(100*count_contributed_staff_fac/len(df),2))
 
 
 glue("pct_contributed_staff_fac", pct_contributed_staff_fac, display=False)
@@ -313,7 +313,7 @@ df_maj["QID4"] = df_maj["QID4"].map(
 )
 
 count_contributed_students = len(df_maj)
-pct_contributed_students = round(100*(count_contributed_students/len(df)),2)
+pct_contributed_students = float(round(100*(count_contributed_students/len(df)),2))
 glue("count_contributed_students", count_contributed_students, display=False)
 glue("pct_contributed_students", pct_contributed_students, display=False)
 
@@ -397,7 +397,7 @@ fig.write_html("_static/students_demog.html", full_html=False, include_plotlyjs=
 ```
 
 
-**{glue:}`count_contributed_students`** respondents identified at students (**{glue:}`pct_contributed_students`$** of respondents). Students came from degree programs in the following subjects:
+**{glue:}`count_contributed_students`** respondents identified at students (**{glue:}`pct_contributed_students`%** of respondents). Students came from degree programs in the following subjects:
 
 ```{raw} html
 :file: _static/students_demog.html
 
@@ -172,7 +172,7 @@ fig3.write_html('_static/familiarity_educational.html', full_html=False, include
 ```{code-cell} ipython3
 :tags: [remove-input]
 
-respondents_pct = prop(df["QID13"], lambda s: s == "Yes")
+respondents_pct = df["QID13"].eq("Yes").mean()
 glue("respondents_pct", respondents_pct, display=False)
  
 # Text processing for word frequency analysis
@@ -181,7 +181,8 @@ rm_terms = {
     'programming', 'language', 'languages', 'tools', 'code', 'etc', 'package',
     'packages', 'list', 'everything', 'including', 'libraries', 'like',
     'various', 'research', 'statistical', 'ecosystem', 'opensource', 'web',
-    'google', 'system', 'compilers', 'academy', 'numerous', 'systems'
+    'google', 'system', 'compilers', 'academy', 'numerous', 'systems', 'the', 'for',
+    'are', 'with', 'that', 'such','most', 'learn','all'
 }
 
 def clean_text(text):
@@ -191,7 +192,7 @@ def clean_text(text):
     text = re.sub(r'[^\w\s]', ' ', text.lower())
     # Split into words and filter
     words = [word for word in text.split() 
-             if word not in rm_terms and len(word) > 2]
+             if word not in rm_terms and len(word) >= 3]
     return ' '.join(words)
 
 # Process text responses
@@ -203,47 +204,56 @@ word_freq = Counter(all_words)
 tools_highlight = ['python', 'r', 'julia', 'git', 'latex']
 top_25_words = dict(word_freq.most_common(25))
 
-tools_df = pd.DataFrame([
-    {'word': word, 'freq': freq, 'pct': freq / len(df)}
-    for word, freq in top_25_words.items()
-    if word != '•'
-]).sort_values('freq')
+# Denominator: only respondents who answered QID15 (usually what you want)
+denom = df['QID15'].notna().sum() or 1
 
-# Create lollipop plot
-fig4 = go.Figure()
+tools_df = (pd.DataFrame(
+    [{'word': w, 'freq': f, 'pct': f/denom} for w, f in Counter(
+        ' '.join(df['QID15'].dropna().apply(clean_text)).split()
+    ).most_common(25) if w != '•']
+).sort_values('freq'))
 
-colors = ['#1f77b4' if word in tools_highlight else '#7f7f7f' 
-          for word in tools_df['word']]
+y_pos = np.arange(len(tools_df))  # numeric y
+colors = ['#1f77b4' if w in tools_highlight else '#7f7f7f' for w in tools_df['word']]
 
+fig4 = go.Figure()
+
+# Scatter using numeric y, but show labels via ticktext
 fig4.add_trace(go.Scatter(
     x=tools_df['pct'],
-    y=tools_df['word'],
+    y=y_pos,
+    text=tools_df['word'],
     mode='markers+lines',
     marker=dict(size=8, color=colors),
     line=dict(color='lightgray', width=1),
-    orientation='h',
-    hovertemplate='Tool: %{y}<br>Percent: %{x:.2%}<extra></extra>'
+    hovertemplate='Tool: %{text}<br>Percent: %{x:.2%}<extra></extra>'
 ))
 
-# Add line segments
-for i, row in tools_df.iterrows():
+# Lollipop stems
+for i, row in enumerate(tools_df.itertuples(index=False)):
     fig4.add_shape(
         type="line",
-        x0=0, x1=row['pct'],
-        y0=row['word'], y1=row['word'],
+        x0=0, x1=row.pct,
+        y0=i, y1=i,
         line=dict(color=colors[i], width=2)
     )
 
+fig4.update_yaxes(
+    tickmode='array',
+    tickvals=y_pos,
+    ticktext=tools_df['word'],
+    automargin=True,
+    tickfont=dict(size=11)
+)
 fig4.update_layout(
-    xaxis=dict(
-        title="Responses Identifying Open Source Tool Use",
-        tickformat='.0%'
-    ),
-    yaxis_title="",
-    showlegend=False,
+    xaxis=dict(title="Responses Identifying Open Source Tool Use", tickformat='.0%'),
+    margin=dict(l=120),
+    height=500,
     plot_bgcolor="white",
-    paper_bgcolor="white"
+    paper_bgcolor="white",
+    showlegend=False
 )
+
 fig4.show()
 fig4.write_html('_static/tools_lollipop.html', full_html=False, include_plotlyjs='cdn')
 ```
@@ -278,8 +288,7 @@ licensed_df = pd.DataFrame([
 ]).sort_values('freq')
 
 # Handle None/NaN values
-if not licensed_df.empty:
-    licensed_df = licensed_df[licensed_df['tool'] != 'nan']
+licensed_df = licensed_df[licensed_df['tool'] != 'nan']
 
 # Create lollipop plot for licensed tools
 fig5 = go.Figure()
@@ -289,23 +298,30 @@ colors_licensed = ['#1f77b4' if tool in licensed_highlight else '#7f7f7f'
 
 fig5.add_trace(go.Scatter(
     x=licensed_df['pct'],
-    y=licensed_df['tool'],
+    y=list(range(len(licensed_df))),   # numeric positions
+    text=licensed_df['tool'],
     mode='markers+lines',
     marker=dict(size=8, color=colors_licensed),
     line=dict(color='lightgray', width=1),
-    orientation='h',
-    hovertemplate='Tool: %{y}<br>Percent: %{x:.2%}<extra></extra>'
+    hovertemplate='Tool: %{text}<br>Percent: %{x:.2%}<extra></extra>'
 ))
+fig5.update_yaxes(
+    tickmode='array',
+    tickvals=list(range(len(licensed_df))),
+    ticktext=licensed_df['tool']
+)
+
 
 # Add line segments
-for i, row in licensed_df.iterrows():
+for i, row in licensed_df.reset_index().iterrows():
     fig5.add_shape(
         type="line",
         x0=0, x1=row['pct'],
-        y0=row['tool'], y1=row['tool'],
+        y0=i, y1=i,   # numeric index
         line=dict(color=colors_licensed[i], width=2)
     )
 
+
 fig5.update_layout(
     xaxis=dict(
         title="Responses Identifying Licensed Tool Use",
@@ -317,7 +333,9 @@ fig5.update_layout(
     paper_bgcolor="white"
 )
 fig5.show()
-fig5.write_html('_static/licensed_tools.html', full_html=False, include_plotlyjs='cdn')
+fig5.write_html('_static/licensed_tools.html', 
+        full_html=False, 
+        include_plotlyjs='cdn')
 ```
 
 ```{raw} html