Skip to content

Commit 23e21de

Browse files
authored
Merge pull request #20 from UW-Madison-DSI/features
Fix issue #19
2 parents 4e34867 + 2b9d6bc commit 23e21de

57 files changed

Lines changed: 549 additions & 395 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
-263 Bytes
Binary file not shown.
1 Byte
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
554 Bytes
Binary file not shown.

docs/_build/html/_sources/campus.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ df = survey_results
2929
3030
# Compute & glue (no visible output)
3131
valuable_pct = int(100 * df['QID24'].eq("Very valuable").mean())
32-
_ = glue("valuable_pct", valuable_pct, display=False) # prevents display
32+
glue("valuable_pct", valuable_pct, display=False) # prevents display
3333
3434
# Figure
3535
qid24_order = ["Very valuable","Some value","Neutral","No value"]
@@ -74,8 +74,8 @@ agree_options = {"Strongly agree", "Somewhat agree"}
7474
# ----- Figure 2: "There is a vibrant culture at UNI" by Respondent Type -----
7575
7676
77-
agree_vibrant = round(100 * df['QID23'].isin(["Strongly agree","Somewhat agree"]).mean(),2)
78-
_ = glue("agree_vibrant", agree_vibrant, display=False) # prevents display
77+
agree_vibrant = float(round(100 * df['QID23'].isin(["Strongly agree","Somewhat agree"]).mean(),2))
78+
glue("agree_vibrant", agree_vibrant, display=False) # prevents display
7979
8080
qid23_order = [
8181
"Strongly agree",
@@ -92,13 +92,13 @@ df_c2 = (
9292
.rename(columns={"QID4": "Respondent Type"})
9393
)
9494
95-
pct_makes_sense = round(
95+
pct_makes_sense = float(round(
9696
100 * df['QID23'].isin([
9797
"Strongly agree",
9898
"Somewhat agree",
9999
"Neither agree nor disagree"
100-
]).mean(), 2)
101-
_ = glue("pct_makes_sense", pct_makes_sense, display=False) # prevents display
100+
]).mean(), 2))
101+
glue("pct_makes_sense", pct_makes_sense, display=False) # prevents display
102102
103103
c2_df = (
104104
df_c2.groupby(["QID23", "Respondent Type"], observed=True, dropna=False)
@@ -127,7 +127,7 @@ In comparison, only **{glue:}`agree_vibrant`%** agreed that there is a vibrant o
127127
```{raw} html
128128
:file: _static/fig2.html
129129
```
130-
**{glue:}`pct_makes_sense`%** of respondents agreed that **it makes sense for the university to contribute to open source software that is vital to its educational and research enterprise**.
130+
**{glue:}`pct_makes_sense`%** of respondents agreed that ** "it makes sense for the university to contribute to open source software that is vital to its educational and research enterprise"**.
131131

132132
## Open Source Training On Campus
133133

@@ -139,7 +139,7 @@ In comparison, only **{glue:}`agree_vibrant`%** agreed that there is a vibrant o
139139
training_yes_pct = prop(df["QID25"], lambda s: s == "Yes")
140140
141141
received_training = int(100*df['QID25'].eq("Yes").astype(int).mean())
142-
_ = glue("received_training", received_training, display=False) # prevents display
142+
glue("received_training", received_training, display=False) # prevents display
143143
144144
145145
df_c3 = (
@@ -227,8 +227,8 @@ fig4.write_html('_static/fig4.html', full_html=False, include_plotlyjs='cdn')
227227
# Interest in more training (QID28) and OSPO workshops (QID29)
228228
more_training_pct = prop(df["QID28"], lambda s: s == "Yes")
229229
ospo_workshops_pct = prop(df["QID29"], lambda s: s == "Yes")
230-
_1 = glue("more_training_pct", more_training_pct, display=False) # prevents display
231-
_2 = glue("ospo_workshops_pct", ospo_workshops_pct, display=False) # prevents display
230+
glue("more_training_pct", more_training_pct, display=False) # prevents display
231+
glue("ospo_workshops_pct", ospo_workshops_pct, display=False) # prevents display
232232
233233
```
234234

docs/_build/html/_sources/sample.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ df_ten = survey_results.copy()
229229
df_ten = df_ten[df_ten["QID4"].isin(["Faculty", "Staff"])].copy()
230230
231231
count_contributed_staff_fac = len(df_ten)
232-
pct_contributed_staff_fac = round(100*count_contributed_staff_fac/len(df),2)
232+
pct_contributed_staff_fac = float(round(100*count_contributed_staff_fac/len(df),2))
233233
234234
235235
glue("pct_contributed_staff_fac", pct_contributed_staff_fac, display=False)
@@ -313,7 +313,7 @@ df_maj["QID4"] = df_maj["QID4"].map(
313313
)
314314
315315
count_contributed_students = len(df_maj)
316-
pct_contributed_students = round(100*(count_contributed_students/len(df)),2)
316+
pct_contributed_students = float(round(100*(count_contributed_students/len(df)),2))
317317
glue("count_contributed_students", count_contributed_students, display=False)
318318
glue("pct_contributed_students", pct_contributed_students, display=False)
319319
@@ -397,7 +397,7 @@ fig.write_html("_static/students_demog.html", full_html=False, include_plotlyjs=
397397
```
398398

399399

400-
**{glue:}`count_contributed_students`** respondents identified at students (**{glue:}`pct_contributed_students`$** of respondents). Students came from degree programs in the following subjects:
400+
**{glue:}`count_contributed_students`** respondents identified at students (**{glue:}`pct_contributed_students`%** of respondents). Students came from degree programs in the following subjects:
401401

402402
```{raw} html
403403
:file: _static/students_demog.html

docs/_build/html/_sources/usage.md

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ fig3.write_html('_static/familiarity_educational.html', full_html=False, include
172172
```{code-cell} ipython3
173173
:tags: [remove-input]
174174
175-
respondents_pct = prop(df["QID13"], lambda s: s == "Yes")
175+
respondents_pct = df["QID13"].eq("Yes").mean()
176176
glue("respondents_pct", respondents_pct, display=False)
177177
178178
# Text processing for word frequency analysis
@@ -181,7 +181,8 @@ rm_terms = {
181181
'programming', 'language', 'languages', 'tools', 'code', 'etc', 'package',
182182
'packages', 'list', 'everything', 'including', 'libraries', 'like',
183183
'various', 'research', 'statistical', 'ecosystem', 'opensource', 'web',
184-
'google', 'system', 'compilers', 'academy', 'numerous', 'systems'
184+
'google', 'system', 'compilers', 'academy', 'numerous', 'systems', 'the', 'for',
185+
'are', 'with', 'that', 'such','most', 'learn','all'
185186
}
186187
187188
def clean_text(text):
@@ -191,7 +192,7 @@ def clean_text(text):
191192
text = re.sub(r'[^\w\s]', ' ', text.lower())
192193
# Split into words and filter
193194
words = [word for word in text.split()
194-
if word not in rm_terms and len(word) > 2]
195+
if word not in rm_terms and len(word) >= 3]
195196
return ' '.join(words)
196197
197198
# Process text responses
@@ -203,47 +204,56 @@ word_freq = Counter(all_words)
203204
tools_highlight = ['python', 'r', 'julia', 'git', 'latex']
204205
top_25_words = dict(word_freq.most_common(25))
205206
206-
tools_df = pd.DataFrame([
207-
{'word': word, 'freq': freq, 'pct': freq / len(df)}
208-
for word, freq in top_25_words.items()
209-
if word != '•'
210-
]).sort_values('freq')
207+
# Denominator: only respondents who answered QID15 (usually what you want)
208+
denom = df['QID15'].notna().sum() or 1
211209
212-
# Create lollipop plot
213-
fig4 = go.Figure()
210+
tools_df = (pd.DataFrame(
211+
[{'word': w, 'freq': f, 'pct': f/denom} for w, f in Counter(
212+
' '.join(df['QID15'].dropna().apply(clean_text)).split()
213+
).most_common(25) if w != '•']
214+
).sort_values('freq'))
214215
215-
colors = ['#1f77b4' if word in tools_highlight else '#7f7f7f'
216-
for word in tools_df['word']]
216+
y_pos = np.arange(len(tools_df)) # numeric y
217+
colors = ['#1f77b4' if w in tools_highlight else '#7f7f7f' for w in tools_df['word']]
217218
219+
fig4 = go.Figure()
220+
221+
# Scatter using numeric y, but show labels via ticktext
218222
fig4.add_trace(go.Scatter(
219223
x=tools_df['pct'],
220-
y=tools_df['word'],
224+
y=y_pos,
225+
text=tools_df['word'],
221226
mode='markers+lines',
222227
marker=dict(size=8, color=colors),
223228
line=dict(color='lightgray', width=1),
224-
orientation='h',
225-
hovertemplate='Tool: %{y}<br>Percent: %{x:.2%}<extra></extra>'
229+
hovertemplate='Tool: %{text}<br>Percent: %{x:.2%}<extra></extra>'
226230
))
227231
228-
# Add line segments
229-
for i, row in tools_df.iterrows():
232+
# Lollipop stems
233+
for i, row in enumerate(tools_df.itertuples(index=False)):
230234
fig4.add_shape(
231235
type="line",
232-
x0=0, x1=row['pct'],
233-
y0=row['word'], y1=row['word'],
236+
x0=0, x1=row.pct,
237+
y0=i, y1=i,
234238
line=dict(color=colors[i], width=2)
235239
)
236240
241+
fig4.update_yaxes(
242+
tickmode='array',
243+
tickvals=y_pos,
244+
ticktext=tools_df['word'],
245+
automargin=True,
246+
tickfont=dict(size=11)
247+
)
237248
fig4.update_layout(
238-
xaxis=dict(
239-
title="Responses Identifying Open Source Tool Use",
240-
tickformat='.0%'
241-
),
242-
yaxis_title="",
243-
showlegend=False,
249+
xaxis=dict(title="Responses Identifying Open Source Tool Use", tickformat='.0%'),
250+
margin=dict(l=120),
251+
height=500,
244252
plot_bgcolor="white",
245-
paper_bgcolor="white"
253+
paper_bgcolor="white",
254+
showlegend=False
246255
)
256+
247257
fig4.show()
248258
fig4.write_html('_static/tools_lollipop.html', full_html=False, include_plotlyjs='cdn')
249259
```
@@ -278,8 +288,7 @@ licensed_df = pd.DataFrame([
278288
]).sort_values('freq')
279289
280290
# Handle None/NaN values
281-
if not licensed_df.empty:
282-
licensed_df = licensed_df[licensed_df['tool'] != 'nan']
291+
licensed_df = licensed_df[licensed_df['tool'] != 'nan']
283292
284293
# Create lollipop plot for licensed tools
285294
fig5 = go.Figure()
@@ -289,23 +298,30 @@ colors_licensed = ['#1f77b4' if tool in licensed_highlight else '#7f7f7f'
289298
290299
fig5.add_trace(go.Scatter(
291300
x=licensed_df['pct'],
292-
y=licensed_df['tool'],
301+
y=list(range(len(licensed_df))), # numeric positions
302+
text=licensed_df['tool'],
293303
mode='markers+lines',
294304
marker=dict(size=8, color=colors_licensed),
295305
line=dict(color='lightgray', width=1),
296-
orientation='h',
297-
hovertemplate='Tool: %{y}<br>Percent: %{x:.2%}<extra></extra>'
306+
hovertemplate='Tool: %{text}<br>Percent: %{x:.2%}<extra></extra>'
298307
))
308+
fig5.update_yaxes(
309+
tickmode='array',
310+
tickvals=list(range(len(licensed_df))),
311+
ticktext=licensed_df['tool']
312+
)
313+
299314
300315
# Add line segments
301-
for i, row in licensed_df.iterrows():
316+
for i, row in licensed_df.reset_index().iterrows():
302317
fig5.add_shape(
303318
type="line",
304319
x0=0, x1=row['pct'],
305-
y0=row['tool'], y1=row['tool'],
320+
y0=i, y1=i, # numeric index
306321
line=dict(color=colors_licensed[i], width=2)
307322
)
308323
324+
309325
fig5.update_layout(
310326
xaxis=dict(
311327
title="Responses Identifying Licensed Tool Use",
@@ -317,7 +333,9 @@ fig5.update_layout(
317333
paper_bgcolor="white"
318334
)
319335
fig5.show()
320-
fig5.write_html('_static/licensed_tools.html', full_html=False, include_plotlyjs='cdn')
336+
fig5.write_html('_static/licensed_tools.html',
337+
full_html=False,
338+
include_plotlyjs='cdn')
321339
```
322340

323341
```{raw} html

0 commit comments

Comments
 (0)