Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion annual-survey.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ it should nevertheless be helpful as a guide.
1. Analyze the results and publish a blog post
- There is a dedicated section about this [below](#analysing-survey-results)
2. Add a link to the previous survey announcement blog post pointing to the blog post with survey results
3. Update [forge.rust-lang.org](https://forge.rust-lang.org/community/survey-faq.html?highlight=survey#where-can-i-see-the-previous-survey-reports) with a link to the results blog post.
3. Update [forge.rust-lang.org](https://forge.rust-lang.org/community/survey-faq.html#where-can-i-see-the-previous-survey-reports) with a link to the results blog post.
4. Update [FAQ](documents/Community-Survey-FAQ.md) with a link to the results blog post.

### Analysing survey results
Expand Down
18 changes: 16 additions & 2 deletions report/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,30 @@ This directory contains scripts that automate the generation of charts, reports
surveys created in SurveyHero.

Note that these scripts are intended to be used as a library, so you will need to write your own script to leverage them.
It is best to take a look at their usage from previous surveys, and start with that.
It is best to take a look at their usage from previous surveys, and start with that. The scripts will change over time, they are only kept compatible with the latest version of the annual survey. If you need to re-render the reports from an older survey, you should use an older version of these scripts from the corresponding year.

# Build and install

First install system dependencies, the development packages for `libxml2`, `libxslt1`, `zlib1g`, `libjpeg` and python3. For Debian is for example:
``` bash
sudo apt install libxml2-dev libxslt1-dev zlib1g-dev libjpeg-dev libpython3-dev
```

Ensure you have Python 3.8 installed (last minor release is 3.8.20). Specifically an old version of the `lxml` library is used and due to [this bug](https://bugs.launchpad.net/lxml/+bug/1973155) an accordingly old version of Python is required. If your distribution does not ship anymore with Python 3.8.x you'll have to compile it yourself ([instructions](https://stackoverflow.com/a/62831268)).

To use the scripts, you should install their dependencies first:
```bash
$ python3 -m venv venv
$ source venv/venv/bin/activate
(venv) $ pip install -U setuptools wheel pip
(venv) $ pip install -U setuptools wheel pip
(venv) $ pip install -r requirements.txt
```

Also ensure to install the Pillow library (this step fixes a ValueError "WordCloud Only Supported for TrueType fonts")
```bash
(venv) $ pip install -U pillow
```

and then add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.

## Useful functions
Expand Down
4 changes: 2 additions & 2 deletions report/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
plotly==5.18.0
kaleido==0.2.1
pandas==2.1.4
matplotlib==3.8.2
pandas==2.0.3
matplotlib==3.7.5
wordcloud==1.9.3
elsie[cairo]==3.4
beautifulsoup4==4.12.3
Expand Down
88 changes: 71 additions & 17 deletions report/surveyhero/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ def format_title(question: Question, include_kind: bool = False) -> str:
return f'<b>{wrap_text(question.question, max_width=75)}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'


def wrap_text(text: str, max_width: int) -> str:
text = textwrap.wrap(text, width=max_width, break_long_words=False)
text = "<br />".join(text)
def wrap_text(text: str, max_width: int, override_line_size: Optional[str] = None) -> str:
lines = textwrap.wrap(text, width=max_width, break_long_words=False)
if override_line_size is not None:
lines = [f"<span style='font-size: {override_line_size}'>{line}</span>" for line in lines]
text = "<br />".join(lines)
return text


Expand All @@ -35,21 +37,44 @@ def make_bar_chart(
xaxis_tickangle=0,
max_tick_width=30,
legend_order: Optional[List[str]] = None,
layout_args: Optional[Dict[str, Any]] = None
layout_args: Optional[Dict[str, Any]] = None,
legend_params: Optional[Dict[str, Any]] = None,
) -> Figure:
assert len(questions) > 0
assert len(set(question.year for question in questions)) == len(questions)

# Sort questions by year to have a left-to-right reading order
questions = sorted(questions, key=lambda q: q.year)

# Plotly hardcodes the line height to be 1.3em, which is quite large, and it makes it
# hard to visually parse different lines vs different X axis ticks.
# Therefore, we use a hack - we set the xaxis font size to be 9 instead of 12
# (the defaut font size), to reduce the line height proportionally (9 * 1.3 instead of
# 12 * 1.3).
# And then we inflate the font size of the individual lines by 12/9 to make the actual
# font size be the same as before applying the hack.
xaxis_font_size = 9
override_line_size = f"{12 / xaxis_font_size:.1f}em"

if legend_order is not None:
legend_order = [wrap_text(l, max_width=max_tick_width) for l in legend_order]
# We need to apply the size hack also to the legend, otherwise the answers won't match
legend_order = [wrap_text(
l,
max_width=max_tick_width,
override_line_size=override_line_size
) for l in legend_order]

data = defaultdict(list)
totals = {}

for question in questions:
assert question.is_simple()
for answer in question.kind.answers:
text = wrap_text(answer.answer, max_width=max_tick_width)
text = wrap_text(
answer.answer,
max_width=max_tick_width,
override_line_size=override_line_size
)

data["year"].append(str(question.year))
data["answer"].append(text)
Expand Down Expand Up @@ -80,7 +105,7 @@ def make_bar_chart(
counts = data.loc[data["Year"] == year, "count"].astype(np.float32)
data.loc[data["Year"] == year, "percent"] = (counts / total_count) * 100.0

main_year = str(questions[0].year)
main_year = str(questions[-1].year)

def sort_key(answer: str) -> int:
if legend_order is not None:
Expand All @@ -100,11 +125,18 @@ def generate_text(row) -> str:

data["text"] = data.apply(generate_text, axis=1)

palette = px.colors.qualitative.Plotly
# Make sure that we have a canonical assignment of colors to individual years
# If there is only a single year, we should assign it palette[0]
# If there are two years, the largest one should have palette[0], the other one palette[1] etc.
palette = palette[:len(questions)][::-1]

fig = px.bar(
data,
x="answer",
y="percent",
color="Year",
color_discrete_sequence=palette,
barmode="group",
text="text",
custom_data=["Year", "count"],
Expand All @@ -114,9 +146,13 @@ def generate_text(row) -> str:
fig.update_traces(
textposition="outside",
hovertemplate="Year: %{customdata[0]}<br />Count: %{customdata[1]}<br />Percent: %{text}<extra></extra>",
textangle=-90 if bar_label_vertical else 0,
textangle=90 if bar_label_vertical else 0,
)

legend = {}
if legend_params is not None:
legend.update(legend_params)

layout_args = layout_args or {}
fig.update_layout(
meta="bar-chart",
Expand All @@ -129,6 +165,8 @@ def generate_text(row) -> str:
xaxis_title=None,
# xaxis_tickwidth=40,
xaxis_tickangle=xaxis_tickangle,
# See usage of `override_line_size` above
xaxis_tickfont=dict(size=xaxis_font_size),
yaxis_title="Percent out of all responses (%)",
yaxis_range=[0, 119],
yaxis_ticksuffix="%",
Expand All @@ -144,6 +182,7 @@ def generate_text(row) -> str:
pad=10,
b=10
),
legend=legend,
dragmode="pan",
**layout_args
)
Expand Down Expand Up @@ -239,10 +278,12 @@ def make_matrix_chart(
question: Question,
categories: List[str],
category_label: str,
height=600,
option_label: Optional[str] = None,
height: Optional[int] = None,
horizontal: bool = False,
max_label_width=20,
legend_params: Optional[Dict[str, Any]] = None
legend_params: Optional[Dict[str, Any]] = None,
textposition = "outside"
) -> Figure:
"""
Create a matrix chart with different categories.
Expand Down Expand Up @@ -278,6 +319,12 @@ def make_matrix_chart(
if not horizontal:
keys = dict(y="Count", x="Category")

if height is None:
if horizontal:
height = 600
else:
height = 1000

fig = px.bar(
df,
**keys,
Expand All @@ -287,12 +334,12 @@ def make_matrix_chart(
Category=group_keys
),
title=format_title(question),
height=1000 if not horizontal else height,
height=height,
hover_data=[category_label]
)
fig.update_traces(
orientation="h" if horizontal else "v",
textposition="outside",
textposition=textposition,
hovertemplate=f"Category: %{{y}}<br />{category_label}: %{{customdata[0]}}<br />Percent: %{{text}}<extra></extra>",
)

Expand All @@ -302,7 +349,18 @@ def make_matrix_chart(

layout_args = {}
if horizontal:
layout_args["xaxis_range"] = [0, 110]
if textposition != "inside":
layout_args["xaxis_range"] = [0, 110]
else:
layout_args["xaxis_range"] = [0, 100]
layout_args["xaxis_title"] = None
layout_args["xaxis_ticksuffix"] = "%"
layout_args["yaxis_ticksuffix"] = ""
layout_args["yaxis_title"] = option_label
else:
layout_args["yaxis_title"] = None
layout_args["xaxis_title"] = option_label
layout_args["yaxis_ticksuffix"] = "%"

fig.update_layout(
meta="matrix-chart",
Expand All @@ -312,13 +370,9 @@ def make_matrix_chart(
font_family="Rockwell",
),
# hovermode="y unified",
yaxis_title=None,
yaxis_tickangle=0,
# https://stackoverflow.com/a/52397461/1107768
yaxis_ticksuffix=" ",
yaxis_fixedrange=True,
xaxis_title="Percent out of the category (%)",
xaxis_ticksuffix="%",
xaxis_fixedrange=True,
legend=legend,
dragmode="pan",
Expand Down
6 changes: 2 additions & 4 deletions report/surveyhero/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@ class ChartReport:
def __init__(self):
self.charts: Dict[str, ChartRenderer] = {}

def add_bar_chart(self, name: str, question: Question, baseline: Optional[Question] = None, **kwargs):
questions = [question]
if baseline is not None:
questions.append(baseline)
def add_bar_chart(self, name: str, question: Question, *baselines: Question, **kwargs):
questions = [question] + list(baselines)

def render_fn(**args):
return make_bar_chart(questions=questions, **join(kwargs, args))
Expand Down
7 changes: 5 additions & 2 deletions report/surveyhero/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def rename_answers(self, diff: Dict[str, Optional[str]]) -> "SimpleQuestion":
continue
answer = dataclasses.replace(answer, answer=updated)
answers.append(answer)
assert len(diff) == 0
if len(diff) != 0:
raise Exception(f"Some diffs were not applied: {diff}\nAnswers: {self.answers}")
return dataclasses.replace(self, answers=answers)


Expand All @@ -42,7 +43,7 @@ def rename_answers(self, diff: Dict[str, str]) -> "MatrixQuestion":
group = diff.pop(group)
answer_groups[group] = items
if len(diff) > 0:
raise Exception(f"Rename answers diff not empty: {diff}")
raise Exception(f"Rename answers diff not empty: {diff}. Answers: {self.answer_groups}")
return dataclasses.replace(self, answer_groups=answer_groups)


Expand Down Expand Up @@ -77,6 +78,8 @@ def combine_answers(self, diff: Dict[str, List[str]]) -> "Question":
for (target, old_answers) in diff.items():
count = 0
for answer in old_answers:
if answer not in answers_orig:
raise Exception(f"Answer {answer} not in {answers_orig}")
count += answers_orig[answer].count
answers_orig.pop(answer)
assert count > 0
Expand Down
Loading