Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions report/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
26 changes: 5 additions & 21 deletions report/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,11 @@ It is best to take a look at their usage from previous surveys, and start with t

# Build and install

First install system dependencies, the development packages for `libxml2`, `libxslt1`, `zlib1g`, `libjpeg` and python3. For Debian is for example:
``` bash
sudo apt install libxml2-dev libxslt1-dev zlib1g-dev libjpeg-dev libpython3-dev
```

Ensure you have Python 3.8 installed (last minor release is 3.8.20). Specifically an old version of the `lxml` library is used and due to [this bug](https://bugs.launchpad.net/lxml/+bug/1973155) an accordingly old version of Python is required. If your distribution does not ship anymore with Python 3.8.x you'll have to compile it yourself ([instructions](https://stackoverflow.com/a/62831268)).

To use the scripts, you should install their dependencies first:
```bash
$ python3 -m venv venv
$ source venv/venv/bin/activate
(venv) $ pip install -U setuptools wheel pip
(venv) $ pip install -r requirements.txt
```

Also ensure to install the Pillow library (this step fixes a ValueError "WordCloud Only Supported for TrueType fonts")
```bash
(venv) $ pip install -U pillow
```

and then add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
Install [uv](https://docs.astral.sh/uv/getting-started/installation/).

Then run `uv sync` to initialize a virtual environment, and add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.

You can then execute your analysis scripts using `uv run <script>`.

## Useful functions
First, you will probably want to export data from SurveyHero into two CSV files - one containing the aggregated data from
Expand Down
19 changes: 19 additions & 0 deletions report/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[project]
name = "report"
version = "0.1.0"
description = "Helper scripts for survey reports"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"beautifulsoup4>=4.13.4",
"kaleido==0.2.1",
"matplotlib==3.7.5",
"multiprocess>=0.70.18",
"nelsie==0.16",
"pandas==2.3.1",
"pillow>=11.3.0",
"plotly>=6",
"pyarrow==21.0.0",
"tqdm==4.66.1",
"wordcloud==1.9.3",
]
11 changes: 0 additions & 11 deletions report/requirements.txt

This file was deleted.

30 changes: 25 additions & 5 deletions report/surveyhero/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ def format_title(question: Question, include_kind: bool = False) -> str:
kind = "single answer" if question.is_single_answer() else "multiple answers"
kind = f", {kind}"

return f'<b>{wrap_text(question.question, max_width=75)}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
title = question.question
if "\n" in title:
title = title.replace("\n", "<br />")
else:
title = wrap_text(title, max_width=75)
return f'<b>{title}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'


def wrap_text(text: str, max_width: int, override_line_size: Optional[str] = None) -> str:
Expand All @@ -39,7 +44,14 @@ def make_bar_chart(
legend_order: Optional[List[str]] = None,
layout_args: Optional[Dict[str, Any]] = None,
legend_params: Optional[Dict[str, Any]] = None,
sort_by_pct=True,
range=(0, 119)
) -> Figure:
"""
By default, the X axis is sorted in decreasing order by percentage counts.
If `sort_by_pct` is `False`, the original order of answers in the first
question is kept.
"""
assert len(questions) > 0
assert len(set(question.year for question in questions)) == len(questions)

Expand All @@ -56,6 +68,9 @@ def make_bar_chart(
xaxis_font_size = 9
override_line_size = f"{12 / xaxis_font_size:.1f}em"

if legend_order is None and not sort_by_pct:
legend_order = [a.answer for a in questions[0].kind.answers]

if legend_order is not None:
# We need to apply the size hack also to the legend, otherwise the answers won't match
legend_order = [wrap_text(
Expand Down Expand Up @@ -168,7 +183,7 @@ def generate_text(row) -> str:
# See usage of `override_line_size` above
xaxis_tickfont=dict(size=xaxis_font_size),
yaxis_title="Percent out of all responses (%)",
yaxis_range=[0, 119],
yaxis_range=range,
yaxis_ticksuffix="%",
yaxis_fixedrange=True,
title_text=format_title(questions[0], include_kind=True),
Expand Down Expand Up @@ -276,21 +291,26 @@ def make_pie_chart(

def make_matrix_chart(
question: Question,
categories: List[str],
category_label: str,
categories: Optional[List[str]] = None,
category_label: Optional[str] = None,
option_label: Optional[str] = None,
height: Optional[int] = None,
horizontal: bool = False,
max_label_width=20,
legend_params: Optional[Dict[str, Any]] = None,
textposition = "outside"
textposition="outside"
) -> Figure:
"""
Create a matrix chart with different categories.
`categories`: List of categories, sorted from most to least important
"""
assert isinstance(question.kind, MatrixQuestion)

if categories is None:
categories = [a.answer for a in next(iter(question.kind.answer_groups.values()))]
if category_label is None:
category_label = "Response"

mapping = dict(zip(categories[::-1], range(1, len(categories) + 1)))

items = question.kind.answer_groups.items()
Expand Down
19 changes: 16 additions & 3 deletions report/surveyhero/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from collections import defaultdict
from pathlib import Path

from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, SurveyReport
from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, \
SurveyReport, RatingQuestion, RatingAnswer


def parse_surveyhero_answers(path: Path, year: int) -> SurveyFullAnswers:
Expand Down Expand Up @@ -46,13 +47,15 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
questions = []

for row in csv.reader(f):
if row and "Answer" in row:
if row and ("Answer" in row or "Rating" in row):
if active_question is not None:
questions.append(active_question)
question = row[0]
kind = SimpleQuestion(answers=[])
if row[1] == "Row":
kind = MatrixQuestion(answer_groups=defaultdict(list))
elif row[2] == "Rating":
kind = RatingQuestion(answers=[])

count = int(COUNT_REGEX.search(question).group(1))
question = question[:question.rindex("(")].strip()
Expand All @@ -69,7 +72,7 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
if all(r == "" for r in row):
# Empty row
continue
elif "Average" in row or "Standard Deviation" in row:
elif "Average" in row or "Standard Deviation" in row or "Net Promoter Score" in row:
# Statistics
continue
else:
Expand All @@ -88,6 +91,16 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
answer=normalize_answer(answer),
count=count,
))
elif isinstance(active_question.kind, RatingQuestion):
label = row[1]
rating = int(row[2])
count = int(row[3])
active_question.kind.answers.append(RatingAnswer(
answer=Answer(
answer=normalize_answer(label),
count=count),
rating=rating
))
else:
print(row)
assert False
Expand Down
Loading