Skip to content

Commit 916394d

Browse files
authored
Merge pull request #344 from Kobzol/compiler-performance-survey-analysis
Compiler performance survey analysis
2 parents c2a521a + 4e04ed0 commit 916394d

File tree

15 files changed

+1817
-94
lines changed

15 files changed

+1817
-94
lines changed

report/.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.10

report/README.md

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,11 @@ It is best to take a look at their usage from previous surveys, and start with t
77

88
# Build and install
99

10-
First install system dependencies, the development packages for `libxml2`, `libxslt1`, `zlib1g`, `libjpeg` and python3. For Debian is for example:
11-
``` bash
12-
sudo apt install libxml2-dev libxslt1-dev zlib1g-dev libjpeg-dev libpython3-dev
13-
```
14-
15-
Ensure you have Python 3.8 installed (last minor release is 3.8.20). Specifically an old version of the `lxml` library is used and due to [this bug](https://bugs.launchpad.net/lxml/+bug/1973155) an accordingly old version of Python is required. If your distribution does not ship anymore with Python 3.8.x you'll have to compile it yourself ([instructions](https://stackoverflow.com/a/62831268)).
16-
17-
To use the scripts, you should install their dependencies first:
18-
```bash
19-
$ python3 -m venv venv
20-
$ source venv/venv/bin/activate
21-
(venv) $ pip install -U setuptools wheel pip
22-
(venv) $ pip install -r requirements.txt
23-
```
24-
25-
Also ensure to install the Pillow library (this step fixes a ValueError "WordCloud Only Supported for TrueType fonts")
26-
```bash
27-
(venv) $ pip install -U pillow
28-
```
29-
30-
and then add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
10+
Install [uv](https://docs.astral.sh/uv/getting-started/installation/).
11+
12+
Then run `uv sync` to initialize a virtual environment, and add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
13+
14+
You can then execute your analysis scripts using `uv run <script>`.
3115

3216
## Useful functions
3317
First, you will probably want to export data from SurveyHero into two CSV files - one containing the aggregated data from

report/pyproject.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[project]
2+
name = "report"
3+
version = "0.1.0"
4+
description = "Helper scripts for survey reports"
5+
readme = "README.md"
6+
requires-python = ">=3.10"
7+
dependencies = [
8+
"beautifulsoup4>=4.13.4",
9+
"kaleido==0.2.1",
10+
"matplotlib==3.7.5",
11+
"multiprocess>=0.70.18",
12+
"nelsie==0.16",
13+
"pandas==2.3.1",
14+
"pillow>=11.3.0",
15+
"plotly>=6",
16+
"pyarrow==21.0.0",
17+
"tqdm==4.66.1",
18+
"wordcloud==1.9.3",
19+
]

report/requirements.txt

Lines changed: 0 additions & 11 deletions
This file was deleted.

report/surveyhero/chart.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ def format_title(question: Question, include_kind: bool = False) -> str:
1919
kind = "single answer" if question.is_single_answer() else "multiple answers"
2020
kind = f", {kind}"
2121

22-
return f'<b>{wrap_text(question.question, max_width=75)}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
22+
title = question.question
23+
if "\n" in title:
24+
title = title.replace("\n", "<br />")
25+
else:
26+
title = wrap_text(title, max_width=75)
27+
return f'<b>{title}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
2328

2429

2530
def wrap_text(text: str, max_width: int, override_line_size: Optional[str] = None) -> str:
@@ -39,7 +44,14 @@ def make_bar_chart(
3944
legend_order: Optional[List[str]] = None,
4045
layout_args: Optional[Dict[str, Any]] = None,
4146
legend_params: Optional[Dict[str, Any]] = None,
47+
sort_by_pct=True,
48+
range=(0, 119)
4249
) -> Figure:
50+
"""
51+
By default, the X axis is sorted in decreasing order by percentage counts.
52+
If `sort_by_pct` is `False`, the original order of answers in the first
53+
question is kept.
54+
"""
4355
assert len(questions) > 0
4456
assert len(set(question.year for question in questions)) == len(questions)
4557

@@ -56,6 +68,9 @@ def make_bar_chart(
5668
xaxis_font_size = 9
5769
override_line_size = f"{12 / xaxis_font_size:.1f}em"
5870

71+
if legend_order is None and not sort_by_pct:
72+
legend_order = [a.answer for a in questions[0].kind.answers]
73+
5974
if legend_order is not None:
6075
# We need to apply the size hack also to the legend, otherwise the answers won't match
6176
legend_order = [wrap_text(
@@ -168,7 +183,7 @@ def generate_text(row) -> str:
168183
# See usage of `override_line_size` above
169184
xaxis_tickfont=dict(size=xaxis_font_size),
170185
yaxis_title="Percent out of all responses (%)",
171-
yaxis_range=[0, 119],
186+
yaxis_range=range,
172187
yaxis_ticksuffix="%",
173188
yaxis_fixedrange=True,
174189
title_text=format_title(questions[0], include_kind=True),
@@ -276,21 +291,26 @@ def make_pie_chart(
276291

277292
def make_matrix_chart(
278293
question: Question,
279-
categories: List[str],
280-
category_label: str,
294+
categories: Optional[List[str]] = None,
295+
category_label: Optional[str] = None,
281296
option_label: Optional[str] = None,
282297
height: Optional[int] = None,
283298
horizontal: bool = False,
284299
max_label_width=20,
285300
legend_params: Optional[Dict[str, Any]] = None,
286-
textposition = "outside"
301+
textposition="outside"
287302
) -> Figure:
288303
"""
289304
Create a matrix chart with different categories.
290305
`categories`: List of categories, sorted from most to least important
291306
"""
292307
assert isinstance(question.kind, MatrixQuestion)
293308

309+
if categories is None:
310+
categories = [a.answer for a in next(iter(question.kind.answer_groups.values()))]
311+
if category_label is None:
312+
category_label = "Response"
313+
294314
mapping = dict(zip(categories[::-1], range(1, len(categories) + 1)))
295315

296316
items = question.kind.answer_groups.items()

report/surveyhero/parser.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from collections import defaultdict
44
from pathlib import Path
55

6-
from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, SurveyReport
6+
from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, \
7+
SurveyReport, RatingQuestion, RatingAnswer
78

89

910
def parse_surveyhero_answers(path: Path, year: int) -> SurveyFullAnswers:
@@ -46,13 +47,15 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
4647
questions = []
4748

4849
for row in csv.reader(f):
49-
if row and "Answer" in row:
50+
if row and ("Answer" in row or "Rating" in row):
5051
if active_question is not None:
5152
questions.append(active_question)
5253
question = row[0]
5354
kind = SimpleQuestion(answers=[])
5455
if row[1] == "Row":
5556
kind = MatrixQuestion(answer_groups=defaultdict(list))
57+
elif row[2] == "Rating":
58+
kind = RatingQuestion(answers=[])
5659

5760
count = int(COUNT_REGEX.search(question).group(1))
5861
question = question[:question.rindex("(")].strip()
@@ -69,7 +72,7 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
6972
if all(r == "" for r in row):
7073
# Empty row
7174
continue
72-
elif "Average" in row or "Standard Deviation" in row:
75+
elif "Average" in row or "Standard Deviation" in row or "Net Promoter Score" in row:
7376
# Statistics
7477
continue
7578
else:
@@ -88,6 +91,16 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
8891
answer=normalize_answer(answer),
8992
count=count,
9093
))
94+
elif isinstance(active_question.kind, RatingQuestion):
95+
label = row[1]
96+
rating = int(row[2])
97+
count = int(row[3])
98+
active_question.kind.answers.append(RatingAnswer(
99+
answer=Answer(
100+
answer=normalize_answer(label),
101+
count=count),
102+
rating=rating
103+
))
91104
else:
92105
print(row)
93106
assert False

0 commit comments

Comments
 (0)