rust-lang · Kobzol · Sep 9, 2025 · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/report/.python-version b/report/.python-version
@@ -0,0 +1 @@
+3.10
diff --git a/report/README.md b/report/README.md
@@ -7,27 +7,11 @@ It is best to take a look at their usage from previous surveys, and start with t
 
 # Build and install
 
-First install system dependencies, the development packages for `libxml2`, `libxslt1`, `zlib1g`, `libjpeg` and python3. For Debian is for example:
-``` bash
-sudo apt install libxml2-dev libxslt1-dev zlib1g-dev libjpeg-dev libpython3-dev
-```
-
-Ensure you have Python 3.8 installed (last minor release is 3.8.20). Specifically an old version of the `lxml` library is used and due to [this bug](https://bugs.launchpad.net/lxml/+bug/1973155) an accordingly old version of Python is required. If your distribution does not ship anymore with Python 3.8.x you'll have to compile it yourself ([instructions](https://stackoverflow.com/a/62831268)).
-
-To use the scripts, you should install their dependencies first:
-```bash
-$ python3 -m venv venv
-$ source venv/venv/bin/activate
-(venv) $ pip install -U setuptools wheel pip
-(venv) $ pip install -r requirements.txt
-```
-
-Also ensure to install the Pillow library (this step fixes a ValueError "WordCloud Only Supported for TrueType fonts")
-```bash
-(venv) $ pip install -U pillow
-```
-
-and then add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
+Install [uv](https://docs.astral.sh/uv/getting-started/installation/).
+
+Then run `uv sync` to initialize a virtual environment, and add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
+
+You can then execute your analysis scripts using `uv run <script>`.
 
 ## Useful functions
 First, you will probably want to export data from SurveyHero into two CSV files - one containing the aggregated data from

diff --git a/report/pyproject.toml b/report/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "report"
+version = "0.1.0"
+description = "Helper scripts for survey reports"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "beautifulsoup4>=4.13.4",
+    "kaleido==0.2.1",
+    "matplotlib==3.7.5",
+    "multiprocess>=0.70.18",
+    "nelsie==0.16",
+    "pandas==2.3.1",
+    "pillow>=11.3.0",
+    "plotly>=6",
+    "pyarrow==21.0.0",
+    "tqdm==4.66.1",
+    "wordcloud==1.9.3",
+]
diff --git a/report/requirements.txt b/report/requirements.txt
diff --git a/report/surveyhero/chart.py b/report/surveyhero/chart.py
@@ -19,7 +19,12 @@ def format_title(question: Question, include_kind: bool = False) -> str:
         kind = "single answer" if question.is_single_answer() else "multiple answers"
         kind = f", {kind}"
 
-    return f'<b>{wrap_text(question.question, max_width=75)}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
+    title = question.question
+    if "\n" in title:
+        title = title.replace("\n", "<br />")
+    else:
+        title = wrap_text(title, max_width=75)
+    return f'<b>{title}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
 
 
 def wrap_text(text: str, max_width: int, override_line_size: Optional[str] = None) -> str:
@@ -39,7 +44,14 @@ def make_bar_chart(
         legend_order: Optional[List[str]] = None,
         layout_args: Optional[Dict[str, Any]] = None,
         legend_params: Optional[Dict[str, Any]] = None,
+        sort_by_pct=True,
+        range=(0, 119)
 ) -> Figure:
+    """
+    By default, the X axis is sorted in decreasing order by percentage counts.
+    If `sort_by_pct` is `False`, the original order of answers in the first
+    question is kept.
+    """
     assert len(questions) > 0
     assert len(set(question.year for question in questions)) == len(questions)
 
@@ -56,6 +68,9 @@ def make_bar_chart(
     xaxis_font_size = 9
     override_line_size = f"{12 / xaxis_font_size:.1f}em"
 
+    if legend_order is None and not sort_by_pct:
+        legend_order = [a.answer for a in questions[0].kind.answers]
+
     if legend_order is not None:
         # We need to apply the size hack also to the legend, otherwise the answers won't match
         legend_order = [wrap_text(
@@ -168,7 +183,7 @@ def generate_text(row) -> str:
         # See usage of `override_line_size` above
         xaxis_tickfont=dict(size=xaxis_font_size),
         yaxis_title="Percent out of all responses (%)",
-        yaxis_range=[0, 119],
+        yaxis_range=range,
         yaxis_ticksuffix="%",
         yaxis_fixedrange=True,
         title_text=format_title(questions[0], include_kind=True),
@@ -276,21 +291,26 @@ def make_pie_chart(
 
 def make_matrix_chart(
         question: Question,
-        categories: List[str],
-        category_label: str,
+        categories: Optional[List[str]] = None,
+        category_label: Optional[str] = None,
         option_label: Optional[str] = None,
         height: Optional[int] = None,
         horizontal: bool = False,
         max_label_width=20,
         legend_params: Optional[Dict[str, Any]] = None,
-        textposition = "outside"
+        textposition="outside"
 ) -> Figure:
     """
     Create a matrix chart with different categories.
     `categories`: List of categories, sorted from most to least important
     """
     assert isinstance(question.kind, MatrixQuestion)
 
+    if categories is None:
+        categories = [a.answer for a in next(iter(question.kind.answer_groups.values()))]
+    if category_label is None:
+        category_label = "Response"
+
     mapping = dict(zip(categories[::-1], range(1, len(categories) + 1)))
 
     items = question.kind.answer_groups.items()

diff --git a/report/surveyhero/parser.py b/report/surveyhero/parser.py
@@ -3,7 +3,8 @@
 from collections import defaultdict
 from pathlib import Path
 
-from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, SurveyReport
+from .survey import SurveyFullAnswers, Question, MatrixQuestion, Answer, SimpleQuestion, \
+    SurveyReport, RatingQuestion, RatingAnswer
 
 
 def parse_surveyhero_answers(path: Path, year: int) -> SurveyFullAnswers:
@@ -46,13 +47,15 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
         questions = []
 
         for row in csv.reader(f):
-            if row and "Answer" in row:
+            if row and ("Answer" in row or "Rating" in row):
                 if active_question is not None:
                     questions.append(active_question)
                 question = row[0]
                 kind = SimpleQuestion(answers=[])
                 if row[1] == "Row":
                     kind = MatrixQuestion(answer_groups=defaultdict(list))
+                elif row[2] == "Rating":
+                    kind = RatingQuestion(answers=[])
 
                 count = int(COUNT_REGEX.search(question).group(1))
                 question = question[:question.rindex("(")].strip()
@@ -69,7 +72,7 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
                 if all(r == "" for r in row):
                     # Empty row
                     continue
-                elif "Average" in row or "Standard Deviation" in row:
+                elif "Average" in row or "Standard Deviation" in row or "Net Promoter Score" in row:
                     # Statistics
                     continue
                 else:
@@ -88,6 +91,16 @@ def parse_surveyhero_report(path: Path, year: int) -> SurveyReport:
                             answer=normalize_answer(answer),
                             count=count,
                         ))
+                    elif isinstance(active_question.kind, RatingQuestion):
+                        label = row[1]
+                        rating = int(row[2])
+                        count = int(row[3])
+                        active_question.kind.answers.append(RatingAnswer(
+                            answer=Answer(
+                                answer=normalize_answer(label),
+                                count=count),
+                            rating=rating
+                        ))
                     else:
                         print(row)
                         assert False