rust-lang · apiraino · Feb 10, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
diff --git a/annual-survey.md b/annual-survey.md
@@ -41,7 +41,7 @@ it should nevertheless be helpful as a guide.
 1. Analyze the results and publish a blog post
    - There is a dedicated section about this [below](#analysing-survey-results)
 2. Add a link to the previous survey announcement blog post pointing to the blog post with survey results
-3. Update [forge.rust-lang.org](https://forge.rust-lang.org/community/survey-faq.html?highlight=survey#where-can-i-see-the-previous-survey-reports) with a link to the results blog post.
+3. Update [forge.rust-lang.org](https://forge.rust-lang.org/community/survey-faq.html#where-can-i-see-the-previous-survey-reports) with a link to the results blog post.
 4. Update [FAQ](documents/Community-Survey-FAQ.md) with a link to the results blog post.
 
 ### Analysing survey results

diff --git a/report/README.md b/report/README.md
@@ -3,16 +3,30 @@ This directory contains scripts that automate the generation of charts, reports
 surveys created in SurveyHero.
 
 Note that these scripts are intended to be used as a library, so you will need to write your own script to leverage them.
-It is best to take a look at their usage from previous surveys, and start with that.
+It is best to take a look at their usage from previous surveys, and start with that. The scripts will change over time, they are only kept compatible with the latest version of the annual survey. If you need to re-render the reports from an older survey, you should use an older version of these scripts from the corresponding year.
+
+# Build and install
+
+First install system dependencies, the development packages for `libxml2`, `libxslt1`, `zlib1g`, `libjpeg` and python3. For Debian is for example:
+``` bash
+sudo apt install libxml2-dev libxslt1-dev zlib1g-dev libjpeg-dev libpython3-dev
+```
+
+Ensure you have Python 3.8 installed (last minor release is 3.8.20). Specifically an old version of the `lxml` library is used and due to [this bug](https://bugs.launchpad.net/lxml/+bug/1973155) an accordingly old version of Python is required. If your distribution does not ship anymore with Python 3.8.x you'll have to compile it yourself ([instructions](https://stackoverflow.com/a/62831268)).
 
 To use the scripts, you should install their dependencies first:
 ```bash
 $ python3 -m venv venv
 $ source venv/venv/bin/activate
-(venv) $ pip install -U setuptools wheel pip  
+(venv) $ pip install -U setuptools wheel pip
 (venv) $ pip install -r requirements.txt
 ```
 
+Also ensure to install the Pillow library (this step fixes a ValueError "WordCloud Only Supported for TrueType fonts")
+```bash
+(venv) $ pip install -U pillow
+```
+
 and then add this directory to the `PYTHONPATH` of your main Python script, and then use e.g. `from surveyhero.parser import parse_surveyhero_report`.
 
 ## Useful functions

diff --git a/report/requirements.txt b/report/requirements.txt
@@ -1,7 +1,7 @@
 plotly==5.18.0
 kaleido==0.2.1
-pandas==2.1.4
-matplotlib==3.8.2
+pandas==2.0.3
+matplotlib==3.7.5
 wordcloud==1.9.3
 elsie[cairo]==3.4
 beautifulsoup4==4.12.3

diff --git a/report/surveyhero/chart.py b/report/surveyhero/chart.py
@@ -22,9 +22,11 @@ def format_title(question: Question, include_kind: bool = False) -> str:
     return f'<b>{wrap_text(question.question, max_width=75)}</b><br /><span style="font-size: 0.8em;">(total responses = {question.total_responses}{kind})</span>'
 
 
-def wrap_text(text: str, max_width: int) -> str:
-    text = textwrap.wrap(text, width=max_width, break_long_words=False)
-    text = "<br />".join(text)
+def wrap_text(text: str, max_width: int, override_line_size: Optional[str] = None) -> str:
+    lines = textwrap.wrap(text, width=max_width, break_long_words=False)
+    if override_line_size is not None:
+        lines = [f"<span style='font-size: {override_line_size}'>{line}</span>" for line in lines]
+    text = "<br />".join(lines)
     return text
 
 
@@ -35,21 +37,44 @@ def make_bar_chart(
         xaxis_tickangle=0,
         max_tick_width=30,
         legend_order: Optional[List[str]] = None,
-        layout_args: Optional[Dict[str, Any]] = None
+        layout_args: Optional[Dict[str, Any]] = None,
+        legend_params: Optional[Dict[str, Any]] = None,
 ) -> Figure:
     assert len(questions) > 0
     assert len(set(question.year for question in questions)) == len(questions)
 
+    # Sort questions by year to have a left-to-right reading order
+    questions = sorted(questions, key=lambda q: q.year)
+
+    # Plotly hardcodes the line height to be 1.3em, which is quite large, and it makes it
+    # hard to visually parse different lines vs different X axis ticks.
+    # Therefore, we use a hack - we set the xaxis font size to be 9 instead of 12
+    # (the defaut font size), to reduce the line height proportionally (9 * 1.3 instead of
+    # 12 * 1.3).
+    # And then we inflate the font size of the individual lines by 12/9 to make the actual
+    # font size be the same as before applying the hack.
+    xaxis_font_size = 9
+    override_line_size = f"{12 / xaxis_font_size:.1f}em"
+
     if legend_order is not None:
-        legend_order = [wrap_text(l, max_width=max_tick_width) for l in legend_order]
+        # We need to apply the size hack also to the legend, otherwise the answers won't match
+        legend_order = [wrap_text(
+            l,
+            max_width=max_tick_width,
+            override_line_size=override_line_size
+        ) for l in legend_order]
 
     data = defaultdict(list)
     totals = {}
 
     for question in questions:
         assert question.is_simple()
         for answer in question.kind.answers:
-            text = wrap_text(answer.answer, max_width=max_tick_width)
+            text = wrap_text(
+                answer.answer,
+                max_width=max_tick_width,
+                override_line_size=override_line_size
+            )
 
             data["year"].append(str(question.year))
             data["answer"].append(text)
@@ -80,7 +105,7 @@ def make_bar_chart(
         counts = data.loc[data["Year"] == year, "count"].astype(np.float32)
         data.loc[data["Year"] == year, "percent"] = (counts / total_count) * 100.0
 
-    main_year = str(questions[0].year)
+    main_year = str(questions[-1].year)
 
     def sort_key(answer: str) -> int:
         if legend_order is not None:
@@ -100,11 +125,18 @@ def generate_text(row) -> str:
 
     data["text"] = data.apply(generate_text, axis=1)
 
+    palette = px.colors.qualitative.Plotly
+    # Make sure that we have a canonical assignment of colors to individual years
+    # If there is only a single year, we should assign it palette[0]
+    # If there are two years, the largest one should have palette[0], the other one palette[1] etc.
+    palette = palette[:len(questions)][::-1]
+
     fig = px.bar(
         data,
         x="answer",
         y="percent",
         color="Year",
+        color_discrete_sequence=palette,
         barmode="group",
         text="text",
         custom_data=["Year", "count"],
@@ -114,9 +146,13 @@ def generate_text(row) -> str:
     fig.update_traces(
         textposition="outside",
         hovertemplate="Year: %{customdata[0]}<br />Count: %{customdata[1]}<br />Percent: %{text}<extra></extra>",
-        textangle=-90 if bar_label_vertical else 0,
+        textangle=90 if bar_label_vertical else 0,
     )
 
+    legend = {}
+    if legend_params is not None:
+        legend.update(legend_params)
+
     layout_args = layout_args or {}
     fig.update_layout(
         meta="bar-chart",
@@ -129,6 +165,8 @@ def generate_text(row) -> str:
         xaxis_title=None,
         # xaxis_tickwidth=40,
         xaxis_tickangle=xaxis_tickangle,
+        # See usage of `override_line_size` above
+        xaxis_tickfont=dict(size=xaxis_font_size),
         yaxis_title="Percent out of all responses (%)",
         yaxis_range=[0, 119],
         yaxis_ticksuffix="%",
@@ -144,6 +182,7 @@ def generate_text(row) -> str:
             pad=10,
             b=10
         ),
+        legend=legend,
         dragmode="pan",
         **layout_args
     )
@@ -239,10 +278,12 @@ def make_matrix_chart(
         question: Question,
         categories: List[str],
         category_label: str,
-        height=600,
+        option_label: Optional[str] = None,
+        height: Optional[int] = None,
         horizontal: bool = False,
         max_label_width=20,
-        legend_params: Optional[Dict[str, Any]] = None
+        legend_params: Optional[Dict[str, Any]] = None,
+        textposition = "outside"
 ) -> Figure:
     """
     Create a matrix chart with different categories.
@@ -278,6 +319,12 @@ def make_matrix_chart(
     if not horizontal:
         keys = dict(y="Count", x="Category")
 
+    if height is None:
+        if horizontal:
+            height = 600
+        else:
+            height = 1000
+
     fig = px.bar(
         df,
         **keys,
@@ -287,12 +334,12 @@ def make_matrix_chart(
             Category=group_keys
         ),
         title=format_title(question),
-        height=1000 if not horizontal else height,
+        height=height,
         hover_data=[category_label]
     )
     fig.update_traces(
         orientation="h" if horizontal else "v",
-        textposition="outside",
+        textposition=textposition,
         hovertemplate=f"Category: %{{y}}<br />{category_label}: %{{customdata[0]}}<br />Percent: %{{text}}<extra></extra>",
     )
 
@@ -302,7 +349,18 @@ def make_matrix_chart(
 
     layout_args = {}
     if horizontal:
-        layout_args["xaxis_range"] = [0, 110]
+        if textposition != "inside":
+            layout_args["xaxis_range"] = [0, 110]
+        else:
+            layout_args["xaxis_range"] = [0, 100]
+        layout_args["xaxis_title"] = None
+        layout_args["xaxis_ticksuffix"] = "%"
+        layout_args["yaxis_ticksuffix"] = ""
+        layout_args["yaxis_title"] = option_label
+    else:
+        layout_args["yaxis_title"] = None
+        layout_args["xaxis_title"] = option_label
+        layout_args["yaxis_ticksuffix"] = "%"
 
     fig.update_layout(
         meta="matrix-chart",
@@ -312,13 +370,9 @@ def make_matrix_chart(
             font_family="Rockwell",
         ),
         # hovermode="y unified",
-        yaxis_title=None,
         yaxis_tickangle=0,
         # https://stackoverflow.com/a/52397461/1107768
-        yaxis_ticksuffix="   ",
         yaxis_fixedrange=True,
-        xaxis_title="Percent out of the category (%)",
-        xaxis_ticksuffix="%",
         xaxis_fixedrange=True,
         legend=legend,
         dragmode="pan",

diff --git a/report/surveyhero/report.py b/report/surveyhero/report.py
@@ -48,10 +48,8 @@ class ChartReport:
     def __init__(self):
         self.charts: Dict[str, ChartRenderer] = {}
 
-    def add_bar_chart(self, name: str, question: Question, baseline: Optional[Question] = None, **kwargs):
-        questions = [question]
-        if baseline is not None:
-            questions.append(baseline)
+    def add_bar_chart(self, name: str, question: Question, *baselines: Question, **kwargs):
+        questions = [question] + list(baselines)
 
         def render_fn(**args):
             return make_bar_chart(questions=questions, **join(kwargs, args))

diff --git a/report/surveyhero/survey.py b/report/surveyhero/survey.py
@@ -25,7 +25,8 @@ def rename_answers(self, diff: Dict[str, Optional[str]]) -> "SimpleQuestion":
                     continue
                 answer = dataclasses.replace(answer, answer=updated)
             answers.append(answer)
-        assert len(diff) == 0
+        if len(diff) != 0:
+            raise Exception(f"Some diffs were not applied: {diff}\nAnswers: {self.answers}")
         return dataclasses.replace(self, answers=answers)
 
 
@@ -42,7 +43,7 @@ def rename_answers(self, diff: Dict[str, str]) -> "MatrixQuestion":
                 group = diff.pop(group)
             answer_groups[group] = items
         if len(diff) > 0:
-            raise Exception(f"Rename answers diff not empty: {diff}")
+            raise Exception(f"Rename answers diff not empty: {diff}. Answers: {self.answer_groups}")
         return dataclasses.replace(self, answer_groups=answer_groups)
 
 
@@ -77,6 +78,8 @@ def combine_answers(self, diff: Dict[str, List[str]]) -> "Question":
         for (target, old_answers) in diff.items():
             count = 0
             for answer in old_answers:
+                if answer not in answers_orig:
+                    raise Exception(f"Answer {answer} not in {answers_orig}")
                 count += answers_orig[answer].count
                 answers_orig.pop(answer)
             assert count > 0