feat: allow month and day units

arcstur · arcstur · commit 1ac995a58899 · 2025-08-25T18:10:48.000-03:00
fixes #20
diff --git a/graphs/bar_chart_race.py b/graphs/bar_chart_race.py
@@ -1,6 +1,13 @@
+import re
+import logging
+from datetime import datetime
+from datetime import timedelta
+from concurrent.futures import ThreadPoolExecutor
+
 import pandas as pd
 from pandas.errors import ParserError
-import re
+
+logger = logging.getLogger("django")
 
 
 class BaseDf:
@@ -13,6 +20,7 @@ def prepare(self) -> "BaseDf":
         self.prepare_value_column()
         self.prepare_identifier_columns()
         self.drop_other_columns()
+        self.drop_duplicates()
         return self
 
     def verify_column_count(self):
@@ -22,10 +30,9 @@ def verify_column_count(self):
     def prepare_date_column(self):
         original_name = self.df.columns[-1]
         try:
-            # TODO: allow other units of time
             self.df[original_name] = pd.to_datetime(
                 self.df[original_name], format="ISO8601"
-            ).dt.year
+            )
         except (ValueError, ParserError):
             raise BaseDfException("last column must be a date column")
         self.df.rename(columns={original_name: "date"}, inplace=True)
@@ -58,6 +65,9 @@ def drop_other_columns(self):
         drop = [col for col in self.df.columns if col not in keep]
         self.df.drop(columns=drop, inplace=True)
 
+    def drop_duplicates(self):
+        self.df.drop_duplicates(["name", "date"], inplace=True)
+
 
 class BaseDfException(Exception):
     def __init__(self, message):
@@ -66,20 +76,40 @@ def __init__(self, message):
 
 
 class DfProcessor:
-    def __init__(self, bdf: BaseDf):
-        self.df = bdf.df
+    def __init__(self, bdf: BaseDf, time_unit: str = "year"):
+        self.df = bdf.df.copy()
+        self.time_unit = time_unit
+        if self.time_unit == "year":
+            self.df["date"] = self.df["date"].dt.strftime("%Y-01-01")
+        elif self.time_unit == "month":
+            self.df["date"] = self.df["date"].dt.strftime("%Y-%m-01")
+        else:
+            self.df["date"] = self.df["date"].dt.strftime("%Y-%m-%d")
 
     def elements(self):
         identifiers = [col for col in ["url", "category"] if col in self.df.columns]
         if not identifiers:
-            return {name: {} for name in self.df["name"].unique()}
+            return [{"name": name for name in self.df["name"].unique()}]
         agg = {col: "first" for col in identifiers}
-        return self.df[["name", *identifiers]].groupby("name").agg(agg).reset_index().to_dict("records")
+        return (
+            self.df[["name", *identifiers]]
+            .groupby("name")
+            .agg(agg)
+            .reset_index()
+            .to_dict("records")
+        )
+
+    def year_count(self):
+        min_year = int(self.df["date"].min()[:4])
+        max_year = int(self.df["date"].max()[:4])
+        return max_year - min_year + 1
 
     def interpolated_df(self):
         df = self.df
+        names = df["name"].unique()
+        time_units = self.all_time_units()
         mux = pd.MultiIndex.from_product(
-            [df["name"].unique(), range(df["date"].min(), df["date"].max() + 1)],
+            [names, time_units],
             names=["name", "date"],
         )
         df = (
@@ -96,18 +126,58 @@ def interpolated_df(self):
         df["rank"] = df.groupby("date")["value"].rank(method="dense", ascending=False)
         return df
 
+    def all_time_units(self):
+        min_year = int(self.df["date"].min()[:4])
+        max_year = int(self.df["date"].max()[:4])
+        year_range = range(min_year, max_year + 1)
+        if self.time_unit == "year":
+            year_range = range(min_year, max_year + 1)
+            return [f"{y}-01-01" for y in year_range]
+        elif self.time_unit == "month":
+            start_month = int(self.df["date"].min()[5:7])
+            start = [f"{min_year}-{m:02d}-01" for m in range(start_month, 13)]
+            year_range = list(year_range)
+            year_range.pop(0)
+            if len(year_range) == 0:
+                return start
+            year_range.pop(-1)
+            between = [
+                f"{y}-{m}"
+                for m in [
+                    "01-01",
+                    "02-01",
+                    "03-01",
+                    "04-01",
+                    "05-01",
+                    "06-01",
+                    "07-01",
+                    "08-01",
+                    "09-01",
+                    "10-01",
+                    "11-01",
+                    "12-01",
+                ]
+                for y in year_range
+            ]
+            end_month = int(self.df["date"].max()[5:7])
+            end = [f"{max_year}-{m:02d}-01" for m in range(1, end_month + 1)]
+            return [*start, *between, *end]
+        else:
+            start_date = datetime.strptime(self.df["date"].min(), "%Y-%m-%d").date()
+            end_date = datetime.strptime(self.df["date"].max(), "%Y-%m-%d").date()
+            days = (end_date - start_date).days + 1
+            days_index = []
+            for n in range(days):
+                date = start_date + timedelta(days=n)
+                days_index.append(date.strftime("%Y-%m-%d"))
+            return days_index
+
     def values_by_date(self):
         ip = self.interpolated_df()
         vl = []
-        for date in list(sorted(ip["date"].unique())):
-            date = int(date)
-            values = (
-                ip.loc[ip["date"] == date]
-                .drop(columns="date")
-                .sort_values("rank")
-                .to_dict(orient="records")
-            )
-            vl.append({"date": f"{date}-01-01", "values": values})
+        for date, grouped in ip.set_index("date").groupby(level=0):
+            values = grouped.sort_values("rank").to_dict(orient="records")
+            vl.append({"date": date, "values": values})
         return vl
 
 
@@ -119,5 +189,17 @@ def process_bar_chart_race(df):
         return {"failed": e.message}
     proc = DfProcessor(bdf)
     elements = proc.elements()
-    values_by_date = proc.values_by_date()
-    return {"elements": elements, "values_by_date": values_by_date}
+    data = {"elements": elements}
+    run_daily = proc.year_count() <= 25
+    proc_monthly = DfProcessor(bdf, time_unit="month")
+    proc_daily = DfProcessor(bdf, time_unit="day")
+    with ThreadPoolExecutor() as executor:
+        t = executor.submit(proc.values_by_date)
+        t_monthly = executor.submit(proc_monthly.values_by_date)
+        if run_daily:
+            t_daily = executor.submit(proc_daily.values_by_date)
+        data["values_by_date"] = t.result()
+        data["values_by_date_monthly"] = t_monthly.result()
+        if run_daily:
+            data["values_by_date_daily"] = t_daily.result()
+    return data
diff --git a/graphs/tests.py b/graphs/tests.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from datetime import datetime
 from numpy import nan
 from django.test import TestCase
 from django.utils.timezone import now
@@ -158,3 +159,27 @@ def test_df_processor(self):
                 2428708.0,
             ],
         )
+
+    def test_df_processor_month(self):
+        df = TestHelper.mock_df_bcr()
+        bdf = BaseDf(df).prepare()
+        proc = DfProcessor(bdf, time_unit="month")
+        ip = proc.interpolated_df()
+        self.assertEqual(ip["value"].count(), 57)
+        months_between = 6 + 12
+        self.assertEqual(len(ip["date"].unique()), 19)
+        self.assertEqual(len(ip["date"].unique()), months_between + 1)
+        vls = proc.values_by_date()
+        self.assertEqual(len(vls), 19)
+
+    def test_df_processor_day(self):
+        df = TestHelper.mock_df_bcr()
+        bdf = BaseDf(df).prepare()
+        proc = DfProcessor(bdf, time_unit="day")
+        ip = proc.interpolated_df()
+        self.assertEqual(ip["value"].count(), 1650)
+        days_between = (datetime(2022, 1, 1) - datetime(2020, 7, 1)).days
+        self.assertEqual(len(ip["date"].unique()), 550)
+        self.assertEqual(len(ip["date"].unique()), days_between + 1)
+        vls = proc.values_by_date()
+        self.assertEqual(len(vls), 550)
diff --git a/src/Components/Infographics/BarChartRace/barChartRace.jsx b/src/Components/Infographics/BarChartRace/barChartRace.jsx
@@ -16,7 +16,7 @@ import api from '../../../api/axios';
  * @param {string} props.colorPalette - List of colors for the chart
  * @param {Array} props.barRaceData - Data for the bar chart race
  */
-const BarChartRace = ({ title, speed, colorPalette, barRaceData, isDownloadingVideo, setIsDownloadingVideo }) => {
+const BarChartRace = ({ title, speed, colorPalette, timeUnit, barRaceData, isDownloadingVideo, setIsDownloadingVideo }) => {
   const DEFAULT_TRANSITION_DELAY = 250;
   const DOWNLOAD_WAIT_MULTIPLIER = 4;
   const svgRef = useRef(null); // Reference to the SVG element
@@ -35,7 +35,13 @@ const BarChartRace = ({ title, speed, colorPalette, barRaceData, isDownloadingVi
   useEffect(() => {
     const fetchDataAsync = () => {
       if (barRaceData) {
-        var keyframes = barRaceData.values_by_date.map(d => [new Date(d.date), d.values]);
+        var data_to_use = barRaceData.values_by_date;
+        if (timeUnit === "day") {
+          data_to_use = barRaceData.values_by_date_daily;
+        } else if (timeUnit === "month") {
+          data_to_use = barRaceData.values_by_date_monthly;
+        };
+        var keyframes = data_to_use.map(d => [new Date(d.date), d.values]);
 
         const dataset = {
           "elements": barRaceData.elements,
@@ -46,7 +52,7 @@ const BarChartRace = ({ title, speed, colorPalette, barRaceData, isDownloadingVi
       }
     };
     fetchDataAsync();
-  }, [barRaceData]);
+  }, [barRaceData, timeUnit]);
 
   useEffect(() => {
     if (dataset) {
@@ -64,7 +70,7 @@ const BarChartRace = ({ title, speed, colorPalette, barRaceData, isDownloadingVi
       };
 
       const width = container.clientWidth;
-      const keyframes = initializeChart(svgRef, dataset, width, title, colorPaletteArray);
+      const keyframes = initializeChart(svgRef, dataset, width, title, colorPaletteArray, timeUnit);
       keyframesRef.current = keyframes;
 
       // Initialize chart with the first keyframe.
@@ -82,7 +88,7 @@ const BarChartRace = ({ title, speed, colorPalette, barRaceData, isDownloadingVi
       }
     };
 
-  }, [dataset, title, speed, colorPalette]);
+  }, [dataset, timeUnit, title, colorPalette]);
 
   const animationDelay = () => {
     return 1000 / speed;
diff --git a/src/Components/Infographics/BarChartRace/barChartRaceUtils.js b/src/Components/Infographics/BarChartRace/barChartRaceUtils.js
@@ -12,9 +12,10 @@ export let color;
 
 // Variables
 let updateBars, updateAxis, updateLabels, updateTicker, x;
+let dateFormatter;
 
 // Function to initialize the chart
-export const initializeChart = (svgRef, dataset, width, title, colorPaletteArray) => {
+export const initializeChart = (svgRef, dataset, width, title, colorPaletteArray, timeUnit) => {
   const chartMargin = 30; // Adjust this value to increase the space
 
    // Create SVG element
@@ -61,6 +62,18 @@ export const initializeChart = (svgRef, dataset, width, title, colorPaletteArray
     color = (x) => scale(x.name);
   }
 
+  // define date format
+  let dateFormat = { year: "numeric" };
+
+  if (timeUnit === "day") {
+    dateFormat = { year: "numeric", month: "numeric", day: "numeric" };
+  } else if (timeUnit === "month") {
+    dateFormat = { year: "numeric", month: "long" };
+  };
+
+  // undefined uses the browser's default locale
+  dateFormatter = Intl.DateTimeFormat(undefined, dateFormat);
+
   // Initialize update functions
   updateBars = bars(svgRef.current, x, y, prev, next);
   updateAxis = axis(svgRef.current, x, y, width);
@@ -86,8 +99,6 @@ export const updateChart = (keyframe, transition) => {
 
 // Ticker function
 function ticker(svgRef, width, keyframes) {
-  const formatDate = d3.utcFormat("%Y");
-
   const now = svgRef
     .append("text")
     .style("font", `bold ${barSize}px var(--sans-serif)`)
@@ -96,10 +107,10 @@ function ticker(svgRef, width, keyframes) {
     .attr("x", width - 6)
     .attr("y", margin.top + barSize * (n - 0.45))
     .attr("dy", "0.32em")
-    .text(formatDate(keyframes[0][0]));
+    .text(dateFormatter.format(keyframes[0][0]));
 
   return ([date], transition) => {
-    transition.end().then(() => now.text(formatDate(date)));
+    transition.end().then(() => now.text(dateFormatter.format(date)));
   };  
 }
 
diff --git a/src/Components/Modal/modal.jsx b/src/Components/Modal/modal.jsx
@@ -4,11 +4,12 @@ import { Button, Modal, TextInput, Label, Select, HelperText } from "flowbite-re
 import { useEffect, useState } from "react";
 
 
-export function InfoModal({ currState, onCloseModal, handleChartDisplay, handleChartTitle, handleChartSpeed, handleChartColorPalette }) {
+export function InfoModal({ barRaceData, currState, onCloseModal, handleChartDisplay, handleChartTitle, handleChartSpeed, handleChartColorPalette, handleChartTimeUnit }) {
   const [openModal, setOpenModal] = useState(false);
   const [chartTitle, setChartTitle] = useState("");
   const [chartSpeed, setChartSpeed] = useState(5);
   const [chartColorPalette, setChartColorPalette] = useState("");
+  const [chartTimeUnit, setChartTimeUnit] = useState("year");
 
   useEffect(() => {
     setOpenModal(currState);
@@ -31,6 +32,12 @@ export function InfoModal({ currState, onCloseModal, handleChartDisplay, handleC
     handleChartColorPalette(value);
   }
 
+  const handleTimeUnitChange = (event) => {
+    var value = event.target.value;
+    setChartTimeUnit(value);
+    handleChartTimeUnit(value);
+  }
+
   const handleChartType = () => {
     handleChartDisplay("Bar chart race");
   }
@@ -53,16 +60,18 @@ export function InfoModal({ currState, onCloseModal, handleChartDisplay, handleC
               <div className="mb-2 block">
                 <Label htmlFor="chartUnit">Speed unit</Label>
               </div>
-              <Select id="chartUnit" required>
-                <option>Years</option>
+              <Select id="chartUnit" value={chartTimeUnit} onChange={handleTimeUnitChange} required>
+                <option value="year">Years</option>
+                {barRaceData?.hasOwnProperty("values_by_date_monthly") && <option value="month">Months</option>}
+                {barRaceData?.hasOwnProperty("values_by_date_daily") && <option value="day">Days</option>}
               </Select>
             </div>
 
             <div className="max-w-md">
               <div className="mb-2 block">
                 <Label htmlFor="chartSpeed">Speed in units per second</Label>
               </div>
-              <TextInput id="chartSpeed" type="number" min="1" max="10" placeholder="Speed in units per second" value={chartSpeed} onChange={handleSpeedChange} required />
+              <TextInput id="chartSpeed" type="number" min="1" max="50" placeholder="Speed in units per second" value={chartSpeed} onChange={handleSpeedChange} required />
             </div>
 
             <div className="max-w-md">
diff --git a/src/Pages/Infographics.jsx b/src/Pages/Infographics.jsx