Merge remote-tracking branch 'origin/feature/fixes-11-jun-ubdt' into JFP_edits

jfparie · jfparie · commit d5a0f3b57968 · 2025-06-11T11:01:55.000+02:00
diff --git a/src/assets/bias-detection-python-code.tsx b/src/assets/bias-detection-python-code.tsx
@@ -332,7 +332,8 @@ def run():
             'iterations': localIterations,
             'minClusterSize': localClusterSize,
             'performanceMetric': bias_score,
-            'dataType': dataTypeText
+            'dataType': dataTypeText,
+            'higherIsBetter': 'biasAnalysis.higherIsBetter' if higherIsBetter else 'biasAnalysis.lowerIsBetter'
         }
     }))
     setResult(json.dumps({
@@ -375,7 +376,12 @@ def run():
         'defaultIndex': 0,
         'labelKey': 'biasAnalysis.clusterinResults.label',
         'valueKey' : 'biasAnalysis.clusterinResults.valueText',
-        'values': biasInClusters
+        'values': biasInClusters,
+        'params': {
+            'numZeroes': int(numZeros),
+            'totalRecords': int(totalRecords),
+            'clusterCount': clusterCount
+        }
     }))
     setResult(json.dumps({
         'type': 'text',
@@ -402,8 +408,6 @@ def run():
     
     
     decoded_X_test["cluster_label"] = y_test
-
-    # ----
     
     if localDataType == 'numeric':
         test_df["cluster_label"] = y_test
@@ -443,10 +447,6 @@ def run():
         print("The most biased cluster has a significantly higher average bias metric than the rest of the dataset.")
     else:
         print("No significant difference in average bias metric between the most biased cluster and the rest of the dataset.")
-        # setResult(json.dumps({
-        #        'type': 'heading',
-        #        'headingKey': 'biasAnalysis.nodifference.heading',                            
-        #    }))    
 
     setResult(json.dumps({
         'type': 'heading',
@@ -482,12 +482,7 @@ def run():
         charts = []
 
         for i, var in enumerate(variables):
-            
-            #setResult(json.dumps({
-            #    'type': 'heading',
-            #    'headingKey': 'biasAnalysis.distribution.heading',            
-            #    'params': {'variable': var}
-            #}))
+                        
             print(f"means: {var}")
             print(overall_means[var])
             print(means[var])
diff --git a/src/components/BiasSettings.tsx b/src/components/BiasSettings.tsx
@@ -76,7 +76,7 @@ export default function BiasSettings({
         resolver: zodResolver(FormSchema),
         defaultValues: {
             dataType: 'numeric',
-            whichPerformanceMetricValueIsBetter: 'higher',
+            whichPerformanceMetricValueIsBetter: 'lower',
         },
     });
     const [iter, setIter] = useState([10]);
diff --git a/src/components/componentMapper.tsx b/src/components/componentMapper.tsx
@@ -267,6 +267,7 @@ export default function ComponentMapper({
                                         defaultIndex: resultItem.defaultIndex,
                                         labelKey: resultItem.labelKey,
                                         valueKey: resultItem.valueKey,
+                                        params: resultItem.params || {},
                                     }}
                                 />
                             </ErrorBoundary>
diff --git a/src/components/composed-components/ClusterCategoriesDistributionAccordeonContent.tsx b/src/components/composed-components/ClusterCategoriesDistributionAccordeonContent.tsx
@@ -80,7 +80,7 @@ const ClusterCategoriesDistributionAccordeonContent = ({
                                     chart.selectFilterGroup ===
                                         defaultCategory) ||
                                 !chart.selectFilterGroup ? (
-                                    <div className="hideonprint">
+                                    <div className="hideonprint w-full">
                                         <h5
                                             key={index}
                                             className="text-gray-800 font-semibold"
@@ -107,7 +107,7 @@ const ClusterCategoriesDistributionAccordeonContent = ({
                                         />
                                     </div>
                                 ) : null}
-                                <div className="hidden showonprint">
+                                <div className="hidden showonprint overflow-x-hidden">
                                     <h5
                                         key={index}
                                         className="text-gray-800 font-semibold"
diff --git a/src/components/composed-components/ClusterNumericalVariableDistributionAccordeonContent.tsx b/src/components/composed-components/ClusterNumericalVariableDistributionAccordeonContent.tsx
@@ -71,7 +71,7 @@ const ClusterNumericalVariableDistributionAccordeonContent = ({
                                     chart.selectFilterGroup ===
                                         defaultCategory) ||
                                 !chart.selectFilterGroup ? (
-                                    <div className="hideonprint">
+                                    <div className="hideonprint w-full">
                                         <h5
                                             key={chartIndex}
                                             className="text-gray-800 font-semibold"
@@ -91,7 +91,7 @@ const ClusterNumericalVariableDistributionAccordeonContent = ({
                                         />
                                     </div>
                                 ) : null}
-                                <div className="hidden showonprint">
+                                <div className="hidden showonprint overflow-x-hidden">
                                     <h5
                                         key={`SingleBarChart-print-${chartIndex}`}
                                         className="text-gray-800 font-semibold"
diff --git a/src/components/composed-components/TextValueSelect.tsx b/src/components/composed-components/TextValueSelect.tsx
@@ -11,6 +11,7 @@ const TextValueSelect = ({
         defaultIndex: number;
         labelKey: string;
         valueKey: string;
+        params: Record<string, string>;
     };
 }) => {
     const [selectedIndex, setSelectedIndex] = useState<number | null>();
@@ -39,6 +40,7 @@ const TextValueSelect = ({
                     <div>
                         <label className="text-sm font-semibold">
                             {t(data.valueKey, {
+                                ...data.params,
                                 index: selectedIndex ?? data.defaultIndex,
                                 value,
                             })}
diff --git a/src/locales/en.ts b/src/locales/en.ts
@@ -181,7 +181,8 @@ export const en = {
         gaussianCopulaModelTitle: '3. Method: Gaussian Copula model',
         cartModelDescription:
             'The CART (Classification and Regression Trees) method generates synthetic data by learning patterns from real data through a decision tree that splits data into homogeneous groups based on feature values. It predicts averages for numerical data and assigns the most common category for categorical data, using these predictions to create new synthetic points.\n \n {{samples}} synthetic data points are generated.',
-        evaluationOfGeneratedDataTitle: '4. Evaluation of generated synthetic data',
+        evaluationOfGeneratedDataTitle:
+            '4. Evaluation of generated synthetic data',
         distributionsTitle: '4.1 Distributions',
         diagnosticsReportTitle: '4.2. Diagnostic report',
         diagnosticsTitle: 'Diagnostic Results',
@@ -303,6 +304,8 @@ A two-sided t-test is performed to accept or reject <i class="font-serif">H</i><
 In this example, we analyze which group is most adversely affected by the risk prediction algorithm. We do this by applying the clustering algorithm on the dataset previewed below. The column "is_recid" indicates whether a defendant reoffended or not (1: yes, 0: no). The "score_text" column indicates whether a defendant was predicted to reoffend (1: yes, 0: no). The column "false_positive" (FP) represents cases where a defendant was predicted to reoffended by the algorithm, but didn't do so (1: FP, 0: no FP). A preview of the data can be found below. The column "false_positive" is used as the bias variable.
 `,
         },
+        higherIsBetter: 'Higher value of bias variable is better',
+        lowerIsBetter: 'Lower value of bias variable is better',
         parameters: {
             heading: '2. Hyperparameters selected for clustering',
             iterations: 'Number of iterations: {{value}}',
@@ -313,6 +316,7 @@ In this example, we analyze which group is most adversely affected by the risk p
 - Minimal cluster size: {{minClusterSize}}
 - Bias variable: {{performanceMetric}}
 - Data type: {{dataType}}
+- Bias variable interpretation: $t({{higherIsBetter}}) is better
 `,
         },
         distributionOfFeaturesAcrossClustersAccordeonTitle:
@@ -366,7 +370,8 @@ In this example, we analyze which group is most adversely affected by the risk p
 - Number of clusters detected: {{clusterCount}}
             `,
             label: 'Choose cluster to show number of datapoints for',
-            valueText: 'Number of datapoints in cluster {{index}}: {{value}}',
+            valueText:
+                'Number of datapoints in cluster {{index}}: {{value}} / {{totalRecords}}',
         },
         higherAverage: `The most deviating cluster has statistically significant different bias variable than the rest of the dataset.`,
         noSignificance: `No statistically significant difference in bias variable between the most biased cluster and the rest of the dataset.`,
diff --git a/src/locales/nl.ts b/src/locales/nl.ts
@@ -304,6 +304,8 @@ Er wordt een tweezijdige t-toets uitgevoerd om <i class="font-serif">H</i><sub>0
 {tooltip:biasAnalysis.p_valueTooltip}p-waarde{/tooltip} : {{p_val}}
         `,
         p_valueTooltip: `p-waarde tooltip`,
+        higherIsBetter: 'Hogere waarde van bias variabele is beter',
+        lowerIsBetter: 'Lagere waarde van bias variabele is beter',
         parameters: {
             heading: '2. Geselecteerde hyperparameters',
             iterations: 'Aantal iteraties: {{value}}',
@@ -316,6 +318,7 @@ Er wordt een tweezijdige t-toets uitgevoerd om <i class="font-serif">H</i><sub>0
 - Minimale clustergrootte: {{minClusterSize}}
 - Bias variabele: {{performanceMetric}}
 - Gegevenstype: {{dataType}}
+- Interpretatie van bias variabele: $t({{higherIsBetter}}) is better
 `,
         },
         distribution: {
@@ -365,7 +368,8 @@ Er wordt een tweezijdige t-toets uitgevoerd om <i class="font-serif">H</i><sub>0
 - Aantal gevonden clusters: {{clusterCount}}
             `,
             label: 'Kies cluster om het aantal datapunten voor weer te geven',
-            valueText: 'Aantal datapunten in cluster {{index}}: {{value}}',
+            valueText:
+                'Aantal datapunten in cluster {{index}}: {{value}} / {{totalRecords}}',
         },
         higherAverage: `Het meest afwijkende cluster heeft statistisch significant andere bias variabele dan de rest van de dataset.`,
         noSignificance: `Het meest afwijkende cluster heeft statistisch significant geen andere bias variabele dan de rest van de dataset.`,