Reordering context columns in PARSynthesizer (#2726)

sarahmish · web-flow · commit a7eb1f87a91d · 2025-10-20T11:41:28.000-04:00
diff --git a/sdv/sequential/par.py b/sdv/sequential/par.py
@@ -378,6 +378,10 @@ def _get_id_context_columns(self):
             if self._get_table_metadata().columns[col]['sdtype'] not in MODELABLE_SDTYPES
         ]
 
+    def _reorder_context_columns(self, context_columns, timeseries_data):
+        order = {column: i for i, column in enumerate(timeseries_data.columns)}
+        return sorted(context_columns, key=lambda x: order.get(x, float('inf')))
+
     def _preprocess(self, data):
         """Transform the raw data to numerical space.
 
@@ -539,6 +543,8 @@ def _fit(self, processed_data):
                 pandas.DataFrame containing both the sequences,
                 the entity columns and the context columns.
         """
+        self.context_columns = self._reorder_context_columns(self.context_columns, processed_data)
+
         if self._sequence_key:
             self._fit_context_model(processed_data)
 
diff --git a/tests/integration/sequential/test_par.py b/tests/integration/sequential/test_par.py
@@ -1002,3 +1002,49 @@ def test_add_constraints_with_context_columns():
     synthesizer.fit(data)
     samples = synthesizer.sample(5)
     synthesizer.validate(samples)
+
+
+def test_par_context_columns_invariance():
+    """Test par is invariate to the order of context columns."""
+    # Setup
+    data = pd.DataFrame(
+        data={
+            'sequence': ['id-0'] * 3 + ['id-1'] * 4 + ['id-2'] * 3,
+            'context1': ['M'] * 3 + ['F'] * 4 + ['M'] * 3,
+            'context2': [12.0] * 3 + [np.nan] * 4 + [34.0] * 3,
+            'seq1': [12, 34, 12, 78, 12, 56, 34, 78, 12, 67],
+            'seq2': ['Yes', 'Yes', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No'],
+        }
+    )
+
+    metadata = Metadata.load_from_dict({
+        'tables': {
+            'table': {
+                'columns': {
+                    'sequence': {'sdtype': 'id'},
+                    'context1': {'sdtype': 'categorical'},
+                    'context2': {'sdtype': 'numerical'},
+                    'seq1': {'sdtype': 'numerical'},
+                    'seq2': {'sdtype': 'categorical'},
+                },
+                'sequence_key': 'sequence',
+            }
+        }
+    })
+
+    synthesizer1 = PARSynthesizer(metadata, epochs=1, context_columns=['context1', 'context2'])
+
+    synthesizer2 = PARSynthesizer(metadata, epochs=1, context_columns=['context2', 'context1'])
+
+    # Run
+    synthesizer1.fit(data)
+    samples1 = synthesizer1.sample(num_sequences=3, sequence_length=2)
+
+    synthesizer2.fit(data)
+    samples2 = synthesizer2.sample(num_sequences=3, sequence_length=2)
+
+    # Assert
+    assert samples1.shape == samples2.shape
+    assert samples1.columns.equals(samples2.columns)
+    synthesizer1.validate(samples2)
+    synthesizer2.validate(samples1)
diff --git a/tests/unit/sequential/test_par.py b/tests/unit/sequential/test_par.py
@@ -451,6 +451,38 @@ def test_update_transformers_context_column(self):
         with pytest.raises(SynthesizerInputError, match=err_msg):
             instance.update_transformers({'time': FloatFormatter()})
 
+    def test__fit_reorder_context_columns_incorrect_order(self):
+        """Test that the context columns are reordered according to data."""
+        # Setup
+        metadata = self.get_metadata()
+        data = self.get_data()
+
+        data.insert(1, 'height', [160, 170, 180])
+        metadata.add_column('height', 'table', sdtype='numerical')
+        instance = PARSynthesizer(metadata, context_columns=['gender', 'height'])
+
+        # Run
+        instance.fit(data)
+
+        # Assert
+        assert instance.context_columns == ['height', 'gender']
+
+    def test__fit_reorder_context_columns_correct_order(self):
+        """Test that the context columns is still the same order."""
+        # Setup
+        metadata = self.get_metadata()
+        data = self.get_data()
+
+        data.insert(2, 'height', [160, 170, 180])
+        metadata.add_column('height', 'table', sdtype='numerical')
+        instance = PARSynthesizer(metadata, context_columns=['gender', 'height'])
+
+        # Run
+        instance.fit(data)
+
+        # Assert
+        assert instance.context_columns == ['gender', 'height']
+
     @patch('sdv.sequential.par.GaussianCopulaSynthesizer')
     def test__fit_context_model_with_context_columns(self, gaussian_copula_mock):
         """Test that the method fits a synthesizer to the context columns.