add 1D array support for independent data generator

dmey · dmey · commit bae6ae7d69d9 · 2020-09-21T22:09:32.000+02:00
diff --git a/synthia/generators/copula.py b/synthia/generators/copula.py
@@ -72,7 +72,7 @@ def _log(self, msg: str) -> None:
             print(msg, flush=True)
 
     def fit(self, data: Union[np.ndarray, xr.DataArray, xr.Dataset],
-            copula: Copula, qrng=False,
+            copula: Copula,
             parameterize_by: Optional[Union[Parameterizer, Dict[int, Parameterizer], Dict[str, Parameterizer]]]=None):
         """tbd
 
diff --git a/synthia/generators/independent.py b/synthia/generators/independent.py
@@ -16,8 +16,8 @@ def fit(self, data: Union[np.ndarray, xr.DataArray, xr.Dataset],
 
         Args:
             data (ndarray or DataArray or Dataset): The input data, either a
-                2D array of shape (sample, feature) or a dataset where all
-                variables have the shape (sample[, ...]).
+                1D array, a 2D array of shape (sample, feature)
+                or a dataset where all variables have the shape (sample[, ...]).
 
             parameterize_by (Parameterizer or mapping, optional): The
                 following forms are valid:
@@ -30,7 +30,7 @@ def fit(self, data: Union[np.ndarray, xr.DataArray, xr.Dataset],
             None
         """
 
-        data, self.data_info = to_feature_array(data)
+        data, self.data_info = to_feature_array(data, allow_1d=True)
         
         self.dtype = data.dtype
         self.n_features = data.shape[1]
diff --git a/synthia/util.py b/synthia/util.py
@@ -95,7 +95,7 @@ def to_unstacked_dataset(arr: np.ndarray, stack_info: StackInfo) -> xr.Dataset:
     ds = xr.Dataset(unstacked)
     return ds
 
-def to_feature_array(data: Union[np.ndarray, xr.DataArray, xr.Dataset]) -> Tuple[xr.DataArray, dict]:
+def to_feature_array(data: Union[np.ndarray, xr.DataArray, xr.Dataset], allow_1d=False) -> Tuple[xr.DataArray, dict]:
     # TODO what about dtype?
     data_info = {}
     if isinstance(data, xr.Dataset):
@@ -111,14 +111,21 @@ def to_feature_array(data: Union[np.ndarray, xr.DataArray, xr.Dataset]) -> Tuple
                 attrs=data.attrs
             )
         data = xr.DataArray(data)
-    assert data.ndim == 2, f'Input array must be 2D, given: {data.ndim}'
+        if allow_1d and data.ndim == 1:
+            data_info['is_1d'] = True
+            data = data.expand_dims(dim='__feature', axis=1)
+    assert data.ndim == 2, f'Input array must be {"1D/" if allow_1d else ""}2D, given: {data.ndim}D'
     data_info['n_features'] = data.shape[1]
     return data, data_info
 
 def from_feature_array(data: np.ndarray, data_info: dict) -> Union[np.ndarray, xr.DataArray, xr.Dataset]:
     stack_info = data_info.get('stack_info')
     if stack_info:
         return to_unstacked_dataset(data, stack_info)
+    is_1d = data_info.get('is_1d')
+    if is_1d:
+        assert data.shape[1] == 1
+        data = data[:,0]
     da_info = data_info.get('da_info')
     if da_info:
         return xr.DataArray(data, **da_info)
diff --git a/tests/test_generators.py b/tests/test_generators.py
@@ -35,6 +35,19 @@ def test_independent_dataset_generation():
     assert synthetic_data['a'].shape == (n_synthetic_samples, n_features[0])
     assert synthetic_data['b'].shape == (n_synthetic_samples, n_features[1])
 
+def test_independent_1d_feature_generation():
+    n_samples = 200
+    input_data = np.random.normal(size=n_samples)
+
+    generator = syn.IndependentDataGenerator()
+
+    generator.fit(input_data)
+
+    n_synthetic_samples = 50
+    synthetic_data = generator.generate(n_samples=n_synthetic_samples)
+
+    assert synthetic_data.shape == (n_synthetic_samples,)
+
 def test_independent_feature_generation_with_distribution():
     n_samples = 20
     n_features = 2