hydroserver2
diff --git a/‎src/hydroserverpy/etl/README.md‎
Lines changed: 34 additions & 29 deletions b/‎src/hydroserverpy/etl/README.md‎
Lines changed: 34 additions & 29 deletions
diff --git a/‎src/hydroserverpy/etl/hydroserver.py‎
Lines changed: 28 additions & 63 deletions b/‎src/hydroserverpy/etl/hydroserver.py‎
Lines changed: 28 additions & 63 deletions
@@ -55,7 +55,7 @@ extractor = LocalFileExtractor(
 
 ### Transformers
 
-Transformers parse the extracted payload into a DataFrame where the first column is `timestamp` and the remaining columns are named by their target datastream ID.
+Transformers parse the extracted payload into a DataFrame with columns `timestamp`, `value`, and `target_id`.
 
 **CSV:**
 ```python
@@ -71,6 +71,8 @@ transformer = CSVTransformer(
 
 Use `identifier_type="index"` to reference columns by 1-based position instead of name:
 ```python
+from hydroserverpy.etl.transformers import CSVTransformer
+
 transformer = CSVTransformer(
     timestamp_key="1",
     identifier_type="index",
@@ -102,6 +104,8 @@ Every transformer requires a `timestamp_key` identifying the source column that
 | `"custom"` | Parses timestamps using a `strftime`-compatible format string provided via `timestamp_format`. Required when the source timestamps are not in a standard ISO 8601 format. |
 
 ```python
+from hydroserverpy.etl.transformers import CSVTransformer
+
 # Custom format example — timestamps like "01/15/2024 08:30:00"
 transformer = CSVTransformer(
     timestamp_key="datetime",
@@ -121,15 +125,17 @@ transformer = CSVTransformer(
 | `"iana"` | Treats timestamps as naive and applies a named IANA timezone. Strips any embedded offset if present. | `timezone` as a valid IANA name |
 
 ```python
+from hydroserverpy.etl.transformers import CSVTransformer
+
 # Fixed UTC offset — timestamps are in US Mountain Standard Time
-transformer = CSVTransformer(
+utc_transformer = CSVTransformer(
     timestamp_key="datetime",
     timezone_type="offset",
     timezone="-0700",
 )
 
 # IANA timezone — handles daylight saving time automatically
-transformer = CSVTransformer(
+iana_transformer = CSVTransformer(
     timestamp_key="datetime",
     timezone_type="iana",
     timezone="America/Denver",
@@ -148,7 +154,7 @@ All timestamps are normalized to UTC before loading regardless of the source tim
 
 ### Data Mappings
 
-Data mappings connect source columns (by name or index) to HydroServer datastream IDs. Each mapping can fan out to multiple target datastreams, and optional data operations can be applied along the way.
+Data mappings connect source columns (by name or index) to HydroServer datastream IDs. Each mapping can fan out to multiple target datastreams, and optional data operations can be applied per target path.
 
 ```python
 from hydroserverpy.etl.transformers import ETLDataMapping, ETLTargetPath
@@ -171,18 +177,18 @@ data_mappings = [
 
 #### Data Operations
 
-Target paths can include a sequence of data operations applied to the source values before loading. Supported operations are arithmetic expressions and rating curves.
+Target paths can include a sequence of data operations applied to the source values before loading. Operations are applied in order — the output of each becomes the input of the next. Supported operations are arithmetic expressions, rating curves, and temporal aggregation.
 
 **Arithmetic expression** — applies a Python arithmetic expression where `x` represents the source value. Only `+`, `-`, `*`, `/`, numeric literals, and the variable `x` are permitted.
 
 ```python
+from hydroserverpy.etl.transformers import ETLTargetPath
 from hydroserverpy.etl.operations import ArithmeticExpressionOperation
 
 ETLTargetPath(
     target_identifier="<datastream-uuid>",
     data_operations=[
         ArithmeticExpressionOperation(
-            type="arithmetic_expression",
             expression="(x - 32) / 1.8",   # Fahrenheit to Celsius
             target_identifier="<datastream-uuid>",
         )
@@ -193,44 +199,40 @@ ETLTargetPath(
 **Rating curve** — maps input values to output values using linear interpolation against a two-column CSV lookup table (input, output), retrieved from a URL.
 
 ```python
+from hydroserverpy.etl.transformers import ETLTargetPath
 from hydroserverpy.etl.operations import RatingCurveDataOperation
 
 ETLTargetPath(
     target_identifier="<datastream-uuid>",
     data_operations=[
         RatingCurveDataOperation(
-            type="rating_curve",
             rating_curve_url="https://example.com/curves/stage-discharge.csv",
             target_identifier="<datastream-uuid>",
         )
     ],
 )
 ```
 
-Operations are applied in order. The output of each operation becomes the input of the next.
-
-### Temporal Aggregation
-
-Temporal aggregation is an optional step that reduces the per-observation DataFrame produced by the transformer into period-level summaries before loading. When configured, the same aggregation is applied uniformly to every target series in the pipeline.
+**Temporal aggregation** — reduces per-observation values into period-level summaries. When included, it should be the last operation in the sequence, as it changes the shape of the data from one row per observation to one row per aggregation window.
 
 ```python
-from hydroserverpy.etl.models import TemporalAggregation
+from hydroserverpy.etl.transformers import ETLTargetPath
+from hydroserverpy.etl.operations import TemporalAggregationOperation
 
-aggregation = TemporalAggregation(
-    aggregation_statistic="simple_mean",
-    aggregation_interval=1,
-    aggregation_interval_unit="day",
+ETLTargetPath(
+    target_identifier="<datastream-uuid>",
+    data_operations=[
+        TemporalAggregationOperation(
+            aggregation_statistic="simple_mean",
+            aggregation_interval=1,
+            aggregation_interval_unit="day",
+            target_identifier="<datastream-uuid>",
+        )
+    ],
 )
 ```
 
-Pass it to the transformer at construction time:
-
-```python
-transformer = CSVTransformer(
-    timestamp_key="datetime",
-    temporal_aggregation=aggregation,
-)
-```
+Because temporal aggregation is a per-target operation, different targets fed by the same source can use different statistics, intervals, or timezone alignments independently.
 
 #### Aggregation statistic
 
@@ -256,37 +258,40 @@ Window boundaries are aligned to local midnight in the configured timezone. The
 | `"iana"` | Local midnight in a named timezone, handling DST automatically | `timezone` as a valid IANA name |
 
 ```python
+from hydroserverpy.etl.operations import TemporalAggregationOperation
+
 # Daily windows aligned to US Mountain Time (UTC-7, DST-aware)
-aggregation = TemporalAggregation(
+TemporalAggregationOperation(
     aggregation_statistic="simple_mean",
     aggregation_interval=1,
     aggregation_interval_unit="day",
     timezone_type="iana",
     timezone="America/Denver",
+    target_identifier="<datastream-uuid>",
 )
 
 # Daily windows at a fixed offset (no DST adjustment)
-aggregation = TemporalAggregation(
+TemporalAggregationOperation(
     aggregation_statistic="time_weighted_mean",
     aggregation_interval=1,
     aggregation_interval_unit="day",
     timezone_type="offset",
     timezone="-0700",
+    target_identifier="<datastream-uuid>",
 )
 ```
 
 **Window boundary semantics:** Windows run from the local midnight that contains the first observation to the local midnight that contains the last observation. The last observation defines the exclusive upper boundary — observations on that final local day are not aggregated. Ensure your source data extends at least one day past the last period you want included, or that the last observation falls on the day following the final window.
 
 Days with no observations are omitted from the output rather than filled with null values.
 
-
 ### Loader
 
 ```python
 from hydroserverpy import HydroServer
 from hydroserverpy.etl.loaders import HydroServerLoader
 
-hs = HydroServer(host="https://my-hydroserver.com", username="user", password="pass")
+hs = HydroServer(host="https://playground.hydroserver.org", email="user@example.com", password="pass")
 
 loader = HydroServerLoader(
     client=hs,
 
@@ -4,8 +4,9 @@
 from hydroserverpy.api.models import Task, DataConnection
 from hydroserverpy.etl import extractors, transformers, loaders, ETLPipeline
 from hydroserverpy.etl.transformers import ETLDataMapping, ETLTargetPath
-from hydroserverpy.etl.operations import DataOperation, RatingCurveDataOperation, ArithmeticExpressionOperation
-from hydroserverpy.etl.models import Timestamp, TemporalAggregation
+from hydroserverpy.etl.operations import (DataOperation, RatingCurveDataOperation, ArithmeticExpressionOperation,
+                                          TemporalAggregationOperation)
+from hydroserverpy.etl.models import Timestamp
 
 
 def normalize_timestamp_kwargs(**kwargs) -> dict:
@@ -82,62 +83,23 @@ def resolve_data_operations(raw_etl_target_path: dict) -> list[DataOperation]:
                 target_identifier=raw_etl_target_path["targetIdentifier"],
                 rating_curve_url=data_operation["ratingCurveUrl"],
             ))
+        elif data_operation["type"] == "aggregation":
+            timezone_kwargs = normalize_timestamp_kwargs(format="iso", **data_operation)
+            aggregation_mapping = {
+                "simple_mean": "simple_mean",
+                "time_weighted_daily_mean": "time_weighted_mean",
+                "last_value_of_day": "last_value_of_period",
+            }
+            resolved_data_operations.append(TemporalAggregationOperation(
+                target_identifier=raw_etl_target_path["targetIdentifier"],
+                aggregation_statistic=aggregation_mapping[data_operation["aggregationStatistic"]],
+                timezone_type=timezone_kwargs.get("timezone_type"),
+                timezone=timezone_kwargs.get("timezone"),
+            ))
 
     return resolved_data_operations
 
 
-def resolve_temporal_aggregation(
-    data_mappings:  list[dict]
-) -> Optional[TemporalAggregation]:
-    """
-    Extract and return a TemporalAggregation configuration from the data mappings,
-    or None if no aggregation transformation is configured.
-
-    Aggregation is applied uniformly across all series at the transformer level,
-    so only one aggregation configuration is permitted across all mappings. Raises
-    ValueError if conflicting aggregation configurations are found.
-    """
-
-    # TODO: Aggregation settings are currently stored in HydroServer under data transformations.
-    #  In the ETL module, aggregation is configured at the transformer level and applied uniformly
-    #  to all datastreams after per-column data operations. This ensures the DataFrame passed to
-    #  the loader remains aligned in time — a shared timestamp column is only valid if all series are
-    #  aggregated identically. We may want to update how aggregation settings are stored in HydroServer
-    #  to match this. In the meantime, if multiple conflicting aggregation settings somehow get passed
-    #  to the same task, this method will throw an exception rather than try to guess which
-    #  aggregation configuration to use. Alternatively, if we want to allow varying aggregation on a per
-    #  column basis, we'll need to retool how the pipeline passes data to the loader. Each datastream
-    #  will either need a dedicated timestamp column, or multiple dataframes will need to be passed to
-    #  the loader.
-
-    temporal_aggregation = None
-
-    for mapping in data_mappings:
-        for path in mapping["paths"]:
-            for transformation in path.get("dataTransformations", []):
-                if transformation["type"] == "aggregation":
-                    if temporal_aggregation is not None:
-                        raise ValueError(
-                            "Received multiple aggregation configurations from HydroServer for the transformer. "
-                            "Only one aggregation configuration per transformer is currently supported."
-                        )
-
-                    timezone_kwargs = normalize_timestamp_kwargs(**transformation)
-                    aggregation_mapping = {
-                        "simple_mean": "simple_mean",
-                        "time_weighted_daily_mean": "time_weighted_mean",
-                        "last_value_of_day": "last_value_of_period",
-                    }
-
-                    temporal_aggregation = TemporalAggregation(
-                        aggregation_statistic=aggregation_mapping[transformation["aggregationStatistic"]],
-                        timezone_type=timezone_kwargs.get("timezone_type"),
-                        timezone=timezone_kwargs.get("timezone"),
-                    )
-
-    return temporal_aggregation
-
-
 def build_hydroserver_pipeline(
     task: Task,
     data_connection: DataConnection,
@@ -178,9 +140,9 @@ def build_hydroserver_pipeline(
     if loader_cls is None:
         loader_cls = getattr(loaders, f"{data_connection.loader_type}Loader")
 
-    extractor_settings = dict(data_connection.extractor_settings)
-    transformer_settings = dict(data_connection.transformer_settings)
-    loader_settings = dict(data_connection.loader_settings)
+    extractor_settings = dict(data_connection.extractor_settings) if data_connection else {}
+    transformer_settings = dict(data_connection.transformer_settings) if data_connection else {}
+    loader_settings = dict(data_connection.loader_settings) if data_connection else {}
 
     extractor_placeholders = extractor_settings.pop("placeholderVariables", [])
     extractor_variables = getattr(task, "extractor_settings", None) or getattr(task, "extractor_variables", {})
@@ -195,19 +157,22 @@ def build_hydroserver_pipeline(
         **{to_snake(k): v for k, v in extractor_settings.items()}
     )
 
-    timestamp_settings = transformer_settings.pop("timestamp", {})
+    if task.task_type == "Aggregation":
+        timestamp_settings = {
+            "key": "phenomenon_time",
+            "format": "iso",
+            "timezoneMode": "utc"
+        }
+    else:
+        timestamp_settings = transformer_settings.pop("timestamp", {})
+
     transformer_settings = {
         ("jmespath" if k == "JMESPath" else to_snake(k)): v
         for k, v in transformer_settings.items()
     }
 
-    temporal_aggregation = resolve_temporal_aggregation(
-        data_mappings=data_mappings
-    )
-
     transformer: transformers.Transformer = transformer_cls(
         timestamp_key=timestamp_settings["key"],
-        temporal_aggregation=temporal_aggregation,
         **transformer_settings,
         **normalize_timestamp_kwargs(**timestamp_settings)
     )