squaredev-io
diff --git a/‎.coverage‎
0 Bytes b/‎.coverage‎
0 Bytes
diff --git a/‎docs/mkdocs/docs/sdk-docs.md‎
Lines changed: 9 additions & 8 deletions b/‎docs/mkdocs/docs/sdk-docs.md‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎docs/mkdocs/docs/tutorial/sdk.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/mkdocs/docs/tutorial/sdk.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎whitebox/api/v1/inference_rows.py‎
Lines changed: 12 additions & 3 deletions b/‎whitebox/api/v1/inference_rows.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎whitebox/api/v1/models.py‎
Lines changed: 18 additions & 0 deletions b/‎whitebox/api/v1/models.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎whitebox/api/v1/performance_metrics.py‎
Lines changed: 3 additions & 3 deletions b/‎whitebox/api/v1/performance_metrics.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎whitebox/core/settings.py‎
Lines changed: 1 addition & 0 deletions b/‎whitebox/core/settings.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎whitebox/cron_tasks/monitoring_metrics.py‎
Lines changed: 132 additions & 26 deletions b/‎whitebox/cron_tasks/monitoring_metrics.py‎
Lines changed: 132 additions & 26 deletions
@@ -4,17 +4,18 @@ This is the documentation for Whitebox's SDK. For an interactive experience, you
 
 ## Models
 
-**_create_model_**_(name, type, target_column, labels=None, description="")_
+**_create_model_**_(name, type, target_column, granularity, labels=None, description="")_
 
 Creates a model in the database. This model works as placeholder for all the actual model's metadata.
 
-| Parameter       | Type             | Description                                                               |
-| --------------- | ---------------- | ------------------------------------------------------------------------- |
-| **name**        | `str`            | The name of the model.                                                    |
-| **type**        | `str`            | The model's type. Possible values: `binary`, `multi_class`, `regression`. |
-| **target_column**  | `str`            | The name of the target column (y).                             |
-| **labels**      | `Dict[str, int]` | The model's labels. Defaults to `None`.                                   |
-| **description** | `str`            | The model's description. Defaults to an empty string `""`.                |
+| Parameter         | Type             | Description                                                                                                                                                                                                                                                        |
+| ----------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **name**          | `str`            | The name of the model.                                                                                                                                                                                                                                             |
+| **type**          | `str`            | The model's type. Possible values: `binary`, `multi_class`, `regression`.                                                                                                                                                                                          |
+| **target_column** | `str`            | The name of the target column (y).                                                                                                                                                                                                                                 |
+| **granularity**   | `str`            | The granularity depending on which the inference rows will be grouped by to create the reports. Must be a `str` containing the amount (`int`) and the type (e.g. "1D"). Possible values for granularity type: `T (minutes)`, `H (hours)`, `D (days)`, `W (weeks)`. |
+| **labels**        | `Dict[str, int]` | The model's labels. Defaults to `None`.                                                                                                                                                                                                                            |
+| **description**   | `str`            | The model's description. Defaults to an empty string `""`.                                                                                                                                                                                                         |
 
 !!! info
 
 
@@ -73,7 +73,8 @@ wb.create_model(
         'additionalProp1': 0,
         'additionalProp2': 1
     },
-    target_column="target"
+    target_column="target",
+    granularity="1D"
 )
 ```
 
 
@@ -1,6 +1,10 @@
 from typing import Dict, List
 from whitebox.middleware.auth import authenticate_user
-from whitebox.schemas.inferenceRow import InferenceRow, InferenceRowCreateDto
+from whitebox.schemas.inferenceRow import (
+    InferenceRow,
+    InferenceRowCreateDto,
+    InferenceRowPreDb,
+)
 from whitebox.analytics.xai_models.pipelines import (
     create_xai_pipeline_per_inference_row,
 )
@@ -31,7 +35,9 @@ async def create_row(
 ) -> InferenceRow:
     """Inserts an inference row into the database."""
 
-    new_inference_row = crud.inference_rows.create(db=db, obj_in=body)
+    updated_body = InferenceRowPreDb(**dict(body), is_used=False)
+
+    new_inference_row = crud.inference_rows.create(db=db, obj_in=updated_body)
     return new_inference_row
 
 
@@ -58,7 +64,10 @@ async def create_many_inference_rows(
                     f'Column "{model.target_column}" was not found in some or any of the rows in provided inference dataset. Please try again!'
                 )
 
-        new_inference_rows = crud.inference_rows.create_many(db=db, obj_list=body)
+        updated_body = [InferenceRowPreDb(**dict(x), is_used=False) for x in body]
+        new_inference_rows = crud.inference_rows.create_many(
+            db=db, obj_list=updated_body
+        )
         return new_inference_rows
     else:
         return errors.not_found(f"Model with id: {dict(body[0])['model_id']} not found")
 
@@ -28,6 +28,24 @@ async def create_model(
 ) -> Model:
     """Inserts a model into the database"""
 
+    granularity = body.granularity
+
+    try:
+        granularity_amount = float(granularity[:-1])
+    except ValueError:
+        return errors.bad_request("Granularity amount that was given is not a number!")
+
+    if not granularity_amount.is_integer():
+        return errors.bad_request(
+            "Granularity amount should be an integer and not a float (e.g. 1D)!"
+        )
+
+    granularity_type = granularity[-1]
+    if granularity_type not in ["T", "H", "D", "W"]:
+        return errors.bad_request(
+            "Wrong granularity type. Accepted values: T (minutes), H (hours), D (days), W (weeks)"
+        )
+
     new_model = crud.models.create(db=db, obj_in=body)
     return new_model
 
 
@@ -38,15 +38,15 @@ async def get_all_models_performance_metrics(
 
     model = crud.models.get(db, model_id)
     if model:
-        if vars(model)["type"] == ModelType.binary:
+        if model.type == ModelType.binary:
             return crud.binary_classification_metrics.get_performance_metrics_by_model(
                 db=db, model_id=model_id
             )
-        elif vars(model)["type"] == ModelType.multi_class:
+        elif model.type == ModelType.multi_class:
             return crud.multi_classification_metrics.get_performance_metrics_by_model(
                 db=db, model_id=model_id
             )
-        elif vars(model)["type"] == ModelType.regression:
+        elif model.type == ModelType.regression:
             return crud.regression_metrics.get_performance_metrics_by_model(
                 db=db, model_id=model_id
             )
 
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
     VERSION: str = ""
     MODEL_PATH: str = ""
     SECRET_KEY: str = ""
+    GRANULARITY: str = ""
 
     class Config:
         env_file = f".env.{os.getenv('ENV')}" or ".env.dev"
 
@@ -3,7 +3,7 @@
 import time
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker, Session
-
+from fastapi.encoders import jsonable_encoder
 from whitebox import crud, entities
 from whitebox.analytics.drift.pipelines import (
     run_data_drift_pipeline,
@@ -19,7 +19,13 @@
 from whitebox.cron_tasks.shared import (
     get_all_models,
     get_model_dataset_rows_df,
-    get_model_inference_rows_df,
+    get_unused_model_inference_rows,
+    group_inference_rows_by_timestamp,
+    seperate_inference_rows,
+    set_inference_rows_to_used,
+    get_latest_drift_metrics_report,
+    round_timestamp,
+    get_used_inference_for_reusage,
 )
 from whitebox.schemas.model import Model, ModelType
 from whitebox.schemas.modelIntegrityMetric import ModelIntegrityMetricCreate
@@ -34,7 +40,7 @@
 
 
 async def run_calculate_drifting_metrics_pipeline(
-    model: Model, inference_processed_df: pd.DataFrame
+    model: Model, inference_processed_df: pd.DataFrame, timestamp: datetime
 ):
     """
     Run the pipeline to calculate the drifting metrics
@@ -67,18 +73,29 @@ async def run_calculate_drifting_metrics_pipeline(
     )
 
     new_drifting_metric = entities.DriftingMetric(
-        timestamp=str(datetime.utcnow()),
+        timestamp=str(timestamp),
         model_id=model.id,
         concept_drift_summary=concept_drift_report,
         data_drift_summary=data_drift_report,
     )
 
-    crud.drifting_metrics.create(db, obj_in=new_drifting_metric)
+    existing_report = crud.drifting_metrics.get_first_by_filter(
+        db=db, model_id=model.id, timestamp=timestamp
+    )
+    if existing_report:
+        crud.drifting_metrics.update(
+            db=db, db_obj=existing_report, obj_in=jsonable_encoder(new_drifting_metric)
+        )
+    else:
+        crud.drifting_metrics.create(db, obj_in=new_drifting_metric)
     logger.info("Drifting metrics calculated!")
 
 
 async def run_calculate_performance_metrics_pipeline(
-    model: Model, inference_processed_df: pd.DataFrame, actual_df: pd.DataFrame
+    model: Model,
+    inference_processed_df: pd.DataFrame,
+    actual_df: pd.DataFrame,
+    timestamp: datetime,
 ):
     """
     Run the pipeline to calculate the performance metrics
@@ -121,11 +138,21 @@ async def run_calculate_performance_metrics_pipeline(
 
         new_performance_metric = entities.BinaryClassificationMetrics(
             model_id=model.id,
-            timestamp=str(datetime.utcnow()),
+            timestamp=str(timestamp),
             **dict(binary_classification_metrics_report),
         )
 
-        crud.binary_classification_metrics.create(db, obj_in=new_performance_metric)
+        existing_report = crud.binary_classification_metrics.get_first_by_filter(
+            db=db, model_id=model.id, timestamp=timestamp
+        )
+        if existing_report:
+            crud.binary_classification_metrics.update(
+                db=db,
+                db_obj=existing_report,
+                obj_in=jsonable_encoder(new_performance_metric),
+            )
+        else:
+            crud.binary_classification_metrics.create(db, obj_in=new_performance_metric)
 
     elif model.type == ModelType.multi_class:
         multiclass_classification_metrics_report = (
@@ -136,11 +163,21 @@ async def run_calculate_performance_metrics_pipeline(
 
         new_performance_metric = entities.MultiClassificationMetrics(
             model_id=model.id,
-            timestamp=str(datetime.utcnow()),
+            timestamp=str(timestamp),
             **dict(multiclass_classification_metrics_report),
         )
 
-        crud.multi_classification_metrics.create(db, obj_in=new_performance_metric)
+        existing_report = crud.multi_classification_metrics.get_first_by_filter(
+            db=db, model_id=model.id, timestamp=timestamp
+        )
+        if existing_report:
+            crud.multi_classification_metrics.update(
+                db=db,
+                db_obj=existing_report,
+                obj_in=jsonable_encoder(new_performance_metric),
+            )
+        else:
+            crud.multi_classification_metrics.create(db, obj_in=new_performance_metric)
 
     elif model.type == ModelType.regression:
         regression_metrics_report = create_regression_evaluation_metrics_pipeline(
@@ -149,17 +186,27 @@ async def run_calculate_performance_metrics_pipeline(
 
         new_performance_metric = entities.RegressionMetrics(
             model_id=model.id,
-            timestamp=str(datetime.utcnow()),
+            timestamp=str(timestamp),
             **dict(regression_metrics_report),
         )
 
-        crud.regression_metrics.create(db, obj_in=new_performance_metric)
+        existing_report = crud.regression_metrics.get_first_by_filter(
+            db=db, model_id=model.id, timestamp=timestamp
+        )
+        if existing_report:
+            crud.regression_metrics.update(
+                db=db,
+                db_obj=existing_report,
+                obj_in=jsonable_encoder(new_performance_metric),
+            )
+        else:
+            crud.regression_metrics.create(db, obj_in=new_performance_metric)
 
     logger.info("Performance metrics calculated!")
 
 
 async def run_calculate_feature_metrics_pipeline(
-    model: Model, inference_processed_df: pd.DataFrame
+    model: Model, inference_processed_df: pd.DataFrame, timestamp: datetime
 ):
     """
     Run the pipeline to calculate the feature metrics
@@ -172,11 +219,22 @@ async def run_calculate_feature_metrics_pipeline(
     if feature_metrics_report:
         new_feature_metric = ModelIntegrityMetricCreate(
             model_id=model.id,
-            timestamp=str(datetime.utcnow()),
+            timestamp=str(timestamp),
             feature_metrics=feature_metrics_report,
         )
 
-        crud.model_integrity_metrics.create(db, obj_in=new_feature_metric)
+        existing_report = crud.model_integrity_metrics.get_first_by_filter(
+            db=db, model_id=model.id, timestamp=timestamp
+        )
+        if existing_report:
+            crud.model_integrity_metrics.update(
+                db=db,
+                db_obj=existing_report,
+                obj_in=jsonable_encoder(new_feature_metric),
+            )
+        else:
+            crud.model_integrity_metrics.create(db, obj_in=new_feature_metric)
+
         logger.info("Feature metrics calculated!")
 
 
@@ -190,24 +248,72 @@ async def run_calculate_metrics_pipeline():
         logger.info("No models found! Skipping pipeline")
     else:
         for model in models:
-            (
-                inference_processed_df,
-                inference_nonprocessed_df,
-                actual_df,
-            ) = await get_model_inference_rows_df(db, model_id=model.id)
-            if inference_processed_df.empty:
+            granularity = model.granularity
+            granularity_amount = int(granularity[:-1])
+            granularity_type = granularity[-1]
+
+            last_report = await get_latest_drift_metrics_report(db, model)
+
+            # We need to get the last report's timestamp as a base of grouping unless there's no report produced.
+            # In this case, the base timestamp is considered the "now" rounded to the day so the intervals start from midnight
+            # e.g. 12:00, 12:15, 12:30, 12:45 and so on if granularity is 15T.
+            last_report_time = (
+                last_report.timestamp
+                if last_report
+                else round_timestamp(datetime.utcnow(), "1D")
+            )
+
+            unused_inference_rows_in_db = await get_unused_model_inference_rows(
+                db, model_id=model.id
+            )
+
+            if len(unused_inference_rows_in_db) == 0:
                 logger.info(
-                    f"No inferences found for model {model.id}! Continuing with next model..."
+                    f"No new inferences found for model {model.id}! Continuing with next model..."
                 )
                 continue
             logger.info(f"Executing Metrics pipeline for model {model.id}...")
-            await run_calculate_drifting_metrics_pipeline(model, inference_processed_df)
 
-            await run_calculate_performance_metrics_pipeline(
-                model, inference_processed_df, actual_df
+            used_inferences = get_used_inference_for_reusage(
+                db,
+                model.id,
+                unused_inference_rows_in_db,
+                last_report_time,
+                granularity_amount,
+                granularity_type,
+            )
+
+            all_inferences = unused_inference_rows_in_db + used_inferences
+
+            grouped_inference_rows = await group_inference_rows_by_timestamp(
+                all_inferences,
+                last_report_time,
+                granularity_amount,
+                granularity_type,
             )
 
-            await run_calculate_feature_metrics_pipeline(model, inference_processed_df)
+            for group in grouped_inference_rows:
+                for timestamp, inference_group in group.items():
+                    inference_rows_ids = [x.id for x in inference_group]
+                    (
+                        inference_processed_df,
+                        inference_nonprocessed_df,
+                        actual_df,
+                    ) = await seperate_inference_rows(inference_group)
+
+                    await run_calculate_drifting_metrics_pipeline(
+                        model, inference_processed_df, timestamp
+                    )
+
+                    await run_calculate_performance_metrics_pipeline(
+                        model, inference_processed_df, actual_df, timestamp
+                    )
+
+                    await run_calculate_feature_metrics_pipeline(
+                        model, inference_processed_df, timestamp
+                    )
+
+                    await set_inference_rows_to_used(db, inference_rows_ids)
 
             logger.info(f"Ended Metrics pipeline for model {model.id}...")
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,8 @@ wb.create_model(`
`73`	`73`	`'additionalProp1': 0,`
`74`	`74`	`'additionalProp2': 1`
`75`	`75`	`},`
`76`		`- target_column="target"`
	`76`	`+ target_column="target",`
	`77`	`+ granularity="1D"`
`77`	`78`	`)`
`78`	`79`	```
`79`	`80`
Original file line number	Diff line number	Diff line change
`@@ -38,15 +38,15 @@ async def get_all_models_performance_metrics(`
`38`	`38`
`39`	`39`	`model = crud.models.get(db, model_id)`
`40`	`40`	`if model:`
`41`		`- if vars(model)["type"] == ModelType.binary:`
	`41`	`+ if model.type == ModelType.binary:`
`42`	`42`	`return crud.binary_classification_metrics.get_performance_metrics_by_model(`
`43`	`43`	`db=db, model_id=model_id`
`44`	`44`	`)`
`45`		`- elif vars(model)["type"] == ModelType.multi_class:`
	`45`	`+ elif model.type == ModelType.multi_class:`
`46`	`46`	`return crud.multi_classification_metrics.get_performance_metrics_by_model(`
`47`	`47`	`db=db, model_id=model_id`
`48`	`48`	`)`
`49`		`- elif vars(model)["type"] == ModelType.regression:`
	`49`	`+ elif model.type == ModelType.regression:`
`50`	`50`	`return crud.regression_metrics.get_performance_metrics_by_model(`
`51`	`51`	`db=db, model_id=model_id`
`52`	`52`	`)`