From 77c00575374770b0eeb4aceaeb03cbf78d61797d Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Tue, 11 Nov 2025 18:00:48 +0530 Subject: [PATCH 1/2] added question on model versioning --- .../187_mlops-etl-pipeline/description.md | 14 -- questions/187_mlops-etl-pipeline/example.json | 5 - questions/187_mlops-etl-pipeline/learn.md | 24 ---- questions/187_mlops-etl-pipeline/meta.json | 12 -- questions/187_mlops-etl-pipeline/solution.py | 43 ------ .../187_mlops-etl-pipeline/starter_code.py | 9 -- questions/187_mlops-etl-pipeline/tests.json | 14 -- .../description.md | 63 +++++++++ .../188_model-versioning-system/example.json | 5 + .../188_model-versioning-system/learn.md | 126 ++++++++++++++++++ .../188_model-versioning-system/meta.json | 17 +++ .../starter_code.py | 82 ++++++++++++ .../188_model-versioning-system/tests.json | 42 ++++++ 13 files changed, 335 insertions(+), 121 deletions(-) delete mode 100644 questions/187_mlops-etl-pipeline/description.md delete mode 100644 questions/187_mlops-etl-pipeline/example.json delete mode 100644 questions/187_mlops-etl-pipeline/learn.md delete mode 100644 questions/187_mlops-etl-pipeline/meta.json delete mode 100644 questions/187_mlops-etl-pipeline/solution.py delete mode 100644 questions/187_mlops-etl-pipeline/starter_code.py delete mode 100644 questions/187_mlops-etl-pipeline/tests.json create mode 100644 questions/188_model-versioning-system/description.md create mode 100644 questions/188_model-versioning-system/example.json create mode 100644 questions/188_model-versioning-system/learn.md create mode 100644 questions/188_model-versioning-system/meta.json create mode 100644 questions/188_model-versioning-system/starter_code.py create mode 100644 questions/188_model-versioning-system/tests.json diff --git a/questions/187_mlops-etl-pipeline/description.md b/questions/187_mlops-etl-pipeline/description.md deleted file mode 100644 index 5adfa2df..00000000 --- a/questions/187_mlops-etl-pipeline/description.md +++ /dev/null @@ -1,14 +0,0 @@ -## Problem - -Implement a simple ETL (Extract-Transform-Load) pipeline for model-ready data preparation. - -Given a CSV-like string containing user events with columns: `user_id,event_type,value` (header included), write a function `run_etl(csv_text)` that: - -1. Extracts rows from the raw CSV text. -2. Transforms data by: - - Filtering only rows where `event_type == "purchase"`. - - Converting `value` to float and dropping invalid rows. - - Aggregating total purchase `value` per `user_id`. -3. Loads the transformed results by returning a list of `(user_id, total_value)` tuples sorted by `user_id` ascending. - -Assume small inputs (no external libs), handle extra whitespace, and ignore blank lines. diff --git a/questions/187_mlops-etl-pipeline/example.json b/questions/187_mlops-etl-pipeline/example.json deleted file mode 100644 index 84952417..00000000 --- a/questions/187_mlops-etl-pipeline/example.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "input": "run_etl(\"user_id,event_type,value\\n u1, purchase, 10.0\\n u2, view, 1.0\\n u1, purchase, 5\\n u3, purchase, not_a_number\\n u2, purchase, 3.5 \\n\\n\")", - "output": "[('u1', 15.0), ('u2', 3.5)]", - "reasoning": "Keep only purchases; convert values; drop invalid; aggregate per user; sort by user_id." -} diff --git a/questions/187_mlops-etl-pipeline/learn.md b/questions/187_mlops-etl-pipeline/learn.md deleted file mode 100644 index d523e6a1..00000000 --- a/questions/187_mlops-etl-pipeline/learn.md +++ /dev/null @@ -1,24 +0,0 @@ -## Solution Explanation - -This task mirrors a minimal MLOps ETL flow that prepares data for downstream modeling. - -### ETL breakdown -- Extract: parse raw CSV text, ignore blanks, and split into header and rows. -- Transform: - - Filter only relevant records (event_type == "purchase"). - - Cast `value` to float; discard invalid rows to maintain data quality. - - Aggregate total purchase value per user to create compact features. -- Load: return a deterministic, sorted list of `(user_id, total_value)`. - -### Why this design? -- Input sanitation prevents runtime errors and poor-quality features. -- Aggregation compresses event-level logs into user-level features commonly used in models. -- Sorting produces stable, testable outputs. - -### Complexity -- For N rows, parsing and aggregation run in O(N); sorting unique users U costs O(U log U). - -### Extensions -- Add schema validation and logging. -- Write outputs to files or databases. -- Schedule ETL runs and add monitoring for drift and freshness. diff --git a/questions/187_mlops-etl-pipeline/meta.json b/questions/187_mlops-etl-pipeline/meta.json deleted file mode 100644 index d31f85bb..00000000 --- a/questions/187_mlops-etl-pipeline/meta.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "id": "187", - "title": "Build a Simple ETL Pipeline (MLOps)", - "difficulty": "medium", - "category": "MLOps", - "video": "", - "likes": "0", - "dislikes": "0", - "contributor": [ - { "profile_link": "https://github.com/Jeet009", "name": "Jeet Mukherjee" } - ] -} diff --git a/questions/187_mlops-etl-pipeline/solution.py b/questions/187_mlops-etl-pipeline/solution.py deleted file mode 100644 index 19b9a275..00000000 --- a/questions/187_mlops-etl-pipeline/solution.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import List, Tuple - - -def run_etl(csv_text: str) -> List[Tuple[str, float]]: - """Reference ETL implementation. - - - Extract: parse CSV text, skip header, strip whitespace, ignore blanks - - Transform: keep event_type == "purchase"; parse value as float; aggregate per user - - Load: return sorted list of (user_id, total_value) by user_id asc - """ - lines = [line.strip() for line in csv_text.splitlines() if line.strip()] - if not lines: - return [] - # header - header = lines[0] - rows = lines[1:] - - # indices from header (allow varying order and case) - headers = [h.strip().lower() for h in header.split(",")] - try: - idx_user = headers.index("user_id") - idx_event = headers.index("event_type") - idx_value = headers.index("value") - except ValueError: - # header missing required columns - return [] - - aggregates: dict[str, float] = {} - for row in rows: - parts = [c.strip() for c in row.split(",")] - if len(parts) <= max(idx_user, idx_event, idx_value): - continue - user_id = parts[idx_user] - event_type = parts[idx_event].lower() - if event_type != "purchase": - continue - try: - value = float(parts[idx_value]) - except ValueError: - continue - aggregates[user_id] = aggregates.get(user_id, 0.0) + value - - return sorted(aggregates.items(), key=lambda kv: kv[0]) diff --git a/questions/187_mlops-etl-pipeline/starter_code.py b/questions/187_mlops-etl-pipeline/starter_code.py deleted file mode 100644 index 65002026..00000000 --- a/questions/187_mlops-etl-pipeline/starter_code.py +++ /dev/null @@ -1,9 +0,0 @@ -# Implement your function below. - -def run_etl(csv_text: str) -> list[tuple[str, float]]: - """Run a simple ETL pipeline over CSV text with header user_id,event_type,value. - - Returns a sorted list of (user_id, total_value) for event_type == "purchase". - """ - # TODO: implement extract, transform, and load steps - raise NotImplementedError diff --git a/questions/187_mlops-etl-pipeline/tests.json b/questions/187_mlops-etl-pipeline/tests.json deleted file mode 100644 index 781c1b28..00000000 --- a/questions/187_mlops-etl-pipeline/tests.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "test": "print(run_etl('user_id,event_type,value\\n u1, purchase, 10.0\\n u2, view, 1.0\\n u1, purchase, 5\\n u3, purchase, not_a_number\\n u2, purchase, 3.5 \\n'))", - "expected_output": "[('u1', 15.0), ('u2', 3.5)]" - }, - { - "test": "print(run_etl('user_id,event_type,value'))", - "expected_output": "[]" - }, - { - "test": "print(run_etl('value,event_type,user_id\\n 1.0, purchase, u1\\n 2.0, purchase, u1\\n'))", - "expected_output": "[('u1', 3.0)]" - } -] diff --git a/questions/188_model-versioning-system/description.md b/questions/188_model-versioning-system/description.md new file mode 100644 index 00000000..92ef1a8b --- /dev/null +++ b/questions/188_model-versioning-system/description.md @@ -0,0 +1,63 @@ +# Model Versioning System + +## Problem Statement + +Implement a simple model versioning system that tracks model versions, performance metrics, and deployment stages. The system should support basic versioning, model comparison, and simple promotion workflows. + +## System Requirements + +### Core Components: + +1. **Model Registry** - Store and manage model versions +2. **Version Management** - Simple versioning (1.0, 1.1, 1.2, etc.) +3. **Performance Tracking** - Store accuracy and basic metrics +4. **Stage Management** - Track deployment stages (dev, staging, production) + +### Key Features: + +- **Simple Versioning**: Increment version numbers (1.0, 1.1, 1.2) +- **Performance Tracking**: Store accuracy and basic metrics +- **Stage Management**: Track which stage each version is in +- **Model Comparison**: Compare performance between versions + +## Implementation Requirements + +### ModelRegistry Class Methods: + +1. `register_model(model_name, accuracy, stage="dev")` - Register new model version +2. `get_model(model_name, version=None)` - Get model by version (latest if None) +3. `promote_model(model_name, version, new_stage)` - Move model to new stage +4. `compare_models(model_name, version1, version2)` - Compare two versions +5. `list_models()` - List all registered models + +### Model Data Structure: + +```python +{ + "model_name": str, + "version": str, # e.g., "1.0", "1.1", "1.2" + "accuracy": float, + "stage": str, # "dev", "staging", "production" + "created_at": str # ISO timestamp +} +``` + +### Validation Rules: + +- Accuracy must be between 0.0 and 1.0 +- Stage must be one of: "dev", "staging", "production" +- Version format: number.number (e.g., "1.0", "1.1") + +## Expected Behavior + +- First model registration starts at version "1.0" +- Subsequent registrations increment version ("1.1", "1.2", etc.) +- Models can be promoted between stages +- Performance comparison shows accuracy differences +- System handles basic error cases + +## Constraints + +- Use only standard Python libraries +- Keep it simple and focused on core functionality +- Handle basic edge cases (invalid accuracy, unknown versions) diff --git a/questions/188_model-versioning-system/example.json b/questions/188_model-versioning-system/example.json new file mode 100644 index 00000000..d22efc13 --- /dev/null +++ b/questions/188_model-versioning-system/example.json @@ -0,0 +1,5 @@ +{ + "input": "registry = ModelRegistry()\nresult = registry.register_model('sentiment_classifier', 0.95, 'dev')\nprint(result['version'])", + "output": "1.0", + "reasoning": "First registration of a model creates version 1.0 automatically. The system assigns the initial version and stores the accuracy and stage information." +} diff --git a/questions/188_model-versioning-system/learn.md b/questions/188_model-versioning-system/learn.md new file mode 100644 index 00000000..426fc5e4 --- /dev/null +++ b/questions/188_model-versioning-system/learn.md @@ -0,0 +1,126 @@ +# Model Versioning System - Learning Guide + +## Overview + +This problem introduces you to MLOps fundamentals through a simplified model versioning system. You'll learn core concepts of model lifecycle management, version tracking, and deployment coordination without overwhelming complexity. + +## Key Concepts + +### 1. Model Versioning +- **Simple Versioning**: Use incrementing version numbers (1.0, 1.1, 1.2) +- **Automatic Increment**: Each new registration increments the version +- **Version Tracking**: Keep history of all model versions + +### 2. Model Registry Design +- **Centralized Storage**: Store all model information in one place +- **Basic Metadata**: Track essential information (name, version, accuracy, stage) +- **Simple Queries**: Easy retrieval and comparison of models + +### 3. Deployment Stages +``` +Development → Staging → Production + ↓ ↓ ↓ + Testing Validation Live Use +``` + +### 4. Core Data Structure +```python +{ + "model_name": "sentiment_classifier", + "version": "1.0", + "accuracy": 0.95, + "stage": "dev", + "created_at": "2024-01-15T10:30:00Z" +} +``` + +## Implementation Strategies + +### 1. Data Storage +```python +# Simple approach using nested dictionaries +self.models = { + "model_name": { + "1.0": {"accuracy": 0.95, "stage": "dev", "created_at": "..."}, + "1.1": {"accuracy": 0.97, "stage": "dev", "created_at": "..."} + } +} +``` + +### 2. Version Management +- Start with version "1.0" for first registration +- Increment minor version for subsequent registrations +- Parse version strings to determine next version + +### 3. Stage Promotion +- Validate that model exists before promotion +- Update stage information for specific version +- Return success/failure status + +### 4. Model Comparison +- Retrieve both model versions +- Calculate accuracy difference +- Return comparison results + +## Common Implementation Patterns + +### 1. Version Increment Logic +```python +def _get_next_version(self, model_name): + if model_name not in self.models: + return "1.0" + + versions = list(self.models[model_name].keys()) + latest_version = max(versions, key=lambda v: float(v)) + major, minor = map(int, latest_version.split('.')) + return f"{major}.{minor + 1}" +``` + +### 2. Validation +```python +def _validate_inputs(self, model_name, accuracy, stage): + if not model_name or not isinstance(model_name, str): + return False, "Invalid model name" + if not 0.0 <= accuracy <= 1.0: + return False, "Accuracy must be between 0.0 and 1.0" + if stage not in ["dev", "staging", "production"]: + return False, "Invalid stage" + return True, "" +``` + +## Key Learning Points + +### 1. System Design Basics +- **Separation of Concerns**: Each method has a single responsibility +- **Data Integrity**: Validate inputs and handle edge cases +- **Error Handling**: Return meaningful results for success/failure + +### 2. MLOps Concepts +- **Model Lifecycle**: Track models from development to production +- **Version Control**: Maintain history of model improvements +- **Stage Management**: Control deployment progression +- **Performance Tracking**: Monitor model quality over time + +### 3. Real-world Relevance +This simplified system mirrors concepts used in: +- **MLflow Model Registry**: Industry-standard model management +- **AWS SageMaker**: Cloud-based ML model deployment +- **Azure ML**: Microsoft's ML platform +- **Kubeflow**: Kubernetes-based ML workflows + +## Common Pitfalls to Avoid + +1. **Version Parsing**: Handle version string format correctly +2. **Edge Cases**: Check for non-existent models/versions +3. **Data Validation**: Ensure accuracy is within valid range +4. **Stage Validation**: Only allow valid stage transitions +5. **Return Types**: Match expected return formats + +## Testing Approach + +- **Happy Path**: Test normal operations +- **Edge Cases**: Test with invalid inputs +- **Error Handling**: Verify proper error responses +- **State Management**: Ensure data consistency + +This problem provides a solid foundation for understanding MLOps system design while remaining approachable for learning the core concepts. diff --git a/questions/188_model-versioning-system/meta.json b/questions/188_model-versioning-system/meta.json new file mode 100644 index 00000000..b6a4fa8d --- /dev/null +++ b/questions/188_model-versioning-system/meta.json @@ -0,0 +1,17 @@ +{ + "id": "188", + "title": "Model Versioning System with Registry and Metadata Management", + "difficulty": "hard", + "category": "MLOps", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" + } + ], + "tinygrad_difficulty": "hard", + "pytorch_difficulty": "hard" +} diff --git a/questions/188_model-versioning-system/starter_code.py b/questions/188_model-versioning-system/starter_code.py new file mode 100644 index 00000000..7662cfa9 --- /dev/null +++ b/questions/188_model-versioning-system/starter_code.py @@ -0,0 +1,82 @@ +from typing import Dict, List, Optional +from datetime import datetime + +class ModelRegistry: + """ + A simple model versioning system that tracks model versions, + performance metrics, and deployment stages. + """ + + def __init__(self): + # Initialize registry storage + # Your implementation here + pass + + def register_model(self, model_name: str, accuracy: float, stage: str = "dev") -> Dict[str, str]: + """ + Register a new model version. + + Args: + model_name: Name of the model + accuracy: Model accuracy (0.0 to 1.0) + stage: Deployment stage ("dev", "staging", "production") + + Returns: + Dictionary with model information including version + """ + # Your implementation here + pass + + def get_model(self, model_name: str, version: Optional[str] = None) -> Optional[Dict[str, str]]: + """ + Get model information by name and version. + + Args: + model_name: Name of the model + version: Specific version (None for latest) + + Returns: + Model information or None if not found + """ + # Your implementation here + pass + + def promote_model(self, model_name: str, version: str, new_stage: str) -> bool: + """ + Promote a model version to a new stage. + + Args: + model_name: Name of the model + version: Version to promote + new_stage: Target stage ("dev", "staging", "production") + + Returns: + True if successful, False otherwise + """ + # Your implementation here + pass + + def compare_models(self, model_name: str, version1: str, version2: str) -> Optional[Dict[str, float]]: + """ + Compare two model versions. + + Args: + model_name: Name of the model + version1: First version to compare + version2: Second version to compare + + Returns: + Dictionary with comparison results or None if models not found + """ + # Your implementation here + pass + + def list_models(self) -> List[Dict[str, str]]: + """ + List all registered models. + + Returns: + List of all model information + """ + # Your implementation here + pass diff --git a/questions/188_model-versioning-system/tests.json b/questions/188_model-versioning-system/tests.json new file mode 100644 index 00000000..5728465e --- /dev/null +++ b/questions/188_model-versioning-system/tests.json @@ -0,0 +1,42 @@ +[ + { + "test": "registry = ModelRegistry()\nresult = registry.register_model('sentiment_classifier', 0.95, 'dev')\nprint(result['version'])", + "expected_output": "1.0" + }, + { + "test": "registry = ModelRegistry()\n# Register first version\nregistry.register_model('nlp_model', 0.95, 'dev')\n# Register second version\nresult = registry.register_model('nlp_model', 0.97, 'dev')\nprint(result['version'])", + "expected_output": "1.1" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('image_classifier', 0.92, 'dev')\n# Promote to staging\nsuccess = registry.promote_model('image_classifier', '1.0', 'staging')\nprint(success)", + "expected_output": "True" + }, + { + "test": "registry = ModelRegistry()\n# Register multiple versions\nregistry.register_model('recommendation_model', 0.90, 'dev')\nregistry.register_model('recommendation_model', 0.93, 'dev')\nregistry.register_model('recommendation_model', 0.89, 'dev')\n# Compare versions\nresult = registry.compare_models('recommendation_model', '1.0', '1.1')\nprint(result['accuracy_delta'])", + "expected_output": "0.03" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('fraud_detector', 0.95, 'dev')\nregistry.register_model('churn_predictor', 0.88, 'staging')\nmodels = registry.list_models()\nprint(len(models))", + "expected_output": "2" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Get latest version\nmodel = registry.get_model('test_model')\nprint(model['version'])", + "expected_output": "1.0" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Get specific version\nmodel = registry.get_model('test_model', '1.0')\nprint(model['accuracy'])", + "expected_output": "0.85" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Promote to production\nsuccess = registry.promote_model('test_model', '1.0', 'production')\nmodel = registry.get_model('test_model', '1.0')\nprint(model['stage'])", + "expected_output": "production" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Try to promote non-existent version\nsuccess = registry.promote_model('test_model', '2.0', 'staging')\nprint(success)", + "expected_output": "False" + }, + { + "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Compare non-existent versions\nresult = registry.compare_models('test_model', '1.0', '2.0')\nprint(result)", + "expected_output": "None" + } +] From 2479187b7dc2293f53c8fc8a85b67adf6a6c09d5 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Tue, 11 Nov 2025 18:11:20 +0530 Subject: [PATCH 2/2] Added One sample Z-test question --- .../description.md | 18 +++ .../example.json | 6 + .../learn.md | 20 +++ .../meta.json | 13 ++ .../solution.py | 40 ++++++ .../starter_code.py | 33 +++++ .../tests.json | 27 ++++ .../description.md | 63 --------- .../188_model-versioning-system/example.json | 5 - .../188_model-versioning-system/learn.md | 126 ------------------ .../188_model-versioning-system/meta.json | 17 --- .../starter_code.py | 82 ------------ .../188_model-versioning-system/tests.json | 42 ------ 13 files changed, 157 insertions(+), 335 deletions(-) create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/description.md create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/example.json create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/learn.md create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/meta.json create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/solution.py create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/starter_code.py create mode 100644 questions/186_one-sample-z-test-hypothesis-testing/tests.json delete mode 100644 questions/188_model-versioning-system/description.md delete mode 100644 questions/188_model-versioning-system/example.json delete mode 100644 questions/188_model-versioning-system/learn.md delete mode 100644 questions/188_model-versioning-system/meta.json delete mode 100644 questions/188_model-versioning-system/starter_code.py delete mode 100644 questions/188_model-versioning-system/tests.json diff --git a/questions/186_one-sample-z-test-hypothesis-testing/description.md b/questions/186_one-sample-z-test-hypothesis-testing/description.md new file mode 100644 index 00000000..754111c7 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/description.md @@ -0,0 +1,18 @@ +Implement a function to perform a one-sample Z-test for a population mean when the population standard deviation is known. Your function must support both one-tailed and two-tailed alternatives. + +Implement a function with the signature: +- one_sample_z_test(sample_mean, population_mean, population_std, n, alternative="two-sided") + +Where: +- sample_mean: The observed sample mean (float) +- population_mean: The hypothesized population mean under H0 (float) +- population_std: The known population standard deviation (float > 0) +- n: Sample size (int > 0) +- alternative: One of {"two-sided", "greater", "less"} + +Return a dictionary with: +- "z": the computed Z statistic rounded to 4 decimals +- "p_value": the corresponding p-value rounded to 4 decimals + +Use the standard normal distribution for the p-value. Handle invalid inputs minimally by assuming valid types and values. + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/example.json b/questions/186_one-sample-z-test-hypothesis-testing/example.json new file mode 100644 index 00000000..d6f0d1a2 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/example.json @@ -0,0 +1,6 @@ +{ + "input": "sample_mean=103.0, population_mean=100.0, population_std=15.0, n=36, alternative='greater'", + "output": "{'z': 1.2, 'p_value': 0.1151}", + "reasoning": "Standard error = 15/sqrt(36)=2.5. Z=(103-100)/2.5=1.2. For a 'greater' test, p=1-CDF(1.2)=0.1151." +} + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/learn.md b/questions/186_one-sample-z-test-hypothesis-testing/learn.md new file mode 100644 index 00000000..d2dfddc8 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/learn.md @@ -0,0 +1,20 @@ +A one-sample Z-test assesses whether the mean of a population differs from a hypothesized value when the population standard deviation is known. It is appropriate for large samples (by CLT) or when normality is assumed and the population standard deviation is known. + +Test statistic: +- z = (x̄ − μ0) / (σ / √n) + - x̄: sample mean + - μ0: hypothesized mean under H0 + - σ: known population standard deviation + - n: sample size + +P-value computation uses the standard normal distribution: +- Two-sided (H1: μ ≠ μ0): p = 2 · min(Φ(z), 1 − Φ(z)) +- Right-tailed (H1: μ > μ0): p = 1 − Φ(z) +- Left-tailed (H1: μ < μ0): p = Φ(z) + +Decision at level α: +- Reject H0 if p ≤ α; otherwise, fail to reject H0. + +Notes: +- If σ is unknown, use a one-sample t-test with the sample standard deviation instead. + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/meta.json b/questions/186_one-sample-z-test-hypothesis-testing/meta.json new file mode 100644 index 00000000..1ea1e7f5 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/meta.json @@ -0,0 +1,13 @@ +{ + "id": "186", + "title": "One-Sample Z-Test for Mean (One and Two-Tailed)", + "difficulty": "easy", + "category": "Statistics", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { "profile_link": "https://github.com/Jeet009", "name": "Jeet Mukherjee" } + ] +} + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/solution.py b/questions/186_one-sample-z-test-hypothesis-testing/solution.py new file mode 100644 index 00000000..7019e23e --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/solution.py @@ -0,0 +1,40 @@ +from math import erf, sqrt + +def _standard_normal_cdf(x): + return 0.5 * (1.0 + erf(x / sqrt(2.0))) + +def one_sample_z_test(sample_mean, population_mean, population_std, n, alternative="two-sided"): + """ + Perform a one-sample Z-test for a population mean with known population std. + + Parameters + ---------- + sample_mean : float + population_mean : float + population_std : float + n : int + alternative : str + One of {"two-sided", "greater", "less"} + + Returns + ------- + dict with keys: + - "z": Z-statistic rounded to 4 decimals + - "p_value": p-value rounded to 4 decimals + """ + standard_error = population_std / sqrt(n) + z = (sample_mean - population_mean) / standard_error + cdf = _standard_normal_cdf(z) + + if alternative == "two-sided": + p = 2.0 * min(cdf, 1.0 - cdf) + elif alternative == "greater": + p = 1.0 - cdf + elif alternative == "less": + p = cdf + else: + # Fallback to two-sided if unexpected input + p = 2.0 * min(cdf, 1.0 - cdf) + + return {"z": round(z, 4), "p_value": round(p, 4)} + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/starter_code.py b/questions/186_one-sample-z-test-hypothesis-testing/starter_code.py new file mode 100644 index 00000000..66dd39e1 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/starter_code.py @@ -0,0 +1,33 @@ +from math import erf, sqrt + +def _standard_normal_cdf(x): + return 0.5 * (1.0 + erf(x / sqrt(2.0))) + +def one_sample_z_test(sample_mean, population_mean, population_std, n, alternative="two-sided"): + """ + Perform a one-sample Z-test for a population mean with known population std. + + Parameters + ---------- + sample_mean : float + population_mean : float + population_std : float + n : int + alternative : str + One of {"two-sided", "greater", "less"} + + Returns + ------- + dict with keys: + - "z": Z-statistic rounded to 4 decimals + - "p_value": p-value rounded to 4 decimals + """ + # TODO: Implement the Z statistic and p-value computation + # z = (sample_mean - population_mean) / (population_std / sqrt(n)) + # Use _standard_normal_cdf for CDF of standard normal. + # For alternative: + # - "two-sided": p = 2 * min(P(Z<=z), P(Z>=z)) = 2 * min(cdf(z), 1-cdf(z)) + # - "greater": p = 1 - cdf(z) + # - "less": p = cdf(z) + return {"z": 0.0, "p_value": 1.0} + diff --git a/questions/186_one-sample-z-test-hypothesis-testing/tests.json b/questions/186_one-sample-z-test-hypothesis-testing/tests.json new file mode 100644 index 00000000..b1eac6b4 --- /dev/null +++ b/questions/186_one-sample-z-test-hypothesis-testing/tests.json @@ -0,0 +1,27 @@ +[ + { + "test": "one_sample_z_test(103.0, 100.0, 15.0, 36, alternative='two-sided')", + "expected_output": "{'z': 1.2, 'p_value': 0.2296}" + }, + { + "test": "one_sample_z_test(103.0, 100.0, 15.0, 36, alternative='greater')", + "expected_output": "{'z': 1.2, 'p_value': 0.1151}" + }, + { + "test": "one_sample_z_test(103.0, 100.0, 15.0, 36, alternative='less')", + "expected_output": "{'z': 1.2, 'p_value': 0.8849}" + }, + { + "test": "one_sample_z_test(97.0, 100.0, 10.0, 25, alternative='two-sided')", + "expected_output": "{'z': -1.5, 'p_value': 0.1336}" + }, + { + "test": "one_sample_z_test(97.0, 100.0, 10.0, 25, alternative='less')", + "expected_output": "{'z': -1.5, 'p_value': 0.0668}" + }, + { + "test": "one_sample_z_test(97.0, 100.0, 10.0, 25, alternative='greater')", + "expected_output": "{'z': -1.5, 'p_value': 0.9332}" + } +] + diff --git a/questions/188_model-versioning-system/description.md b/questions/188_model-versioning-system/description.md deleted file mode 100644 index 92ef1a8b..00000000 --- a/questions/188_model-versioning-system/description.md +++ /dev/null @@ -1,63 +0,0 @@ -# Model Versioning System - -## Problem Statement - -Implement a simple model versioning system that tracks model versions, performance metrics, and deployment stages. The system should support basic versioning, model comparison, and simple promotion workflows. - -## System Requirements - -### Core Components: - -1. **Model Registry** - Store and manage model versions -2. **Version Management** - Simple versioning (1.0, 1.1, 1.2, etc.) -3. **Performance Tracking** - Store accuracy and basic metrics -4. **Stage Management** - Track deployment stages (dev, staging, production) - -### Key Features: - -- **Simple Versioning**: Increment version numbers (1.0, 1.1, 1.2) -- **Performance Tracking**: Store accuracy and basic metrics -- **Stage Management**: Track which stage each version is in -- **Model Comparison**: Compare performance between versions - -## Implementation Requirements - -### ModelRegistry Class Methods: - -1. `register_model(model_name, accuracy, stage="dev")` - Register new model version -2. `get_model(model_name, version=None)` - Get model by version (latest if None) -3. `promote_model(model_name, version, new_stage)` - Move model to new stage -4. `compare_models(model_name, version1, version2)` - Compare two versions -5. `list_models()` - List all registered models - -### Model Data Structure: - -```python -{ - "model_name": str, - "version": str, # e.g., "1.0", "1.1", "1.2" - "accuracy": float, - "stage": str, # "dev", "staging", "production" - "created_at": str # ISO timestamp -} -``` - -### Validation Rules: - -- Accuracy must be between 0.0 and 1.0 -- Stage must be one of: "dev", "staging", "production" -- Version format: number.number (e.g., "1.0", "1.1") - -## Expected Behavior - -- First model registration starts at version "1.0" -- Subsequent registrations increment version ("1.1", "1.2", etc.) -- Models can be promoted between stages -- Performance comparison shows accuracy differences -- System handles basic error cases - -## Constraints - -- Use only standard Python libraries -- Keep it simple and focused on core functionality -- Handle basic edge cases (invalid accuracy, unknown versions) diff --git a/questions/188_model-versioning-system/example.json b/questions/188_model-versioning-system/example.json deleted file mode 100644 index d22efc13..00000000 --- a/questions/188_model-versioning-system/example.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "input": "registry = ModelRegistry()\nresult = registry.register_model('sentiment_classifier', 0.95, 'dev')\nprint(result['version'])", - "output": "1.0", - "reasoning": "First registration of a model creates version 1.0 automatically. The system assigns the initial version and stores the accuracy and stage information." -} diff --git a/questions/188_model-versioning-system/learn.md b/questions/188_model-versioning-system/learn.md deleted file mode 100644 index 426fc5e4..00000000 --- a/questions/188_model-versioning-system/learn.md +++ /dev/null @@ -1,126 +0,0 @@ -# Model Versioning System - Learning Guide - -## Overview - -This problem introduces you to MLOps fundamentals through a simplified model versioning system. You'll learn core concepts of model lifecycle management, version tracking, and deployment coordination without overwhelming complexity. - -## Key Concepts - -### 1. Model Versioning -- **Simple Versioning**: Use incrementing version numbers (1.0, 1.1, 1.2) -- **Automatic Increment**: Each new registration increments the version -- **Version Tracking**: Keep history of all model versions - -### 2. Model Registry Design -- **Centralized Storage**: Store all model information in one place -- **Basic Metadata**: Track essential information (name, version, accuracy, stage) -- **Simple Queries**: Easy retrieval and comparison of models - -### 3. Deployment Stages -``` -Development → Staging → Production - ↓ ↓ ↓ - Testing Validation Live Use -``` - -### 4. Core Data Structure -```python -{ - "model_name": "sentiment_classifier", - "version": "1.0", - "accuracy": 0.95, - "stage": "dev", - "created_at": "2024-01-15T10:30:00Z" -} -``` - -## Implementation Strategies - -### 1. Data Storage -```python -# Simple approach using nested dictionaries -self.models = { - "model_name": { - "1.0": {"accuracy": 0.95, "stage": "dev", "created_at": "..."}, - "1.1": {"accuracy": 0.97, "stage": "dev", "created_at": "..."} - } -} -``` - -### 2. Version Management -- Start with version "1.0" for first registration -- Increment minor version for subsequent registrations -- Parse version strings to determine next version - -### 3. Stage Promotion -- Validate that model exists before promotion -- Update stage information for specific version -- Return success/failure status - -### 4. Model Comparison -- Retrieve both model versions -- Calculate accuracy difference -- Return comparison results - -## Common Implementation Patterns - -### 1. Version Increment Logic -```python -def _get_next_version(self, model_name): - if model_name not in self.models: - return "1.0" - - versions = list(self.models[model_name].keys()) - latest_version = max(versions, key=lambda v: float(v)) - major, minor = map(int, latest_version.split('.')) - return f"{major}.{minor + 1}" -``` - -### 2. Validation -```python -def _validate_inputs(self, model_name, accuracy, stage): - if not model_name or not isinstance(model_name, str): - return False, "Invalid model name" - if not 0.0 <= accuracy <= 1.0: - return False, "Accuracy must be between 0.0 and 1.0" - if stage not in ["dev", "staging", "production"]: - return False, "Invalid stage" - return True, "" -``` - -## Key Learning Points - -### 1. System Design Basics -- **Separation of Concerns**: Each method has a single responsibility -- **Data Integrity**: Validate inputs and handle edge cases -- **Error Handling**: Return meaningful results for success/failure - -### 2. MLOps Concepts -- **Model Lifecycle**: Track models from development to production -- **Version Control**: Maintain history of model improvements -- **Stage Management**: Control deployment progression -- **Performance Tracking**: Monitor model quality over time - -### 3. Real-world Relevance -This simplified system mirrors concepts used in: -- **MLflow Model Registry**: Industry-standard model management -- **AWS SageMaker**: Cloud-based ML model deployment -- **Azure ML**: Microsoft's ML platform -- **Kubeflow**: Kubernetes-based ML workflows - -## Common Pitfalls to Avoid - -1. **Version Parsing**: Handle version string format correctly -2. **Edge Cases**: Check for non-existent models/versions -3. **Data Validation**: Ensure accuracy is within valid range -4. **Stage Validation**: Only allow valid stage transitions -5. **Return Types**: Match expected return formats - -## Testing Approach - -- **Happy Path**: Test normal operations -- **Edge Cases**: Test with invalid inputs -- **Error Handling**: Verify proper error responses -- **State Management**: Ensure data consistency - -This problem provides a solid foundation for understanding MLOps system design while remaining approachable for learning the core concepts. diff --git a/questions/188_model-versioning-system/meta.json b/questions/188_model-versioning-system/meta.json deleted file mode 100644 index b6a4fa8d..00000000 --- a/questions/188_model-versioning-system/meta.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "id": "188", - "title": "Model Versioning System with Registry and Metadata Management", - "difficulty": "hard", - "category": "MLOps", - "video": "", - "likes": "0", - "dislikes": "0", - "contributor": [ - { - "profile_link": "https://github.com/Jeet009", - "name": "Jeet Mukherjee" - } - ], - "tinygrad_difficulty": "hard", - "pytorch_difficulty": "hard" -} diff --git a/questions/188_model-versioning-system/starter_code.py b/questions/188_model-versioning-system/starter_code.py deleted file mode 100644 index 7662cfa9..00000000 --- a/questions/188_model-versioning-system/starter_code.py +++ /dev/null @@ -1,82 +0,0 @@ -from typing import Dict, List, Optional -from datetime import datetime - -class ModelRegistry: - """ - A simple model versioning system that tracks model versions, - performance metrics, and deployment stages. - """ - - def __init__(self): - # Initialize registry storage - # Your implementation here - pass - - def register_model(self, model_name: str, accuracy: float, stage: str = "dev") -> Dict[str, str]: - """ - Register a new model version. - - Args: - model_name: Name of the model - accuracy: Model accuracy (0.0 to 1.0) - stage: Deployment stage ("dev", "staging", "production") - - Returns: - Dictionary with model information including version - """ - # Your implementation here - pass - - def get_model(self, model_name: str, version: Optional[str] = None) -> Optional[Dict[str, str]]: - """ - Get model information by name and version. - - Args: - model_name: Name of the model - version: Specific version (None for latest) - - Returns: - Model information or None if not found - """ - # Your implementation here - pass - - def promote_model(self, model_name: str, version: str, new_stage: str) -> bool: - """ - Promote a model version to a new stage. - - Args: - model_name: Name of the model - version: Version to promote - new_stage: Target stage ("dev", "staging", "production") - - Returns: - True if successful, False otherwise - """ - # Your implementation here - pass - - def compare_models(self, model_name: str, version1: str, version2: str) -> Optional[Dict[str, float]]: - """ - Compare two model versions. - - Args: - model_name: Name of the model - version1: First version to compare - version2: Second version to compare - - Returns: - Dictionary with comparison results or None if models not found - """ - # Your implementation here - pass - - def list_models(self) -> List[Dict[str, str]]: - """ - List all registered models. - - Returns: - List of all model information - """ - # Your implementation here - pass diff --git a/questions/188_model-versioning-system/tests.json b/questions/188_model-versioning-system/tests.json deleted file mode 100644 index 5728465e..00000000 --- a/questions/188_model-versioning-system/tests.json +++ /dev/null @@ -1,42 +0,0 @@ -[ - { - "test": "registry = ModelRegistry()\nresult = registry.register_model('sentiment_classifier', 0.95, 'dev')\nprint(result['version'])", - "expected_output": "1.0" - }, - { - "test": "registry = ModelRegistry()\n# Register first version\nregistry.register_model('nlp_model', 0.95, 'dev')\n# Register second version\nresult = registry.register_model('nlp_model', 0.97, 'dev')\nprint(result['version'])", - "expected_output": "1.1" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('image_classifier', 0.92, 'dev')\n# Promote to staging\nsuccess = registry.promote_model('image_classifier', '1.0', 'staging')\nprint(success)", - "expected_output": "True" - }, - { - "test": "registry = ModelRegistry()\n# Register multiple versions\nregistry.register_model('recommendation_model', 0.90, 'dev')\nregistry.register_model('recommendation_model', 0.93, 'dev')\nregistry.register_model('recommendation_model', 0.89, 'dev')\n# Compare versions\nresult = registry.compare_models('recommendation_model', '1.0', '1.1')\nprint(result['accuracy_delta'])", - "expected_output": "0.03" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('fraud_detector', 0.95, 'dev')\nregistry.register_model('churn_predictor', 0.88, 'staging')\nmodels = registry.list_models()\nprint(len(models))", - "expected_output": "2" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Get latest version\nmodel = registry.get_model('test_model')\nprint(model['version'])", - "expected_output": "1.0" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Get specific version\nmodel = registry.get_model('test_model', '1.0')\nprint(model['accuracy'])", - "expected_output": "0.85" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Promote to production\nsuccess = registry.promote_model('test_model', '1.0', 'production')\nmodel = registry.get_model('test_model', '1.0')\nprint(model['stage'])", - "expected_output": "production" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Try to promote non-existent version\nsuccess = registry.promote_model('test_model', '2.0', 'staging')\nprint(success)", - "expected_output": "False" - }, - { - "test": "registry = ModelRegistry()\nregistry.register_model('test_model', 0.85, 'dev')\n# Compare non-existent versions\nresult = registry.compare_models('test_model', '1.0', '2.0')\nprint(result)", - "expected_output": "None" - } -]