udacity · yannicknkongolo7-crypto · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
@@ -0,0 +1,19 @@
+[flake8]
+max-line-length = 100
+extend-ignore = E203, W503
+exclude =
+    .git,
+    __pycache__,
+    .pytest_cache,
+    .mypy_cache,
+    .venv,
+    venv,
+    env,
+    build,
+    dist,
+    .ipynb_checkpoints,
+    screenshots,
+    data,
+    model
+per-file-ignores =
+    tests/*:E501
@@ -0,0 +1,36 @@
+name: CI
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          # First install pinned runtime/dev dependencies to avoid building
+          # heavy wheels during the editable install step.
+          pip install -r requirements.txt
+          # Now install the project in editable mode so repository packages
+          # (like `ml`) are importable in CI.
+          pip install -e .
+
+      - name: Debug install
+        run: |
+          echo "Python:" $(python --version)
+          echo "pip:" $(pip --version)
+          pip list --format=columns
+          python -c "import ml, sys; print('ml import ok =>', ml.__file__)"
+      - name: Train tiny model
+        run: python scripts/train_tiny_model.py
+      - name: Lint
+        run: python -m flake8 .
+      - name: Tests
+        run: pytest -q
@@ -177,3 +177,4 @@ pyrightconfig.json
 fastapi/
 
 # End of https://www.toptal.com/developers/gitignore/api/python
+model/*.pkl
@@ -3,6 +3,25 @@ Working in a command line environment is recommended for ease of use with git an
 # Environment Set up (pip or conda)
 * Option 1: use the supplied file `environment.yml` to create a new environment with conda
 * Option 2: use the supplied file `requirements.txt` to create a new environment with pip
+
+Quick start (venv + editable install)
+-----------------------------------
+If you prefer a lightweight virtualenv workflow, create and activate a venv, install the project in editable mode and run the tiny trainer used in CI:
+
+```bash
+# create & activate venv (macOS / Linux)
+python -m venv .venv
+source .venv/bin/activate
+
+# install the project and dependencies in editable mode
+pip install --upgrade pip
+pip install -e .
+
+# run the small training helper (writes artifacts to ./model)
+python scripts/train_tiny_model.py
+```
+
+This mirrors how the CI installs the repository and makes the local `ml` package importable without modifying PYTHONPATH.
 
 ## Repositories
 * Create a directory for the project and initialize git.

@@ -1,18 +1,49 @@
-import json
-
 import requests
 
-# TODO: send a GET using the URL http://127.0.0.1:8000
-r = None # Your code here
+URL = "http://127.0.0.1:8000"
+
 
-# TODO: print the status code
-# print()
-# TODO: print the welcome message
-# print()
+def safe_request(method, url, **kwargs):
+    try:
+        r = requests.request(method, url, timeout=5, **kwargs)
+        try:
+            # Try to parse as JSON
+            print(f"{method} {url}:", r.status_code, r.json())
+        except ValueError:
+            # Fallback to raw text if not JSON (e.g. error pages, 500s)
+            print(f"{method} {url}:", r.status_code, r.text)
+    except requests.exceptions.ConnectionError:
+        print(f"❌ Could not connect to {url}. Is the server running?")
+    except requests.exceptions.Timeout:
+        print(f"⏱️ Request to {url} timed out.")
+    except Exception as e:
+        print(f"⚠️ Unexpected error calling {url}: {e}")
 
 
+# GET request
+safe_request("GET", URL)
+
+# First POST payload
+payload1 = {
+    "age": 52,
+    "workclass": "Private",
+    "fnlgt": 209642,
+    "education": "Masters",
+    "education-num": 14,
+    "marital-status": "Married-civ-spouse",
+    "occupation": "Exec-managerial",
+    "relationship": "Husband",
+    "race": "White",
+    "sex": "Male",
+    "capital-gain": 0,
+    "capital-loss": 0,
+    "hours-per-week": 45,
+    "native-country": "United-States",
+}
+safe_request("POST", f"{URL}/data/", json=payload1)
 
-data = {
+# Second POST payload
+payload2 = {
     "age": 37,
     "workclass": "Private",
     "fnlgt": 178356,
@@ -28,11 +59,4 @@
     "hours-per-week": 40,
     "native-country": "United-States",
 }
-
-# TODO: send a POST using the data above
-r = None # Your code here
-
-# TODO: print the status code
-# print()
-# TODO: print the result
-# print()
+safe_request("POST", f"{URL}/data/", json=payload2)
@@ -1,74 +1,107 @@
 import os
 
 import pandas as pd
-from fastapi import FastAPI
-from pydantic import BaseModel, Field
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, ConfigDict, Field
 
 from ml.data import apply_label, process_data
 from ml.model import inference, load_model
 
-# DO NOT MODIFY
+
+# ---------- Request schema (Pydantic v2 style) ----------
 class Data(BaseModel):
-    age: int = Field(..., example=37)
-    workclass: str = Field(..., example="Private")
-    fnlgt: int = Field(..., example=178356)
-    education: str = Field(..., example="HS-grad")
-    education_num: int = Field(..., example=10, alias="education-num")
-    marital_status: str = Field(
-        ..., example="Married-civ-spouse", alias="marital-status"
+    # allow aliases (hyphenated names) and show a full example in /docs
+    model_config = ConfigDict(
+        populate_by_name=True,
+        json_schema_extra={
+            "example": {
+                "age": 37,
+                "workclass": "Private",
+                "fnlgt": 178356,
+                "education": "HS-grad",
+                "education-num": 10,
+                "marital-status": "Married-civ-spouse",
+                "occupation": "Prof-specialty",
+                "relationship": "Husband",
+                "race": "White",
+                "sex": "Male",
+                "capital-gain": 0,
+                "capital-loss": 0,
+                "hours-per-week": 40,
+                "native-country": "United-States",
+            }
+        },
+    )
+
+    age: int
+    workclass: str
+    fnlgt: int
+    education: str
+    education_num: int = Field(alias="education-num")
+    marital_status: str = Field(alias="marital-status")
+    occupation: str
+    relationship: str
+    race: str
+    sex: str
+    capital_gain: int = Field(alias="capital-gain")
+    capital_loss: int = Field(alias="capital-loss")
+    hours_per_week: int = Field(alias="hours-per-week")
+    native_country: str = Field(alias="native-country")
+
+
+# ---------- Load artifacts ----------
+_PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
+ENCODER_PATH = os.path.join(_PROJECT_ROOT, "model", "encoder.pkl")
+MODEL_PATH = os.path.join(_PROJECT_ROOT, "model", "model.pkl")
+
+if not os.path.exists(ENCODER_PATH):
+    raise RuntimeError(
+        f"Encoder not found at {ENCODER_PATH}. Did you run train_model.py?"
     )
-    occupation: str = Field(..., example="Prof-specialty")
-    relationship: str = Field(..., example="Husband")
-    race: str = Field(..., example="White")
-    sex: str = Field(..., example="Male")
-    capital_gain: int = Field(..., example=0, alias="capital-gain")
-    capital_loss: int = Field(..., example=0, alias="capital-loss")
-    hours_per_week: int = Field(..., example=40, alias="hours-per-week")
-    native_country: str = Field(..., example="United-States", alias="native-country")
+if not os.path.exists(MODEL_PATH):
+    raise RuntimeError(f"Model not found at {MODEL_PATH}. Did you run train_model.py?")
+
+encoder = load_model(ENCODER_PATH)
+model = load_model(MODEL_PATH)
 
-path = None # TODO: enter the path for the saved encoder 
-encoder = load_model(path)
 
-path = None # TODO: enter the path for the saved model 
-model = load_model(path)
+# ---------- FastAPI app ----------
+app = FastAPI(title="Census Income Inference API")
 
-# TODO: create a RESTful API using FastAPI
-app = None # your code here
 
-# TODO: create a GET on the root giving a welcome message
 @app.get("/")
 async def get_root():
-    """ Say hello!"""
-    # your code here
-    pass
+    return {"message": "Welcome to the Census Income Inference API"}
 
 
-# TODO: create a POST on a different path that does model inference
 @app.post("/data/")
 async def post_inference(data: Data):
-    # DO NOT MODIFY: turn the Pydantic model into a dict.
-    data_dict = data.dict()
-    # DO NOT MODIFY: clean up the dict to turn it into a Pandas DataFrame.
-    # The data has names with hyphens and Python does not allow those as variable names.
-    # Here it uses the functionality of FastAPI/Pydantic/etc to deal with this.
-    data = {k.replace("_", "-"): [v] for k, v in data_dict.items()}
-    data = pd.DataFrame.from_dict(data)
-
-    cat_features = [
-        "workclass",
-        "education",
-        "marital-status",
-        "occupation",
-        "relationship",
-        "race",
-        "sex",
-        "native-country",
-    ]
-    data_processed, _, _, _ = process_data(
-        # your code here
-        # use data as data input
-        # use training = False
-        # do not need to pass lb as input
-    )
-    _inference = None # your code here to predict the result using data_processed
-    return {"result": apply_label(_inference)}
+    try:
+        # Use aliases and normalize keys to hyphenated for the pipeline
+        data_dict = data.model_dump(by_alias=True)
+        df = pd.DataFrame([{k.replace("_", "-"): v for k, v in data_dict.items()}])
+
+        cat_features = [
+            "workclass",
+            "education",
+            "marital-status",
+            "occupation",
+            "relationship",
+            "race",
+            "sex",
+            "native-country",
+        ]
+
+        X, _, _, _ = process_data(
+            df,
+            categorical_features=cat_features,
+            label=None,
+            training=False,
+            encoder=encoder,
+            lb=None,
+        )
+        preds = inference(model, X)
+        return {"result": apply_label(preds)}
+    except Exception as e:
+        # Keep stacktrace in server logs but surface a clear client error
+        raise HTTPException(status_code=500, detail=f"Inference failed: {e}")
@@ -1 +1 @@
-
+"""ML package."""
@@ -5,7 +5,7 @@
 def process_data(
     X, categorical_features=[], label=None, training=True, encoder=None, lb=None
 ):
-    """ Process the data used in the machine learning pipeline.
+    """Process the data used in the machine learning pipeline.
 
     Processes the data using one hot encoding for the categorical features and a
     label binarizer for the labels. This can be used in either training or
@@ -69,8 +69,9 @@ def process_data(
     X = np.concatenate([X_continuous, X_categorical], axis=1)
     return X, y, encoder, lb
 
+
 def apply_label(inference):
-    """ Convert the binary label in a single inference sample into string output."""
+    """Convert the binary label in a single inference sample into string output."""
     if inference[0] == 1:
         return ">50K"
     elif inference[0] == 0:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -177,3 +177,4 @@ pyrightconfig.json
		fastapi/

		# End of https://www.toptal.com/developers/gitignore/api/python
		model/*.pkl