leftkats · leftkats · Jun 7, 2025 · Jun 7, 2025
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,22 @@
+[run]
+# Only measure coverage for these source directories
+source = recipes, syncers
+
+[report]
+# Omit tests, docs, markdown, config, and common non-code files from coverage report
+omit =
+    *.md
+    *.txt
+    *.rst
+    *.yml
+    *.yaml
+    *.ini
+    setup.py
+    LICENSE
+    README*
+    CONTRIBUTING*
+    .gitignore
+    .coveragerc
+    __init__.py
+    pytest.ini
+    requirements.txt
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,36 @@
+name: Run Tests and Upload Coverage
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-cov
+
+      - name: Run tests with coverage
+        run: |
+          pytest --cov=src --cov-report=xml
+
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          slug: leftkats/DataPytheon
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4  # or your preferred version
+    hooks:
+      - id: ruff
+        args: ["--fix", "--line-length=120"]
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -0,0 +1,43 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in **DataPytheon** a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+- Using welcoming and inclusive language  
+- Being respectful of differing viewpoints and experiences  
+- Gracefully accepting constructive criticism  
+- Focusing on what is best for the community  
+- Showing empathy towards other community members  
+
+Examples of unacceptable behavior by participants include:
+
+- Harassment, intimidation, or discrimination in any form  
+- Trolling, insulting or derogatory comments, and personal or political attacks  
+- Public or private harassment  
+- Publishing others' private information, such as physical or electronic addresses, without explicit permission  
+- Other conduct which could reasonably be considered inappropriate in a professional setting  
+
+## Enforcement Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+## Reporting Guidelines
+
+If you are subject to or witness unacceptable behavior, please report it by contacting the project maintainers. All reports will be reviewed and investigated promptly and fairly.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders, who will decide on the best course of action. This may include a warning or temporary or permanent ban from the project.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,50 @@
+# Contributing to DataPytheon
+
+Thank you for your interest in contributing! :sparkles: Whether you're a first-timer or an experienced contributor, we welcome your help in making DataPytheon better.
+
+---
+
+## How You Can Help
+
+- Add new **data recipes** in the `recipes/` folder  
+- Add new **API syncers** in the `syncers/` folder  
+- Fix bugs or improve existing scripts  
+- Improve documentation or add examples  
+- Write or improve tests in the `tests/` folder  
+
+---
+
+## Getting Started
+
+1. **Fork the repository**  
+Click the "Fork" button at the top right of the repo page.
+
+2. **Clone your fork locally**  
+```bash
+git clone https://github.com/your-username/DataPytheon.git
+cd DataPytheon
+```
+3.Create a new branch for your work
+```bash
+git checkout -b feature/your-feature-name
+```
+4.Create a new branch for your work
+``bash
+pip install -r requirements.txt
+```
+5. Make your changes
+Add your script, improve docs, or fix bugs.
+6. Test your changes
+Run existing tests and/or add new ones.
+```bash
+pytest
+```
+7. Commit and push
+```bash
+git add .
+git commit -m "Add feature: description"
+git push origin feature/your-feature-name
+```
+8. Open a Pull Request
+
+Go to your fork on GitHub and click "Compare & pull request". Describe your changes clearly.
diff --git a/README.md b/README.md
@@ -1,2 +1,47 @@
-# DataPytheon
-A mythical collection of ready-to-use Python scripts for fetching, cleaning, and syncing data from public datasets and APIs. Cook data fast with recipes and syncers!
+# :snake: DataPytheon
+
+[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/)
+[![codecov](https://codecov.io/gh/leftkats/DataPytheon/graph/badge.svg?token=C69BFSAR0S)](https://codecov.io/gh/leftkats/DataPytheon)
+
+
+Welcome to **DataPytheon** – a mythical library of **easy-to-use Python scripts** that help you **access, clean, and explore datasets** from both public repositories and live APIs.
+
+Whether you're a **beginner learning data science**, a **developer prototyping fast**, or an **open-source contributor**, this project gives you plug-and-play tools to handle real-world data with ease.
+
+---
+
+## :bookmark_tabs: What Is This?
+
+**DataPytheon** is a hybrid repository that offers:
+
+- **`recipes/`** — Pre-cleaned **static datasets** (like Titanic, Iris, Netflix, etc.)
+- **`syncers/`** — Scripts to **fetch real-time data** from public APIs (like exchange rates, crypto prices, weather, etc.)
+
+All scripts return **ready-to-use Pandas DataFrames**, ideal for quick analysis, learning, or feeding into models.
+
+Think of it as your **data prep toolbox** — one line of code away from clean, structured data.
+
+---
+
+## :hammer_and_wrench: Who Is It For?
+
+- :student: **Beginners** in Python, data science, or machine learning
+- :computer: **Developers** who want quick dataset access without boilerplate
+- :sparkles: **Contributors** looking for a simple and valuable open-source project
+- :books: **Educators** who need ready datasets for teaching or assignments
+
+---
+
+## :file_folder: Project Structure
+
+DataPytheon/
+│
+├── recipes/                    # Static datasets
+│   └── titanic.py              # Example recipe
+│
+├── syncers/                    # Live/API data scripts
+│   └── exchange_rates.py       # Example syncer
+│
+├── tests/                      # Basic unit tests for scripts
+│   └── test_titanic.py
+
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,18 @@
+
+codecov:
+  require_ci_to_pass: yes
+
+coverage:
+  status:
+    project:
+      default:
+        target: 85%
+        threshold: 2%   # Reject PR if coverage drops more than 2%
+    patch:
+      default:
+        threshold: 2%
+
+comment:
+  layout: "reach, diff, flags, files"
+  behavior: default
+  require_changes: false
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,4 @@
+[tool.ruff]
+line-length = 120
+target-version = "py310"
+select = ["E", "F", "I"]  # Error, Pyflakes, and Import sort
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+pythonpath = src
+addopts = --cov=src --cov-report=term --cov-fail-under=80
+testpaths = tests
+python_files = test_*.py
+norecursedirs = .git .tox .venv .idea .vscode
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,17 @@
+# Core data stack
+pandas>=1.5
+numpy>=1.22
+seaborn==0.13.2
+
+# API requests
+requests>=2.31
+
+# Testing
+pytest>=7.4
+pytest-cov>=4.1
+
+# Linting & formatting
+ruff>=0.4.4
+
+# Pre-commit hooks
+pre-commit>=3.6
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/recipes/__init__.py b/src/recipes/__init__.py
diff --git a/src/recipes/titanic.py b/src/recipes/titanic.py
@@ -0,0 +1,42 @@
+"""
+Titanic Dataset Recipe
+----------------------
+Loads and preprocesses the Titanic dataset.
+Source: https://www.kaggle.com/c/titanic/data (via seaborn)
+
+Steps:
+- Loads from seaborn (no API key needed)
+- Basic cleaning and transformation
+- Returns a ready-to-use pandas DataFrame
+"""
+
+import seaborn as sns
+
+
+def load_titanic_data():
+    """Loads and cleans the Titanic dataset."""
+
+    # Load dataset from seaborn
+    df = sns.load_dataset("titanic")
+
+    # Drop columns with too many missing values or redundant info
+    df = df.drop(columns=["deck", "embark_town", "alive"])
+
+    # Drop rows with critical missing values
+    df = df.dropna(subset=["embarked", "age"])
+
+    # Fill missing values in 'embarked' with the most common port
+    df["embarked"] = df["embarked"].fillna("S")
+
+    # Convert categorical columns to category dtype
+    cat_cols = ["sex", "class", "embarked", "who", "adult_male", "alone"]
+    for col in cat_cols:
+        df[col] = df[col].astype("category")
+
+    return df
+
+
+# Example usage
+if __name__ == "__main__":
+    df = load_titanic_data()
+    print(df.head())
diff --git a/src/syncers/__init__.py b/src/syncers/__init__.py
diff --git a/src/syncers/exchange_rates.py b/src/syncers/exchange_rates.py
@@ -0,0 +1,48 @@
+"""
+Exchange Rates Syncer
+----------------------
+Fetches real-time exchange rates using the Frankfurter API.
+Source: https://www.frankfurter.app/
+
+Steps:
+- Calls the public API (no API key required)
+- Retrieves exchange rates for a given base currency
+- Converts the result into a pandas DataFrame
+"""
+
+import pandas as pd
+import requests
+
+
+def fetch_exchange_rates(base_currency="USD"):
+    """
+    Fetches latest exchange rates for the given base currency.
+
+    Parameters:
+        base_currency (str): ISO 4217 code (e.g., 'USD', 'EUR', 'GBP')
+
+    Returns:
+        pd.DataFrame: Tidy DataFrame with rates and metadata
+    """
+    url = f"https://api.frankfurter.app/latest?from={base_currency}"
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        raise Exception(
+            f"API request failed with status {response.status_code}: {response.text}"
+        )
+
+    data = response.json()
+
+    # Flatten into DataFrame
+    df = pd.DataFrame(list(data["rates"].items()), columns=["currency", "rate"])
+    df["base"] = data["base"]
+    df["date"] = data["date"]
+
+    return df
+
+
+# Example usage
+if __name__ == "__main__":
+    df = fetch_exchange_rates("USD")
+    print(df.head())
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/recipes/__init__.py b/tests/recipes/__init__.py
diff --git a/tests/recipes/test_titanic.py b/tests/recipes/test_titanic.py
@@ -0,0 +1,31 @@
+import pandas as pd
+
+from src.recipes.titanic import load_titanic_data
+
+
+def test_load_titanic_data_returns_dataframe():
+    df = load_titanic_data()
+
+    # Check that output is a DataFrame
+    assert isinstance(df, pd.DataFrame), "Expected a pandas DataFrame"
+
+    # Check for expected columns (a subset)
+    expected_cols = {
+        "survived",
+        "pclass",
+        "sex",
+        "age",
+        "sibsp",
+        "parch",
+        "fare",
+        "embarked",
+    }
+    missing_cols = expected_cols - set(df.columns)
+    assert not missing_cols, f"Missing expected columns: {missing_cols}"
+
+    # Check no critical nulls remain in 'age' or 'embarked'
+    assert df["age"].isnull().sum() == 0, "'age' column should have no nulls"
+    assert df["embarked"].isnull().sum() == 0, "'embarked' column should have no nulls"
+
+    # Check that some rows exist
+    assert len(df) > 0, "DataFrame should not be empty"
diff --git a/tests/syncers/__init__.py b/tests/syncers/__init__.py