Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
5bec0f4
add uv
antonacio Apr 6, 2025
0d05a1c
setup dev tools
antonacio Apr 6, 2025
14c3d5f
add kedro project
antonacio Apr 6, 2025
f2ca97a
add kedro pipelines config
antonacio Apr 6, 2025
5a742b5
add package flag to mypy command
antonacio Apr 6, 2025
4286baa
add data setup instructions + scraping notebook
antonacio Apr 6, 2025
13feea3
update README
antonacio Apr 6, 2025
c5419e1
mcdonalds locations processing node + development notebook
antonacio Apr 6, 2025
ca6d37a
subway locations processing node
antonacio Apr 6, 2025
7737af1
update README
antonacio Apr 6, 2025
5811675
fix typo
antonacio Apr 6, 2025
0ea2bd0
add kedro-viz layer metadata
antonacio Apr 6, 2025
db92300
add kedro-viz
antonacio Apr 7, 2025
e5a3803
add ibge population data processing nodes
antonacio Apr 8, 2025
a88a12f
add ibge cities GDP data processing node
antonacio Apr 8, 2025
5d3c678
add just command to run pre-commit hooks
antonacio Apr 8, 2025
4d9c802
add geopandas dependency
antonacio Apr 8, 2025
0a55359
add shapefiles processing nodes
antonacio Apr 8, 2025
b15a326
update README
antonacio Apr 8, 2025
e7ed801
update README
antonacio Apr 8, 2025
c4be3bb
rename data_processing pipeline to process_data
antonacio Apr 9, 2025
de18f03
rename data_processing pipeline to process_data
antonacio Apr 9, 2025
0536724
move shapes into separate folder
antonacio Apr 9, 2025
7fdcc21
add nbstripout to just format command
antonacio Apr 9, 2025
092580b
merge data pipeline with merge cities data node
antonacio Apr 9, 2025
49cb031
add nbstripout to just format command
antonacio Apr 9, 2025
541bd87
compute reference coordinates in shapefiles
antonacio Apr 9, 2025
820e219
update merge cities data node
antonacio Apr 9, 2025
90620d9
fix intermediate and immediate regions' column names for the cities t…
antonacio Apr 9, 2025
da5939b
register pipelines
antonacio Apr 9, 2025
4e34c4b
parametrize geographic columns
antonacio Apr 9, 2025
4e2e7cc
remove city_id from geographic cols
antonacio Apr 10, 2025
d5f136a
add primary tables for shapefiles
antonacio Apr 10, 2025
21edb4e
change to 2021 shapefiles for better compatibility with population an…
antonacio Apr 10, 2025
0b8694c
merge microregions, mesoregions, and states nodes
antonacio Apr 10, 2025
d5b474c
add docstrings for the merge data nodes
antonacio Apr 10, 2025
b2d4953
merge restaurats data node
antonacio Apr 11, 2025
0c3c8ee
add seaborn dependency
antonacio Apr 11, 2025
45dab42
build_report pipeline structure
antonacio Apr 11, 2025
f8dc802
haversine distance funciton
antonacio Apr 18, 2025
b5dd1fa
first geolocation analyses (maps and tables) for the reporting pipeline
antonacio Apr 18, 2025
a0d79b0
target cities for expansion analysis
antonacio Apr 18, 2025
56a6000
add scikit-learn dependency
antonacio Apr 18, 2025
7ce54b0
adding mesoregions to geolocation analyses
antonacio Apr 18, 2025
59b684d
add PuLP dependency
antonacio Apr 19, 2025
7b1647b
remove scikit-learn dependency
antonacio Apr 19, 2025
564c740
adjust build report nodes
antonacio Apr 19, 2025
162f2e8
optimal warehouse locations optimization nodes
antonacio Apr 19, 2025
fd66a4c
group report analyses parameters
antonacio Apr 19, 2025
6b11c31
update README
antonacio Apr 19, 2025
deb9176
adjust plots
antonacio Apr 19, 2025
12aa12d
final report nodes
antonacio Apr 19, 2025
b82f9b4
review report text
antonacio Apr 19, 2025
87807c3
dev notebook with linear programming solution
antonacio Apr 19, 2025
781c71a
add final report to remote repo
antonacio Apr 19, 2025
8174cd6
adjust final report
antonacio Apr 19, 2025
53a0acf
adjust final report
antonacio Apr 19, 2025
4302db3
remove math identation in final report
antonacio Apr 19, 2025
579b5e2
update README
antonacio Apr 19, 2025
efa5de5
fix lint
antonacio Apr 19, 2025
a668f22
remove mypy
antonacio Apr 19, 2025
6da9509
update README
antonacio Apr 19, 2025
7fd24ae
remove unused variable
antonacio Apr 19, 2025
f8792ea
add unit tests
antonacio Apr 19, 2025
32b6e58
update README
antonacio Apr 19, 2025
6afe438
update README
antonacio Apr 19, 2025
06ea2cc
update CI pipeline to include develop branch
antonacio Apr 19, 2025
f11d525
update README
antonacio Apr 19, 2025
5a7c589
add docker
antonacio Apr 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/format-lint-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: "CI: format, lint, and test"
on:
push:
branches:
- "main"
- "develop"
pull_request:
branches:
- "main"
- "develop"

jobs:
format-lint-test:
runs-on: ubuntu-latest

steps:
- name: 🛎️ Checkout
uses: actions/checkout@v4

- name: 📦 Install uv
uses: astral-sh/setup-uv@v5

- name: 🐍 Set up Python
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: 🚀 Install just
uses: extractions/setup-just@v2

- name: 🦾 Install the project
run: just dev-sync

- name: 💅 🧪 Format, Lint and test
run: just validate
168 changes: 168 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
##########################
# KEDRO PROJECT

# ignore all local configuration
conf/local/**
!conf/local/.gitkeep
.telemetry

# ignore potentially sensitive credentials files
conf/**/*credentials*

# ignore everything in the following folders
data/**

# except their sub-folders
!data/**/
# except the final report
!data/08_reporting/images/*
!data/08_reporting/final_report.md

# also keep all .gitkeep files
!.gitkeep

# ignore kedro-viz metadata
.viz

# ignore file based logs
*.log

##########################
# Common files

# IntelliJ
.idea/
*.iml
out/
.idea_modules/

### macOS
*.DS_Store
.AppleDouble
.LSOverride
.Trashes

# Vim
*~
.*.swo
.*.swp

# emacs
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc

# VS Code
.vscode/

# JIRA plugin
atlassian-ide-plugin.xml

# C extensions
*.so

### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
.static_storage/
.media/
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
.ipython/profile_default/history.sqlite
.ipython/profile_default/startup/README

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# mkdocs documentation
/site

# mypy
.mypy_cache/

# mlflow local runs
mlruns/*
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
default_language_version:
python: python3.12
repos:
- repo: local
hooks:
- id: format
name: format
entry: just format
require_serial: true
language: system
pass_filenames: false
- repo: https://github.com/kynan/nbstripout
rev: 0.8.1
hooks:
- id: nbstripout
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
59 changes: 59 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# An example using multi-stage image builds to create a final image without uv.
# Source: https://github.com/astral-sh/uv-docker-example/blob/main/multistage.Dockerfile

# First, build the application in the `/app` directory.
# See `Dockerfile` for details.
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy

# Install GDAL build dependencies (for fiona Python package)
USER root
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
gdal-bin \
libgdal-dev \
python3-dev \
&& rm -rf /var/lib/apt/lists/*

# Disable Python downloads, because we want to use the system interpreter
# across both images. If using a managed Python version, it needs to be
# copied from the build image into the final image; see `standalone.Dockerfile`
# for an example.
ENV UV_PYTHON_DOWNLOADS=0

WORKDIR /app
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev
ADD . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev


# Then, use a final image without uv
FROM python:3.12-slim-bookworm
# It is important to use the image that matches the builder, as the path to the
# Python executable must be the same, e.g., using `python:3.11-slim-bookworm`
# will fail.

# Install GDAL runtime library (for fiona Python package)
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libgdal-dev \
&& rm -rf /var/lib/apt/lists/*

# Copy the application from the builder
COPY --from=builder --chown=app:app /app /app

# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"

# disable Kedro telemetry
ENV KEDRO_DISABLE_TELEMETRY=1

WORKDIR /app
# Run Kedro pipelines
CMD ["kedro", "run", "--async"]

32 changes: 32 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# https://github.com/casey/just

dev-sync:
uv sync --all-extras --cache-dir .uv_cache

prod-sync:
uv sync --all-extras --no-dev --cache-dir .uv_cache

install-hooks:
uv run pre-commit install

format:
uv run ruff format

lint:
uv run ruff check --fix

test:
uv pip install -e .
uv run pytest --verbose --color=yes tests

validate: format lint test

docker-build:
docker build -t geolocation-pipeline .

docker-run:
# Bind‑mount the host .data/ folder into the container so that the pipelines'
# outputs are saved in the host's local .data/ folder
docker run --rm \
-v "$(pwd)/data:/app/data" \
geolocation-pipeline
Loading