Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,20 +177,21 @@ Pre-commit hooks enforce standards by running checks (like `ruff`) before commit
Loaders pull data (e.g., CSV, Shapefiles) into a `GeoDataFrame`{ title="A GeoDataFrame is a pandas DataFrame with geospatial capabilities." }.

1. **Subclass `LoaderBase`** (`urban_mapper/modules/loader/abc_loader.py`):
- Implement `load_data_from_file`. Refer to the `base` class for details.
- Implement `_load`. Refer to the `base` class for details.
Note: If it works from a file, consider being subclass of `FileLoaderBase`** (`urban_mapper/modules/loader/file_loader.py`):
2. **Register It**:
- Add to `FILE_LOADER_FACTORY` in `urban_mapper/modules/loader/loader_factory.py`.
- Add to `LOADER_FACTORY` in `urban_mapper/modules/loader/loader_factory.py`.

**Example** (`csv_loader.py`):
```python
from urban_mapper.modules.loader.abc_loader import LoaderBase
from urban_mapper.modules.loader.file_loader import FileLoaderBase
import geopandas as gpd
import pandas as pd
from beartype import beartype

@beartype
class CSVLoader(LoaderBase):
def load_data_from_file(self) -> gpd.GeoDataFrame:
class CSVLoader(FileLoaderBase):
def _load(self) -> gpd.GeoDataFrame:
df = pd.read_csv(self.file_path) #(1)
# Convert to GeoDataFrame...
return gdf
Expand Down
32 changes: 27 additions & 5 deletions docs/api/loaders.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,53 @@
options:
heading: "LoaderBase"
members:
- load_data_from_file
- _load_data_from_file
- load
- _load
- preview

## ::: urban_mapper.modules.loader.FileLoaderBase
options:
heading: "FileLoaderBase"
members:
- load
- _load
- preview

## ::: urban_mapper.modules.loader.CSVLoader
options:
heading: "CSVLoader"
members:
- _load_data_from_file
- _load
- preview

## ::: urban_mapper.modules.loader.ParquetLoader
options:
heading: "ParquetLoader"
members:
- _load_data_from_file
- _load
- preview

## ::: urban_mapper.modules.loader.ShapefileLoader
options:
heading: "ShapefileLoader"
members:
- _load_data_from_file
- _load
- preview

## ::: urban_mapper.modules.loader.DataFrameLoader
options:
heading: "DataFrameLoader"
members:
- _load
- preview

## ::: urban_mapper.modules.loader.HuggingFaceLoader
options:
heading: "HuggingFaceLoader"
members:
- _load
- preview

## ::: urban_mapper.modules.loader.LoaderFactory
options:
heading: "LoaderFactory"
Expand Down
60 changes: 6 additions & 54 deletions docs/copy_of_examples/1-Per-Module/7-urban_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,6 @@
"mapper = um.UrbanMapper()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Note: For the documentation interactive mode, we only query 100000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
"data = (\n",
" um.UrbanMapper()\n",
" .loader\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=100000, streaming=True)\n",
" .with_columns(\"longitude\", \"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
" .load()\n",
")\n",
"data['longitude'] = data['longitude'].astype(float)\n",
"data['latitude'] = data['latitude'].astype(float)\n",
"data.to_csv(\"./pluto.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -91,11 +71,11 @@
" )\n",
" .build()\n",
")\n",
"\n",
"# Note: For the documentation interactive mode, we only query 100000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
"loader = (\n",
" mapper\n",
" .loader\n",
" .from_file(\"./pluto.csv\")\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=100000, streaming=True)\n",
" .with_columns(\"longitude\", \"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
" .build()\n",
Expand All @@ -110,7 +90,7 @@
")\n",
"filter_step = mapper.filter.with_type(\"BoundingBoxFilter\").build()\n",
"enricher = mapper.enricher.with_data(group_by=\"nearest_intersection\", values_from=\"numfloors\").aggregate_by(method=\"mean\", output_column=\"avg_floors\").build()\n",
"visualiser = mapper.visual.with_type(\"Interactive\").with_style({\"tiles\": \"CartoDB dark_matter\", \"colorbar_text_color\": \"white\"}).build()\n",
"visualiser = mapper.visual.with_type(\"Interactive\").with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"}).build()\n",
"\n",
"# Assemble the pipeline\n",
"# Note that a pipeline's step is a tuple with a name and the step itself.\n",
Expand Down Expand Up @@ -260,46 +240,18 @@
" .build()\n",
")\n",
"\n",
"## It is not possible to use from_huggingface directly in the pipeline, because the online method that supports `.build()` is from_file\n",
"## This feature should be changed in the next versions\n",
"data = (\n",
" mapper\n",
" .loader\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=1000, streaming=True)\n",
" .with_columns(\"longitude\", \"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
" .load()\n",
")\n",
"data['longitude'] = data['longitude'].astype(float)\n",
"data['latitude'] = data['latitude'].astype(float)\n",
"data.to_csv(\"./pluto.csv\")\n",
"\n",
"## It is not possible to use from_huggingface directly in the pipeline, because the online method that supports `.build()` is from_file\n",
"## This feature should be changed in the next versions\n",
"data = (\n",
" mapper\n",
" .loader\n",
" .from_huggingface(\"oscur/taxisvis1M\", number_of_rows=1000, streaming=True)\n",
" .with_columns(\"pickup_longitude\", \"pickup_latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
" .load()\n",
")\n",
"data['pickup_longitude'] = data['pickup_longitude'].astype(float)\n",
"data['pickup_latitude'] = data['pickup_latitude'].astype(float)\n",
"data.to_csv(\"./taxisvis1M.csv\")\n",
"\n",
"loader1 = (\n",
" mapper\n",
" .loader\n",
" .from_file(\"pluto.csv\")\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=1000, streaming=True)\n",
" .with_columns(\"longitude\", \"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
" .build()\n",
")\n",
"loader2 = (\n",
" mapper\n",
" .loader\n",
" .from_file(\"taxisvis1M.csv\")\n",
" .from_huggingface(\"oscur/taxisvis1M\", number_of_rows=1000, streaming=True)\n",
" .with_columns(\"pickup_longitude\", \"pickup_latitude\")\n",
" .with_map({\"pickup_longitude\": \"longitude\", \"pickup_latitude\": \"latitude\"})\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns. \n",
Expand All @@ -322,7 +274,7 @@
"enricher1 = mapper.enricher.with_data(group_by=\"nearest_intersection\", values_from=\"numfloors\", data_id=\"pluto_data\").aggregate_by(method=\"mean\", output_column=\"avg_floors\").build()\n",
"enricher2 = mapper.enricher.with_data(group_by=\"pickup_segment\", data_id=\"taxi_data\").count_by(output_column=\"pickup_count\").build()\n",
"\n",
"visualiser = mapper.visual.with_type(\"Interactive\").with_style({\"tiles\": \"CartoDB dark_matter\"}).build()\n",
"visualiser = mapper.visual.with_type(\"Interactive\").with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"}).build()\n",
"\n",
"# Assemble the pipeline\n",
"# Note that a pipeline's step is a tuple with a name and the step itself.\n",
Expand Down
30 changes: 5 additions & 25 deletions docs/copy_of_examples/2-End-to-End/2-pipeline_way.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,6 @@
"um = UrbanMapper()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
"data = (\n",
" UrbanMapper()\n",
" .loader\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=5000, streaming=True)\n",
" .with_columns(longitude_column=\"longitude\", latitude_column=\"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns.\n",
" .load()\n",
")\n",
"\n",
"data['longitude'] = data['longitude'].astype(float)\n",
"data['latitude'] = data['latitude'].astype(float)\n",
"\n",
"data.to_csv(\"pluto.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -90,9 +68,11 @@
" ) # Recall that with mapping is to tell `map_nearest_layer` how it should map the urban data with the urban layer.\n",
" .build()\n",
")\n",
"\n",
"# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
"loader = (\n",
" um.loader.from_file(\"./pluto.csv\")\n",
" um\n",
" .loader\n",
" .from_huggingface(\"oscur/pluto\", number_of_rows=5000, streaming=True)\n",
" .with_columns(longitude_column=\"longitude\", latitude_column=\"latitude\")\n",
"# .with_columns(geometry_column=<geometry_column_name>\") # Replace <geometry_column_name> with the actual name of your geometry column instead of latitude and longitude columns.\n",
" .build()\n",
Expand All @@ -115,7 +95,7 @@
"\n",
"visualiser = (\n",
" um.visual.with_type(\"Interactive\")\n",
" .with_style({\"tiles\": \"CartoDB dark_matter\", \"colorbar_text_color\": \"white\"})\n",
" .with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"})\n",
" .build()\n",
")\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,7 @@
"outputs": [],
"source": [
"import urban_mapper as um\n",
"from urban_mapper.pipeline import UrbanPipeline\n",
"\n",
"# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
"data = (\n",
" um.UrbanMapper()\n",
" .loader\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\", number_of_rows=5000, streaming=True)\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .load()\n",
")\n",
"\n",
"data['LONGITUDE'] = data['LONGITUDE'].astype(float)\n",
"data['LATITUDE'] = data['LATITUDE'].astype(float)\n",
"\n",
"data.to_csv(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")"
"from urban_mapper.pipeline import UrbanPipeline"
]
},
{
Expand All @@ -59,10 +45,11 @@
" )\n",
" .build()\n",
" )),\n",
" # Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.\n",
" (\"loader\", (\n",
" mapper\n",
" .loader\n",
" .from_file(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\", number_of_rows=5000, streaming=True)\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .build()\n",
" )),\n",
Expand All @@ -85,7 +72,7 @@
" mapper\n",
" .visual\n",
" .with_type(\"Interactive\")\n",
" .with_style({\"tiles\": \"CartoDB dark_matter\", \"colorbar_text_color\": \"white\"})\n",
" .with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"}) \n",
" .build()\n",
" ))\n",
"])"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,6 @@
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urban_mapper as um\n",
"from urban_mapper.pipeline import UrbanPipeline\n",
"\n",
"data = (\n",
" um.UrbanMapper()\n",
" .loader\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\")\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .load()\n",
")\n",
"\n",
"data['LONGITUDE'] = data['LONGITUDE'].astype(float)\n",
"data['LATITUDE'] = data['LATITUDE'].astype(float)\n",
"\n",
"data.to_csv(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -60,7 +37,7 @@
" )),\n",
" (\"loader\", (\n",
" um.UrbanMapper().loader\n",
" .from_file(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\")\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .build()\n",
" )),\n",
Expand All @@ -86,7 +63,7 @@
" (\"visualiser\", (\n",
" um.UrbanMapper().visual\n",
" .with_type(\"Interactive\")\n",
" .with_style({\"tiles\": \"CartoDB dark_matter\", \"colorbar_text_color\": \"white\"})\n",
" .with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"})\n",
" .build()\n",
" ))\n",
"])"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,6 @@
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urban_mapper as um\n",
"from urban_mapper.pipeline import UrbanPipeline\n",
"\n",
"data = (\n",
" um.UrbanMapper()\n",
" .loader\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\")\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .load()\n",
")\n",
"\n",
"data['LONGITUDE'] = data['LONGITUDE'].astype(float)\n",
"data['LATITUDE'] = data['LATITUDE'].astype(float)\n",
"\n",
"data.to_csv(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -156,7 +133,7 @@
" )),\n",
" (\"loader\", (\n",
" um.UrbanMapper().loader\n",
" .from_file(\"./NYC_Motor_Vehicle_Collisions_Mar_12_2025.csv\")\n",
" .from_huggingface(\"oscur/NYC_vehicle_collisions\")\n",
" .with_columns(longitude_column=\"LONGITUDE\", latitude_column=\"LATITUDE\")\n",
" .build()\n",
" )),\n",
Expand Down Expand Up @@ -236,7 +213,7 @@
" (\"visualiser\", (\n",
" um.UrbanMapper().visual\n",
" .with_type(\"Interactive\")\n",
" .with_style({\"tiles\": \"CartoDB dark_matter\", \"colorbar_text_color\": \"white\"})\n",
" .with_style({\"tiles\": \"CartoDB Positron\", \"colorbar_text_color\": \"gray\"})\n",
" .build()\n",
" ))\n",
"])"
Expand Down
Loading