Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/src/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from contextlib import contextmanager
from pathlib import Path

FORCE_RELOAD = True
FORCE_RELOAD = False
""" During the data load, whether to query the various GIS API services for the data to load. If True, will query the
API services and report on data differences. If false will read the cached data."""

Expand Down
4 changes: 3 additions & 1 deletion data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import geopandas as gpd
import pandas as pd

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.access_process import AccessProcessOutputValidator
from src.validation.base import ValidationResult, validate_output


@validate_output(AccessProcessOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def access_process(
dataset: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
Expand All @@ -30,7 +32,7 @@ def access_process(
access_process (str): The access process for each property based on city ownership and market value.
Will be NA for non-vacant properties.

Primary Feature Layer Columns Referenced:
Columns referenced:
city_owner_agency, market_value, vacant

Side Effects:
Expand Down
12 changes: 7 additions & 5 deletions data/src/data_utils/city_owned_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

import geopandas as gpd

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.city_owned_properties import (
CityOwnedPropertiesOutputValidator,
CityOwnedPropertiesInputValidator,
CityOwnedPropertiesOutputValidator,
)

from ..classes.loaders import EsriLoader
Expand All @@ -17,27 +18,28 @@


@validate_output(CityOwnedPropertiesOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def city_owned_properties(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Processes city-owned property data by joining it with the primary feature layer,
Processes city-owned property data by joining it with the input dataframe,
renaming columns, and updating access information for properties based on ownership.
All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)".

Args:
primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned
input_gdf (GeoDataFrame): The GeoDataFrame to which city-owned
property data will be joined.

Returns:
FeatureLayer: The updated primary feature layer with processed city ownership
GeoDataFrame: The updated GeoDataFrame with processed city ownership
information.

Columns added:
city_owner_agency (str): The agency that owns the city property.
side_yard_eligible (bool): Indicates if the property is eligible for the side yard program.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id, owner_1, owner2

Tagline:
Expand Down
10 changes: 6 additions & 4 deletions data/src/data_utils/community_gardens.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import geopandas as gpd

from src.constants.city_limits import PHL_GEOMETRY
from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.community_gardens import (
CommunityGardensInputValidator,
Expand All @@ -15,19 +16,20 @@


@validate_output(CommunityGardensOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def community_gardens(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Updates the 'vacant' column in the primary feature layer to ensure community gardens
Updates the 'vacant' column in the input dataframe to ensure community gardens
are marked as not vacant. This protects known community gardens from being categorized
as vacant, preventing potential predatory development.

Args:
primary_featurelayer (FeatureLayer): The feature layer containing property data.
input_gdf (GeoDataFrame): The input GeoDataFrame containing property data.

Returns:
FeatureLayer: The input feature layer with the 'vacant' column updated to False
GeoDataFrame: The input GeoDataFrame with the 'vacant' column updated to False
for parcels containing community gardens.

Tagline:
Expand All @@ -36,7 +38,7 @@ def community_gardens(
Columns updated:
vacant: Updated to False for parcels containing community gardens.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id, vacant

Source:
Expand Down
10 changes: 6 additions & 4 deletions data/src/data_utils/conservatorship.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytz
from dateutil.parser import parse

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.conservatorship import ConservatorshipOutputValidator

Expand All @@ -15,26 +16,27 @@


@validate_output(ConservatorshipOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def conservatorship(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Determines conservatorship eligibility for properties in a feature layer.
Determines conservatorship eligibility for properties in a GeoDataFrame.

Args:
primary_featurelayer (FeatureLayer): A feature layer containing property data in a GeoDataFrame (`gdf`).
input_gdf (GeoDataFrame): A GeoDataFrame containing property data in a GeoDataFrame (`gdf`).

Columns Added:
conservatorship (bool): Indicates whether each property qualifies for conservatorship (True or False).

Primary Feature Layer Columns Referenced:
Columns referenced:
city_owner_agency, sheriff_sale, market_value, all_violations_past_year, sale_date

Tagline:
Identify conservatorship-eligible properties

Returns:
FeatureLayer: The input feature layer with an added "conservatorship" column indicating
GeoDataFrame: The input GeoDataFrame with an added "conservatorship" column indicating
whether each property qualifies for conservatorship (True or False).
"""
conservatorships = []
Expand Down
12 changes: 7 additions & 5 deletions data/src/data_utils/contig_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,26 @@
import numpy as np
from libpysal.weights import Queen

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.contig_neighbors import ContigNeighborsOutputValidator

from ..utilities import opa_join


@validate_output(ContigNeighborsOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def contig_neighbors(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Calculates the number of contiguous vacant neighbors for each property in a feature layer.
Calculates the number of contiguous vacant neighbors for each property in a GeoDataFrame.

Args:
primary_featurelayer (FeatureLayer): A feature layer containing property data in a GeoDataFrame (`gdf`).
input_gdf: A input GeoDataFrame containing property data in a GeoDataFrame (`gdf`).

Returns:
FeatureLayer: The input feature layer with an added "n_contiguous" column indicating
GeoDataFrame: The input GeoDataFrame with an added "n_contiguous" column indicating
the number of contiguous vacant neighbors for each property.

Tagline:
Expand All @@ -32,7 +34,7 @@ def contig_neighbors(
Columns Added:
n_contiguous (int): The number of contiguous vacant neighbors for each property.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id, vacant
"""
print(f"[DEBUG] contig_neighbors: Starting with {len(input_gdf)} properties")
Expand Down Expand Up @@ -192,7 +194,7 @@ def contig_neighbors(
f"[DEBUG] contig_neighbors: vacant_parcels opa_ids in input_gdf: {len(matching_opa_ids)} / {len(vacant_opa_ids)}"
)

# Merge the results back to the primary feature layer
# Merge the results back to the input GeoDataFrame
input_gdf = opa_join(input_gdf, vacant_parcels[["opa_id", "n_contiguous"]])

# Debug: Check what's in input_gdf after join
Expand Down
12 changes: 7 additions & 5 deletions data/src/data_utils/council_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import geopandas as gpd
import pandas as pd

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.council_dists import (
CouncilDistrictsInputValidator,
Expand All @@ -17,18 +18,19 @@


@validate_output(CouncilDistrictsOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def council_dists(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Associates properties in the primary feature layer with council districts
Associates properties in the input GeoDataFrame with council districts
using a spatial join.

Args:
primary_featurelayer (FeatureLayer): The feature layer containing property data.
input_gdf (GeoDataFrame): The GeoDataFrame containing property data.

Returns:
FeatureLayer: The input feature layer with properties spatially joined
GeoDataFrame: The input GeoDataFrame with properties spatially joined
to council districts, ensuring no duplicate entries.

Tagline:
Expand All @@ -37,7 +39,7 @@ def council_dists(
Columns added:
district (str): The council district associated with the property.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id, geometry
"""

Expand Down Expand Up @@ -74,7 +76,7 @@ def council_dists(

merged_gdf = spatial_join(input_gdf, council_dists, predicate="within")

# Drop duplicates in the primary feature layer
# Drop duplicates in the input GeoDataFrame
merged_gdf.drop_duplicates(inplace=True)

# Debug: Check for duplicate OPA IDs and show what's causing them
Expand Down
10 changes: 6 additions & 4 deletions data/src/data_utils/delinquencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import geopandas as gpd
import pandas as pd

from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.delinquencies import DelinquenciesOutputValidator

Expand All @@ -12,18 +13,19 @@


@validate_output(DelinquenciesOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def delinquencies(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Adds property tax delinquency information to the primary feature layer by
Adds property tax delinquency information to the input GeoDataFrame by
joining with a tax delinquencies dataset.

Args:
primary_featurelayer (FeatureLayer): The feature layer containing property data.
input_gdf (GeoDataFrame): The GeoDataFrame containing property data.

Returns:
FeatureLayer: The input feature layer with added columns for tax delinquency
GeoDataFrame: The input GeoDataFrame with added columns for tax delinquency
information, including total due, actionable status, payment agreements, and more.

Tagline:
Expand All @@ -41,7 +43,7 @@ def delinquencies(
sheriff_sale (bool): Indicates if the property is at risk of sheriff sale.
total_assessment (float): Total property assessment.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id
"""

Expand Down
10 changes: 6 additions & 4 deletions data/src/data_utils/dev_probability.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import requests

from src.config.config import USE_CRS
from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.dev_probability import DevProbabilityOutputValidator

Expand All @@ -15,19 +16,20 @@


@validate_output(DevProbabilityOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def dev_probability(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Calculates development probability based on permit counts and assigns
development ranks to census block groups. The results are joined to the
primary feature layer.
input GeoDataFrame.

Args:
primary_featurelayer (FeatureLayer): The feature layer containing property data.
input_gdf (GeoDataFrame): The GeoDataFrame containing property data.

Returns:
FeatureLayer: The input feature layer with added spatial join data for
GeoDataFrame: The input GeoDataFrame with added spatial join data for
development probability and ranks.

Tagline:
Expand All @@ -37,7 +39,7 @@ def dev_probability(
permit_count (int): The number of permits issued in the census block group.
dev_rank (str): The development rank of the census block group.

Primary Feature Layer Columns Referenced:
Columns referenced:
opa_id, geometry

Source:
Expand Down
10 changes: 6 additions & 4 deletions data/src/data_utils/drug_crimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,26 @@
import geopandas as gpd

from src.data_utils.kde import apply_kde_to_input
from src.metadata.metadata_utils import current_metadata, provide_metadata
from src.validation.base import ValidationResult, validate_output
from src.validation.drug_crimes import DrugCrimesOutputValidator

from ..constants.services import DRUGCRIME_SQL_QUERY


@validate_output(DrugCrimesOutputValidator)
@provide_metadata(current_metadata=current_metadata)
def drug_crimes(
input_gdf: gpd.GeoDataFrame,
) -> Tuple[gpd.GeoDataFrame, ValidationResult]:
"""
Applies kernel density estimation (KDE) analysis for drug crimes to the primary feature layer.
Applies kernel density estimation (KDE) analysis for drug crimes to the input GeoDataFrame.

Args:
primary_featurelayer (FeatureLayer): The feature layer containing property data.
input_gdf (GeoDataFrame): The GeoDataFrame containing property data.

Returns:
FeatureLayer: The input feature layer with KDE analysis results for drug crimes.
GeoDataFrame: The input GeoDataFrame with KDE analysis results for drug crimes.

Tagline:
Density analysis for drug crimes
Expand All @@ -31,7 +33,7 @@ def drug_crimes(
drug_crimes_density_label (str): Categorized density level.
drug_crimes_density_percentile (float): Percentile rank of density.

Primary Feature Layer Columns Referenced:
Columns referenced:
geometry

Source:
Expand Down
Loading
Loading