Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 137 additions & 14 deletions blocksnet/analysis/network/origin_destination/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,49 @@
DEFAULT_ACCESSIBILITY = 10


def _round_probabilistic_row_to_int(prob_row: pd.Series, trips: int) -> pd.Series:

# Convert one probability row into integer trips preserving row sum exactly.

raw = prob_row.to_numpy(dtype=float) * float(trips)
floored = np.floor(raw).astype("int64")
remainder = int(trips - floored.sum())

if remainder > 0:
fractional = raw - floored
order = np.argsort(-fractional, kind="mergesort")
floored[order[:remainder]] += 1

return pd.Series(floored, index=prob_row.index, dtype="int64")


def _integerize_origin_constrained_od(od_prob_mx: pd.DataFrame, demand: pd.Series) -> pd.DataFrame:

# Integerize origin-constrained OD probabilities preserving integer row demand.

od_int = pd.DataFrame(0, index=od_prob_mx.index, columns=od_prob_mx.columns, dtype="int64")
demand_int = demand.round().clip(lower=0).astype("int64")

for origin, trips in demand_int.items():
trips = int(trips)
if trips <= 0:
continue

prob_row = od_prob_mx.loc[origin].astype(float)
row_sum = float(prob_row.sum())

if row_sum <= 0.0:
prob_row[:] = 0.0
if origin in prob_row.index:
prob_row.loc[origin] = 1.0
else:
prob_row = prob_row / row_sum

od_int.loc[origin] = _round_probabilistic_row_to_int(prob_row, trips)

return od_int


def _calculate_nodes_weights(blocks_df: gpd.GeoDataFrame, acc_mx: pd.DataFrame, accessibility: float) -> pd.DataFrame:

logger.info("Identifying nearest nodes to blocks")
Expand All @@ -45,9 +88,9 @@ def _calculate_nodes_weights(blocks_df: gpd.GeoDataFrame, acc_mx: pd.DataFrame,
return nodes_df


def _calculate_diversity(blocks_df: pd.DataFrame, services_dfs: list[pd.DataFrame]) -> pd.DataFrame:
def _calculate_diversity(blocks_df: pd.DataFrame, services_count_dfs: list[pd.DataFrame]) -> pd.DataFrame:
logger.info("Calculating diversity and density")
diversity_df = shannon_diversity(services_dfs)
diversity_df = shannon_diversity(services_count_dfs)
blocks_df = blocks_df.join(diversity_df)
blocks_df[DENSITY_COLUMN] = blocks_df[COUNT_COLUMN] / blocks_df.site_area
return blocks_df
Expand All @@ -69,11 +112,20 @@ def _calculate_attractiveness(blocks_df: pd.DataFrame, lu_consts: dict[LandUse,
def _calculate_od_mx(nodes_df: pd.DataFrame, acc_mx: pd.DataFrame) -> pd.DataFrame:
logger.info("Calculating origin destination matrix")
acc_mx = acc_mx.replace(0, np.nan)
return pd.DataFrame(
np.outer(nodes_df[POPULATION_COLUMN], nodes_df[ATTRACTIVENESS_COLUMN]) / acc_mx,
index=acc_mx.index,
columns=acc_mx.columns,
).fillna(0.0)

# Origin-constrained gravity model:
# row sums match origin population and total OD matches total population.
gravity_weights = (1.0 / acc_mx).mul(nodes_df[ATTRACTIVENESS_COLUMN], axis=1).fillna(0.0)

# Preserve demand for isolated origins as intrazonal flow.
empty_rows = gravity_weights.sum(axis=1).eq(0.0)
for node_id in gravity_weights.index[empty_rows]:
gravity_weights.loc[node_id, node_id] = 1.0

row_sums = gravity_weights.sum(axis=1).replace(0.0, np.nan)
od_prob_mx = gravity_weights.div(row_sums, axis=0).fillna(0.0)

return _integerize_origin_constrained_od(od_prob_mx, nodes_df[POPULATION_COLUMN])


def _validate_input(blocks_df: pd.DataFrame, blocks_to_nodes_mx: pd.DataFrame, nodes_to_nodes_mx: pd.DataFrame):
Expand All @@ -90,22 +142,93 @@ def origin_destination_matrix(
blocks_df: pd.DataFrame,
blocks_to_nodes_mx: pd.DataFrame,
nodes_to_nodes_mx: pd.DataFrame,
services_dfs: list[pd.DataFrame],
services_count_dfs: list[pd.DataFrame],
accessibility: float = DEFAULT_ACCESSIBILITY,
lu_consts: dict[LandUse, float] = LU_CONSTS,
) -> pd.DataFrame:

"""
Build an origin-constrained OD matrix from block attributes and accessibility matrices.

The function estimates an integer-valued origin-destination (OD) matrix between network nodes
using an origin-constrained gravity model. Trip productions are derived from block population
(after distributing blocks to nearby nodes), while trip attractions are proportional to a
composite attractiveness score based on service density, service diversity (Shannon index),
and a land-use constant.

Parameters
----------
blocks_df : pandas.DataFrame
Table of blocks (zones). Index must uniquely identify blocks and must match
``blocks_to_nodes_mx.index``. Required columns are validated/coerced by ``BlocksSchema``.
Expected fields include at least:
- ``population`` (number of people living in block, a field mapped to :data:`POPULATION_COLUMN`)
- ``land_use`` (type of land use for urban block, values of :class:`blocksnet.enums.LandUse`)
- ``site_area`` (used for service density)
blocks_to_nodes_mx : pandas.DataFrame
Block-to-node generalized cost matrix (e.g., walk time), with blocks on rows and network
nodes on columns. Used to distribute each block's population/attractiveness across nearby
nodes. Its index must match ``blocks_df.index``.
nodes_to_nodes_mx : pandas.DataFrame
Node-to-node generalized cost matrix (e.g., travel time). Must be square with identical
index/columns, and its index must match ``blocks_to_nodes_mx.columns``.
Zeros are treated as missing costs for the gravity model.
services_count_dfs : list[pandas.DataFrame]
List of service-count tables (typically one per service category/type) used to compute
Shannon diversity and total service counts per block. Each table must be compatible with
:func:`blocksnet.analysis.diversity.shannon.core.shannon_diversity`.
accessibility : float, default=DEFAULT_ACCESSIBILITY
Threshold on ``blocks_to_nodes_mx`` costs defining which nodes are considered reachable
from a block for distribution. For each block, nodes with cost ``<= accessibility`` are
considered; additionally, the nearest node is always included to avoid empty supports.
lu_consts : dict[LandUse, float], default=LU_CONSTS
Mapping from land-use type to an additive attractiveness constant. If a block land-use is
missing in the mapping, :data:`DEFAULT_LU_CONST` is used.

Returns
-------
pandas.DataFrame
Integer OD matrix between network nodes. Index and columns correspond to
``nodes_to_nodes_mx.index``. Row sums are equal to the corresponding origin node population
(rounded to integers and clipped at zero). Total trips equal the total distributed
population (after rounding).

Notes
-----
- This is an *origin-constrained* model: productions (row sums) are fixed by origin population,
while attractions (column sums) are not explicitly constrained.
- Blocks are not OD zones directly: block attributes are first distributed to network nodes
via inverse-cost weights derived from ``blocks_to_nodes_mx``. The OD matrix is then computed
between nodes.
- If an origin node has no valid outgoing gravity weights (e.g., all costs missing), its demand
is assigned as intrazonal flow (diagonal element).
- Integerization preserves each origin row sum exactly using largest-remainder rounding.

Raises
------
ValueError
If indices/columns of the input matrices are inconsistent (do not align), or if the block
table does not satisfy the expected schema after validation.

Examples
--------
>>> od_nodes = origin_destination_matrix(
... blocks_df=blocks,
... blocks_to_nodes_mx=blocks_to_nodes,
... nodes_to_nodes_mx=nodes_to_nodes,
... services_count_dfs=[shops_counts, schools_counts],
... accessibility=10.0,
... )
>>> od_nodes.shape
(len(nodes_to_nodes.index), len(nodes_to_nodes.index))
"""

blocks_df = BlocksSchema(blocks_df)
_validate_input(blocks_df, blocks_to_nodes_mx, nodes_to_nodes_mx)

blocks_df = _calculate_diversity(blocks_df, services_dfs)
blocks_df = _calculate_diversity(blocks_df, services_count_dfs)
blocks_df = _calculate_attractiveness(blocks_df, lu_consts)

scaler = MinMaxScaler()
blocks_df[[POPULATION_COLUMN, ATTRACTIVENESS_COLUMN]] = scaler.fit_transform(
blocks_df[[POPULATION_COLUMN, ATTRACTIVENESS_COLUMN]]
)

nodes_gdf = _calculate_nodes_weights(blocks_df, blocks_to_nodes_mx, accessibility)

return _calculate_od_mx(nodes_gdf, nodes_to_nodes_mx)
Loading