Skip to content

Commit 1323feb

Browse files
Merge pull request #1 from rustprooflabs/improve-logic
Start tucking logic away behind the scenes
2 parents 06cd468 + 20c0461 commit 1323feb

File tree

13 files changed

+348
-241
lines changed

13 files changed

+348
-241
lines changed

.github/workflows/deploy-book.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Based on https://github.com/rust-lang/mdBook/wiki/Automated-Deployment%3A-GitHub-Actions
2+
name: Deploy mdbook
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
deploy:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v3
13+
with:
14+
fetch-depth: 0
15+
- name: Install mdbook
16+
run: |
17+
mkdir mdbook
18+
curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.14/mdbook-v0.4.14-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=./mdbook
19+
echo `pwd`/mdbook >> $GITHUB_PATH
20+
- name: Deploy GitHub Pages
21+
run: |
22+
# Just add a `cd` here if you need to change to another directory.
23+
cd docs
24+
mdbook build
25+
git worktree add gh-pages
26+
git config user.name "Deploy from CI"
27+
git config user.email ""
28+
cd gh-pages
29+
# Delete the ref to avoid keeping history.
30+
git update-ref -d refs/heads/gh-pages
31+
rm -rf *
32+
mv ../book/* .
33+
mv ../CNAME .
34+
git add .
35+
git commit -m "Deploy $GITHUB_SHA to gh-pages"
36+
git push --force --set-upstream origin gh-pages

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
docs/book/**

Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
FROM rustprooflabs/pgosm-flex
2+
3+
COPY ./db /app/faker/db
4+

README.md

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,4 @@ components based on OpenStreetMap. The use of OpenStreetMap data
55
as a starting point provides a sense of realism. The use of `random()`
66
and to generate fake data avoids privacy concerns.
77

8-
## Load OpenStreetMap Data
9-
10-
Load the region/subregion you want using the PgOSM Flex Docker image.
11-
The [Quick Start section](https://github.com/rustprooflabs/pgosm-flex#quick-start)
12-
loads the data into a PostGIS enabled database in the Docker container,
13-
available on port 5433.
14-
15-
> Update instructions to use custom layerset. Only need place, road, and land use.
16-
17-
18-
19-
```bash
20-
mkdir ~/pgosm-data
21-
export POSTGRES_USER=postgres
22-
export POSTGRES_PASSWORD=mysecretpassword
23-
24-
docker run --name pgosm -d --rm \
25-
-v ~/pgosm-data:/app/output \
26-
-v ~/git/pgosm-flex-faker/:/custom-layerset \
27-
-v /etc/localtime:/etc/localtime:ro \
28-
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
29-
-p 5433:5432 -d rustprooflabs/pgosm-flex
30-
31-
docker exec -it \
32-
pgosm python3 docker/pgosm_flex.py \
33-
--ram=8 \
34-
--region=north-america/us \
35-
--subregion=ohio \
36-
--layerset=faker_layerset \
37-
--layerset-path=/custom-layerset/
38-
```
39-
40-
41-
After loading, connect and run the `osm-faker.sql`.
42-
Each time running will generate slightly different results.
43-
44-
45-
Version 1
46-
47-
![](osm-faker-stores-in-ohio-1.png)
48-
49-
Version 2
50-
51-
![](osm-faker-stores-in-ohio-2.png)
52-
538

db/deploy/001.sql

Lines changed: 216 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,226 @@ BEGIN;
55
CREATE SCHEMA pgosm_flex_faker;
66

77

8-
CREATE FUNCTION pgosm_flex_faker.location_in_place_landuse()
9-
RETURNS BOOLEAN
10-
LANGUAGE sql
11-
SECURITY DEFINER
12-
SET search_path TO 'pgosm_flex_faker, pg_temp'
8+
CREATE PROCEDURE pgosm_flex_faker.location_in_place_landuse()
9+
LANGUAGE plpgsql
1310
AS $$
11+
BEGIN
1412

15-
-- Do something
13+
-- Define a custom `place_osm_types` table before executing to customize areas
14+
CREATE TEMP TABLE IF NOT EXISTS place_osm_types AS
15+
SELECT 'retail' AS osm_type
16+
UNION
17+
SELECT 'commercial' AS osm_type
18+
;
1619

17-
SELECT True;
1820

21+
-- Basic selection, provide attributes used to rank locations
22+
DROP TABLE IF EXISTS places_for_shops_1;
23+
CREATE TEMP TABLE places_for_shops_1 AS
24+
WITH base AS (
25+
SELECT osm_id, name, osm_type, admin_level, nest_level,
26+
-- Rounding is assuming SRID 3857, or another unit in Meters or Feet.
27+
ROUND(public.ST_Area(geom)::NUMERIC, 0) AS geom_area, geom
28+
FROM osm.place_polygon_nested
29+
-- Using innermost places to reduce likelihood over overlap
30+
WHERE innermost
31+
AND name <> ''
32+
AND admin_level < 99
33+
), with_space AS (
34+
-- Within each Place, find how many places are "near" (intersects)
35+
-- or contain the types of places (commercial, retail, residential, etc)
36+
-- defined in place_osm_types
37+
SELECT b.osm_id,
38+
COUNT(lp.osm_id) AS near_areas,
39+
COALESCE(SUM(public.ST_Area(lp.geom)), 0) AS near_space,
40+
COUNT(c.osm_id) AS contained_areas,
41+
COALESCE(SUM(public.ST_Area(c.geom)), 0) AS contained_space
42+
FROM base b
43+
LEFT JOIN osm.landuse_polygon lp
44+
ON public.ST_Intersects(b.geom, lp.geom)
45+
AND lp.osm_type IN (SELECT osm_type FROM place_osm_types)
46+
LEFT JOIN osm.landuse_polygon c
47+
ON public.ST_Contains(b.geom, c.geom)
48+
AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
49+
GROUP BY b.osm_id
50+
)
51+
SELECT b.*, ws.contained_areas, ws.contained_space,
52+
ws.near_areas, ws.near_space
53+
FROM base b
54+
INNER JOIN with_space ws ON b.osm_id = ws.osm_id
55+
;
56+
57+
58+
DROP TABLE IF EXISTS places_for_shops;
59+
CREATE TEMP TABLE places_for_shops AS
60+
SELECT osm_id, name, osm_type, admin_level, contained_areas, contained_space,
61+
near_areas, near_space, geom_area,
62+
contained_space / geom_area AS space_contained_ratio_higher_is_better,
63+
near_space / geom_area AS space_near_ratio_higher_is_better,
64+
geom
65+
FROM places_for_shops_1
66+
ORDER BY space_contained_ratio_higher_is_better DESC,
67+
space_near_ratio_higher_is_better DESC
68+
;
69+
70+
71+
/*
72+
* The following scoring logic creates scores for each place depending
73+
* on how it's contained and nearby landuse data compare to the area's
74+
* percentile values.
75+
*/
76+
DROP TABLE IF EXISTS place_scores;
77+
CREATE TEMP TABLE place_scores AS
78+
WITH breakpoints AS (
79+
-- Calculate percentiles of space available across all available place inputs
80+
-- This should let each region adjust for the input data
81+
SELECT percentile_cont(0.25)
82+
within group (order by contained_space asc)
83+
as contained_space_25_perc,
84+
percentile_cont(0.50)
85+
within group (order by contained_space asc)
86+
as contained_space_50_perc,
87+
percentile_cont(0.90)
88+
within group (order by near_space asc)
89+
as near_space_90_perc
90+
FROM places_for_shops
91+
WHERE near_areas > 0
92+
)
93+
SELECT p.osm_id,
94+
-- Actual ranking is arbitrary, they key is understanding that scores
95+
-- under a random value in the next step (where random between 0.0 and 1.0)
96+
-- so increasing the max score here results in some areas almost always
97+
-- being picked
98+
CASE WHEN b.contained_space_50_perc < p.contained_space
99+
THEN .55
100+
WHEN b.contained_space_25_perc < p.contained_space
101+
THEN .35
102+
ELSE .01
103+
END AS contained_space_score,
104+
CASE WHEN b.near_space_90_perc < p.near_space
105+
THEN .1
106+
ELSE .01
107+
END AS near_space_score
108+
FROM places_for_shops p
109+
INNER JOIN breakpoints b ON True
110+
-- Excludes places that aren't even nearby (intersects) an appropriate
111+
-- place type
112+
WHERE p.near_areas > 0
113+
;
114+
115+
116+
DROP TABLE IF EXISTS selected;
117+
CREATE TEMP TABLE selected AS
118+
WITH a AS (
119+
SELECT p.osm_id,
120+
s.contained_space_score + s.near_space_score
121+
AS total_score,
122+
random() as rnd
123+
FROM places_for_shops p
124+
INNER JOIN place_scores s
125+
ON p.osm_id = s.osm_id
126+
)
127+
SELECT a.osm_id
128+
FROM a
129+
WHERE a.total_score > a.rnd
130+
;
131+
132+
133+
-- Selected areas to put points into.
134+
DROP TABLE IF EXISTS faker_place_polygon;
135+
CREATE TEMP TABLE faker_place_polygon AS
136+
SELECT p.*
137+
FROM selected s
138+
INNER JOIN places_for_shops p ON s.osm_id = p.osm_id
139+
ORDER BY p.name
140+
;
141+
142+
CREATE INDEX gix_faker_place_polygon
143+
ON faker_place_polygon USING GIST (geom)
144+
;
145+
146+
147+
/*
148+
Ranking roads by osm_type with goal of scoring roads with lower speed
149+
limits higher. Uses helper table loaded by PgOSM Flex.
150+
151+
Uses window function for rank steps, then normalize to 0-1 range.
152+
Finally, squishes range into 0.05 - 0.90 to prevent guarantees of
153+
never or always included.
154+
*/
155+
DROP TABLE IF EXISTS road_osm_type_rank;
156+
CREATE TEMP TABLE road_osm_type_rank AS
157+
WITH rank_lower_speed_better AS (
158+
SELECT osm_type, maxspeed_mph,
159+
RANK() OVER (ORDER BY maxspeed_mph desc) AS rnk_raw
160+
FROM pgosm.road
161+
WHERE route_motor
162+
AND osm_type NOT LIKE '%link'
163+
), aggs_for_normalization AS (
164+
SELECT MIN(rnk_raw) AS min_rnk, MAX(rnk_raw) AS max_rnk
165+
FROM rank_lower_speed_better
166+
), normal_rnk AS (
167+
SELECT r.osm_type, r.maxspeed_mph,
168+
(rnk_raw * 1.0 - min_rnk) / (max_rnk - min_rnk)
169+
AS normalized_rnk
170+
FROM rank_lower_speed_better r
171+
JOIN aggs_for_normalization ON True
172+
)
173+
SELECT osm_type, maxspeed_mph,
174+
CASE WHEN normalized_rnk < 0.05 THEN 0.05
175+
WHEN normalized_rnk > 0.9 THEN .9
176+
ELSE normalized_rnk
177+
END AS normalized_rnk
178+
FROM normal_rnk
179+
;
180+
181+
182+
-----------------------------------------
183+
-- Identify roads where a building could be
184+
-- Not using actual buildings / addresses because:
185+
---- a) privacy
186+
---- b) coverage
187+
DROP TABLE IF EXISTS selected_roads ;
188+
CREATE TEMP TABLE selected_roads AS
189+
WITH road_ranks AS (
190+
SELECT p.osm_id AS place_osm_id, p.name AS place_name,
191+
rr.normalized_rnk AS road_type_score,
192+
r.osm_id AS road_osm_id
193+
FROM faker_place_polygon p
194+
INNER JOIN osm.landuse_polygon c
195+
ON public.ST_Contains(p.geom, c.geom)
196+
AND c.osm_type IN (SELECT osm_type FROM place_osm_types)
197+
INNER JOIN osm.road_line r
198+
ON c.geom && r.geom
199+
AND r.route_motor
200+
AND r.osm_type NOT IN ('service')
201+
AND r.osm_type NOT LIKE '%link'
202+
INNER JOIN road_osm_type_rank rr
203+
ON r.osm_type = rr.osm_type
204+
), ranked AS (
205+
SELECT *,
206+
ROW_NUMBER() OVER (
207+
PARTITION BY place_osm_id
208+
ORDER BY road_type_score DESC, random()) AS rnk
209+
FROM road_ranks
210+
)
211+
SELECT *
212+
FROM ranked
213+
WHERE rnk = 1
214+
;
215+
216+
217+
DROP TABLE IF EXISTS faker_store_location;
218+
CREATE TEMP TABLE faker_store_location AS
219+
SELECT a.place_osm_id, a.place_name, a.road_osm_id,
220+
r.osm_type, r.name, r.ref,
221+
public.ST_LineInterpolatePoint(public.ST_LineMerge(r.geom), random()) AS geom
222+
FROM selected_roads a
223+
INNER JOIN osm.road_line r ON a.road_osm_id = r.osm_id
224+
;
225+
226+
227+
END
19228
$$
20229
;
21230

docs/CNAME

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
faker.pgosm-flex.com

docs/book.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[book]
2+
authors = ['Ryan Lambert', 'RustProof Labs']
3+
language = "en"
4+
multilingual = false
5+
src = "src"
6+
title = "PgOSM Flex Faker"

docs/src/SUMMARY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Summary
2+
3+
- [What is PgOSM Flex Faker?](pgosm-flex-faker.md)
4+
- [Quick Start](quick-start.md)
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)