Skip to content

Commit 8ee0260

Browse files
committed
Rename project to Geo Faker. Add initial version of customer stored procedure. Improve docs
1 parent b7657da commit 8ee0260

File tree

8 files changed

+185
-43
lines changed

8 files changed

+185
-43
lines changed

db/deploy/001.sql

Lines changed: 93 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
BEGIN;
44

5-
CREATE SCHEMA pgosm_flex_faker;
5+
CREATE SCHEMA geo_faker;
66

77

8-
CREATE PROCEDURE pgosm_flex_faker.point_in_place_landuse()
8+
CREATE PROCEDURE geo_faker.point_in_place_landuse()
99
LANGUAGE plpgsql
1010
AS $$
1111
BEGIN
@@ -115,7 +115,6 @@ BEGIN
115115
WHERE p.near_areas > 0
116116
;
117117

118-
119118
DROP TABLE IF EXISTS selected;
120119
CREATE TEMP TABLE selected AS
121120
WITH a AS (
@@ -133,7 +132,6 @@ BEGIN
133132
WHERE a.total_score > a.rnd
134133
;
135134

136-
137135
-- Selected areas to put points into.
138136
DROP TABLE IF EXISTS faker_place_polygon;
139137
CREATE TEMP TABLE faker_place_polygon AS
@@ -147,7 +145,6 @@ BEGIN
147145
ON faker_place_polygon USING GIST (geom)
148146
;
149147

150-
151148
/*
152149
Ranking roads by osm_type with goal of scoring roads with lower speed
153150
limits higher. Uses helper table loaded by PgOSM Flex.
@@ -182,7 +179,6 @@ BEGIN
182179
FROM normal_rnk
183180
;
184181

185-
186182
/*
187183
Identify roads where a building could be
188184
Not using actual buildings / addresses because:
@@ -239,12 +235,13 @@ END
239235
$$
240236
;
241237

242-
238+
COMMENT ON PROCEDURE geo_faker.point_in_place_landuse IS 'Uses osm.landuse_polygon and osm.road_line to simulate probable locations for commercial store locations. Can be customized for custom landuse types by manually defining landuse_osm_types temp table.'
239+
;
243240

244241

245242
-- From: https://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint
246243

247-
CREATE FUNCTION pgosm_flex_faker.n_points_in_polygon(geom geometry, num_points integer)
244+
CREATE FUNCTION geo_faker.n_points_in_polygon(geom geometry, num_points integer)
248245
RETURNS SETOF geometry
249246
LANGUAGE plpgsql VOLATILE
250247
COST 100
@@ -291,8 +288,96 @@ END
291288
$$
292289
;
293290

291+
COMMENT ON FUNCTION geo_faker.n_points_in_polygon(GEOMETRY, INT) IS 'Creates N points randomly within the given polygon.';
292+
293+
294+
295+
-- Call the procedure to ensure the required temp table exists, avoids deploy failure
296+
CALL geo_faker.point_in_place_landuse();
297+
298+
299+
CREATE PROCEDURE geo_faker.points_around_point()
300+
LANGUAGE plpgsql
301+
AS $$
302+
DECLARE
303+
stores_to_process BIGINT;
304+
t_row faker_store_location%rowtype;
305+
BEGIN
306+
307+
SELECT COUNT(*) INTO stores_to_process
308+
FROM faker_store_location
309+
;
310+
RAISE NOTICE 'Stores to process: %', stores_to_process;
311+
312+
DROP TABLE IF EXISTS faker_customer_location;
313+
CREATE TEMP TABLE faker_customer_location
314+
(
315+
id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
316+
store_id BIGINT NOT NULL,
317+
customer_id BIGINT NOT NULL,
318+
geom GEOMETRY(POINT, 3857) NOT NULL
319+
);
294320

295321

322+
FOR t_row IN SELECT * FROM faker_store_location LOOP
323+
RAISE NOTICE 'Store ID: %', t_row.id;
296324

325+
DROP TABLE IF EXISTS place_buffer;
326+
CREATE TEMP TABLE place_buffer AS
327+
SELECT id AS store_id, geom, ST_Buffer(geom, 5000) AS geom_buffer
328+
FROM faker_store_location
329+
WHERE id = t_row.id
330+
;
331+
332+
DROP TABLE IF EXISTS store_potential_customers;
333+
CREATE TEMP TABLE store_potential_customers AS
334+
SELECT store_id,
335+
geo_faker.n_points_in_polygon(geom_buffer, 1000)
336+
AS geom
337+
FROM place_buffer
338+
;
339+
ALTER TABLE store_potential_customers
340+
ADD customer_id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY;
341+
342+
--SELECT * FROM store_potential_customers;
343+
/*
344+
* Using a CTE here with ST_Envelope to bbox join roads.
345+
* A simple join (which looks innocent) took 45+ seconds to return 141 rows
346+
* while the CTE version takes < 60 ms.
347+
*/
348+
--EXPLAIN (ANALYZE, BUFFERS, VERBOSE, SETTINGS)
349+
WITH possible_roads AS (
350+
SELECT p.store_id, p.customer_id, p.geom AS geom_customer,
351+
r.geom AS geom_road,
352+
ST_Distance(p.geom, r.geom) AS distance
353+
FROM osm.road_line r
354+
INNER JOIN store_potential_customers p
355+
ON ST_DWithin(r.geom, p.geom, 300)
356+
WHERE r.route_motor
357+
), ranked AS (
358+
SELECT *, ROW_NUMBER() OVER (
359+
PARTITION BY store_id, customer_id ORDER BY distance
360+
) AS rnk
361+
FROM possible_roads
362+
)
363+
INSERT INTO faker_customer_location (store_id, customer_id, geom)
364+
SELECT store_id, customer_id,
365+
ST_Snap(geom_customer, geom_road, 300) AS geom_snapped
366+
FROM ranked
367+
WHERE rnk = 1
368+
;
369+
COMMIT;
370+
371+
END LOOP;
372+
373+
END;
374+
$$;
375+
376+
377+
COMMENT ON PROCEDURE geo_faker.points_around_point IS 'Creates fake customer locations around a store. Locations are snapped to roads. Locations not scoped to landuse at this time. Requires faker_store_location temp table with fake store data.';
297378

298379
COMMIT;
380+
381+
382+
383+

db/revert/001.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
BEGIN;
44

5-
DROP SCHEMA pgosm_flex_faker CASCADE;
5+
DROP SCHEMA geo_faker CASCADE;
66

77
COMMIT;

docs/src/SUMMARY.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Summary
22

3-
- [What is PgOSM Flex Faker?](pgosm-flex-faker.md)
3+
- [What is Geo Faker?](geo-faker.md)
44
- [Quick Start](quick-start.md)
55
- [Customize](customize.md)
66
- [Docker image](docker-image.md)

docs/src/customize.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
This section builds on the [Quick Start](quick-start.md) section.
44

5+
> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).
6+
57

68
## External Postgres connections
79

@@ -11,13 +13,22 @@ This approach does load a lot of data to the target database which may not be
1113
desired. Consider using `pg_dump` to load only the target data to your
1214
database of choice.
1315

16+
The Sqitch deployment step should use additional parameters not set in the quick start
17+
instructions.
18+
19+
```bash
20+
source ~/.pgosm-faker-local
21+
cd pgosm-flex-faker/db
22+
sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB deploy
23+
```
24+
1425

1526
## Each time is new data
1627

1728
Rerun, save second set.
1829

1930
```sql
20-
CALL pgosm_flex_faker.point_in_place_landuse();
31+
CALL geo_faker.point_in_place_landuse();
2132
CREATE TABLE my_fake_stores_v2 AS
2233
SELECT *
2334
FROM faker_store_location
@@ -26,7 +37,7 @@ SELECT *
2637

2738
## Custom Places for Shops
2839

29-
The procedure `pgosm_flex_faker.point_in_place_landuse()` allows overriding
40+
The procedure `geo_faker.point_in_place_landuse()` allows overriding
3041
the inclusion of `retail` and `commercial` landuse.
3142
This is done by creating a custom `landuse_osm_types` table before
3243
running the stored procedure.

docs/src/docker-image.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
# Docker image
22

3+
> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).
4+
35
## Building the image
46

57
Build latest. Occasionally run with `--no-cache` to force some software updates.
68

79
```bash
810
docker pull rustprooflabs/pgosm-flex:latest
9-
docker build -t rustprooflabs/pgosm-flex-faker:latest .
11+
docker build -t rustprooflabs/geo-faker:latest .
1012
```
1113

14+
15+
```bash
16+
docker push rustprooflabs/geo-faker:latest
17+
```

docs/src/geo-faker.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# What is Geo Faker?
2+
3+
The the Geo Faker project creates fake store and customer data with geospatial
4+
components based on OpenStreetMap. The use of OpenStreetMap data
5+
as a starting point provides a sense of realism. The use of `random()`
6+
and to generate fake data avoids privacy concerns.
7+
8+
9+
> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).
10+
11+
12+
GeoFaker builds on [PgOSM Flex](https://pgosm-flex.com/), using its inherit ability
13+
to be customized as a launching point to generate realistic looking, but absolutely fake
14+
geospatial data.
15+
16+
17+
18+
Version 1
19+
20+
![alt](osm-faker-stores-in-ohio-1.png)
21+
22+
Version 2
23+
24+
![alt2](osm-faker-stores-in-ohio-2.png)
25+
26+
27+

docs/src/pgosm-flex-faker.md

Lines changed: 0 additions & 17 deletions
This file was deleted.

docs/src/quick-start.md

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
1-
# Quick Start to PgOSM Flex Faker
1+
# Quick Start to Geo Faker
22

3-
This section covers how to get started with the Faker version of PgOSM Flex.
3+
This section covers how to get started with the Faker version of PgOSM Flex,
4+
also known as Geo Faker.
45

56

7+
> Warning: This project is in early development! Things will be changing over the first few releases (e.g. before 0.5.0).
8+
9+
The basic process to using Geo Faker are:
10+
11+
* Run PgOSM Flex with custom layerset
12+
* Load PgOSM Flex Faker objects
13+
*
14+
615
## Load OpenStreetMap Data
716

817
Load the region/subregion you want using the PgOSM Flex Docker image.
9-
These instructions are modified from [PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html) section. The following
10-
loads the data into a PostGIS enabled database in a `pgosm-flex-faker`
18+
These instructions are modified from
19+
[PgOSM Flex's Quick Start](https://pgosm-flex.com/quick-start.html)
20+
section. The following loads the data into a PostGIS enabled database in a `geo-faker`
1121
Docker container available on port 5433.
1222

1323

@@ -16,14 +26,14 @@ mkdir ~/pgosm-data
1626
export POSTGRES_USER=postgres
1727
export POSTGRES_PASSWORD=mysecretpassword
1828

19-
docker pull rustprooflabs/pgosm-flex-faker:latest
29+
docker pull rustprooflabs/geo-faker:latest
2030

2131
docker run --name pgosm-faker -d --rm \
2232
-v ~/pgosm-data:/app/output \
23-
-v ~/git/pgosm-flex-faker/:/custom-layerset \
33+
-v ~/git/geo-faker/:/custom-layerset \
2434
-v /etc/localtime:/etc/localtime:ro \
2535
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
26-
-p 5433:5432 -d rustprooflabs/pgosm-flex-faker
36+
-p 5433:5432 -d rustprooflabs/geo-faker
2737

2838
docker exec -it \
2939
pgosm-faker python3 docker/pgosm_flex.py \
@@ -37,12 +47,13 @@ docker exec -it \
3747

3848
## Load Faker Objects
3949

40-
After the data completes processing, load the PgOSM Flex Faker database structures.
50+
After the data completes processing, load the PgOSM Flex Faker database structures
51+
in the `geo_faker` schema.
4152
This is done using Sqitch.
4253

4354

4455
```bash
45-
cd pgosm-flex-faker/db
56+
cd geo-faker/db
4657
sqitch db:pg://$POSTGRES_USER:$POSTGRES_PASSWORD@localhost:5433/pgosm deploy
4758
```
4859

@@ -52,28 +63,47 @@ random results.
5263

5364
## Run Faker generation
5465

55-
The stored procedure `pgosm_flex_faker.point_in_place_landuse()` places points
66+
There are two stored procedures in the `geo_faker` schema that
67+
generate the fake stores and customers.
68+
69+
70+
The stored procedure `geo_faker.point_in_place_landuse()` places points
5671
along roads that are within (or nearby) specific `landuse` areas. The generated
5772
data is available after calling the stored procedure in a temporary table
5873
named `faker_store_location`.
5974
The generated data is scoped to named places currently, though that will
6075
likely become adjustable in the future.
6176

6277

78+
The `geo_faker.point_in_place_landuse()` stored procedure requires
79+
the `faker_store_location` temp table created by the first stored procedure.
80+
81+
82+
6383
```sql
64-
CALL pgosm_flex_faker.point_in_place_landuse();
65-
SELECT COUNT(*) FROM faker_store_location;
84+
CALL geo_faker.point_in_place_landuse();
85+
CALL geo_faker.points_around_point();
6686
```
6787

68-
6988
The following query saves the data in a new, non-temporary table named
7089
`my_fake_stores`.
7190

7291

7392

93+
94+
95+
7496
```sql
7597
CREATE TABLE my_fake_stores AS
7698
SELECT *
7799
FROM faker_store_location
78100
;
101+
102+
CREATE TABLE my_fake_customers AS
103+
SELECT *
104+
FROM faker_customer_location
105+
;
79106
```
107+
108+
109+

0 commit comments

Comments
 (0)