Skip to content

Commit 289c10c

Browse files
Merge pull request #12 from rustprooflabs/add-pgfaker
Improve faked data - pgfaker
2 parents 280b3b4 + 6cf94b2 commit 289c10c

22 files changed

+246
-275
lines changed

Dockerfile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
FROM rustprooflabs/pgosm-flex
22

3+
# Install pgfaker extension
4+
RUN wget https://github.com/rustprooflabs/pgfaker/releases/download/0.0.1/pgfaker_0.0.1_debian-11_pg15_amd64.deb \
5+
-O /tmp/pgfaker.deb \
6+
&& dpkg -i --force-overwrite /tmp/pgfaker.deb
7+
8+
39
COPY ./db /app/faker/db
410
COPY ./faker.ini /app/flex-config/layerset/
5-
COPY ./run_faker.sh /app/
6-
COPY ./run_faker.sql /app/
11+
12+
COPY ./app/* /app/
13+
File renamed without changes.

run_faker.sql renamed to app/run_faker.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CALL geofaker.point_in_place_landuse();
22
/*
33
Sets scale of distance for customers, and density of customer points
44
*/
5-
CALL geofaker.points_around_point(_distance_scale:=0.5,
6-
_density_scale:=4);
5+
CALL geofaker.points_around_point(_distance_scale:=1.5,
6+
_density_scale:=1);
77

88

99
DROP TABLE IF EXISTS geofaker.store;

db/deploy/basics.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Deploy pgosm-flex-faker:extensions to pg
2+
3+
BEGIN;
4+
5+
CREATE SCHEMA IF NOT EXISTS geofaker;
6+
CREATE EXTENSION IF NOT EXISTS pgfaker;
7+
8+
COMMIT;

db/deploy/n-points-in-polygon.sql

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
-- Deploy pgosm-flex-faker:n-points-in-polygon to pg
2+
-- requires: basics
3+
4+
BEGIN;
5+
6+
7+
CREATE FUNCTION geofaker.n_points_in_polygon(geom geometry, num_points integer)
8+
RETURNS SETOF geometry
9+
LANGUAGE plpgsql VOLATILE
10+
COST 100
11+
ROWS 1000
12+
AS $$
13+
DECLARE
14+
target_proportion numeric;
15+
n_ret integer := 0;
16+
loops integer := 0;
17+
x_min float8;
18+
y_min float8;
19+
x_max float8;
20+
y_max float8;
21+
srid integer;
22+
rpoint geometry;
23+
BEGIN
24+
-- Get envelope and SRID of source polygon
25+
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
26+
INTO x_min, y_min, x_max, y_max, srid;
27+
-- Get the area proportion of envelope size to determine if a
28+
-- result can be returned in a reasonable amount of time
29+
SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
30+
RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
31+
srid, ST_NumGeometries(geom), ST_NPoints(geom),
32+
round(100.0*target_proportion, 2) || '%';
33+
IF target_proportion < 0.0001 THEN
34+
RAISE EXCEPTION 'Target area proportion of geometry is too low (%)',
35+
100.0*target_proportion || '%';
36+
END IF;
37+
RAISE DEBUG 'bounds: % % % %', x_min, y_min, x_max, y_max;
38+
39+
WHILE n_ret < num_points LOOP
40+
loops := loops + 1;
41+
SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
42+
random()*(y_max - y_min) + y_min),
43+
srid) INTO rpoint;
44+
IF ST_Contains(geom, rpoint) THEN
45+
n_ret := n_ret + 1;
46+
RETURN NEXT rpoint;
47+
END IF;
48+
END LOOP;
49+
RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
50+
END
51+
$$
52+
;
53+
54+
COMMENT ON FUNCTION geofaker.n_points_in_polygon(GEOMETRY, INT) IS 'Creates N points randomly within the given polygon. From: https://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint';
55+
56+
57+
58+
COMMIT;
Lines changed: 5 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
-- Deploy pgosm-flex-faker:001 to pg
1+
-- Deploy pgosm-flex-faker:point-in-place-landuse to pg
2+
-- requires: basics
23

34
BEGIN;
45

5-
CREATE SCHEMA geofaker;
6-
76

87
CREATE PROCEDURE geofaker.point_in_place_landuse()
98
LANGUAGE plpgsql
@@ -223,8 +222,9 @@ BEGIN
223222

224223
DROP TABLE IF EXISTS faker_store_location;
225224
CREATE TEMP TABLE faker_store_location AS
226-
SELECT ROW_NUMBER() OVER () AS store_id, a.place_osm_id, a.place_osm_type, a.place_name, a.road_osm_id,
227-
r.osm_type AS road_osm_type, r.name AS road_name, r.ref AS road_ref,
225+
SELECT ROW_NUMBER() OVER () AS store_id, a.place_name AS city,
226+
r.name AS street_name, r.ref AS road_ref,
227+
pgfaker.company(), pgfaker.slogan(), pgfaker.phone(),
228228
public.ST_LineInterpolatePoint(public.ST_LineMerge(r.geom), random()) AS geom
229229
FROM selected_roads a
230230
INNER JOIN osm.road_line r ON a.road_osm_id = r.osm_id
@@ -239,149 +239,4 @@ COMMENT ON PROCEDURE geofaker.point_in_place_landuse IS 'Uses osm.landuse_polygo
239239
;
240240

241241

242-
CREATE FUNCTION geofaker.n_points_in_polygon(geom geometry, num_points integer)
243-
RETURNS SETOF geometry
244-
LANGUAGE plpgsql VOLATILE
245-
COST 100
246-
ROWS 1000
247-
AS $$
248-
DECLARE
249-
target_proportion numeric;
250-
n_ret integer := 0;
251-
loops integer := 0;
252-
x_min float8;
253-
y_min float8;
254-
x_max float8;
255-
y_max float8;
256-
srid integer;
257-
rpoint geometry;
258-
BEGIN
259-
-- Get envelope and SRID of source polygon
260-
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
261-
INTO x_min, y_min, x_max, y_max, srid;
262-
-- Get the area proportion of envelope size to determine if a
263-
-- result can be returned in a reasonable amount of time
264-
SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
265-
RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
266-
srid, ST_NumGeometries(geom), ST_NPoints(geom),
267-
round(100.0*target_proportion, 2) || '%';
268-
IF target_proportion < 0.0001 THEN
269-
RAISE EXCEPTION 'Target area proportion of geometry is too low (%)',
270-
100.0*target_proportion || '%';
271-
END IF;
272-
RAISE DEBUG 'bounds: % % % %', x_min, y_min, x_max, y_max;
273-
274-
WHILE n_ret < num_points LOOP
275-
loops := loops + 1;
276-
SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
277-
random()*(y_max - y_min) + y_min),
278-
srid) INTO rpoint;
279-
IF ST_Contains(geom, rpoint) THEN
280-
n_ret := n_ret + 1;
281-
RETURN NEXT rpoint;
282-
END IF;
283-
END LOOP;
284-
RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
285-
END
286-
$$
287-
;
288-
289-
COMMENT ON FUNCTION geofaker.n_points_in_polygon(GEOMETRY, INT) IS 'Creates N points randomly within the given polygon. From: https://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint';
290-
291-
292-
293-
-- Ensure the required temp table exists, avoids deploy failure creating next sproc
294-
CREATE TEMP TABLE IF NOT EXISTS faker_store_location
295-
(
296-
store_id BIGINT, place_osm_id BIGINT, place_osm_type TEXT, place_name TEXT,
297-
road_osm_id BIGINT, geom GEOMETRY
298-
);
299-
300-
301-
CREATE PROCEDURE geofaker.points_around_point()
302-
LANGUAGE plpgsql
303-
AS $$
304-
DECLARE
305-
stores_to_process BIGINT;
306-
t_row faker_store_location%rowtype;
307-
BEGIN
308-
309-
SELECT COUNT(*) INTO stores_to_process
310-
FROM faker_store_location
311-
;
312-
RAISE NOTICE 'Generating customers for % stores...', stores_to_process;
313-
314-
DROP TABLE IF EXISTS faker_customer_location;
315-
CREATE TEMP TABLE faker_customer_location
316-
(
317-
id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
318-
store_id BIGINT NOT NULL,
319-
customer_id BIGINT NOT NULL,
320-
geom GEOMETRY(POINT, 3857) NOT NULL
321-
);
322-
323-
324-
FOR t_row IN SELECT * FROM faker_store_location LOOP
325-
IF t_row.store_id % 10 = 0 THEN
326-
RAISE NOTICE 'Store ID: %', t_row.store_id;
327-
END IF;
328-
329-
DROP TABLE IF EXISTS place_buffer;
330-
CREATE TEMP TABLE place_buffer AS
331-
SELECT store_id, geom, ST_Buffer(geom, 5000) AS geom_buffer
332-
FROM faker_store_location
333-
WHERE store_id = t_row.store_id
334-
;
335-
336-
DROP TABLE IF EXISTS store_potential_customers;
337-
CREATE TEMP TABLE store_potential_customers AS
338-
SELECT store_id,
339-
geofaker.n_points_in_polygon(geom_buffer, 1000)
340-
AS geom
341-
FROM place_buffer
342-
;
343-
ALTER TABLE store_potential_customers
344-
ADD customer_id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY;
345-
346-
--SELECT * FROM store_potential_customers;
347-
/*
348-
* Using a CTE here with ST_Envelope to bbox join roads.
349-
* A simple join (which looks innocent) took 45+ seconds to return 141 rows
350-
* while the CTE version takes < 60 ms.
351-
*/
352-
--EXPLAIN (ANALYZE, BUFFERS, VERBOSE, SETTINGS)
353-
WITH possible_roads AS (
354-
SELECT p.store_id, p.customer_id, p.geom AS geom_customer,
355-
r.geom AS geom_road,
356-
ST_Distance(p.geom, r.geom) AS distance
357-
FROM osm.road_line r
358-
INNER JOIN store_potential_customers p
359-
ON ST_DWithin(r.geom, p.geom, 300)
360-
WHERE r.route_motor
361-
), ranked AS (
362-
SELECT *, ROW_NUMBER() OVER (
363-
PARTITION BY store_id, customer_id ORDER BY distance
364-
) AS rnk
365-
FROM possible_roads
366-
)
367-
INSERT INTO faker_customer_location (store_id, customer_id, geom)
368-
SELECT store_id, customer_id,
369-
ST_Snap(geom_customer, geom_road, 300) AS geom_snapped
370-
FROM ranked
371-
WHERE rnk = 1
372-
;
373-
COMMIT;
374-
375-
END LOOP;
376-
377-
END;
378-
$$;
379-
380-
381-
COMMENT ON PROCEDURE geofaker.points_around_point IS 'Creates fake customer locations around a store. Locations are snapped to roads. Locations not scoped to landuse at this time. Requires faker_store_location temp table with fake store data.';
382-
383242
COMMIT;
384-
385-
386-
387-
Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
-- Deploy pgosm-flex-faker:002 to pg
2-
-- requires: 001
1+
-- Deploy pgosm-flex-faker:points-around-point to pg
2+
-- requires: n-points-in-polygon
33

44
BEGIN;
55

@@ -12,9 +12,6 @@ CREATE TEMP TABLE IF NOT EXISTS faker_store_location
1212
);
1313

1414

15-
16-
17-
DROP PROCEDURE geofaker.points_around_point();
1815
CREATE PROCEDURE geofaker.points_around_point(
1916
_distance_scale NUMERIC = 1.0,
2017
_density_scale NUMERIC = 1.0
@@ -37,6 +34,9 @@ BEGIN
3734
id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
3835
store_id BIGINT NOT NULL,
3936
customer_id BIGINT NOT NULL,
37+
full_name TEXT NOT NULL,
38+
email TEXT NOT NULL,
39+
phone TEXT NULL,
4040
geom GEOMETRY(POINT, 3857) NOT NULL
4141
);
4242

@@ -100,8 +100,11 @@ BEGIN
100100
) AS rnk
101101
FROM possible_roads
102102
)
103-
INSERT INTO faker_customer_location (store_id, customer_id, geom)
103+
INSERT INTO faker_customer_location (store_id, customer_id,
104+
full_name, email, phone, geom)
104105
SELECT store_id, customer_id,
106+
pgfaker.person_full_name(), pgfaker.email(),
107+
pgfaker.phone(),
105108
ST_Snap(geom_customer, geom_road, 300) AS geom_snapped
106109
FROM ranked
107110
WHERE rnk = 1
@@ -116,8 +119,4 @@ END;
116119
$$;
117120

118121

119-
120-
121-
122-
123122
COMMIT;

db/revert/001.sql

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)