diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..eb9f1fc --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +benchmark/*.data filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..aa3a3ca --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,71 @@ +name: build +on: + workflow_dispatch: + push: + branches: + - master + tags: + - "!*" # Do not execute on tags + paths: + - "**/*.c" + - "**/*.h" + - "**/*.sql" + - data/** + - expected/** + - .clangd-format + - .github/workflows/** + - Makefile + - "*.control" + pull_request: + paths: + - "**/*.c" + - "**/*.h" + - "**/*.sql" + - data/** + - expected/** + - .clangd-format + - .github/workflows/** + - Makefile + - "*.control" + branches: + - "**" +env: + PGPORT: 5432 + PGUSER: postgres + PGDATABASE: postgres + PGPASSWORD: postgres + PGHOST: localhost + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Update repositories + run: sudo apt -y install wget ca-certificates && + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - && + sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" >> /etc/apt/sources.list.d/pgdg.list' && + sudo apt-get --purge remove postgresql && + sudo apt update + + - name: Install postgres + run: sudo apt install -y postgresql-17 libpq-dev clang-format postgresql-server-dev-17 + + - name: Start Postgres + run: sudo systemctl start postgresql && sleep 5 + + - name: Set password + run: sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'postgres';" + + - name: Read Postgres version + run: sudo -u postgres psql -c "SELECT version();" + + - name: Install + run: sudo make install + + - name: Format check + run: find . -iname '*.h' -o -iname '*.c' | xargs clang-format --dry-run --Werror + + - name: Run tests + run: make installcheck || (cat regression.diffs && exit -1) diff --git a/Makefile b/Makefile index 9b66b03..30003af 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,17 @@ MODULE_big = pg_set OBJS = \ - hash_set.o \ - pg_set_io.o \ - pg_set_op.o + hash_set.o \ + pg_set_io.o \ + pg_set_op.o \ + pg_set_gin.o \ + pg_set_gist.o \ + pg_set_analyze.o \ EXTENSION = pg_set DATA = pg_set--1.0.sql -PG_CPPFLAGS = -std=c11 -Wextra -Wpedantic -O0 +PG_CPPFLAGS = -std=c11 -Wextra -Wpedantic REGRESS = pg_set_test diff --git a/README.md b/README.md new file mode 100644 index 0000000..15bb32f --- /dev/null +++ b/README.md @@ -0,0 +1,242 @@ +# `pg_set` + +This extensions adds integer sets to PostgreSQL. It provides a new data type +`pg_set` that can store a set of integers (`int4`) efficiently, along with +various functions and operators to manipulate these sets. + +## Features + +- Written in C +- Efficient storage of integer sets using a mask (it's smaller than `int4[]` for + small enough sets) +- Array-compatible text representation and efficient casting to and from array +- Mathematical set properties and operations: union, intersection, difference, + containment, etc. +- Index support for GIN, GiST and Hash +- Statistic collection and array-like selectivity support for all indexable + operators +- Null elements not supported. This is annoying for arrays and don't really make + sense for sets. + +## Motivation + +Originally I wrote this extension to learn about more the low-level details of +Postgres types, but I ended up adding more and more features so I decided to +publish it. The case that motivated me to write this extension was one where we +stored sets of IDs that referenced an external table (see the `quasi_monotonic` +on the benchmark). We felt that creating a table was overkill, so we needed to +store sets of values. We also had a requirement that values could not be +duplicated and the order didn't matter, so what we needed was a set, not an +array. Traditionally, PostgreSQL has the following options: + +- Use a plain `int4[]` array and ensure uniqueness at the application level or + with functions (e.g. + [`intarray`](https://www.postgresql.org/docs/current/intarray.html)). +- Use [`hstore`](https://www.postgresql.org/docs/current/hstore.html) extension + to store sets of integers as keys +- Use [`jsonb`](https://www.postgresql.org/docs/current/datatype-json.html) + extension to store sets of integers as keys + +Back then we used `intarray` with constraints as it's pretty straightforward to +, but I thought that it'd be nice to have a _set_ type to make it natural. +Since I didn't find one, I decided to write this extension to learn a little +bit more about PostgreSQL internals. + +## Installation + +Clone the repository and run: + +```bash +make +sudo make install +``` + +Then, in your database: + +```sql +CREATE EXTENSION pg_set; +``` + +## Usage + +### Basic usage + +```sql +-- Creating sets +-- Array-compatible representation +SELECT '{1,2,3}'::pg_set; + +-- No duplication +SELECT '{1,1,1,1,1}'::pg_set; + +-- With int4 args +SELECT pg_set_create(1,2,3); + +CREATE TABLE reference ( + id int4 GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, + external_ids pg_set NOT NULL +); + +INSERT INTO reference (external_ids) VALUES + ('{1,2,3}'), + ('{3,4,5,6}'), + ('{7,8,9}'), + ('{1,3,5,7,9}'); + +-- contains 1 and 3 +SELECT * FROM reference WHERE external_ids @> '{1,3}'; + +-- is contained by the set +SELECT * FROM reference WHERE external_ids <@ '{1,2,3,4,7,8,9}'; + +-- contains 4 +SELECT * FROM reference WHERE external_ids @> 4; + +-- Same as above +SELECT * FROM reference WHERE 4 <@ external_ids; + +-- contains 1 or 3 +SELECT * FROM reference WHERE external_ids && '{1,3}'; + +-- Equality +SELECT * FROM reference WHERE external_ids = '{1,2,3}'; + +-- Inequality +SELECT * FROM reference WHERE external_ids <> '{1,2,3}'; + +-- Count of elements in the set +SELECT * FROM reference WHERE pg_set_count(external_ids) > 3; +``` + +### Set operations + +```sql +-- Union +UPDATE + reference +SET + external_ids = external_ids + '{4}' +RETURNING *; + +-- Add element +UPDATE + reference +SET + external_ids = external_ids + 5 +RETURNING *; + +-- Works on both sides +UPDATE + reference +SET + external_ids = 5 + external_ids +RETURNING *; + +-- Can also remove an element +UPDATE + reference +SET + external_ids = external_ids - 5 +RETURNING *; + +-- Interesection +UPDATE + reference +SET + external_ids = external_ids * '{3,4,5}' +WHERE + external_ids && '{3,4,5}' +RETURNING *; + +-- Difference +UPDATE + reference +SET + external_ids = external_ids - '{3,4,5}' +WHERE + external_ids && '{3,4,5}' +RETURNING *; +``` + +## `ANALYZE` support + +Supports almost the statistics collection as arrays (default stats + +`most_common_elems`, `most_common_elem_freqs` and `element_count_histogram`. +Does not support `correlation` and `histogram_bounds` as it sets don't support +less-than operation). Selectivity functions work for the `@>` (both set and +integer cases), `&&` operators and `=` operator (Postgres default works well +here), which are all indexable operators so far. + +## Index support + +### GiST + +The GiST index supports the `@>`, `&&` and `=` operators through the +`gist_pg_set_ops`. The implementation uses an RD-tree data structure with +built-in lossy compression. It approximates sets as a bit mask and also +contains the minimum and maximum set elements to speed up overlap queries at +the expense of index size. It has an optional `masklen` parameter which is the +mask length in bits. It goes from 16 bits (2 bytes) to 16064 bits (2016 bytes). +The default is 16 bytes. A higher `masklen` will increase precision at the +expense of index size. So `masklen` must be balanced with index size for +optimal performance. + +```sql +CREATE INDEX ON reference USING gist (external_ids); +CREATE INDEX ON reference USING gist (external_ids gist_pg_set_ops (masklen=2048)); +``` + +#### Exclusion constraints + +You can also exclusion constraints using GiST. This makes it great to use in +conjunction with `btree_gist`: + +```sql +CREATE EXTENSION btree_gist; + +CREATE TABLE room_booking ( + id int4 GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, + room_id int NOT NULL, + booked_slots pg_set NOT NULL, + EXCLUDE USING gist (room_id WITH =, booked_slots WITH &&) +); + +INSERT INTO room_booking (room_id, booked_slots) VALUES + (1, '{1,2,3}'), + (1, '{5,6,7}'), + (2, '{1,2,3}'); + +INSERT INTO room_booking (room_id, booked_slots) VALUES + (1, '{3,9}'); -- Fails, overlaps with first entry +``` + +### GIN + +Supports the same operators as GiST. Works exactly the similar as an `int4[]` +GIN, as GIN is a tree of elements. It's implemented on the `gin_pg_set_ops` +operator class. + +```sql +CREATE INDEX ON reference USING gin (external_ids); +``` + +### Hash + +Supports only `=` as usual. It's implemented on the `hash_pg_set_ops` operator +class. + +```sql +CREATE INDEX ON reference USING hash (external_ids); +``` + +## Benchmark + +See [`pg_set_benchmark`](https://github.com/carlosganzerla/pg_set_benchmark). + +## Future work + +Currently, this extension supports only `int4`. It would be nice to add support +for `int2` and `int8`, but that requires a lot of work, as we need to redefine +basically all functions at the SQL level and find a way to reuse the internals +without compromising performance. The same applies to `float4` and `float8`, +and to pretty much any fixed-length, sortable type. diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..d5f7820 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,206 @@ +# Benchmark Results + +## System Information + +| System | Linux | +|----------------|----------------------------------------| +| Release | 6.16.6-arch1-1 | +| Machine | x86_64 | +| Processor | AMD Ryzen 7 5700U with Radeon Graphics | +| Physical cores | 8 | +| Total cores | 16 | +| Max Frequency | 4373.86Mhz | +| Total memory | 30.73GB | +| SWAP | 6.00GB | +| Disk | SM2P32A8-512GC 476.9G | + +## Postgres Parameters + +| Parameter | Value | +|----------------------------|-----------------------------------------------------------------------------------------| +| version | "PostgreSQL 17.5 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 15.2.1 20250813, 64-bit" | +| application_name | psql | +| autovacuum | off | +| config_file | /var/lib/postgres/data/postgresql.conf | +| data_directory | /var/lib/postgres/data | +| DateStyle | "ISO, MDY" | +| default_text_search_config | pg_catalog.english | +| dynamic_shared_memory_type | posix | +| hba_file | /var/lib/postgres/data/pg_hba.conf | +| ident_file | /var/lib/postgres/data/pg_ident.conf | +| lc_messages | en_US.UTF-8 | +| lc_monetary | en_US.UTF-8 | +| lc_numeric | en_US.UTF-8 | +| lc_time | en_US.UTF-8 | +| log_timezone | America/Sao_Paulo | +| max_connections | 100 | +| max_wal_size | 1GB | +| min_wal_size | 80MB | +| shared_buffers | 128MB | +| TimeZone | America/Sao_Paulo | +| transaction_deferrable | off | +| transaction_isolation | read committed | +| transaction_read_only | off | + +## Distribution `quasi_monotonic` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|-----------|-----------|-----------|-----------| +| Insert | 477.07 ms | 797.58 ms | 382.25 ms | 901.64 ms | +| Table Sizes | 30 MB | 46 MB | 36 MB | 46 MB | +| Seq. Scan Contains | 24.20 ms | 26.17 ms | 38.22 ms | 29.32 ms | +| Seq. Scan Overlap | 23.07 ms | 42.25 ms | 35.89 ms | 36.22 ms | +| Seq. Scan Subset | 24.37 ms | 38.62 ms | 34.55 ms | 32.08 ms | +| Seq. Scan Equality | 21.72 ms | 24.99 ms | 25.22 ms | 36.02 ms | +| Seq. Scan Non Equality | 72.25 ms | 82.64 ms | 82.57 ms | 112.59 ms | +| Seq. Scan Add Element | 117.26 ms | 111.07 ms | 186.94 ms | 491.69 ms | +| Seq. Scan Remove Element | 77.93 ms | 99.10 ms | 86.37 ms | 211.85 ms | +| Seq. Scan Union | 131.90 ms | 116.95 ms | 140.36 ms | 451.77 ms | +| Seq. Scan Intersection | 105.20 ms | N/A | 127.88 ms | N/A | +| Seq. Scan Difference | 121.83 ms | 105.50 ms | 134.61 ms | 200.20 ms | +| Gin Sizes | 80 MB | 99 MB | 80 MB | 99 MB | +| GIN Contains | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Overlap | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Subset | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Equality | 0.01 ms | N/A | 0.01 ms | N/A | +| Gist Sizes | 120 MB | 102 MB | 102 MB | N/A | +| GiST Contains | 0.93 ms | 7.20 ms | 5.75 ms | N/A | +| GiST Overlap | 1.13 ms | 14.88 ms | 9.38 ms | N/A | +| GiST Subset | 0.42 ms | 2.39 ms | 1.54 ms | N/A | +| GiST Equality | 0.41 ms | N/A | 2.23 ms | N/A | + +## Distribution `random_gigantic` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|------------|-------------|------------|-------------| +| Insert | 4006.25 ms | 14861.67 ms | 1430.14 ms | 17642.02 ms | +| Table Sizes | 113 MB | 372 MB | 103 MB | 133 MB | +| Seq. Scan Contains | 47.78 ms | 182.82 ms | 2134.92 ms | 489.37 ms | +| Seq. Scan Overlap | 58.18 ms | 271.35 ms | 1909.31 ms | 646.84 ms | +| Seq. Scan Subset | 52.24 ms | 214.75 ms | 1904.41 ms | 483.21 ms | +| Seq. Scan Equality | 35.36 ms | 142.05 ms | 33.93 ms | 477.70 ms | +| Seq. Scan Non Equality | 35.69 ms | 144.83 ms | 35.97 ms | 479.57 ms | +| Seq. Scan Add Element | 170.58 ms | 341.74 ms | 181.23 ms | 4570.54 ms | +| Seq. Scan Remove Element | 39.50 ms | 296.77 ms | 45.25 ms | 1536.69 ms | +| Seq. Scan Union | 211.92 ms | 414.02 ms | 1967.17 ms | 7474.66 ms | +| Seq. Scan Intersection | 85.05 ms | N/A | 1945.58 ms | N/A | +| Seq. Scan Difference | 179.48 ms | 387.89 ms | 1853.60 ms | 1462.06 ms | +| Gin Sizes | 489 MB | 558 MB | 489 MB | 559 MB | +| GIN Contains | 0.02 ms | 0.02 ms | 0.02 ms | 0.51 ms | +| GIN Overlap | 0.90 ms | 1.29 ms | 0.77 ms | 49.54 ms | +| GIN Subset | 0.31 ms | 0.99 ms | 0.45 ms | 0.35 ms | +| GIN Equality | 0.31 ms | N/A | 0.45 ms | N/A | +| Gist Sizes | 1296 kB | 16 MB | 26 MB | N/A | +| GiST Contains | 42.95 ms | 152.97 ms | 1881.60 ms | N/A | +| GiST Overlap | 65.01 ms | 311.68 ms | 1966.65 ms | N/A | +| GiST Subset | 38.44 ms | 140.61 ms | 1343.61 ms | N/A | +| GiST Equality | 0.26 ms | N/A | 3.97 ms | N/A | + +## Distribution `random_large` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|------------|------------|------------|-------------| +| Insert | 2536.03 ms | 7922.07 ms | 753.56 ms | 10031.72 ms | +| Table Sizes | 85 MB | 286 MB | 79 MB | 126 MB | +| Seq. Scan Contains | 56.00 ms | 164.54 ms | 1061.64 ms | 353.55 ms | +| Seq. Scan Overlap | 190.08 ms | 7215.04 ms | 2901.02 ms | 5129.05 ms | +| Seq. Scan Subset | 49.02 ms | 5731.71 ms | 2753.03 ms | 419.11 ms | +| Seq. Scan Equality | 51.05 ms | 167.24 ms | 49.02 ms | 389.91 ms | +| Seq. Scan Non Equality | 56.73 ms | 177.97 ms | 50.08 ms | 348.36 ms | +| Seq. Scan Add Element | 156.79 ms | 307.14 ms | 168.86 ms | 2675.09 ms | +| Seq. Scan Remove Element | 53.60 ms | 281.70 ms | 59.90 ms | 1109.07 ms | +| Seq. Scan Union | 454.92 ms | 894.53 ms | 2867.54 ms | 15046.06 ms | +| Seq. Scan Intersection | 246.75 ms | N/A | 2849.81 ms | N/A | +| Seq. Scan Difference | 324.51 ms | 677.74 ms | 2712.52 ms | 1043.01 ms | +| Gin Sizes | 392 MB | 512 MB | 392 MB | 511 MB | +| GIN Contains | 0.02 ms | 0.02 ms | 0.02 ms | 0.07 ms | +| GIN Overlap | 11.20 ms | 13.50 ms | 11.67 ms | 146.39 ms | +| GIN Subset | 7.44 ms | 11.40 ms | 8.95 ms | 2.34 ms | +| GIN Equality | 7.26 ms | N/A | 9.09 ms | N/A | +| Gist Sizes | 9672 kB | 8488 kB | 8440 kB | N/A | +| GiST Contains | 40.28 ms | 106.31 ms | 511.03 ms | N/A | +| GiST Overlap | 218.39 ms | 6895.56 ms | 2834.22 ms | N/A | +| GiST Subset | 2.30 ms | 10.95 ms | 1.03 ms | N/A | +| GiST Equality | 2.14 ms | N/A | 9.80 ms | N/A | + +## Distribution `random_medium` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|-----------|------------|-----------|------------| +| Insert | 682.86 ms | 1256.08 ms | 123.77 ms | 1775.40 ms | +| Table Sizes | 25 MB | 77 MB | 25 MB | 77 MB | +| Seq. Scan Contains | 11.26 ms | 20.30 ms | 204.31 ms | 25.97 ms | +| Seq. Scan Overlap | 29.86 ms | 283.89 ms | 257.51 ms | 278.72 ms | +| Seq. Scan Subset | 13.06 ms | 200.89 ms | 236.36 ms | 35.63 ms | +| Seq. Scan Equality | 8.60 ms | 17.04 ms | 9.53 ms | 22.82 ms | +| Seq. Scan Non Equality | 13.53 ms | 22.23 ms | 14.82 ms | 27.80 ms | +| Seq. Scan Add Element | 55.39 ms | 70.54 ms | 67.78 ms | 608.95 ms | +| Seq. Scan Remove Element | 16.67 ms | 57.84 ms | 19.98 ms | 276.29 ms | +| Seq. Scan Union | 86.72 ms | 126.66 ms | 274.66 ms | 1542.69 ms | +| Seq. Scan Intersection | 46.03 ms | N/A | 271.15 ms | N/A | +| Seq. Scan Difference | 70.55 ms | 108.40 ms | 264.00 ms | 262.90 ms | +| Gin Sizes | 144 MB | 212 MB | 144 MB | 212 MB | +| GIN Contains | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Overlap | 0.09 ms | 0.16 ms | 0.09 ms | 0.10 ms | +| GIN Subset | 0.03 ms | 0.10 ms | 0.04 ms | 0.04 ms | +| GIN Equality | 0.04 ms | N/A | 0.03 ms | N/A | +| Gist Sizes | 33 MB | 18 MB | 18 MB | N/A | +| GiST Contains | 9.16 ms | 9.53 ms | 21.50 ms | N/A | +| GiST Overlap | 51.10 ms | 241.39 ms | 219.85 ms | N/A | +| GiST Subset | 7.54 ms | 0.83 ms | 0.31 ms | N/A | +| GiST Equality | 5.10 ms | N/A | 0.69 ms | N/A | + +## Distribution `random_small` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|-----------|------------|-----------|------------| +| Insert | 515.23 ms | 1098.06 ms | 342.50 ms | 1211.01 ms | +| Table Sizes | 31 MB | 57 MB | 36 MB | 57 MB | +| Seq. Scan Contains | 21.70 ms | 24.28 ms | 43.88 ms | 25.06 ms | +| Seq. Scan Overlap | 27.26 ms | 178.23 ms | 74.86 ms | 79.40 ms | +| Seq. Scan Subset | 20.05 ms | 149.39 ms | 66.23 ms | 33.39 ms | +| Seq. Scan Equality | 19.57 ms | 23.55 ms | 21.44 ms | 30.77 ms | +| Seq. Scan Non Equality | 67.37 ms | 76.41 ms | 70.97 ms | 94.27 ms | +| Seq. Scan Add Element | 116.84 ms | 110.47 ms | 172.07 ms | 614.00 ms | +| Seq. Scan Remove Element | 73.47 ms | 99.78 ms | 76.33 ms | 254.19 ms | +| Seq. Scan Union | 163.12 ms | 183.58 ms | 245.62 ms | 1316.88 ms | +| Seq. Scan Intersection | 113.76 ms | N/A | 228.90 ms | N/A | +| Seq. Scan Difference | 135.81 ms | 149.07 ms | 241.17 ms | 243.40 ms | +| Gin Sizes | 93 MB | 135 MB | 93 MB | 135 MB | +| GIN Contains | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Overlap | 0.03 ms | 0.05 ms | 0.03 ms | 0.03 ms | +| GIN Subset | 0.02 ms | 0.04 ms | 0.02 ms | 0.02 ms | +| GIN Equality | 0.02 ms | N/A | 0.02 ms | N/A | +| Gist Sizes | 94 MB | 88 MB | 87 MB | N/A | +| GiST Contains | 9.40 ms | 11.13 ms | 10.70 ms | N/A | +| GiST Overlap | 67.98 ms | 140.67 ms | 66.69 ms | N/A | +| GiST Subset | 0.76 ms | 1.12 ms | 0.51 ms | N/A | +| GiST Equality | 0.63 ms | N/A | 0.50 ms | N/A | + +## Distribution `random_tiny` + +| Operation | pg_set | hstore | intarray | jsonb | +|--------------------------|-----------|------------|-----------|------------| +| Insert | 791.81 ms | 1300.54 ms | 697.03 ms | 1405.83 ms | +| Table Sizes | 50 MB | 72 MB | 61 MB | 72 MB | +| Seq. Scan Contains | 33.98 ms | 40.77 ms | 54.94 ms | 41.88 ms | +| Seq. Scan Overlap | 35.59 ms | 77.32 ms | 62.19 ms | 59.96 ms | +| Seq. Scan Subset | 32.47 ms | 73.12 ms | 57.06 ms | 51.70 ms | +| Seq. Scan Equality | 32.36 ms | 39.22 ms | 38.35 ms | 55.87 ms | +| Seq. Scan Non Equality | 122.26 ms | 141.25 ms | 138.92 ms | 180.59 ms | +| Seq. Scan Add Element | 197.00 ms | 181.90 ms | 308.40 ms | 759.94 ms | +| Seq. Scan Remove Element | 129.99 ms | 162.28 ms | 139.61 ms | 325.55 ms | +| Seq. Scan Union | 217.87 ms | 196.14 ms | 245.38 ms | 720.08 ms | +| Seq. Scan Intersection | 187.70 ms | N/A | 225.61 ms | N/A | +| Seq. Scan Difference | 210.84 ms | 181.59 ms | 234.10 ms | 313.88 ms | +| Gin Sizes | 75 MB | 128 MB | 75 MB | 128 MB | +| GIN Contains | 0.02 ms | 0.02 ms | 0.01 ms | 0.01 ms | +| GIN Overlap | 0.01 ms | 0.02 ms | 0.02 ms | 0.01 ms | +| GIN Subset | 0.01 ms | 0.01 ms | 0.01 ms | 0.01 ms | +| GIN Equality | 0.01 ms | N/A | 0.01 ms | N/A | +| Gist Sizes | 189 MB | 173 MB | 173 MB | N/A | +| GiST Contains | 10.25 ms | 11.01 ms | 10.33 ms | N/A | +| GiST Overlap | 15.77 ms | 27.91 ms | 19.31 ms | N/A | +| GiST Subset | 2.46 ms | 2.60 ms | 1.41 ms | N/A | +| GiST Equality | 2.00 ms | N/A | 1.82 ms | N/A | + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 0000000..40c1ca2 --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,567 @@ +import csv +import io +import pathlib +import platform +import subprocess +from abc import abstractmethod +from contextlib import contextmanager +from typing import Callable + +import cpuinfo +import psutil +from table2md import MarkdownTable + +TYPES = ["pg_set", "hstore", "intarray", "jsonb"] +RUNS_PER_SELECT = 5 +ACCESS_METHODS = ["seq", "gin", "gist"] +DISTRIBUTIONS = [ + "quasi_monotonic", + "random_gigantic", + "random_large", + "random_medium", + "random_small", + "random_tiny", +] + +WRITE_QUERIES = [] +SIZE_QUERIES = {} +READ_QUERIES = {} + +MEASURES_DICT = {} + +DISTRIBUTION_PATH = pathlib.Path(__file__).parent + + +def dict_items_skip_none(d: dict): + for k, v in d.items(): + if v is not None: + yield k, v + + +class Measure: + def __init__(self, file: str) -> None: + self.files = [file] + + @abstractmethod + def merge(self, file: str) -> None: ... + + @abstractmethod + def get_result(self) -> str: ... + + +class TimeAverageMeasure(Measure): + def merge(self, file: str) -> None: + self.files.append(file) + + @abstractmethod + def get_result(self) -> str: + values = [] + for file in self.files: + values.append(get_exec_time(file)) + + return "{:.2f} ms".format(sum(values) / len(values)) + + +class SizeMeasure(Measure): + def merge(self, file: str) -> None: + self.files = [file] + + def get_result(self): + return get_size(self.files[0]) + + +def write_query(function: Callable): + def wrapper(stream: io.StringIO): + append_time_measurement_query(stream, function, 1) + + WRITE_QUERIES.append(wrapper) + return wrapper + + +def read_query(*access_methods: str): + def decorator(function: Callable): + def wrapper(method: str, stream: io.StringIO): + append_time_measurement_query(stream, function, RUNS_PER_SELECT, method) + + for method in access_methods: + if method not in READ_QUERIES: + READ_QUERIES[method] = [] + READ_QUERIES[method].append(wrapper) + + return decorator + + +def size_query(method: str): + def decorator(function: Callable): + def wrapper(stream: io.StringIO): + append_size_measurement_query(stream, function) + + SIZE_QUERIES[method] = wrapper + return wrapper + + return decorator + + +prepare = """ +\\c postgres +DROP DATABASE IF EXISTS pg_set_bench; +CREATE DATABASE pg_set_bench; + +\\c pg_set_bench + +CREATE EXTENSION pg_set; +CREATE EXTENSION hstore; +CREATE EXTENSION intarray; + +ALTER SYSTEM SET autovacuum = off; +SELECT pg_reload_conf(); + +CREATE TABLE seed ( + id int4 PRIMARY KEY GENERATED ALWAYS AS IDENTITY, + values int4[] +); +TRUNCATE TABLE seed; +\\copy seed (values) FROM {dist_path} + +VACUUM seed; + +SELECT id base_id FROM seed ORDER BY random() LIMIT 1; \\gset + +CREATE TABLE pg_set_table ( + values pg_set +); + +CREATE TABLE hstore_table ( + values hstore +); + +CREATE TABLE intarray_table ( + values int4[] +); + +CREATE TABLE jsonb_table ( + values jsonb +); +""" + + +vacuum_and_values = """ +VACUUM FULL pg_set_table, hstore_table, intarray_table, jsonb_table; + +SELECT values base_values, values[1] base_value FROM seed WHERE id = :base_id LIMIT 1; \\gset +SELECT values hstore_base_values FROM hstore_table WHERE values ?& :'base_values' LIMIT 1; \\gset +SELECT values jsonb_base_values FROM jsonb_table WHERE values ?& :'base_values' LIMIT 1; \\gset +""" + +seq_prepare = """ +SET enable_seqscan = on; +SET enable_bitmapscan = off; +SET enable_indexscan = off; +""" + +gin_prepare = """ +CREATE INDEX pg_set_gin_idx ON pg_set_table USING gin (values); +CREATE INDEX hstore_gin_idx ON hstore_table USING gin (values); +CREATE INDEX intarray_gin_idx ON intarray_table USING gin (values gin__int_ops); +CREATE INDEX jsonb_gin_idx ON jsonb_table USING gin (values); + +SET enable_seqscan = off; +SET enable_bitmapscan = on; +SET enable_indexscan = off; +""" + +gist_prepare = """ +DROP INDEX pg_set_gin_idx; +DROP INDEX hstore_gin_idx; +DROP INDEX intarray_gin_idx; +DROP INDEX jsonb_gin_idx; + +CREATE INDEX pg_set_gist_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 1024)); +CREATE INDEX hstore_gist_idx ON hstore_table USING gist (values gist_hstore_ops (siglen = 128)); +CREATE INDEX intarray_gist_idx ON intarray_table USING gist (values gist__intbig_ops (siglen = 128)); + +SET enable_seqscan = off; +SET enable_bitmapscan = off; +SET enable_indexscan = on; +""" + +METHOD_PREPARE = {"gin": gin_prepare, "seq": seq_prepare, "gist": gist_prepare} + + +def file_wrap(query: str, file: str): + return f""" + \\o {file} + {query} + \\o + """ + + +def explain_wrap(query: str, file: str): + return file_wrap(f"EXPLAIN (ANALYZE, FORMAT JSON) {query}", file) + + +def get_size(file: str): + with open(file, "r") as f: + lines = f.readlines() + return lines[2].strip() + + +def get_exec_time(file: str): + with open(file, "r") as f: + lines = f.readlines() + + total_time = 0.0 + for line in lines: + if "Execution Time" in line: + parts = line.strip().split(":") + if len(parts) > 1: + time_str = parts[1].strip().split()[0] + total_time += float(time_str) + return total_time + + +def routine_name(function: Callable, type: str, access_method: str | None = None): + prefix = f"{access_method}_" if access_method else "" + return f"{prefix}{function.__name__}_{type}" + + +def add_to_measures_dict( + function: Callable, + cls: type[Measure], + type: str, + iteration: int, + access_method: str | None = None, +): + file = f"/tmp/{routine_name(function, type, access_method)}_{iteration}.json" + am_translations = { + "gist": "GiST", + "gin": "GIN", + "seq": "Seq. Scan", + } + name = " ".join( + [ + am_translations.get(access_method or "", ""), + function.__name__.replace("_", " ").title(), + ] + ).strip() + if name not in MEASURES_DICT: + MEASURES_DICT[name] = {type: cls(file)} + elif name in MEASURES_DICT and type not in MEASURES_DICT[name]: + MEASURES_DICT[name][type] = cls(file) + else: + MEASURES_DICT[name][type].merge(file) + return file + + +def append_time_measurement_query( + stream: io.StringIO, + function: Callable, + iteration_count: int, + access_method: str | None = None, +): + for type, query in dict_items_skip_none(function(access_method)): + for iteration in range(iteration_count): + file = add_to_measures_dict( + function, TimeAverageMeasure, type, iteration, access_method + ) + stream.write(explain_wrap(query, file)) + + +def append_size_measurement_query(stream: io.StringIO, function: Callable): + for type, query in dict_items_skip_none(function()): + file = add_to_measures_dict(function, SizeMeasure, type, 0, None) + stream.write(file_wrap(query, file)) + + +@write_query +def insert(_): + pg_set = """ + INSERT INTO pg_set_table (values) + SELECT values::pg_set FROM seed; + """ + + hstore = """ + INSERT INTO hstore_table (values) + SELECT hstore( + values::text[], + array_fill(null::text, array[cardinality(values)]) + ) FROM seed; + """ + + intarray = """ + INSERT INTO intarray_table (values) + SELECT uniq(values) FROM seed; + """ + + seed = """ + INSERT INTO jsonb_table (values) + SELECT jsonb_object( + values::text[], + array_fill(null::text, array[cardinality(values)]) + ) FROM seed; + """ + return { + "pg_set": pg_set, + "hstore": hstore, + "intarray": intarray, + "jsonb": seed, + } + + +@size_query("seq") +def table_sizes(): + return { + "pg_set": "SELECT pg_size_pretty(pg_total_relation_size('pg_set_table'));", + "hstore": "SELECT pg_size_pretty(pg_total_relation_size('hstore_table'));", + "intarray": "SELECT pg_size_pretty(pg_total_relation_size('intarray_table'));", + "jsonb": "SELECT pg_size_pretty(pg_total_relation_size('jsonb_table'));", + } + + +@size_query("gin") +def gin_sizes(): + return { + "pg_set": "SELECT pg_size_pretty(pg_total_relation_size('pg_set_gin_idx'));", + "hstore": "SELECT pg_size_pretty(pg_total_relation_size('hstore_gin_idx'));", + "intarray": "SELECT pg_size_pretty(pg_total_relation_size('intarray_gin_idx'));", + "jsonb": "SELECT pg_size_pretty(pg_total_relation_size('jsonb_gin_idx'));", + } + + +@size_query("gist") +def gist_sizes(): + return { + "pg_set": "SELECT pg_size_pretty(pg_total_relation_size('pg_set_gist_idx'));", + "hstore": "SELECT pg_size_pretty(pg_total_relation_size('hstore_gist_idx'));", + "intarray": "SELECT pg_size_pretty(pg_total_relation_size('intarray_gist_idx'));", + "jsonb": None, + } + + +@read_query("seq", "gin", "gist") +def contains(method: str): + return { + "pg_set": "SELECT * FROM pg_set_table WHERE values @> :base_value;", + "hstore": "SELECT * FROM hstore_table WHERE values ? (:base_value)::text;", + "intarray": "SELECT * FROM intarray_table WHERE values @> array[:base_value];", + "jsonb": ( + "SELECT * FROM jsonb_table WHERE values ? (:base_value)::text;" + if method != "gist" + else None + ), + } + + +@read_query("seq", "gin", "gist") +def overlap(method: str): + return { + "pg_set": "SELECT * FROM pg_set_table WHERE values && :'base_values';", + "hstore": "SELECT * FROM hstore_table WHERE values ?| :'base_values';", + "intarray": "SELECT * FROM intarray_table WHERE values && :'base_values';", + "jsonb": ( + "SELECT * FROM jsonb_table WHERE values ?| :'base_values';" + if method != "gist" + else None + ), + } + + +@read_query("seq", "gin", "gist") +def subset(method: str): + return { + "pg_set": "SELECT * FROM pg_set_table WHERE values @> :'base_values';", + "hstore": "SELECT * FROM hstore_table WHERE values ?& :'base_values';", + "intarray": "SELECT * FROM intarray_table WHERE values @> :'base_values';", + "jsonb": ( + "SELECT * FROM jsonb_table WHERE values ?& :'base_values';" + if method != "gist" + else None + ), + } + + +@read_query("seq", "gin", "gist") +def equality(method: str): + return { + "pg_set": "SELECT * FROM pg_set_table WHERE values = :'base_values';", + "hstore": ( + "SELECT * FROM hstore_table WHERE values = :'hstore_base_values';" + if method == "seq" + else None + ), + "intarray": "SELECT * FROM intarray_table WHERE values = :'base_values';", + "jsonb": ( + "SELECT * FROM jsonb_table WHERE values = :'jsonb_base_values';" + if method == "seq" + else None + ), + } + + +@read_query("seq") +def non_equality(_): + return { + "pg_set": "SELECT * FROM pg_set_table WHERE values <> :'base_values';", + "hstore": "SELECT * FROM hstore_table WHERE values <> :'hstore_base_values';", + "intarray": "SELECT * FROM intarray_table WHERE values <> :'base_values';", + "jsonb": "SELECT * FROM jsonb_table WHERE values <> :'jsonb_base_values';", + } + + +@read_query("seq") +def add_element(_): + return { + "pg_set": "SELECT values + :base_value FROM pg_set_table;", + "hstore": "SELECT values || hstore(array[:base_value::text], array_fill(null::text, array[1])) FROM hstore_table;", + "intarray": "SELECT uniq(values || :base_value) FROM intarray_table;", + "jsonb": "SELECT values || jsonb_build_object(:base_value::text, null) FROM jsonb_table;", + } + +@read_query("seq") +def remove_element(_): + return { + "pg_set": "SELECT values - :base_value FROM pg_set_table;", + "hstore": "SELECT values - :base_value::text FROM hstore_table;", + "intarray": "SELECT values - :base_value FROM intarray_table;", + "jsonb": "SELECT values - :base_value::text FROM jsonb_table;", + } + + +@read_query("seq") +def union(_): + return { + "pg_set": "SELECT values + :'base_values' FROM pg_set_table;", + "hstore": "SELECT values || :'hstore_base_values' FROM hstore_table;", + "intarray": "SELECT values | :'base_values' FROM intarray_table;", + "jsonb": "SELECT values || :'jsonb_base_values' FROM jsonb_table;", + } + + +@read_query("seq") +def intersection(_): + return { + "pg_set": "SELECT values * :'base_values' FROM pg_set_table;", + "hstore": None, + "intarray": "SELECT values & :'base_values' FROM intarray_table;", + "jsonb": None, + } + + +@read_query("seq") +def difference(_): + return { + "pg_set": "SELECT values - :'base_values' FROM pg_set_table;", + "hstore": "SELECT values - :'hstore_base_values' FROM hstore_table;", + "intarray": "SELECT values - :'base_values' FROM intarray_table;", + "jsonb": "SELECT values - :'base_values' FROM jsonb_table;", + } + + +def get_system_information(file: io.TextIOWrapper): + def get_size(bytes, suffix="B"): + """ + Scale bytes to its proper format + e.g: + 1253656 => '1.20MB' + 1253656678 => '1.17GB' + """ + factor = 1024 + for unit in ["", "K", "M", "G", "T", "P"]: + if bytes < factor: + return f"{bytes:.2f}{unit}{suffix}" + bytes /= factor + + uname = platform.uname() + cpufreq = psutil.cpu_freq() + svmem = psutil.virtual_memory() + swap = psutil.swap_memory() + lsblk = ( + subprocess.run( + ["lsblk", "-o", "MODEL,SIZE", "-d", "-n"], stdout=subprocess.PIPE + ) + .stdout.decode("utf-8") + .strip() + ) + + MarkdownTable.from_2d_iterable( + [ + ["System", uname.system], + ["Release", uname.release], + ["Machine", uname.machine], + ["Processor", cpuinfo.get_cpu_info()["brand_raw"]], + ["Physical cores", psutil.cpu_count(logical=False)], + ["Total cores", psutil.cpu_count(logical=True)], + ["Max Frequency", f"{cpufreq.max:.2f}Mhz"], + ["Total memory", get_size(svmem.total)], + ["SWAP", get_size(swap.total)], + ["Disk", lsblk], + ] + ).print(end="\n", file=file) + + +def get_postgres_params(results: io.TextIOWrapper): + with open("/tmp/params.sql", "w+") as f: + f.write( + """ + \\pset format csv + \\pset tuples_only + \\o /tmp/params.csv + SELECT 'version', version(); + SELECT name, current_setting(name) from pg_settings where source <> 'default'; + """ + ) + subprocess.run(["psql", "-f", "/tmp/params.sql"], stdout=subprocess.DEVNULL) + with open("/tmp/params.csv", "r") as f: + output = f.readlines() + table = [l.strip().split(",", 1) for l in output if l] + + MarkdownTable.from_2d_iterable([["Parameter", "Value"], *table]).print( + end="\n", file=results + ) + + +if __name__ == "__main__": + with open(pathlib.Path(DISTRIBUTION_PATH).joinpath("README.md"), "w+") as results: + results.write("# Benchmark Results\n\n") + results.write("## System Information\n\n") + get_system_information(results) + results.write("## Postgres Parameters\n\n") + get_postgres_params(results) + for distribution in DISTRIBUTIONS: + results.write(f"## Distribution `{distribution}`\n\n") + sql_stream = io.StringIO() + + sql_stream.write( + prepare.format( + dist_path=pathlib.Path(DISTRIBUTION_PATH).joinpath( + f"{distribution}.data" + ) + ) + ) + for function in WRITE_QUERIES: + function(sql_stream) + + sql_stream.write(vacuum_and_values) + + for method in ACCESS_METHODS: + sql_stream.write(METHOD_PREPARE[method]) + SIZE_QUERIES[method](sql_stream) + for function in READ_QUERIES[method]: + function(method, sql_stream) + + with open("/tmp/benchmark.sql", "w+") as f: + f.write(sql_stream.getvalue()) + + subprocess.run( + ["psql", "-f", "/tmp/benchmark.sql"], stdout=subprocess.DEVNULL + ) + table = [["Operation", *TYPES]] + for name, measure in MEASURES_DICT.items(): + row = [name] + for type in TYPES: + if type not in measure: + row.append("N/A") + else: + row.append(measure[type].get_result()) + table.append(row) + MarkdownTable.from_2d_iterable(table).print(end="\n", file=results) diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock new file mode 100644 index 0000000..07df55a --- /dev/null +++ b/benchmark/poetry.lock @@ -0,0 +1,54 @@ +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. + +[[package]] +name = "psutil" +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +] + +[package.extras] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +description = "Get CPU info with pure Python" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, + {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, +] + +[[package]] +name = "table2md" +version = "1.1.0" +description = "Print tabular data in markdown format" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "table2md-1.1.0-py3-none-any.whl", hash = "sha256:e88cf5b171bf5e5ae049a8a13082be49390b52e16ddf4ca21f2b0fe87f40c155"}, + {file = "table2md-1.1.0.tar.gz", hash = "sha256:8d288c460220b2e03c9145313ad145bb30e29b26407fae6c511077716b9846d8"}, +] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.10" +content-hash = "07fd8f2d81090089064caf67ecef874754cc5cc430c11bb4da74d4a4cc785b0f" diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml new file mode 100644 index 0000000..c14bd1c --- /dev/null +++ b/benchmark/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "benchmark" +version = "0.1.0" +description = "" +authors = [ + {name = "carlosganzerla",email = "carlosganzrl@hotmail.com"} +] +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "table2md (>=1.1.0,<2.0.0)", + "psutil (>=7.0.0,<8.0.0)", + "py-cpuinfo (>=9.0.0,<10.0.0)" +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/benchmark/quasi_monotonic.data b/benchmark/quasi_monotonic.data new file mode 100644 index 0000000..9deb7bd --- /dev/null +++ b/benchmark/quasi_monotonic.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e17e176bdabe665b7c2548e4b7b20fd2620aa4cb6eabf08256bfcec62eb15c +size 15471075 diff --git a/benchmark/random_gigantic.data b/benchmark/random_gigantic.data new file mode 100644 index 0000000..33e980f --- /dev/null +++ b/benchmark/random_gigantic.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b284ceb22a3b7eb517e72b56652a8f6a6264f224d7c8418c1831f44574bb04fa +size 196578458 diff --git a/benchmark/random_large.data b/benchmark/random_large.data new file mode 100644 index 0000000..b4ee4d5 --- /dev/null +++ b/benchmark/random_large.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0336fdeb91b35f00f5cafd316c08146b02ebcaacca58eebb3be89a6c69236771 +size 138579668 diff --git a/benchmark/random_medium.data b/benchmark/random_medium.data new file mode 100644 index 0000000..c2fefa1 --- /dev/null +++ b/benchmark/random_medium.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230ed72903f1860bd1603664e20a2f25a0431fe7f727d05621afbe455506b6e6 +size 39875342 diff --git a/benchmark/random_small.data b/benchmark/random_small.data new file mode 100644 index 0000000..e2a640f --- /dev/null +++ b/benchmark/random_small.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadad0faad658b8f56936dcb5a4e9d8dcb872f3d4067049a686ba20c15500740 +size 22708253 diff --git a/benchmark/random_tiny.data b/benchmark/random_tiny.data new file mode 100644 index 0000000..e9b395c --- /dev/null +++ b/benchmark/random_tiny.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2106ef4e693dd309915e85dc3b58b4630a004098ae6a18befbdc7b670899ab89 +size 21741244 diff --git a/data/pg_set.data b/data/pg_set.data new file mode 100644 index 0000000..90655e8 --- /dev/null +++ b/data/pg_set.data @@ -0,0 +1,1000 @@ +{217419,456870,462437,797909} +{242156,247172,388336,522876,702300} +{184153,223401,397005,488051,505962,562105,634229,800864,851753,898821} +{15444,23221,583565,805669,812429,919238,960115} +{602002,714087,762955,915951} +{258273} +{348022,432022} +{242258,334475,380805,438406,530037,544443,574767,881943,928043} +{97231,161451,258528,407964,597108,718357,833605,960246} +{33370,249697,292953,294525,492493,506097,577153,723181,738339} +{135330,178719,211217,547472,589245,601659,728866,772126,946813,988182} +{215776,237013,245295,318350,742971,753460,940854} +{127378,133239,147934,376914,428383,502458,539877,643225} +{77963,139910,195477,234111,547993,911649} +{126046,276506,446712,629718,980205} +{17938,68471,417939,437603,481084,710872,718478,730594,759771,939377} +{44352,226085,348121,358859,584831,693544,724443,732497,740265} +{410156,447209,469909,804613,830409} +{127898,202734,271142,386425,850683,914844} +{324845,548174,555176} +{126357,142289,342867,421822,745023,933557,969299,970803,994692} +{102496,263502,415468,476760,647144,754524,846225} +{474314,619603} +{32612,90512,131469,195561,736366,757806,861566,898318} +{211344,269795,562912,829113} +{31773,273384,391419,470327,514195,523741,697588,758195,786640,805745} +{26459,192700,221631,748682} +{203055,972907} +{138848,295937,304088,380451,532927,648199,991567} +{120736,265026,435243,521051,931920} +{25342,109613,544056,555620,855813,898928,950296} +{509369,722369} +{16781,17346,629338,702571,753342,780495,815091,962192,983610} +{104704,251477,683298,705833,861887,889416} +{280341,579155,665902,909792} +{155039,460214,977059} +{543235,832584,914922} +{100701} +{15289,81317,269756,686569,703978,766437,972183} +{307899,910768} +{48962,152171,311526,370283,413082,440877,510559,814756,954397} +{61227,314588,347258,415312,496444,922426,994441} +{88348,98673,158867,394794,579076,728564} +{237843,368459} +{317368,417374,767898,877105} +{9036,817227} +{111608,137123,320256,357815,560803,730492} +{216123,323512,357343,409450} +{1648,100677,163564,170022,270070,819484,939417,983508} +{37604,631879} +{432470,657637} +{39627,73739,225447,272891,683928,701664,706871,729986,945658,953547} +{355442,631952,897946,913840} +{108568,318986,679389,721518,745313,991789} +{184379,603262,609370,856746} +{3544,19953,110647,184717,280819,325686,573643,708237} +{65115,112718,194145,320181,329846,339839,645691,700909,848985,893827} +{141370,398630} +{389496,417213,596747,602652,719534,755509} +{1786,275380,870957} +{201601,434311,585718,766079} +{128248,358465,455443,510029,667782,774875,922589} +{139144,171751,520191,692837,693057,723662,912193} +{150229,470353,480538,514611,518132,598838,885847,984115,997337} +{79985,195776,289050,448681,588827,593993,859200,871307} +{292757,493734,788148,849060,890172} +{56904,242820,259180,273477,376461,398757,862297,872332} +{285060,416380,455396,509674,928773} +{17398,471877,577118,912979} +{187472,243490,412823,483662,491124,828046,929444} +{37435,103117,387388,612331,641729,715059,726262,876085,933397} +{2890,383458,501457} +{91087} +{38309,507494,567415,880159,954918} +{29364,254462,283984,386307,594302,753944,789140,964924} +{245166,537089,610355,805472,848775,939459} +{67991,94601,118275,361976,803706,842212} +{44790,47647,296801,453455,745615} +{227255,268474,521222,621169,708428,933405} +{39098,412576,659765,669072,678070,786360,978507} +{35557,433335,582841,600688,663244,844725,860830,982023} +{254386,700512} +{12486,210370,411379,429231,507925,547603,559737,738568,758861} +{53463,93390,514451} +{24204,310601,349374,600769} +{203823,614354,971949} +{284008,355473,683144,943133} +{103949,401016,515776,935813,966035} +{12264,144715,269253,296901,392577,592538,625508,679810,910580,928820} +{301185,301564,479070,821341} +{753640,783275} +{347776,536104,559636,760747} +{524592} +{9871,122118,513809,636041,701186,710800} +{716270,772366,791993} +{219877,309496,523963,561811,591299,770261,886278,904706} +{57154,283663,318076,373080,462082,727663,958465} +{277466,462233,491808} +{433751,789390,896426} +{74002,391444,493660,625483,687051} +{242329,432296,519887,741659,777210,853075} +{85983,478635,663184} +{9513,196279,885102} +{56129,166101,166914,300060,324990,357781,396309,573389} +{654017,678851,980730} +{259294,505678,577959,833207,958345} +{234644,345437,522585,652516,747393,752350,884152,936689} +{433161,499884,756533,770149} +{107110,222715,288334,589027,641540,660866,897028} +{32984,39435,52327,160293,424600,584860,883408,939980} +{159602,171434,288164,315650,334475,389301,616467,646915,679782,964648} +{215516,233769,245025,277463,294307,380096,552424,632970,732357,930938} +{134781,481573,481763,724922} +{17808,118331,427801,517892,646646,699065,752733,765039,906111,953421} +{37075,133597,315531,470327,663302,786112,808553,978059} +{93935,349601,776978,810557} +{470558} +{197379,313555,452954,533820,572327,609533,633122,703336,865045,951652} +{134508,158103} +{57275,99746,527416,692595} +{230611,552591} +{182902,654517,696296,777167} +{357815,425609,456144,555263,938611} +{386806} +{205193,298987} +{736903,742738,755898} +{212452,226283,594665,619918,969374} +{314637} +{19935,539549,687658} +{63296,148542,392299,452144,793022,835193,876172} +{596971,597783,734816,747662,809978} +{22766,106034,151719,262491,312614,363711} +{416070} +{79667,95047,325524,467764,566503,700414,818023,857222} +{59963,87353,233228,372662,443533,459658,679279,838098} +{755026,963471} +{780536} +{101789,124978,221578,241572,306940,433436,493219,728798,845628} +{810798,889225} +{306210,559590,602799,700148} +{13786,173062,400413,627456,662038,820057} +{44149,460111,917639} +{65070,328495,406563,497756,938051,965873} +{245125,409951,754029,952468} +{6349,93803,127769,178062,504503,504703,610450,877027,899852} +{846230,945709} +{91559,207638,345352,950733} +{748489,795528} +{60059,263647,495293,706850,903482} +{15962,242111,378345,425186,447359,548658,584324,644568,835138} +{98443,174418,358204,448350,490840,553402,712898,788330,867745,877784} +{105825,175847,282862,367046,879400} +{86373,190426,300405,584725,706686,773794,783796,916418} +{11803,67432,70727,74611,226265,286278,369078,593454,909081,995111} +{141448,355460,387112,467610,623787,839456} +{171268,380081,470750,627265,869189,878361} +{49679,94560,268460,698528,872517} +{11668,72976,142553,506655,707474,786225,832545,989047} +{336765,353541,495537,528445,545789,622011,638060} +{376708} +{404396,540704,550456,577551,646940,786934,882157,924683,978701} +{53367} +{6023,98240,103065,226962,509027,561618,945040} +{20448,86729,218145,284035,407008,577594,780746,952875} +{91936,955610} +{61450} +{69154,246684,297861,370859,553581,585827,927582} +{67482,84419,248461,290119,462894,583916,623482,907174,933607} +{101523,183523,202606,249431,368600,407811,412620,840087} +{14076,258493,333892,348639,398297,670031,797349,930269} +{523790,529615,547554,576890,655990,669617,672702,749285,802792,822287} +{46110,103500,147314,365801,433275,471767,478237,819790,907584,931469} +{861843} +{263913,631585,730409} +{69474,91784} +{101084,241673,578731,621213,722291,881808} +{46902,633617,726987,786586,954088} +{508553,581965,923341,997657} +{43183,118777,347152,391301,395290,594620,891288} +{103088,114562,486028,680496,747532} +{134395,610406,870849} +{53396,688215,695338,855977} +{167055,268908,278019,558180,941771} +{13717,89335,109189,251077,338717,350132,407927,703943,947948,953362} +{76465,118523,137878,482087,649035,745785,922312,945643} +{745548} +{248595} +{241233,354348,896283,950476} +{43226,58759,85298,398789,526633,592919,656928,723762,974087,985389} +{94328,122932,162212,537533,683308,709580,901473} +{52550,345908,415752,735301,825786,861131,923062,952435,975022} +{18101,248061,307157,373327,869158} +{8103,208459,247432,416691,740222,815373,884875,919792} +{70261,154923,336234,439941,444373,567219,886780} +{132759} +{208896,309638,369537,685326,693698,699085,782755,826009,910025} +{327655,412896,433449,682303,715500,815233} +{15513,146254,204842,206147,438510,700951} +{111503,378584} +{87601,274107,342412,346529,355683,485163,574402,714393} +{323671,414155,709929,719907} +{634148} +{380834,557621,658343,757033,997414} +{11075,100809,367717,737216,741744,829071,957450} +{53729,584408,949223} +{44472,133639,242569,397272,601872} +{17670,34978,166065,239493,275114,531017,591724,676167} +{85768,159588,223872,364214,586856,741895,751418,874017,876745,929703} +{13714,72449,128444,156458,174498,204605,232150,428683,649956,817619} +{15979,211596,262993,344701,451821,592904,779493,864768,998369} +{27352,41897,59648,82178,143097,509767,511156,693818,791084,980879} +{91555,321241,452582,514235,659950,682149,684647,732728,849224,869610} +{199353,218597,241140} +{48718,218795,444693,503076,655712,842503,876381} +{146551,363976,433678,568020,603182,864941,893343,936040} +{50739,197315,496442} +{68515,136480,232719,234711,297726,346972,375405,651322} +{191062,976424} +{15057,18798,91414,342641,380698,421483,448595,454330,873371,977946} +{653881,856463} +{337796} +{26259,127124,254984,424349,443695,545748,576302,627981} +{201457,384799,667845,725586,885035,966805,978981} +{333926} +{571734,596718,705947,809215} +{106338,159100,323811,591741,822897,890980,903986,965604,978982} +{276941,320887} +{66628,179194,210375,279522,296240,318568,715431,937838,987777} +{467359,641956} +{252977,424292,477533,546979,556122,580686,637313,710230,866134} +{301928,421768,474528,564805,714904,720042,836454,948966,982629} +{469289,539642} +{663818} +{458642,852382} +{66745,81444,148501,218548,601957,609651,617152} +{352064,555155} +{141580,330733,355674,364505,479755,606313,610373,854844} +{664978,670252,701650,713505,713833,871075} +{419336,457469,559953,945959} +{161231,358542,983712} +{192792,378874,726271,896864} +{258259,458541,538410,589856,694236} +{911264,998757} +{27858,263240,585657,687146,888723,997765} +{129246,315001,485098,535423,815761,845240,889497} +{353826,356472,367933,793532} +{193492,257814,482576,503373,656524,715137,846143,887302,933842,964005} +{62649,123954,127290,260005,296153,343641,533492,906578,912805} +{74213,261779,280227,300411,311434,443794} +{101339,164978,317086,502896,503336,873206,924264} +{109210,565867,577580,583561,756794,783452,970513} +{478904,501921,541832} +{128984,716022,764809} +{603242,808824} +{984,16884,138814,180301,594998,651887,818538,985608} +{98527,602636,603562,622401} +{24212,345739,430931,445819,708795,711942,752524,881438,914168,948242} +{422513,424981,481070,672208} +{39384,146306,204073,437983,492624} +{278091,317915,412300,973422} +{12480,216602,474783,923176} +{33213,49359,101606,221137,371556,535793,592161} +{110511,197132,251013,294810,333008,558378,599516,861433,885554} +{374157,570577,668766} +{130147} +{3273,256883,306593,483887,551077,776939,824473,875660,915590} +{272157,331623,397048,579002,763386,780011,853243,904682} +{66170,192303} +{253178,315913,362152,391759,449858,801446,932926,958027} +{171177} +{560916,624477,657156,877558,888015} +{798455} +{11870,124110,330286,373177,543183,615812,639342,774603} +{345258,450272,603561,818106,857037,889205} +{128819,160996,267765,313047,670460,687446,867282,916616} +{401256,538450,652027,741046,797776,848928} +{555706,669311} +{284687,376356,416214} +{75556,79002,138103,147402} +{477300,587488,821209,884645} +{27638,103596,157483,349257} +{333928,411385,452862,558735,565843,623315,923151} +{362400,443179,444901} +{82163,281417,302857,444083,672378,693990,735502,896264} +{38622,171193,733362} +{436339,740411} +{53191,187767,223438,338418,576470,969246} +{34800,195396,597333,658690,709106,728879,818069,889871} +{32547,379822,487510} +{93894,265830,272309,426557,526268,670917,792248,824285,868721,873752} +{16828,46071,461874,478934,819892,897387} +{88725,188816,261048,319495,483661,579019,838182} +{112462,218702,263396,347550,589024,694725,800773,838801,888544,957464} +{23442,216922,233692,393366,545519,811374} +{164744,348023,436368,456628,542876,601636,740094,837457,862274} +{65146,174868,239180,357867,451395,591596,817877,892983,932848} +{209132,299995,351243,351391} +{130326,220491,302429,472723,495055,660234,819345} +{19973,136959,228904,288321,322245,385044,633556,686609,762286,820786} +{319655,407257,420128,680453,911027} +{94562,110961,130210,254481,269081,305999,537101,557127,698934,901528} +{332214,422348,644106,686540,698215} +{67562,69483,310972,331692,434841,835200,966402} +{200290,216660,292643,537404,543757,636269,747756,781112,853678,873271} +{4399,50248,94485,123956,297875,469255,534161,679217,737591,759046} +{614413,630193} +{16650} +{156377,225268,572113,726468,772490,963726} +{294431,505926,698738,762891,810965,956333,964143,975905,983156} +{83642,261325,282551,514572,607230,663710} +{53962,55030,281535,535058,610431,662617} +{75039,833454,955932} +{51237,151917,774563,782007} +{836500,854475} +{155829,214068,300312,318725,395415,429293,435547,438251} +{151698,446799,476576,485279,617670,736513} +{60162,251807,451645,797793,866144,957445} +{151913,197091,275748,290662,477371,561848,655225,978614,991216} +{120417,180873,245190,255558,323219,520671,605627,683018} +{134401} +{714459,940047} +{534184,597639,710454,721674,725219,831769} +{4950,101414,181974,260822,306706,617157,683346,723799,912888,983388} +{37476,217230,360741,396426,665689,676149,688199,750485,790293,798406} +{529883} +{68150,273078,333684,391133,396345,545510,591031,646061,840211,954534} +{5387,13839,157910,312676,356602,501182,604389,619734,905291,967497} +{819761} +{168259,214635,285429,422224,672284,716760,780714,846510,928359,934998} +{266907,354459,444510,489485,616465,621514,832289,931873} +{272576,389187,536438,557394,633633,639810,825704,945441,977118} +{17702,79236,96795,287320,327682,777787,807033} +{43508,905582} +{786121} +{97633,257368,308236,986024} +{6132,496157,644121,851191,851359} +{59417,266667,488616,694013,845742,934729} +{2269,129506,280455,300139,532658,554375,558058,685508,800523} +{78660,97053,237995,295766,349715,478106,704631} +{518063} +{301790,625122} +{151578,184139,198227,209954,252688,466277,717351,750332,767430,874267} +{428843,513227,928201,981901} +{33115,282659,508237,523144,780154,944846,947072,971718,978304} +{39079,250953,293641,392506,395236,409112,513431,813477,860510,985750} +{26125,62967,244987,395008,502902,905698,967606} +{610062} +{27779,188179,318362,331984,361149,572525,624603,698915,722985,865156} +{161881,373904,383561,426705,433352,751626,818022,917432,941475} +{474957,497446,503691} +{27865,264747,294254,435156,974221} +{129185,158613,526181,643292,737962,858945,958203,985928} +{34889,377415,472564,487199,578460,594952,738533,842214} +{4871,10043,107758,141251,247668,261359,415243,505343,552788,909738} +{62085,76483,312992,442199,565219,569189,702659,846446} +{59717,155606,531993} +{241811,312744,334237,610566} +{244265,312596,320996,473237,567802} +{227376,366859,583203,636740,734841,786046} +{82454,320813,332730,357094,410798,823074,883280} +{580135} +{847776} +{42427,95114,351910,918679} +{159522,310787,447645,702900,710592,783921} +{247578,874381,915361,919491,972496} +{337161,799499,854004} +{24732,54378,685974,809359,829848} +{295718} +{3425,38006,119663,741670,899283} +{839623} +{36956,117561,183395,185855,613176} +{493965,659877,756380,826025,874788} +{75319,126191,837056,914499} +{57658,63934,183395,496665,531949,695896,811435,905041,934633,983612} +{391895,531072,703718} +{527571} +{42657,85350,340901,348622,478311,536361,599520,787127,796294} +{16938,23055,70210,88856,103507,223363,402576,645689,696418,916985} +{874465} +{63899,398327,517553} +{18529} +{37483,82364,147297,254822,576087} +{82208,90489,422913,447177,643588,784858,871987} +{98768,512010,531270,535996,545106,693495,867789} +{71798,231194,356301,416986,563101,931168} +{31557,465288,524874,541079,569735,573850,731859,985028} +{317906} +{142045,166822,232183,305852,378803,684214,713151} +{69071,124945,128815,432088,521010,526950,665730,674233,729129} +{2559,623248} +{38422,103814,518739,541135,560742,764788} +{183187,650779} +{21451,80715,119097,245369,280068,438701,642377,876556,942321} +{439966,495993,533868,544468,815087,980352} +{301978,489662,509452,585700,764824,929664} +{122992,144369,557306,654622,774760,903795} +{24989,113411,209659,395177,416196,601889,910663} +{617171} +{600726} +{7359,139923,198611,443540,511671,580592,677134,688599} +{448652,499421,524859,664094,719576} +{222882,243921,424360,646137,868636,970867} +{226176,265459,486763,550783,560305,780194,944021,979702} +{19608,42428,251142,278195,399957,574405,694751,966934} +{78290,136403,333264,547603,725171,864092,973480} +{145094,191247,280305,363849,479517,699793,742081,880550,958406} +{343148,496702,728436,971816} +{132556,553517} +{230143,294168,294522,499563,535044,589016,825679,949170} +{125738,312823,323274,360325,503403,634102,649615,652790} +{89729,120026,169538,233094,454773,537369,657223,692524,847050} +{70333,102741,160064,192628,287045,454963,589927,648839,820681} +{40068,130851,389156,796228,927416,987117} +{34991,77053,96591,150594,243053,421429,440247,602397} +{208993,245690,266049,726723} +{159450,352706,504454,526139,617899,755304,907347} +{14193,438019,664081,698943,739293,891241,909338} +{608008,889708,984686} +{353506,399972,419219,678136,683336,779146} +{58110,268924,418874,612120,739976,743888,777552,822875,958923,987884} +{49361,171706,251862,367613,400076,479198,759744,825742,856583} +{224113,232339,257173,404640,606070,611871,702848,951576,956931} +{224241,423152,452876,624548,644068,680814,769699,847900,877311} +{52057,74936} +{6352,202965,232582,780239,906022} +{94155,258354,281210,848969} +{212489,325262,373071} +{128275,288667,330139,532050,658086,768275,857744,947626} +{43915,235209,370000,398371,549335,774317,872514} +{119756,187998,222336,747304,784920,996185} +{24373,120971,261610,596232,721722,752901} +{24065,271437,303807,329409,513588,628743,656090,667110,821176,936244} +{41481,160768,166852,532951,726750} +{367511,724583,784368,796535} +{7662,9449,63600,340515,386099,403717,591022} +{183800,462952} +{595919} +{356935,456436,478691,781621,891869} +{195410,584567,697480,768452} +{19304,67377,233284,305035,373291,690413,716973,774320,999040} +{43835,112134,456691,480889,525401,730707,852877,918827,925544} +{243624,408665,602489} +{192007,328947,453388,561173,727152,803222,924034,944116} +{78743,88502,131695,195288,281427,335371,585423,718005,767035,825404} +{199578,238020,244780,484968,748179,761313,959275} +{99265,158159,187157,391900,449801,508351,558939,653535,845582,930459} +{66333,185716,287089,313257,789527} +{46630,349124,940473,989557} +{106965,167764,213480,345933,392642,502419,513681,807358} +{168510,612168,785386,886365} +{758131,874878} +{330576} +{212688,385490,441622,634616} +{137826,199970,968672} +{628816,778346,974440} +{17355,471868,565213} +{66600,151376,486448,667116,773635,840219} +{153991,161647,425303,483443,565317,578344,680714,724400,930670} +{242482,535338,766658,820532,828147,969494,989239} +{32905,377951,574618,582237,967257} +{197698,255789,392648,473500,537126,558815,793240} +{381573,452258,685124} +{197863,332904,478630,776374,796705,849926,920412} +{121645,150186,433296,566993} +{14503,296705,305660,781582} +{137112,632943,795823,962220,987147} +{125762,153003,154031,217890,615013,688127,710075,846353} +{96558,341212,409612,487232,493647,842260,953223} +{131502,131872,232127,479027,487885,721511,762482,861573,876515,914980} +{486920,586068} +{470864,536707,896178} +{7714,12381,63845,338246,417325,529769,737633,822639,894255} +{160049,320897,562009,722461} +{289170,360405,458385,477990,502403} +{974521} +{19568,162036,260848,319475,561654,688740,689216,723330,727259} +{64462,169844,424046,661361,840918,876153,926166,992131} +{244774,287566,329782,475753} +{132569,133639,630750} +{103263,161756,209039,404083,472868,681835,759451,886332,998784} +{170796,421095,480053,701237,806827,977959} +{232478,718711,882010} +{44043,274814,574041,576101,588842,872029,895876} +{26989,82384,137272,141100,314225,351117,455040,862384} +{26153,242362,754914,780171,869912} +{112874,324542,646427,802919,811471,966040,988310} +{347842,348216,806315} +{81411,139204,235320,250385} +{234619,234827,297203,377265,516328,587740,707717,724451,935607} +{662192} +{193222,346173,383407,485356,659488,712107,767992,877406,944343} +{37168,96259,117169,464381,561673,582906,649619,709187,905655,990642} +{151211,390875,488760,561282,610686,907622} +{41399,155600,370659,735481,796451,871496,897022,943691} +{17420,123023,170532,304578,321990,470918,506054,576749,762695} +{109503} +{137963,161145,639897,988075} +{33347,60070,396359,613532,941537} +{147887,569125} +{244090,391153,592733,656694,699707,701306,947464} +{101050,582412} +{230661,237603,764998,820036} +{98008,117182,263605,608730,623959,696786,896347,919401} +{611747,657843,724948} +{255980,350519,412459,636070,675968,801651,827483} +{221043,386977,474141,712365,765642,816472} +{951369} +{280837,807515} +{21378,74601,171493,225544,549474,598409,771137,841433} +{174206,313830,866897,888498} +{286931,629219,741064,752004,757952,968236,970408} +{806005,891457} +{70545,266758,278065,682501,715669,861738} +{854185} +{106061,176466,227407,304544,468557,490842,578760,659160,794333,860751} +{180278,199095,313686,573238,585427,831842,854565,874059,958268,958332} +{262694,606701,696133,759073,979121} +{358015,921584} +{82937,117062,268559,526023,545517,732144,763194,960442} +{546212,620133,737404} +{221989,822360,876809,954050} +{23066,89591,124365,297552,377564,417340,620111,757851,774382,847089} +{52334,136590,277152,616562,898564} +{117170,138244,280731,342064,473368,711328,767278,781272,818248,997366} +{62341,142089,176412,506615,617138,732410,745866,815151,862887,919970} +{14812,131550,844323} +{146102,376992,379364,489142,558904,661723,661869,768254,902078,946757} +{95142,393233,426553,535711,618456,668929} +{35636,501111,573664,730700,780948,815926,828697,935584} +{56265,422689,589134,812318,914054,916090,993560} +{47571,444555,487455} +{150295,329324,923464} +{7293,508934,834084,881656} +{229022,367472,522312,573857} +{127102,208766,238719,457356,483265,533324,598236,835752,941170,974114} +{6741,59313,172442,203727,279871,340685,532058,905332} +{151646,395905,509869,734253} +{138330,317989,760119,879256} +{38692,303548,502508,655090,655498,751666,822439,867656,876857,906632} +{390531,433225,875445} +{31693,138844,366555,599825,666450} +{88489,270688,520768,561337,859522,957635} +{201959,397742,487297,642789,701420,748521,788842,957159} +{417728} +{678789} +{424631,522291,867174} +{34220,56918,300660,368449,563783,648141,802530} +{217844,554610,601774,689593,892705} +{66147,108711,172973,506269,545155,594787,696349,735647,814009} +{73447,341047,374455,520867,799503,827378,848364,870732} +{926215} +{52621,149601,343118,518736,632521,798624} +{371415} +{200791,378879,541121,563789,798540,816568} +{575952,974896} +{61566,89957,602840} +{86513,210296,315162,346231,641877,781607,820736,844894} +{45427,448813,747260} +{256300} +{207150,446918,527273,765321,866003,875489,998077} +{211022,264376,504655,584678,637370,657018,761606,767744,796701,864215} +{188636,311257,833471} +{188124,206613,366416,473334,582393,607439,688505,800849,907360} +{73499,73527,109507,429178,467607,480599,503610,817410,896901,965287} +{175634,386821,629326,642017,721442,829043,981634} +{73358,309249,420430,440566,860781} +{289288,386522,473727,681171,794382,845748,883908,923676} +{261201,524877} +{28029,128957,515502,815503,903261,993421} +{405201,620713,634198,855336,948977} +{86238,123306,205155,472381,505662,604888,736399,957407} +{81821,292344,528363,633471,677682,725521,845337,943819,977748} +{532202} +{559344} +{982139} +{617664,796878,817952} +{29995,691479,737189,738967,763282,838762,864669,959202} +{9477,144567,615189,669671,676028,720812,778671,792175} +{68695,70864,370997,579052,583447,651700,706241,731639,768324,841531} +{100358,509101,585650,643940,888621} +{9880,51464,142927,283008,442090,545228,631124,632599,752423} +{109296,918893,949652} +{68351,228701,712031,728734} +{918829} +{188621,477290,488991,572570,578043,616354,729898,833717} +{150069,822699,904252} +{82584,208843,313167,464261,720700,813601,828824} +{143099,268275,324932,473922,479561,520925,603099,864374,866406} +{559754,648935} +{122850,204731,350908,498831,555125,771645,861421,878700} +{82746,533618,611139,875711} +{81382,94053,122011,349291,526801,602047,898227,925515} +{36653,302935,336093,762833,836495} +{72495,256232,310298,537899,638273,651032,804500,872341} +{96660,120109,247619,321306,393552,393739,528639,957901} +{525627,618674,693523,963603} +{895169} +{758291,831999} +{24561,151958,219618,525990,664090,714512,900392,984749,985634} +{268316,526560} +{95101,277135,389795,506342,715295} +{79106,266198,527998,702489,787264} +{57172,59082,177060,285863,542250,603532,631792,680865,685694,779931} +{67650,90422,159657,413227,433318,450696,508338,768033,832610,927235} +{90209,313715,710424,813820} +{385649} +{171633,347304} +{100091,126023,391539,487383,700169,734332,910581} +{7309,24196,169677,259734,272601,409138,534552,681866,860231} +{73484,100249,109514,250244,525979,567292,681724,764715} +{538251,770053,820356} +{169402} +{352219,766670,810870,863734,882883} +{16615,77943,386112,574098} +{71742,231617,243293,432601,456449,842490,853037,919011} +{17704,41710,73342,131998,660183,745606,881426} +{23299,148787,440274,629380,739016,801347,915628,938108,987755} +{22831,177783,549109,597495,727650,808718,809589} +{201666,317894,463996,512506,565564,620306,641634,661138} +{500666,737384,826138} +{120568,171833,177113,257617,522362,526256,812381,959828} +{230369,246149,290653,313004,489052,751706} +{98591,114036,179579,627139,903477} +{511323,767036,912964} +{248183,353146,404849,502038,524974,603485,639135,691823,831057} +{633968,746386,981195,982735} +{807353} +{322323,767076,840774} +{8606,60571,204404,279572,742750} +{319445} +{210268,222350,596621,665030,666812} +{679821,714116} +{62899,154086,562654,576288,647245,761771,764318,904930,947740,995180} +{14516,43619,68687,169241,194449,213752,341056,452749,466721,746023} +{129516,206388,578644,671343,762665,842874} +{249344,663426} +{10809,48993,59810,255196,303178,705824,763628,932835,984378,992489} +{12756,81063,212093,249658,254675,302330,478595,511163,591357} +{158799,373638,554100,599216,734738,901185} +{232821,335804,378238} +{400702,497068} +{158075,176324,501203,662871,952021} +{302493} +{256026,257109,819393,835218} +{34035,56981,318431,393858,397710,402182,421759,837522,921008} +{181316} +{454235,485655,868210} +{189706,332375,557756,915178,923103} +{147194,394234,423979,708200,708245,804978,847388,880528} +{112329,206701,415411,513163,791266,999733} +{66088,420921,426520,518774,782761,951910} +{79309,93513,578109,753789,943246} +{251353,253408,474959,567436,592630,603848,635028,730438,747860,788651} +{68809,454829,636601,695032} +{4801,6631,219480,222159,773774,780996,794150,943893} +{95244,435421,479735,872710} +{47035,236594,290338,597062,791993} +{953989} +{163755,209588,220739,254518,333453,581073,693761,827028,838919,983986} +{631340,756361,911813} +{233083,441234,552463} +{653732} +{23571,156214,323882,681769,798014,883875,891228} +{306039} +{184378,392198,460676,550782,598238,781494,859335,877907} +{279045} +{52775,98300,205966,355632,389773,488689,495442,533880,681893,739428} +{84915,495881,648078,650777} +{47613,502782,523393,729244} +{494529,875856} +{34526,42410,286148,611477,864161} +{169655,345053,426844,690908,753984,770298,854673} +{243521,418846,615351,773490,872628} +{109884,320983,327446,498116,513132,571479,686150,921055} +{69096,77420,90052,210523,383734,505197,511526,540570,692961,844395} +{282387,321018,402426,612032,833590} +{37989,147865,347491,564007,600927,683013,719631,749245,749628,995974} +{35331,50679,306176,377048,386887,389037,500136,577807,602853,860516} +{8189,649211,687506,691895,944095} +{126295,344560,476259,502451,616322} +{70178,259890,422797,676745,691145,950398,987549} +{488254,715054,930460} +{72393,242391} +{137059,207523,372389,537828,579845,634948,643395,662312,776864} +{212775,230810,284049,386927,515442,619658,767990,819377} +{368427,443486,662506,696080,765157} +{129735,177823,252243,816811,846755,935270} +{690533} +{216108,446885,639851,715184,904418,962354,973073} +{140950,181496,428233,468751,490197,518104,560883,586173,746521} +{110466,183883,223329,276074,478943,930710} +{249736,535620,553811,571375,882695} +{58028,75081,262879,376089,438376,471101,673706,693961,935950} +{62819,218149,361016,678779,872859,909540,933734} +{7560,14091,359415,879141,897872,977618} +{205897,254081,617190,658150,729903} +{89460,278010,889989,963544} +{86719,154844,468995,706150,746547,787179,885325,920893} +{48465,249473,561242,619779,726529,773719,830807} +{154704,554909,713892,774896} +{107661,211204,632399,684343} +{565563,632603,740557,900643} +{73900,203090,515386,586656,675248,751858,844165} +{582323} +{349122} +{19435,26879,89439,106789,121268,299779,466097,823370,898918} +{22430,158208,276254,405346,482392,643069,689332,852027} +{161613,216505,333410,440777,526392,792072,826356} +{155756} +{766982} +{180813,216580,396011,620370,677061,678664,828907,895553} +{77252,516341,690546,814561,943344} +{81895,244521,410539,583849,668403,848942} +{310726,537851} +{110561,220033,243166,952224} +{62684,90530,109680,386424,809789,877346,890973,899326} +{411168,412985,490366} +{649089,830768,974287} +{277795,574868,740833} +{94739,150320,259619,437291,601240,872405} +{90422,145786,179192,397859,562781,857729,923287,960040} +{52190,265011,455736,461405,656403,936498} +{81798,242036,679082} +{819953} +{39653,441930,461027,486823,502107,675384,677785,762679,931585,993168} +{28578,141231,281940,862592} +{39140,65652,275775,326595,395458,593570,925170,993929} +{255682,406286,687809} +{240182,477721} +{130093,198039,212834,278199,436595,453033,488661,496611,856462,957216} +{249775,321776,511375,580305,689698,855237} +{10892,28119,69051,247529,395539,511811,647508} +{122214,143339,482259,529201,625389,682010,785578,886499,923253} +{156615,484643,487605,941564} +{213987} +{37919,236288,344261,400449,771849,989701} +{174575,306299,401363,899690} +{186161,411896,546769,658006,732046} +{287985,803235} +{222683,445361,587034} +{71185,112132,568790,694952,770208,801190,862000,884362,901903} +{708493,734835,786632,877697} +{156351,195563,262583,314676,394396,504295,561248,604412,710612,897766} +{960668} +{94388,302375,312313,339656,522646,528291,664700,775479,991884} +{816562,929326} +{413405,922229} +{81504,228752,667371,693557,858935} +{261431} +{194738,253563,329085,424104,667347,978148} +{210823,328927,688527,750672,857434} +{107063,303591,309814,321632,516507,545016,560224,690501,916384,967843} +{146649,536657,556531,729291} +{240096,277120,378805,546756} +{98547,109699,168717,354608,475595,579004,587480,595276,697785,874949} +{166309,184242,260380,383160,667857,741963,796469,898600} +{309988,328727,566036,589244,667550,709734,898880,928895} +{5294,372751,383959,528083} +{80330,85874,123214,158298,163279,338796,495448,622919,729663,925224} +{15915,56471,339708,362158,503280,588272,665258,775922,972509,980667} +{160785,185178,218306,228794,544953,601459,695796,896141} +{87698,126722,263463,285827,337923,365413,582807,765345} +{89562,208523,253702,517044,562216,575802,593043,681858,913315,980716} +{537816,650117,655357,867884} +{156814,241038,450280,500486,632749,899898} +{17570,61117,309994,478117,488407,862660,878352} +{52243,335667,421070,491097,505553,610370,773872,830742,847916} +{51420} +{83849,248379,288521,331977,430470,475083,598982,808782,848972} +{644792,667640,944139} +{247246,844796,873638} +{681957,826970,878025} +{41901,956413} +{34639,70729,145377,147629,256011,642650,987765} +{64545,312830,343185,491692,538864,557366,683436} +{146798,259692,397990,444589,660998,846112} +{106759,172130,462006,666001,699829,754823} +{660368,935001} +{66405,196602,227850,254675,420799,688007,974717,979094} +{308071,372520,444124,479271,607388,692520} +{145437,288899,320954,536421} +{168104,452556,615930,730046,833058} +{176116,209173,233203,266543,349418,430825,442596,590454,966655,986654} +{255791,342602} +{392583,402479,497945,501529,649343,709934,710496,721208,909777,962338} +{87436,247728,337943,401618,450104,542978,618572,758274,824321,856185} +{390318,413792,747254,904133} +{37283,182473,546326,814620} +{128153,235167,376964,496967,757894,792473,801206} +{912,542870,562813,580542,821384,901056,915770} +{283397,853174} +{252381,394176,439599,501954,623661,657038,763532} +{142504,907744} +{981089} +{580060,584113,701758} +{219825,297010,378850,426889,453564,639490,831580,911598} +{41156,69751,174665,412892,981650} +{157552,361470,411424,504300,613936,678388,791640,860421} +{931632} +{550299} +{55808,126542,173632,197648,233108,306505,360709,635009,909862} +{154756,361968,637563,727579,889638} +{18585,50345,133343,155947,183016,236961,470988,613204,796990,896731} +{635700} +{91036,125889,241554,278489,293722,373827,459049,553129,886759,888363} +{174774,395889,439103,603616,667126,667259,671929,778920,829327,948216} +{218279,289298,388508,607033,642204,669812,693089,825967} +{126481,561889,606963,777346,785725,790423,813756,845817} +{90807,165674,210694,212597,265087,304281,582526,594062,983923} +{529000,612859} +{249215,512415,560450,839163,842432} +{552746,596508,829751} +{355441,538992,919417,985882} +{1689,516666} +{291197,584003} +{41748,453612,594203,599554,646076,699946,777094,782600,808528} +{14859,130333,234267,245842,497405,634974,839129,895608,905086,963792} +{408633,411034,668324,717879,718867,819201} +{165992,260697,279527,509295,554935,791847} +{124507,443400,730009,763487,783327,969761} +{2376,55221,56207,528203,532814,796728,812610,840253} +{21480,276707,751254,834641,930552} +{342653,356407,390941,539728,664266,749420} +{955,32511,46493,346112} +{375356,416042,514315,568196,571892,643391,688046,704611} +{76345,112650,498822,851652,936272,940898,995071} +{18298,106702,295092,312803,785981,973004} +{114094,591184,626784,706997,799302} +{209280,311515} +{85814,170593,201840,268812,292620,379527,512620,834776} +{75448,88097,272937,300817,387680,396926,510797,520566,776309,843643} +{148509,557682} +{73081,114086,127958,156453,234232,245265,401496,790624} +{37199,97621,565090,612651,847276} +{13105,472293,540929,542281,553496,583111} +{153211,356165,401902,498374,650192,675938,765601,766904,830575,882394} +{161061,248524,354443,355121,421502,452021,587615} +{109065,122332,153386,757580,828996,889313,895703,899008,924366} +{72113,201287,444024,831619,877848,891699,964610,985257} +{275113,292961,405765,822014,892239} +{176361,474995,542380,725411,833445,850713,899547,906008,912957} +{30625,167695,213191,276317,452884,690180,706831,733756} +{35407,164503,257600,414540,424850,702760,783737,860599} +{41655,198328,229130,364166,930855,997030} +{74205,95589,205104,207443,350602,400513,514019,599880} +{74858,139669,358869,723630} +{53748,147765,167346,803680,871991,876773,893750} +{382069,406883,508433,642035,644244,678621,750270,844350} +{784711,847742} +{78528,118971,276338,379300,461129,693101,962057,987398} +{3042,612808} +{236259,892320} +{59585,167664,168721,378145,398476,560438,624907,725714,731421} +{184733,482448,562692} +{127585,576450} +{33924,63115,79389,82979,262911,518667,733345,847675,892704,926276} +{139231,467169,798379,990708} +{92359,227155,368992,402790,439290,499174,583592,624910,706697,851664} +{487360,594021,765451} +{11427,177269,210081,215055,240324,329959,352233,372007,655544,971495} +{85593,260329,358138,505201,669015,729288,993644} +{322951,340259,623166,867330} +{55773,271694,498176,686723,703171} +{48381,107221,164707,167640,214470,423430,521060,851009} +{207791} +{618590} +{156855,198990,251154,918847} +{93172,99033,228403,641678} +{5061,434007,497515,794774,962885} +{365592,428895,473966,498690,563624,636747} +{166406,252051,346190,427544,603536,633770,663210,764015,790304} +{370224,830891} +{27410,153904,451150,811282,871142} +{160839,696278,734381,863856} +{38183,124075,191945,359434,475537,498402,742289,774577,899450,978776} +{232827,384214,516134,532376,582999,636413,833463} +{18328,124941,220413,289275,395838,413397,653161,735599,864606} +{378087,413202,524807,553342,585932,602775,811346,874898,954357,989022} +{60579,194078,260147,432195,484943,600137} +{55599,287374} +{159925,498450,727290} +{29666,71935,103011,129028,667380,776397} +{72669,570255,735144,759164,823600,853940} +{5992,181541,444830,977867} +{71039,104979,308891,614732,766241,815799,828471,838443,848939,867043} +{19925,101928,193867,203159,303411,459650,663206,786907,821393,988322} +{297173,323994,335130,398191,522903,679915} +{337934,889331} +{154287,302740,561152,613533,670748,829857,996827} +{151365,199054,328290,356005,415739,695852} +{320377} +{723034,760372,836932} +{39691,177180,386885,700007,946761} +{3762,94319,123107,301036,357269,387185,802847,874889,890690} +{884702,940290} +{67569,78250,598647,625951,755836,768783,858239,898139} +{169609,566093,751452,855204,920019} +{100921,407886,520750,659640,754883,826233} +{115643,267353,295783,536749,678255,906082,956550,959079} +{315712,572115} +{89282,299887,835824} +{275714,670431} +{243514,439643,732537,943615} +{75807,272117,698180} +{1007,138543,345664,751582,972886,990712} +{272703,377973,440738,474885,606581,619665,626824,756366,818353,855076} +{115602,399577,437771,613744,886138} +{121759,286151,311608,364796,588378,611043,710993,752584,753972,757868} +{529262} +{88637,95101,210598,259933,657648,673777,854495,955219} +{707307} +{295229,315175} +{61791,254292,370654,389170,389341,443780,612317,903135,978615} +{147658,233721,374704,405708,526761,776264,813520,832724,939482} +{154662,224556,362232,395850,520429,616127,714826,715811,963556,992482} +{214111,283104,510734,544505,546067,606497,646995,757961,780889,938028} +{1083,44380,115711,473201,488053,669403,736798,748426,784069,997854} +{284136} +{98614,328068,727034,986085} +{429717,574942,598224,711317,884385,908852,965954} +{745426} +{247012,428806,576034,585557,613294,657712,737351,750465,899911} +{359118,682537,797367} +{116705,354932,569900,771324,889688,917282,923454,965443,972520} +{3444,192341,213235,223594,588018,890331} +{395102,911836,940793,963300} +{142463,175517,328065,637490,645421,828575,912062} +{366494} +{11548,72094,528796,766382,781391,846999} +{4813,413993,744502} +{75231,274759,304520,580265,844095} +{621718} +{193212,203262,227296,875693} +{58711,144656,484335,497013,603361,911335} +{537714,793698,869218} +{78068,790498,887576,970193} +{182788,212042,291031,338707,403963,428414,532724,577304,978652} +{85022,244260,279039,556563,563258,604398,712965,930143} +{167901,242904,279228,565573,694691,914444} +{41270,159746,416764,612783,775702,873501} +{432328,796857} +{19757,182345,264252,314332,644867,723611} +{77478,305252,379998,800568,807086} +{258774,431488,531903,733976,737390,961362} +{810690} +{216193,509062} +{12888,98136,323652} +{23601,23884,85297,272386,404877,627793,836913,920771,971149} +{15662,308671,539446,688978} +{7019,347413,361177,558519,654304,887042} +{74784,95310,488279,526615,659872,949818} +{243675,319115} +{234157,272014,559652,682264,800780,964329} +{200080,249811,498104,620138,637619,658717,735287,900633,921910,987806} +{110583,171443,226684,283296,794155,920694,933691,951510,982882,997599} +{84796,347065} +{112494,171357,401737,848087} +{822712,972222} +{554053,670881} +{60898,248289,550168,649275,746481,875170} +{43795,395714,453610,712137} +{937529,968153} +{197934,417678,548348,619904,780780,795874,940667,987601} +{215354,353347,523395,823086} +{174374,270205,341741,508628,579660,811684} +{98507,311140,477303,585516} +{5894,112191} +{41440,375370,439001,508201,540800,930860} +{468391,775928} +{93863,104885,369909,486031,487671} +{97821,760316} +{6600,111393,157389,187139,249578,318613,354456,430196,935440,960617} +{45305,52638,823453} +{6884,54249,88111,99820,162425,213397,555014,661211,761533,872966} +{215442,609765} +{110757,378776,514050,573680} +{38491,57050,74391,210409,252541,496689,654856,712001,920628} +{84960,353497,418715,799629,921438} +{855150} +{212199,332505,376525,635317,709889,859876} +{31730} +{21798,62692,271522,287884,323310,560052,696886,801923,989670} +{143296,172344,402691,565778,677859,720969,859190,954361} +{29524,395719,423307,479174,679949,869510} +{70407,208157,573898} +{145671,181494,255816,346094,355114,397958,508500,573786,865758} +{348673,720665} +{2904,64410,125717,540582,857377,870122,877024} +{36788,60016,136597,401675,421390,478126,482859,535908,884430,938780} +{28318,116125,327829} +{126990,190926,490381} +{914056,922102} +{58952,200224,667325,792435,898271,979792} +{120216,281697,469344,930732} +{597166} +{23411,42865,62981,276949,328778,554090,758001,796429,802055,808037} +{557437,640503,724629} +{41829,174191,267429,457857,512913,566042,668901,807887,871614} +{245386,356362,391924,393813,496337,568473,925448,927263,978625} +{13160,59140,132918,220053,575312,750690,994178} diff --git a/expected/pg_set_test.out b/expected/pg_set_test.out index 29ef169..59cfa39 100644 --- a/expected/pg_set_test.out +++ b/expected/pg_set_test.out @@ -446,6 +446,90 @@ SELECT pg_set_info(pg_set_add('{2,4,6,8,10,12,14,16}', 9)) = pg_set_info('{2,4,6 t (1 row) +SELECT pg_set_remove('{}', 1); + pg_set_remove +--------------- + {} +(1 row) + +SELECT pg_set_remove('{2}', 2); + pg_set_remove +--------------- + {} +(1 row) + +SELECT pg_set_remove('{1,2,3,4,5}', 1); + pg_set_remove +--------------- + {2,3,4,5} +(1 row) + +SELECT pg_set_remove('{1,2,3,4,6}', 7); + pg_set_remove +--------------- + {1,2,3,4,6} +(1 row) + +SELECT pg_set_remove('{2,4,6,8,10,12}', 10); + pg_set_remove +--------------- + {2,4,6,8,12} +(1 row) + +SELECT pg_set_remove('{2,4,6,8,10,12,14}', 13); + pg_set_remove +-------------------- + {2,4,6,8,10,12,14} +(1 row) + +SELECT pg_set_remove('{2,4,6,8,10,9,12,14,16}', 9); + pg_set_remove +----------------------- + {2,4,6,8,10,12,14,16} +(1 row) + +SELECT pg_set_info(pg_set_remove('{}', 1)) = pg_set_info('{}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{2}', 2)) = pg_set_info('{}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{1,2,3,4,5}', 1)) = pg_set_info('{2,3,4,5}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{1,2,3,4,6}', 7)) = pg_set_info('{1,2,3,4,6}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,12}', 10)) = pg_set_info('{2,4,6,8,12}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,12,14}', 13)) = pg_set_info('{2,4,6,8,10,12,14}'); + ?column? +---------- + t +(1 row) + +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,9,12,14,16}', 9)) = pg_set_info('{2,4,6,8,10,12,14,16}'); + ?column? +---------- + t +(1 row) + SELECT pg_set_smallest('{}'); pg_set_smallest ----------------- @@ -1106,18 +1190,24 @@ SELECT '{0,1,2}'::pg_set <@ '{1,2,3}'; f (1 row) -SELECT '{1,2,3,4,5}'::pg_set || 4; +SELECT '{1,2,3,4,5}'::pg_set + 4; ?column? ------------- {1,2,3,4,5} (1 row) -SELECT 4 || '{1,2,3}'; +SELECT 4 + '{1,2,3}'::pg_set; ?column? ----------- {1,2,3,4} (1 row) +SELECT '{1,2,3,4,5}'::pg_set - 4; + ?column? +----------- + {1,2,3,5} +(1 row) + SELECT '{1,2,3,4,5}'::pg_set && '{6,7,8}'; ?column? ---------- @@ -1178,3 +1268,354 @@ SELECT '{1,2,3,4,5}'::int4[]::pg_set; {1,2,3,4,5} (1 row) +-- Large set test +CREATE TABLE int4array_table ( + id int PRIMARY KEY, + values int4[] +); +CREATE TABLE pg_set_table ( + id int PRIMARY KEY, + values pg_set +); +CREATE FUNCTION array_sort(a int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT array_agg(a ORDER BY a) FROM unnest(a) a; +$$; +CREATE FUNCTION array_union(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + UNION + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; +CREATE FUNCTION array_intersect(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + INTERSECT + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; +CREATE FUNCTION array_diff(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + EXCEPT + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; +INSERT INTO int4array_table (id, values) +SELECT + x, + vals.* +FROM + generate_series(0, 16) x +CROSS JOIN LATERAL ( + SELECT + array_sort(array_agg(DISTINCT trunc(random()* 100000000)::int)) + FROM + generate_series(1, (2^x)::int) +) vals; +INSERT INTO pg_set_table (id, values) +SELECT + id, + values::pg_set +FROM + int4array_table; +WITH sample AS ( + SELECT + id, + values[cardinality(values) / 2 + 1] mid_val, + values array_vals, + values::pg_set set_vals + FROM + int4array_table + WHERE + id = 7 +) +SELECT + 1 +FROM + pg_set_table +JOIN + int4array_table +ON + pg_set_table.id = int4array_table.id +CROSS JOIN + sample +WHERE + pg_set_table.values::text != int4array_table.values::text OR + (pg_set_table.values + sample.set_vals)::text != array_union(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values - sample.set_vals)::text != array_diff(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values * sample.set_vals)::text != array_intersect(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values + sample.mid_val)::text != array_union(int4array_table.values, array[sample.mid_val])::text OR + (pg_set_table.values && sample.set_vals) != (int4array_table.values && sample.array_vals) OR + (pg_set_table.values @> sample.set_vals) != (int4array_table.values @> sample.array_vals) OR + (pg_set_table.values <@ sample.set_vals) != (int4array_table.values <@ sample.array_vals) OR + (pg_set_table.values @> sample.mid_val) != (int4array_table.values @> array[sample.mid_val]) OR + (pg_set_table.values = sample.set_vals) != (int4array_table.values = sample.array_vals) OR + (pg_set_table.values <> sample.set_vals) != (int4array_table.values <> sample.array_vals) +LIMIT 1; + ?column? +---------- +(0 rows) + +DROP TABLE int4array_table, pg_set_table; +CREATE TABLE pg_set_table (values pg_set); +\copy pg_set_table FROM 'data/pg_set.data' +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values <> '{217419,456870,462437,797909}'; + count +------- + 999 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; + count +------- + 1000 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; + count +------- + 3 +(1 row) + +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING GIN (values); +SET enable_seqscan = off; +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; + count +------- + 1000 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; + count +------- + 3 +(1 row) + +DROP INDEX pg_set_table_values_idx; +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 0)); +ERROR: value 0 out of bounds for option "masklen" +DETAIL: Valid values are between "16" and "16064". +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 15)); +ERROR: value 15 out of bounds for option "masklen" +DETAIL: Valid values are between "16" and "16064". +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 16065)); +ERROR: value 16065 out of bounds for option "masklen" +DETAIL: Valid values are between "16" and "16064". +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 16064)); +SET enable_seqscan = off; +SET enable_bitmapscan = off; +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; + count +------- + 1000 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; + count +------- + 3 +(1 row) + +DROP INDEX pg_set_table_values_idx; +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING hash (values); +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; + count +------- + 1 +(1 row) + +DROP INDEX pg_set_table_values_idx; +CREATE TABLE point_pg_set_table ( + p point, + values pg_set +); +CREATE INDEX point_pg_set_table_idx ON point_pg_set_table + USING gist (p, values gist_pg_set_ops (masklen = 16064)); +INSERT INTO point_pg_set_table +SELECT + '(0,0)'::point, + values +FROM + pg_set_table; +SET enable_seqscan = off; +SET enable_bitmapscan = off; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values = '{217419,456870,462437,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> 456870; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> '{}'; + count +------- + 1000 +(1 row) + +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> '{217419,797909}'; + count +------- + 1 +(1 row) + +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values && '{217419,723181,738339,104704}'; + count +------- + 3 +(1 row) + +TRUNCATE TABLE pg_set_table; +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' +ANALYZE pg_set_table; +SELECT num_nulls( + null_frac, + avg_width, + n_distinct, + most_common_vals, + most_common_elems, + most_common_elem_freqs, + elem_count_histogram +) FROM pg_stats WHERE tablename = 'pg_set_table'; + num_nulls +----------- + 0 +(1 row) + +EXPLAIN SELECT * FROM pg_set_table WHERE values && '{864768,866406,867656,867884}'; + QUERY PLAN +---------------------------------------------------------------------------------- + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=16 width=33) + Filter: ("values" && '{864768,866406,867656,867884}'::pg_set) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + +EXPLAIN SELECT * FROM pg_set_table WHERE values @> '{864768}'; + QUERY PLAN +--------------------------------------------------------------------------------- + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=4 width=33) + Filter: ("values" @> '{864768}'::pg_set) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + +EXPLAIN SELECT * FROM pg_set_table WHERE values @> 864768; + QUERY PLAN +--------------------------------------------------------------------------------- + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=4 width=33) + Filter: ("values" @> 864768) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + +EXPLAIN SELECT * FROM pg_set_table WHERE values = '{1007,138543,345664,751582,972886,990712}'; + QUERY PLAN +--------------------------------------------------------------------------------- + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=1 width=33) + Filter: ("values" = '{1007,138543,345664,751582,972886,990712}'::pg_set) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + +EXPLAIN SELECT * FROM pg_set_table WHERE values <> '{1007,138543,345664,751582,972886,990712}'; + QUERY PLAN +------------------------------------------------------------------------------------ + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=3999 width=33) + Filter: ("values" <> '{1007,138543,345664,751582,972886,990712}'::pg_set) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + +EXPLAIN SELECT * FROM pg_set_table WHERE values && '{864768,866406,867656,867884}'; + QUERY PLAN +---------------------------------------------------------------------------------- + Seq Scan on pg_set_table (cost=10000000000.00..10000000082.00 rows=16 width=33) + Filter: ("values" && '{864768,866406,867656,867884}'::pg_set) + JIT: + Functions: 2 + Options: Inlining true, Optimization true, Expressions true, Deforming true +(5 rows) + diff --git a/hash_set.c b/hash_set.c index c4eae01..2de1db9 100644 --- a/hash_set.c +++ b/hash_set.c @@ -2,6 +2,7 @@ #include "common/int.h" #include "pg_set.h" #include "varatt.h" +#define EXPANDED_CAPACITY(capacity_) (capacity_ * 2) HashSet * create_set(void) diff --git a/pg_set--1.0.sql b/pg_set--1.0.sql index d21872b..f9d2745 100644 --- a/pg_set--1.0.sql +++ b/pg_set--1.0.sql @@ -21,11 +21,17 @@ RETURNS bytea AS '$libdir/pg_set' LANGUAGE C STABLE STRICT PARALLEL SAFE; +CREATE FUNCTION pg_set_typanalyze(internal) +RETURNS boolean +AS '$libdir/pg_set' +LANGUAGE C VOLATILE STRICT PARALLEL SAFE; + CREATE TYPE pg_set ( INPUT = pg_set_in, OUTPUT = pg_set_out, RECEIVE = pg_set_recv, SEND = pg_set_send, + ANALYZE = pg_set_typanalyze, STORAGE = extended, INTERNALLENGTH = -1 ); @@ -43,113 +49,143 @@ LANGUAGE C STABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_equals(pg_set, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_not_equals(pg_set, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_count(pg_set) RETURNS int4 AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_create(VARIADIC int4[]) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_contains(pg_set, int4) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_contained(int4, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_subset(pg_set, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_superset(pg_set, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_overlaps(pg_set, pg_set) RETURNS bool AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_smallest(pg_set) RETURNS int4 AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_largest(pg_set) RETURNS int4 AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_to_array(pg_set) RETURNS int4[] AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION array_to_pg_set(int4[]) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_radd(int4, pg_set) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_add(pg_set, int4) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_union(pg_set, pg_set) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_intersect(pg_set, pg_set) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION pg_set_diff(pg_set, pg_set) RETURNS pg_set AS '$libdir/pg_set' -LANGUAGE C STABLE STRICT PARALLEL SAFE; +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION pg_set_remove(pg_set, int4) +RETURNS pg_set +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION pg_set_subset_sel(internal, oid, internal, int4) +RETURNS float8 +AS '$libdir/pg_set' +LANGUAGE C STRICT STABLE PARALLEL SAFE; + +CREATE FUNCTION pg_set_overlap_sel(internal, oid, internal, int4) +RETURNS float8 +AS '$libdir/pg_set' +LANGUAGE C STRICT STABLE PARALLEL SAFE; + +CREATE FUNCTION pg_set_contains_sel(internal, oid, internal, int4) +RETURNS float8 +AS '$libdir/pg_set' +LANGUAGE C STRICT STABLE PARALLEL SAFE; + +CREATE FUNCTION pg_set_join_subset_sel(internal, oid, internal, int2, internal) +RETURNS float8 +AS '$libdir/pg_set' +LANGUAGE C STRICT STABLE PARALLEL SAFE; + +CREATE FUNCTION pg_set_join_overlap_sel(internal, oid, internal, int2, internal) +RETURNS float8 +AS '$libdir/pg_set' +LANGUAGE C STRICT STABLE PARALLEL SAFE; CREATE OPERATOR = ( - LEFTARG = pg_set, - RIGHTARG = pg_set, - PROCEDURE = pg_set_equals, - COMMUTATOR = =, - NEGATOR = <>, - RESTRICT = eqsel, - JOIN = eqjoinsel, - MERGES, - HASHES + LEFTARG = pg_set, + RIGHTARG = pg_set, + PROCEDURE = pg_set_equals, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + MERGES, + HASHES ); CREATE OPERATOR <> ( - LEFTARG = pg_set, - RIGHTARG = pg_set, - PROCEDURE = pg_set_not_equals, - COMMUTATOR = <>, - NEGATOR = =, - RESTRICT = neqsel, - JOIN = neqjoinsel + LEFTARG = pg_set, + RIGHTARG = pg_set, + PROCEDURE = pg_set_not_equals, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = neqsel, + JOIN = neqjoinsel ); CREATE OPERATOR <@ ( @@ -166,8 +202,8 @@ CREATE OPERATOR @> ( RIGHTARG = pg_set, FUNCTION = pg_set_superset, COMMUTATOR = '<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = pg_set_subset_sel, + JOIN = pg_set_join_subset_sel ); CREATE OPERATOR @> ( @@ -175,8 +211,8 @@ CREATE OPERATOR @> ( RIGHTARG = int4, FUNCTION = pg_set_contains, COMMUTATOR = '<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = pg_set_contains_sel, + JOIN = pg_set_join_overlap_sel ); CREATE OPERATOR <@ ( @@ -193,8 +229,8 @@ CREATE OPERATOR && ( RIGHTARG = pg_set, FUNCTION = pg_set_overlaps, COMMUTATOR = '&&', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = pg_set_overlap_sel, + JOIN = pg_set_join_overlap_sel ); CREATE OPERATOR + ( @@ -217,18 +253,24 @@ CREATE OPERATOR - ( FUNCTION = pg_set_diff ); -CREATE OPERATOR || ( +CREATE OPERATOR + ( LEFTARG = pg_set, RIGHTARG = int4, FUNCTION = pg_set_add, - COMMUTATOR = '||' + COMMUTATOR = '+' ); -CREATE OPERATOR || ( +CREATE OPERATOR + ( LEFTARG = int4, RIGHTARG = pg_set, FUNCTION = pg_set_radd, - COMMUTATOR = '||' + COMMUTATOR = '+' +); + +CREATE OPERATOR - ( + LEFTARG = pg_set, + RIGHTARG = int4, + FUNCTION = pg_set_remove ); CREATE CAST (int4[] AS pg_set) @@ -236,3 +278,115 @@ CREATE CAST (int4[] AS pg_set) CREATE CAST (pg_set AS int4[]) WITH FUNCTION pg_set_to_array; + +CREATE FUNCTION gin_extract_pg_set_value(pg_set, internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gin_extract_pg_set_query(pg_set, internal, int2, internal, internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gin_triconsistent_pg_set(internal, int2, pg_set, int4, internal, internal, internal) +RETURNS bool +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OPERATOR CLASS gin_pg_set_ops +DEFAULT FOR TYPE pg_set USING gin +AS + OPERATOR 3 &&, + OPERATOR 6 =, + OPERATOR 7 @> (pg_set, pg_set), + OPERATOR 13 @> (pg_set, int4), + FUNCTION 1 btint4cmp(int4,int4), + FUNCTION 2 gin_extract_pg_set_value(pg_set, internal), + FUNCTION 3 gin_extract_pg_set_query(pg_set, internal, int2, internal, internal), + FUNCTION 6 gin_triconsistent_pg_set(internal, int2, pg_set, int4, internal, internal, internal), + STORAGE int4; + +CREATE FUNCTION gist_pg_set_in(cstring) +RETURNS gist_pg_set +AS '$libdir/pg_set' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_out(gist_pg_set) +RETURNS cstring +AS '$libdir/pg_set' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE TYPE gist_pg_set ( + INTERNALLENGTH = -1, + INPUT = gist_pg_set_in, + OUTPUT = gist_pg_set_out +); + +CREATE FUNCTION gist_pg_set_compress(internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_decompress(internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_penalty(internal,internal,internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_picksplit(internal, internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_union(internal, internal) +RETURNS gist_pg_set +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_same(gist_pg_set, gist_pg_set, internal) +RETURNS internal +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION gist_pg_set_consistent(internal,pg_set,int2,oid,internal) +RETURNS bool +AS '$libdir/pg_set' +LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION gist_pg_set_options(internal) +RETURNS void +AS '$libdir/pg_set' +LANGUAGE C STRICT; + +CREATE OPERATOR CLASS gist_pg_set_ops +DEFAULT FOR TYPE pg_set USING gist +AS + OPERATOR 3 &&, + OPERATOR 6 =, + OPERATOR 7 @> (pg_set, pg_set), + OPERATOR 13 @> (pg_set, int4), + FUNCTION 1 gist_pg_set_consistent (internal, pg_set, smallint, oid, internal), + FUNCTION 2 gist_pg_set_union (internal, internal), + FUNCTION 3 gist_pg_set_compress (internal), + FUNCTION 4 gist_pg_set_decompress (internal), + FUNCTION 5 gist_pg_set_penalty (internal, internal, internal), + FUNCTION 6 gist_pg_set_picksplit (internal, internal), + FUNCTION 7 gist_pg_set_same (gist_pg_set, gist_pg_set, internal), + FUNCTION 10 gist_pg_set_options (internal), + STORAGE gist_pg_set; + +CREATE FUNCTION pg_set_hash(pg_set) +RETURNS int4 +AS '$libdir/pg_set' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE OPERATOR CLASS hash_pg_set_ops +DEFAULT FOR TYPE pg_set USING hash +AS + OPERATOR 1 = , + FUNCTION 1 pg_set_hash(pg_set); diff --git a/pg_set.h b/pg_set.h index ec3a9ef..8121996 100644 --- a/pg_set.h +++ b/pg_set.h @@ -3,6 +3,7 @@ #include "postgres.h" #include "c.h" #include "common/hashfn.h" +#include "utils/array.h" typedef int32 PgSetElement; @@ -34,10 +35,12 @@ typedef struct HashSet *create_set(void); bool set_add(HashSet *set, PgSetElement value); PgSet *shrink(HashSet *set); +ArrayType *cast_to_array(PgSet *args); #define PG_SET_MAX_SIZE INT_MAX -#define BITS_PER_MASK_POS (8 * sizeof(uint16)) -#define INITIAL_CAPACITY 16 +#define BITS_PER_BYTE 8 +#define BITS_PER_MASK_POS (BITS_PER_BYTE * sizeof(uint16)) +#define INITIAL_CAPACITY BITS_PER_MASK_POS #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -52,14 +55,13 @@ PgSet *shrink(HashSet *set); #define ELEMENT_MAX INT_MAX #define ELEMENT_MIN INT_MIN -#define PG_GETARG_PG_SET_P(x) (PgSet *) (PG_DETOAST_DATUM(PG_GETARG_DATUM(x))) +#define DatumGetPgSetP(X) ((PgSet *) PG_DETOAST_DATUM(X)) +#define PG_GETARG_PG_SET_P(x) (DatumGetPgSetP(PG_GETARG_DATUM(x))) #define MAX_ELEMENTS ((uint32) INT_MAX) #define MASK_INDEX(index_) (index_ / BITS_PER_MASK_POS) #define MASK_BIT(index_) (1 << ((index_) % BITS_PER_MASK_POS)) -#define EXPANDED_CAPACITY(capacity_) (capacity_ * 2) - #define CHECK_COUNT(set_) \ if ((set_)->count > MAX_ELEMENTS) \ ereport(ERROR, \ @@ -72,3 +74,8 @@ PgSet *shrink(HashSet *set); uint32 hash = HASHVAL(&(element_), (capacity_)); \ (mask_)[MASK_INDEX(hash)] |= MASK_BIT(hash); \ } while (0) + +#define PgSetOverlapStrategyNumber 3 +#define PgSetEqualsStrategyNumber 6 +#define PgSetSubsetStrategyNumber 7 +#define PgSetContainsStrategyNumber 13 diff --git a/pg_set_analyze.c b/pg_set_analyze.c new file mode 100644 index 0000000..401169b --- /dev/null +++ b/pg_set_analyze.c @@ -0,0 +1,447 @@ +#include "postgres.h" +#include "access/detoast.h" +#include "access/htup_details.h" +#include "commands/vacuum.h" +#include "fmgr.h" +#include "pg_set.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" + +#define SET_WIDTH_THRESHOLD 0x10000 +#define DEFAULT_OVERLAP_SEL 0.01 +#define DEFAULT_SUBSET_SEL 0.005 +#define DEFAULT_SEL(operator) \ + ((operator) == OVERLAP ? DEFAULT_OVERLAP_SEL : DEFAULT_SUBSET_SEL) + +typedef enum +{ + OVERLAP, + SUBSET +} Operator; + +typedef PgSetElement *(*ExtractElementsFunc)(Datum datum, uint32 *count, + void **freeable); + +const AttStatsSlot default_slot = { + .nnumbers = 0, + .nvalues = 0, + .numbers = NULL, + .values = NULL, + .numbers_arr = NULL, + .stacoll = InvalidOid, + .staop = InvalidOid, + .valuetype = InvalidOid, + .values_arr = NULL, +}; + +typedef struct +{ + // For extra_data compatibility + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ + // We need both so we can also calculate standard stats for sets + AnalyzeAttrComputeStatsFunc std_compute_stats; + void *std_extra_data; + // This save the stats calculated by array_typanlyze + VacAttrStats *array_stats; +} PgSetAnalyzeExtraData; + +static void compute_set_stats(VacAttrStats *stats, + AnalyzeAttrFetchFunc fetchfunc, int samplerows, + double totalrows); +static PgSetElement *extract_set_elements(Datum datum, uint32 *count, + void **freeable); +static PgSetElement *element_to_array(Datum datum, uint32 *count, + void **freeable); +static Selectivity calc_opsel(PlannerInfo *root, List *args, int var_relid, + VariableStatData *vardata, Operator operator, + ExtractElementsFunc extract_elements); + +static Selectivity mcelem_set_sel(PgSetElement *elements, uint32 count, + AttStatsSlot slot, Operator operator); + +PG_FUNCTION_INFO_V1(pg_set_typanalyze); +Datum +pg_set_typanalyze(PG_FUNCTION_ARGS) +{ + PgSetAnalyzeExtraData *extra_data; + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + VacAttrStats *array_stats = palloc(sizeof(VacAttrStats)); + + memcpy(array_stats, stats, sizeof(VacAttrStats)); + array_stats->attrtypid = INT4ARRAYOID; + extra_data = palloc(sizeof(PgSetAnalyzeExtraData)); + + // AFAIK, neither of those analyze functions can fail + std_typanalyze(stats); + // Copy the operation OIDs into the extra_data + memcpy(extra_data, stats->extra_data, 3 * sizeof(Oid)); + + // We'll basically reuse array_typanalyze to calculate some stats that + // have the exact same logic for sets + DirectFunctionCall1(array_typanalyze, PointerGetDatum(array_stats)); + + extra_data->array_stats = array_stats; + extra_data->std_compute_stats = stats->compute_stats; + + stats->extra_data = extra_data; + stats->compute_stats = compute_set_stats; + stats->minrows = array_stats->minrows; + PG_RETURN_BOOL(true); +} + +static Datum +set_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) +{ + PgSet *set; + ArrayType *array; + int attnum = stats->tupattnum; + HeapTuple tuple = stats->rows[rownum]; + TupleDesc tupDesc = stats->tupDesc; + Datum value = heap_getattr(tuple, attnum, tupDesc, isNull); + // If the value is too large, skip it. + if (toast_raw_datum_size(value) > SET_WIDTH_THRESHOLD) + { + // Don't to nothing. array_typanalyze should skip it! + return value; + } + set = DatumGetPgSetP(value); + // This might need to be pfree'd later, but for now I won't bother + array = cast_to_array(set); + // Return it as an array to trick compute_array_stats + return PointerGetDatum(array); +} + +static void +compute_set_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, + int samplerows, double totalrows) +{ + PgSetAnalyzeExtraData *extra_data = + (PgSetAnalyzeExtraData *) stats->extra_data; + int i = 0; + // Copy the necessary data so we can get the MCELEM and DECHIST stats + // as sets were an array + extra_data->array_stats->tupattnum = stats->tupattnum; + extra_data->array_stats->rows = stats->rows; + extra_data->array_stats->tupDesc = stats->tupDesc; + extra_data->array_stats->exprnulls = stats->exprnulls; + extra_data->array_stats->exprvals = stats->exprvals; + extra_data->array_stats->rowstride = stats->rowstride; + // Use array to calc non scalar stats with set_fetch_func that will trick + // compute_array_stats into thinking it's an array + extra_data->array_stats->compute_stats(extra_data->array_stats, + set_fetch_func, + samplerows, + totalrows); + // Recalculate scalar stats with the original fetchfunc. This is because + // array and sets have different representations, so we need the original + // width, nulls, etc. + extra_data->std_compute_stats(stats, fetchfunc, samplerows, totalrows); + + while (i < STATISTIC_NUM_SLOTS && stats->stakind[i] != 0) + i++; + + // Then just copy the stats we need: MCELEM and DECHIST + for (; i < STATISTIC_NUM_SLOTS; i++) + { + int16 stakind = extra_data->array_stats->stakind[i]; + if (stakind != STATISTIC_KIND_MCELEM && + stakind != STATISTIC_KIND_DECHIST) + // Skip correlation and histogram as they depend on less than + // operator, which does not exist for esets + continue; + + stats->stakind[i] = extra_data->array_stats->stakind[i]; + stats->staop[i] = extra_data->array_stats->staop[i]; + stats->stacoll[i] = extra_data->array_stats->stacoll[i]; + + stats->statypid[i] = extra_data->array_stats->statypid[i]; + stats->statyplen[i] = extra_data->array_stats->statyplen[i]; + stats->statypbyval[i] = extra_data->array_stats->statypbyval[i]; + stats->statypalign[i] = extra_data->array_stats->statypalign[i]; + + stats->numnumbers[i] = extra_data->array_stats->numnumbers[i]; + stats->stanumbers[i] = extra_data->array_stats->stanumbers[i]; + + stats->numvalues[i] = extra_data->array_stats->numvalues[i]; + stats->stavalues[i] = extra_data->array_stats->stavalues[i]; + } +} + +PG_FUNCTION_INFO_V1(pg_set_subset_sel); +Datum +pg_set_subset_sel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + List *args = (List *) PG_GETARG_POINTER(2); + int var_relid = PG_GETARG_INT32(3); + VariableStatData vardata; + Selectivity result = calc_opsel(root, + args, + var_relid, + &vardata, + SUBSET, + extract_set_elements); + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(result); +} + +PG_FUNCTION_INFO_V1(pg_set_contains_sel); +Datum +pg_set_contains_sel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + List *args = (List *) PG_GETARG_POINTER(2); + int var_relid = PG_GETARG_INT32(3); + VariableStatData vardata; + Selectivity result = calc_opsel(root, + args, + var_relid, + &vardata, + OVERLAP, + element_to_array); + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(result); +} + +PG_FUNCTION_INFO_V1(pg_set_overlap_sel); +Datum +pg_set_overlap_sel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + List *args = (List *) PG_GETARG_POINTER(2); + int var_relid = PG_GETARG_INT32(3); + VariableStatData vardata; + Selectivity result = calc_opsel(root, + args, + var_relid, + &vardata, + OVERLAP, + extract_set_elements); + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(result); +} + +PG_FUNCTION_INFO_V1(pg_set_join_overlap_sel); +Datum +pg_set_join_overlap_sel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_SEL(OVERLAP)); +} + +PG_FUNCTION_INFO_V1(pg_set_join_subset_sel); +Datum +pg_set_join_subset_sel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(DEFAULT_SEL(SUBSET)); +} + +static PgSetElement * +extract_set_elements(Datum datum, uint32 *count, void **freeable) +{ + PgSet *set = DatumGetPgSetP(datum); + *count = set->count; + + // Detoasted. Need to be freed later! + if (PointerGetDatum(set) != datum) + *freeable = set; + + return ELEMENT_ARRAY(set); +} + +static PgSetElement * +element_to_array(Datum datum, uint32 *count, void **freeable) +{ + PgSetElement *element = palloc(sizeof(PgSetElement)); + *count = 1; + element[0] = DatumGetInt32(datum); + // Free the allocated array later + *freeable = element; + return element; +} + +static Selectivity +calc_opsel(PlannerInfo *root, List *args, int var_relid, + VariableStatData *vardata, Operator operator, + ExtractElementsFunc extract_elements) +{ + Node *operand; + bool var_on_left; + Selectivity result; + Datum constval; + uint32 count; + PgSetElement *elements; + void *freeable = NULL; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, + args, + var_relid, + vardata, + &operand, + &var_on_left)) + return DEFAULT_SEL(operator); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(operand, Const)) + { + return DEFAULT_SEL(operator); + } + + /* + * The "&&", "@>" and "<@" operators are strict, so we can cope with a + * NULL constant right away. + */ + if (((Const *) operand)->constisnull) + { + return 0.0; + } + + constval = ((Const *) operand)->constvalue; + elements = extract_elements(constval, &count, &freeable); + + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + AttStatsSlot mcelem_slot; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + if (get_attstatsslot(&mcelem_slot, + vardata->statsTuple, + STATISTIC_KIND_MCELEM, + InvalidOid, + ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) + { + /* Use the most-common-elements slot for the array Var. */ + result = mcelem_set_sel(elements, count, mcelem_slot, operator); + free_attstatsslot(&mcelem_slot); + } + else + { + /* No most-common-elements info, so do without */ + result = mcelem_set_sel(elements, count, default_slot, operator); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + result *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + /* we assume no nulls here, so no stanullfrac correction */ + result = mcelem_set_sel(elements, count, default_slot, operator); + } + + if (freeable) + pfree(freeable); + + return result; +} + +static Selectivity +mcelem_set_sel(PgSetElement *elements, uint32 count, AttStatsSlot slot, + Operator operator) +{ + Selectivity result, elem_sel; + uint32 k = 0, j = 0; + float4 minfreq; + + /* + * There should be three more Numbers than Values, because the last three + * cells should hold minimal and maximal frequency among the non-null + * elements, and then the frequency of null elements. Ignore the Numbers + * if not right. + */ + if (slot.nnumbers != slot.nvalues + 3) + { + slot.numbers = NULL; + slot.nnumbers = 0; + } + + if (slot.numbers) + { + /* Grab the lowest observed frequency */ + minfreq = slot.numbers[slot.nvalues]; + } + else + { + /* Without statistics make some default assumptions */ + minfreq = 2 * (float4) DEFAULT_SEL(operator); + } + + if (operator== SUBSET) + { + /* + * Initial selectivity for "col_set @> const_set" query is 1.0, and + * it will be decreased with each element of constant set. + */ + result = 1.0; + } + else + { + /* + * Initial selectivity for "col_set && const_set" query is 0.0, and + * it will be increased with each element of constant array. + */ + result = 0.0; + } + + /* The set case is much simpler than native array. The elements can + * assumed to be sorted an there are no nulls. So we can just iterate + * elements and mcelem two pointers */ + for (; j < count && k < (uint32) slot.nvalues;) + { + PgSetElement mcelem = DatumGetInt32(slot.values[k]); + if (elements[j] > mcelem) + { + // Move to next mcelem + k++; + continue; + } + else if (elements[j] < mcelem) + { + // No match + j++; + elem_sel = Min(DEFAULT_SEL(operator), minfreq / 2); + } + else + { + // Match! + j++; + k++; + elem_sel = slot.numbers[k]; + } + + if (operator== SUBSET) + result *= elem_sel; + else + /* The multiplication exists because a set may have overlap with + many elements of the query, so the events are not disjoint and we + need to subtract the probability of an element that matches the + histogram is contained a set that was selected previously */ + result = result + elem_sel - result * elem_sel; + } + + for (; j < count; j++) + { + // Deal with the rest of mismatches + elem_sel = MIN(DEFAULT_SEL(operator), minfreq / 2); + if (operator== SUBSET) + result *= elem_sel; + else + result = result + elem_sel - result * elem_sel; + } + + CLAMP_PROBABILITY(result); + return result; +} diff --git a/pg_set_gin.c b/pg_set_gin.c new file mode 100644 index 0000000..785e399 --- /dev/null +++ b/pg_set_gin.c @@ -0,0 +1,104 @@ +#include "postgres.h" +#include "access/gin.h" +#include "access/stratnum.h" +#include "fmgr.h" +#include "pg_set.h" + +static Datum * +pg_set_to_keys(PgSet *set, int32 *nentries) +{ + uint32 i; + PgSetElement *elements = ELEMENT_ARRAY(set); + Datum *out = NULL; + + *nentries = set->count; + if (set->count) + out = (Datum *) palloc(sizeof(Datum) * set->count); + + for (i = 0; i < set->count; i++) + { + out[i] = Int32GetDatum(elements[i]); + } + + return out; +} + +PG_FUNCTION_INFO_V1(gin_extract_pg_set_value); +Datum +gin_extract_pg_set_value(PG_FUNCTION_ARGS) +{ + PgSet *in = PG_GETARG_PG_SET_P(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + + PG_RETURN_POINTER(pg_set_to_keys(in, nentries)); +} + +PG_FUNCTION_INFO_V1(gin_extract_pg_set_query); +Datum +gin_extract_pg_set_query(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries = NULL; + + if (strategy == PgSetSubsetStrategyNumber || + strategy == PgSetEqualsStrategyNumber || + strategy == PgSetOverlapStrategyNumber) + { + PgSet *query = PG_GETARG_PG_SET_P(0); + entries = pg_set_to_keys(query, nentries); + // The empty set is subset of all sets. + if (strategy == PgSetSubsetStrategyNumber && entries == NULL) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == PgSetContainsStrategyNumber) + { + PgSetElement query = PG_GETARG_INT32(0); + entries = (Datum *) palloc(sizeof(Datum)); + *nentries = 1; + entries[0] = Int32GetDatum(query); + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + } + + PG_RETURN_POINTER(entries); +} + +PG_FUNCTION_INFO_V1(gin_triconsistent_pg_set); +Datum +gin_triconsistent_pg_set(PG_FUNCTION_ARGS) +{ + int i; + GinTernaryValue result = GIN_MAYBE; + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + int32 nkeys = PG_GETARG_INT32(3); + if (strategy == PgSetSubsetStrategyNumber || + strategy == PgSetEqualsStrategyNumber) + { + /* Subset is true if all keys are present in the set. */ + result = strategy == PgSetSubsetStrategyNumber ? GIN_TRUE : GIN_MAYBE; + for (i = 0; i < nkeys; i++) + { + if (check[i] != GIN_TRUE) + { + // If the key maybe is on the set, we cannot say for sure + // it's a subset or not + result = check[i]; + break; + } + } + } + else if (strategy == PgSetOverlapStrategyNumber || + strategy == PgSetContainsStrategyNumber) + { + /* Existence of key is guaranteed in default search mode */ + result = GIN_TRUE; + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + PG_RETURN_GIN_TERNARY_VALUE(result); +} diff --git a/pg_set_gist.c b/pg_set_gist.c new file mode 100644 index 0000000..94de372 --- /dev/null +++ b/pg_set_gist.c @@ -0,0 +1,687 @@ +#include "postgres.h" +#include "access/gist.h" +#include "access/reloptions.h" +#include "common/int.h" +#include "fmgr.h" +#include "pg_set.h" +#include "port/pg_bitutils.h" +#include "varatt.h" + +typedef struct +{ + int32 vl_len_; + uint32 masklen; /* mask length in bits */ +} GistPgSetOptions; + +typedef struct +{ + char vl_len_[4]; + PgSetElement min; + PgSetElement max; + uint16 setbits; + uint16 mask[FLEXIBLE_ARRAY_MEMBER]; +} GistPgSet; + +/* Cost here is basically the potential cost of splitting an entry to the + * wrong side. The lower the cost, the more the entry fits well on both sides */ +typedef struct +{ + OffsetNumber pos; + int32 cost; +} GistSplitCost; + +#define WISH_F(l, r, f) \ + (double) (-(double) (((l) - (r)) * ((l) - (r)) * ((l) - (r))) * (r)) +// Gist entries are padded in 8 bytes. 32 seems a fine default +#define MASKLEN_DEFAULT ((32 - sizeof(GistPgSet)) * BITS_PER_BYTE) +#define MASKLEN_MAX \ + ((GISTMaxIndexKeySize - sizeof(GistPgSet)) * BITS_PER_BYTE) +#define GET_MASKLEN() \ + (PG_HAS_OPCLASS_OPTIONS() ? \ + ((GistPgSetOptions *) PG_GET_OPCLASS_OPTIONS())->masklen : \ + MASKLEN_DEFAULT) +#define HAS_BIT(key_, index, bit) ((key_->mask)[index] & bit) +#define IS_FULL(key_, masklen_) (key_->setbits == masklen_) +#define IS_EMPTY(key_) (key_->setbits == 0) +#define MASK_INDEX_AND_BIT(element_, masklen_, index_, bit_) \ + do \ + { \ + uint32 hash = HASHVAL(&(element_), (masklen_)); \ + index_ = MASK_INDEX(hash), bit_ = MASK_BIT(hash); \ + } while (0) + +#define GIST_PG_SET(datum_) ((GistPgSet *) DatumGetPointer(datum_)) +#define GET_GIST_ENTRY(vec, pos) GIST_PG_SET((vec)->vector[(pos)].key) +#define COPY_DATUM(out_, in_) \ + do \ + { \ + out_ = palloc(VARSIZE(in_)); \ + memcpy(out_, in_, VARSIZE(in_)); \ + } while (0) + +static inline void realloc_key(GistPgSet **in, uint32 masklen); +static void union_key(GistPgSet **out, GistPgSet *key, uint32 masklen); +static inline uint16 count_set_bits(uint16 n); +static int32 calc_penalty(GistPgSet *left, GistPgSet *right, uint32 masklen); +static uint32 hammdist(uint16 *mask_a, uint16 *mask_b, uint32 masklen); +static bool gist_pg_set_subset(GistPgSet *key, PgSetElement *elements, + uint32 count, uint32 masklen, bool *recheck); +static bool gist_pg_set_overlaps(GistPgSet *key, PgSetElement *elements, + uint32 count, uint32 masklen, bool *recheck); +static bool gist_pg_set_equals(GistPgSet *key, PgSetElement *elements, + uint32 count, uint32 masklen, bool *recheck); +static int comparecost(const void *a, const void *b); + +PG_FUNCTION_INFO_V1(gist_pg_set_in); +Datum +gist_pg_set_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "gist_pg_set"))); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_out); +Datum +gist_pg_set_out(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot display a value of type %s", "gist_pg_set"))); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_compress); +Datum +gist_pg_set_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + uint32 masklen = GET_MASKLEN(); + GISTENTRY *retval = entry; + if (entry->leafkey) + { + uint32 i; + PgSet *in = (PgSet *) PG_DETOAST_DATUM(entry->key); + PgSetElement *elements = ELEMENT_ARRAY(in); + GistPgSet *key = NULL; + realloc_key(&key, masklen); + for (i = 0; i < in->count; i++) + { + uint32 index, bit; + MASK_INDEX_AND_BIT(elements[i], masklen, index, bit); + if (!HAS_BIT(key, index, bit)) + { + key->mask[index] |= bit; + key->setbits++; + } + + if (IS_FULL(key, masklen)) + { + realloc_key(&key, 0); + break; + } + } + + if (IS_EMPTY(key)) + { + realloc_key(&key, 0); + } + else + { + key->min = elements[0]; + key->max = elements[in->count - 1]; + } + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, + PointerGetDatum(key), + entry->rel, + entry->page, + entry->offset, + false); + } + PG_RETURN_POINTER(retval); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_decompress); +Datum +gist_pg_set_decompress(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(PG_GETARG_POINTER(0)); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_same); +Datum +gist_pg_set_same(PG_FUNCTION_ARGS) +{ + uint32 i; + GistPgSet *key_a = (GistPgSet *) PG_GETARG_POINTER(0); + GistPgSet *key_b = (GistPgSet *) PG_GETARG_POINTER(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + uint32 masklen = GET_MASKLEN(); + + if (key_a->setbits != key_b->setbits || key_a->min != key_b->min || + key_a->max != key_b->max) + { + *result = false; + } + // Both keys are guaranteed to have no mask here + else if (key_a->setbits == 0 || key_a->setbits == masklen) + { + *result = true; + } + else + { + for (i = 0; i < masklen / BITS_PER_MASK_POS; i++) + { + if (key_a->mask[i] != key_b->mask[i]) + { + *result = false; + break; + } + } + } + + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_union); +Datum +gist_pg_set_union(PG_FUNCTION_ARGS) +{ + int32 i; + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + int32 *out_size = (int32 *) PG_GETARG_POINTER(1); + int32 len = entryvec->n; + uint32 masklen = GET_MASKLEN(); + GistPgSet *out = NULL; + realloc_key(&out, masklen); + + for (i = 0; i < len; i++) + { + union_key(&out, GET_GIST_ENTRY(entryvec, i), masklen); + } + + if (IS_EMPTY(out)) + { + realloc_key(&out, 0); + } + *out_size = VARSIZE(out); + PG_RETURN_POINTER(out); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_penalty); +Datum +gist_pg_set_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + GistPgSet *in = (GistPgSet *) DatumGetPointer(origentry->key); + GistPgSet *out = (GistPgSet *) DatumGetPointer(newentry->key); + *penalty = calc_penalty(in, out, GET_MASKLEN()); + PG_RETURN_POINTER(penalty); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_picksplit); +Datum +gist_pg_set_picksplit(PG_FUNCTION_ARGS) +{ + OffsetNumber j, k; + OffsetNumber *leftoff, *rightoff; + GistSplitCost *costvector; + GistPgSet *j_entry, *k_entry; + + int32 penalty_l, penalty_r; + GistPgSet *datum_l, *datum_r; + OffsetNumber seed_l = 0, seed_r = 0; + + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + + uint32 masklen = GET_MASKLEN(); + + OffsetNumber maxoff = entryvec->n - 1; + int32 penalty, max_penalty = -1; + uint32 nbytes = (maxoff + 1) * sizeof(OffsetNumber); + + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + leftoff = v->spl_left; + rightoff = v->spl_right; + v->spl_nleft = 0; + v->spl_nright = 0; + + for (j = FirstOffsetNumber; j < maxoff; j = OffsetNumberNext(j)) + { + j_entry = GET_GIST_ENTRY(entryvec, j); + for (k = OffsetNumberNext(j); k <= maxoff; k = OffsetNumberNext(k)) + { + k_entry = GET_GIST_ENTRY(entryvec, k); + penalty = calc_penalty(j_entry, k_entry, masklen); + if (penalty > max_penalty) + { + max_penalty = penalty; + seed_l = j; + seed_r = k; + } + } + } + + if (seed_l == 0 || seed_r == 0) + { + seed_l = FirstOffsetNumber; + seed_r = OffsetNumberNext(seed_l); + } + + if (v->spl_ldatum_exists) + { + COPY_DATUM(datum_l, GIST_PG_SET(v->spl_ldatum)); + union_key(&datum_l, GET_GIST_ENTRY(entryvec, seed_l), masklen); + v->spl_ldatum_exists = false; + } + else + { + COPY_DATUM(datum_l, GET_GIST_ENTRY(entryvec, seed_l)); + } + + if (v->spl_rdatum_exists) + { + COPY_DATUM(datum_r, GIST_PG_SET(v->spl_rdatum)); + union_key(&datum_r, GET_GIST_ENTRY(entryvec, seed_r), masklen); + v->spl_rdatum_exists = false; + } + else + { + COPY_DATUM(datum_r, GET_GIST_ENTRY(entryvec, seed_r)); + } + + costvector = (GistSplitCost *) palloc(sizeof(GistSplitCost) * maxoff); + + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) + { + costvector[j - 1].pos = j; + j_entry = GET_GIST_ENTRY(entryvec, j); + penalty_l = calc_penalty(datum_l, j_entry, masklen); + penalty_r = calc_penalty(datum_r, j_entry, masklen); + costvector[j - 1].cost = abs(penalty_l - penalty_r); + } + qsort(costvector, maxoff, sizeof(GistSplitCost), comparecost); + + for (k = 0; k < maxoff; k++) + { + j = costvector[k].pos; + if (j == seed_l) + { + *leftoff++ = j; + v->spl_nleft++; + continue; + } + else if (j == seed_r) + { + *rightoff++ = j; + v->spl_nright++; + continue; + } + + j_entry = GET_GIST_ENTRY(entryvec, j); + penalty_l = calc_penalty(datum_l, j_entry, masklen); + penalty_r = calc_penalty(datum_r, j_entry, masklen); + + if (penalty_l < + penalty_r + WISH_F(v->spl_nleft, v->spl_nright, 0.00001)) + { + union_key(&datum_l, j_entry, masklen); + *leftoff++ = j; + v->spl_nleft++; + } + else + { + union_key(&datum_r, j_entry, masklen); + *rightoff++ = j; + v->spl_nright++; + } + } + + *rightoff = *leftoff = FirstOffsetNumber; + pfree(costvector); + + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_consistent); +Datum +gist_pg_set_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + bool *recheck = (bool *) PG_GETARG_POINTER(4); + uint32 masklen = GET_MASKLEN(); + GistPgSet *key = (GistPgSet *) DatumGetPointer(entry->key); + bool retval = true; + *recheck = true; + + if (strategy == PgSetContainsStrategyNumber) + { + PgSetElement query = PG_GETARG_INT32(1); + PgSetElement elements[] = {query}; + retval = gist_pg_set_subset(key, elements, 1, masklen, recheck); + } + else + { + PgSet *query = PG_GETARG_PG_SET_P(1); + PgSetElement *elements = ELEMENT_ARRAY(query); + switch (strategy) + { + case PgSetSubsetStrategyNumber: + retval = gist_pg_set_subset(key, + elements, + query->count, + masklen, + recheck); + break; + + case PgSetOverlapStrategyNumber: + retval = gist_pg_set_overlaps(key, + elements, + query->count, + masklen, + recheck); + break; + + case PgSetEqualsStrategyNumber: + if (GIST_LEAF(entry)) + { + retval = gist_pg_set_equals(key, + elements, + query->count, + masklen, + recheck); + } + else + { + retval = gist_pg_set_subset(key, + elements, + query->count, + masklen, + recheck); + } + break; + + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + } + } + PG_RETURN_BOOL(retval); +} + +PG_FUNCTION_INFO_V1(gist_pg_set_options); +Datum +gist_pg_set_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(GistPgSetOptions)); + add_local_int_reloption(relopts, + "masklen", + "mask length in bits", + MASKLEN_DEFAULT, + BITS_PER_MASK_POS, + MASKLEN_MAX, + offsetof(GistPgSetOptions, masklen)); + + PG_RETURN_VOID(); +} + +static uint32 +hammdist(uint16 *mask_a, uint16 *mask_b, uint32 masklen) +{ + uint32 i, dist = 0; + for (i = 0; i < masklen / BITS_PER_MASK_POS; i++) + { + dist += count_set_bits(mask_a[i] ^ mask_b[i]); + } + return dist; +} + +static bool +gist_pg_set_subset(GistPgSet *key, PgSetElement *elements, uint32 count, + uint32 masklen, bool *recheck) +{ + uint32 i; + PgSetElement min, max; + if (count == 0) + { + *recheck = false; + return true; + } + + if (IS_EMPTY(key)) + { + *recheck = false; + return false; + } + + min = elements[0]; + max = elements[count - 1]; + if (key->max < max || key->min > min) + { + *recheck = false; + return false; + } + + if (IS_FULL(key, masklen)) + { + *recheck = true; + return true; + } + for (i = 0; i < count; i++) + { + uint32 index, bit; + MASK_INDEX_AND_BIT(elements[i], masklen, index, bit); + if (!HAS_BIT(key, index, bit)) + { + *recheck = false; + return false; + } + } + *recheck = true; + return true; +} + +static bool +gist_pg_set_overlaps(GistPgSet *key, PgSetElement *elements, uint32 count, + uint32 masklen, bool *recheck) +{ + uint32 i; + PgSetElement min, max; + if (count == 0 || key->setbits == 0) + { + *recheck = false; + return false; + } + + min = elements[0]; + max = elements[count - 1]; + if (key->max < min || key->min > max) + { + *recheck = false; + return false; + } + + if (key->max == max || key->min == min || key->min == max || + key->max == min) + { + *recheck = false; + return true; + } + + if (key->setbits == masklen) + { + *recheck = true; + return true; + } + + for (i = 0; i < count; i++) + { + uint32 index, bit; + MASK_INDEX_AND_BIT(elements[i], masklen, index, bit); + if (HAS_BIT(key, index, bit)) + { + *recheck = true; + return true; + } + } + *recheck = false; + return false; +} + +static bool +gist_pg_set_equals(GistPgSet *key, PgSetElement *elements, uint32 count, + uint32 masklen, bool *recheck) +{ + uint32 i; + PgSetElement min, max; + GistPgSet *copy; + if (count == 0 && key->setbits == 0) + { + *recheck = false; + return true; + } + + if (count == 0 || key->setbits == 0) + { + *recheck = false; + return false; + } + + min = elements[0]; + max = elements[count - 1]; + if (key->max != max || key->min != min) + { + *recheck = false; + return false; + } + + COPY_DATUM(copy, key); + for (i = 0; i < count; i++) + { + uint32 index, bit; + MASK_INDEX_AND_BIT(elements[i], masklen, index, bit); + if (!HAS_BIT(key, index, bit)) + { + *recheck = false; + return false; + } + if (HAS_BIT(copy, index, bit)) + { + copy->setbits--; + copy->mask[index] &= ~bit; + } + } + /* If all bits are the same, recheck, otherwise the set is possibly just a + * subset, but certainly not equal */ + *recheck = copy->setbits == 0; + return *recheck; +} + +static inline uint16 +count_set_bits(uint16 n) +{ + return pg_number_of_ones[n & 0xFF] + pg_number_of_ones[(n >> 8) & 0xFF]; +} + +static void +union_key(GistPgSet **out, GistPgSet *key, uint32 masklen) +{ + uint32 i; + (*out)->min = MIN((*out)->min, key->min); + (*out)->max = MAX((*out)->max, key->max); + + if (key->setbits == 0 || (*out)->setbits == masklen) + { + return; + } + + // Key is "1" or out is "0", so we can just copy + if ((*out)->setbits == 0 || key->setbits == masklen) + { + if (key->setbits < masklen) + { + realloc_key(out, masklen); + } + memcpy((*out), key, VARSIZE(key)); + return; + } + + for (i = 0; i < masklen / BITS_PER_MASK_POS; i++) + { + (*out)->setbits += count_set_bits(key->mask[i]); + (*out)->mask[i] |= key->mask[i]; + + if (IS_FULL((*out), masklen)) + { + realloc_key(out, 0); + return; + } + } +} + +static inline void +realloc_key(GistPgSet **in, uint32 masklen) +{ + uint32 size = sizeof(GistPgSet) + SET_MASK_SIZE(masklen); + GistPgSet *out = palloc0(size); + if (*in != NULL) + { + memcpy(out, *in, size); + pfree(*in); + } + SET_VARSIZE(out, size); + *in = out; +} + +static int32 +calc_penalty(GistPgSet *left, GistPgSet *right, uint32 masklen) +{ + // Base penalty + int32 penalty; + bool check_boundaries = true; + + /* This if chain is basically a way to calculate the Hamming distance more + * optimally for edge cases. We set this distance as the "base" penalty */ + if (IS_EMPTY(left) || IS_EMPTY(right)) + { + penalty += MAX(left->setbits, right->setbits); + check_boundaries = false; + } + else if (IS_FULL(left, masklen) || IS_FULL(right, masklen)) + { + penalty += masklen - MIN(left->setbits, right->setbits); + } + else + { + penalty = hammdist(left->mask, right->mask, masklen); + } + + /* Definitely no overlap. Add masklen as penalty so non overlapping entries + * are far apart */ + if (check_boundaries && (right->max < left->min || right->min > left->max)) + { + penalty += masklen; + } + return penalty; +} + +static int +comparecost(const void *a, const void *b) +{ + return pg_cmp_s32(((const GistSplitCost *) a)->cost, + ((const GistSplitCost *) b)->cost); +} diff --git a/pg_set_op.c b/pg_set_op.c index 4b9f561..97ed312 100644 --- a/pg_set_op.c +++ b/pg_set_op.c @@ -158,6 +158,12 @@ pg_set_superset(PG_FUNCTION_ARGS) PG_RETURN_BOOL(is_subset(set_A, set_B)); } +/* + * No compression here, because the sets need only one element in common, + * so compression may be most of the times a bad deal and thus we'll have to act + * pessimistically here. This is a fairly straightforward two pointer approach, + * O(m+n) time + */ PG_FUNCTION_INFO_V1(pg_set_overlaps); Datum pg_set_overlaps(PG_FUNCTION_ARGS) @@ -205,25 +211,16 @@ PG_FUNCTION_INFO_V1(pg_set_to_array); Datum pg_set_to_array(PG_FUNCTION_ARGS) { - uint32 i; - Datum *values; - ArrayType *out; PgSet *in = PG_GETARG_PG_SET_P(0); - PgSetElement *elements = ELEMENT_ARRAY(in); - if (in->count == 0) - { - out = construct_empty_array(INT4OID); - PG_RETURN_ARRAYTYPE_P(out); - } - values = palloc(in->count * sizeof(Datum)); - for (i = 0; i < in->count; i++) - { - values[i] = Int32GetDatum(elements[i]); - } - out = construct_array_builtin(values, in->count, INT4OID); - PG_RETURN_ARRAYTYPE_P(out); + ArrayType *out = cast_to_array(in); + PG_RETURN_POINTER(out); } +/* +* If element is contained, just return the input. Otherwise allocate as needed +* in case the set needs expansion in capacity. Copy the set elements and place +* the new element on the correct position. Can be as expensive an O(n) +*/ PG_FUNCTION_INFO_V1(pg_set_add); Datum pg_set_add(PG_FUNCTION_ARGS) @@ -233,6 +230,7 @@ pg_set_add(PG_FUNCTION_ARGS) PG_RETURN_POINTER(add_element(in, element)); } +/* Commutator to pg_set_add */ PG_FUNCTION_INFO_V1(pg_set_radd); Datum pg_set_radd(PG_FUNCTION_ARGS) @@ -242,6 +240,12 @@ pg_set_radd(PG_FUNCTION_ARGS) PG_RETURN_POINTER(add_element(in, element)); } +/* + * Straight two pointers. Pre-allocate an array of `|A| + |B|` elements and a + * mask that would be correspondent to a capacity of `2*max(Cap(A), Cap(B))`. + * Make a two pointer union then shrink the mask as needed and copy the array + * to the destination with the proper length + */ PG_FUNCTION_INFO_V1(pg_set_union); Datum pg_set_union(PG_FUNCTION_ARGS) @@ -252,7 +256,8 @@ pg_set_union(PG_FUNCTION_ARGS) PgSet *set_B = PG_GETARG_PG_SET_P(1); PgSetElement *elements_A = ELEMENT_ARRAY(set_A); PgSetElement *elements_B = ELEMENT_ARRAY(set_B); - uint16 estimated_capacity = 2 * MAX(set_A->capacity, set_B->capacity); + // To avoid overflow one both sets is on max capacity + uint64 estimated_capacity = 2 * MAX(set_A->capacity, set_B->capacity); PgSetElement *out_elements = palloc((set_A->count + set_B->count) * sizeof(PgSetElement)); uint16 *estimated_mask = palloc0(SET_MASK_SIZE(estimated_capacity)); @@ -293,6 +298,10 @@ pg_set_union(PG_FUNCTION_ARGS) PG_RETURN_POINTER(out); } +/* + * Similar to union, but pre-allocate `max(|A|, |B|)` elements and + * `max(Cap(A), Cap(B))` mask. + */ PG_FUNCTION_INFO_V1(pg_set_intersect); Datum pg_set_intersect(PG_FUNCTION_ARGS) @@ -303,7 +312,7 @@ pg_set_intersect(PG_FUNCTION_ARGS) PgSet *set_B = PG_GETARG_PG_SET_P(1); PgSetElement *elements_A = ELEMENT_ARRAY(set_A); PgSetElement *elements_B = ELEMENT_ARRAY(set_B); - uint16 estimated_capacity = MAX(set_A->capacity, set_B->capacity); + uint32 estimated_capacity = MAX(set_A->capacity, set_B->capacity); PgSetElement *out_elements = palloc(MAX(set_A->count, set_B->count) * sizeof(PgSetElement)); uint16 *estimated_mask = palloc0(SET_MASK_SIZE(estimated_capacity)); @@ -330,6 +339,9 @@ pg_set_intersect(PG_FUNCTION_ARGS) PG_RETURN_POINTER(out); } +/* Similar to union, but pre-allocate `|A|` elements and `Cap(A)` mask.) + * operation + */ PG_FUNCTION_INFO_V1(pg_set_diff); Datum pg_set_diff(PG_FUNCTION_ARGS) @@ -340,7 +352,7 @@ pg_set_diff(PG_FUNCTION_ARGS) PgSet *set_B = PG_GETARG_PG_SET_P(1); PgSetElement *elements_A = ELEMENT_ARRAY(set_A); PgSetElement *elements_B = ELEMENT_ARRAY(set_B); - uint16 estimated_capacity = set_A->capacity; + uint32 estimated_capacity = set_A->capacity; PgSetElement *out_elements = palloc(set_A->count * sizeof(PgSetElement)); uint16 *estimated_mask = palloc0(SET_MASK_SIZE(estimated_capacity)); for (i = 0, j = 0; i < set_A->count && j < set_B->count;) @@ -372,6 +384,65 @@ pg_set_diff(PG_FUNCTION_ARGS) PG_RETURN_POINTER(out); } +/* If element not contained, just return the input. Otherwise allocate as needed +* in case the set needs shrinking in capacity. Copy the set elements skipping +* the old element. Can be as expensive an O(n) +*/ +PG_FUNCTION_INFO_V1(pg_set_remove); +Datum +pg_set_remove(PG_FUNCTION_ARGS) +{ + uint32 i; + PgSet *out; + PgSetElement *out_elements; + PgSetElement *in_elements; + size_t new_size; + uint32 new_capacity; + bool old_position_skipped = false; + PgSet *in = PG_GETARG_PG_SET_P(0); + PgSetElement element = PG_GETARG_INT32(1); + if (!contains_element(in, element)) + { + PG_RETURN_POINTER(in); + } + in_elements = ELEMENT_ARRAY(in); + new_capacity = get_capacity(in->count - 1); + new_size = VARSIZE(in) - SET_MASK_SIZE(in->capacity) + + SET_MASK_SIZE(new_capacity) - sizeof(PgSetElement); + out = palloc0(new_size); + out->count = in->count - 1; + out->capacity = new_capacity; + out_elements = ELEMENT_ARRAY(out); + for (i = 0; i < in->count; i++) + { + if (in_elements[i] != element) + { + out_elements[i - old_position_skipped] = in_elements[i]; + SET_MASK_BIT(out->mask, in_elements[i], out->capacity); + } + else + { + old_position_skipped = true; + } + } + SET_VARSIZE(out, new_size); + PG_RETURN_POINTER(out); +} + +PG_FUNCTION_INFO_V1(pg_set_hash); +Datum +pg_set_hash(PG_FUNCTION_ARGS) +{ + PgSet *in = PG_GETARG_PG_SET_P(0); + uint32 hashval = + hash_bytes((unsigned char *) VARDATA(in), VARSIZE(in) - VARHDRSZ); + PG_RETURN_INT32(hashval); +} + +/* + * Hash the element and see if the mask has a match. 50% prob of solving in + * O(1) time if the element is absent. Otherwise make a binary search + */ static bool contains_element(PgSet *set, PgSetElement element) { @@ -442,6 +513,12 @@ get_capacity(uint32 count) return MAX(capacity, INITIAL_CAPACITY); } +/* + * Check counts first, if A count is smaller, compare masks + * (compress B mask to A capacity), if A has something B doesn't, return false. + * Otherwise make two pointer search. It should be O(m+n) in most cases, but it + * might have as low as 8 times less iterations if the mask check fails + */ static bool is_subset(PgSet *set_A, PgSet *set_B) { @@ -505,11 +582,11 @@ is_subset(PgSet *set_A, PgSet *set_B) return true; } +/* If A and B have the same count and A is a subset of B, +* then they are equal. */ static bool is_equal(PgSet *set_A, PgSet *set_B) { - /* If A and B have the same count and A is a subset of B, - * then they are equal. */ return set_A->count == set_B->count && is_subset(set_A, set_B); } @@ -534,8 +611,7 @@ add_element(PgSet *set, PgSetElement element) SET_MASK_SIZE(new_capacity) + sizeof(PgSetElement); out = palloc0(new_size); in_elements = ELEMENT_ARRAY(set); - out->count = set->count; - out->count++; + out->count = set->count + 1; CHECK_COUNT(out); out->capacity = new_capacity; out_elements = ELEMENT_ARRAY(out); @@ -556,7 +632,27 @@ add_element(PgSet *set, PgSetElement element) } out_elements[new_position] = element; SET_MASK_BIT(out->mask, element, out->capacity); - SET_VARSIZE(out, new_size); return out; } + +ArrayType * +cast_to_array(PgSet *args) +{ + uint32 i; + Datum *values; + ArrayType *out; + PgSetElement *elements = ELEMENT_ARRAY(args); + if (args->count == 0) + { + out = construct_empty_array(INT4OID); + return out; + } + values = palloc(args->count * sizeof(Datum)); + for (i = 0; i < args->count; i++) + { + values[i] = Int32GetDatum(elements[i]); + } + out = construct_array_builtin(values, args->count, INT4OID); + return out; +} diff --git a/sql/pg_set_test.sql b/sql/pg_set_test.sql index 43bf64c..3de4a59 100644 --- a/sql/pg_set_test.sql +++ b/sql/pg_set_test.sql @@ -96,6 +96,22 @@ SELECT pg_set_info(pg_set_add('{2,4,6,8,10,12,14}', 13)) = pg_set_info('{2,4,6,8 SELECT pg_set_info(pg_set_add('{2,4,6,8,10,12,14,16}', 0)) = pg_set_info('{0,2,4,6,8,10,12,14,16}'); SELECT pg_set_info(pg_set_add('{2,4,6,8,10,12,14,16}', 9)) = pg_set_info('{2,4,6,8,9,10,12,14,16}'); +SELECT pg_set_remove('{}', 1); +SELECT pg_set_remove('{2}', 2); +SELECT pg_set_remove('{1,2,3,4,5}', 1); +SELECT pg_set_remove('{1,2,3,4,6}', 7); +SELECT pg_set_remove('{2,4,6,8,10,12}', 10); +SELECT pg_set_remove('{2,4,6,8,10,12,14}', 13); +SELECT pg_set_remove('{2,4,6,8,10,9,12,14,16}', 9); + +SELECT pg_set_info(pg_set_remove('{}', 1)) = pg_set_info('{}'); +SELECT pg_set_info(pg_set_remove('{2}', 2)) = pg_set_info('{}'); +SELECT pg_set_info(pg_set_remove('{1,2,3,4,5}', 1)) = pg_set_info('{2,3,4,5}'); +SELECT pg_set_info(pg_set_remove('{1,2,3,4,6}', 7)) = pg_set_info('{1,2,3,4,6}'); +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,12}', 10)) = pg_set_info('{2,4,6,8,12}'); +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,12,14}', 13)) = pg_set_info('{2,4,6,8,10,12,14}'); +SELECT pg_set_info(pg_set_remove('{2,4,6,8,10,9,12,14,16}', 9)) = pg_set_info('{2,4,6,8,10,12,14,16}'); + SELECT pg_set_smallest('{}'); SELECT pg_set_smallest('{5,8,4,1,4,7,9,0,9}'); SELECT pg_set_smallest('{1,1,1,1,1,1}'); @@ -222,8 +238,10 @@ SELECT 4 <@ '{1,2,3}'; SELECT '{1,2,3,4,5}'::pg_set @> '{1,2,3}'; SELECT '{0,1,2}'::pg_set <@ '{1,2,3}'; -SELECT '{1,2,3,4,5}'::pg_set || 4; -SELECT 4 || '{1,2,3}'; +SELECT '{1,2,3,4,5}'::pg_set + 4; +SELECT 4 + '{1,2,3}'::pg_set; + +SELECT '{1,2,3,4,5}'::pg_set - 4; SELECT '{1,2,3,4,5}'::pg_set && '{6,7,8}'; SELECT '{1,2,3,4,5}'::pg_set && '{-5,-3,0,1}'; @@ -239,3 +257,207 @@ SELECT '{1,2,3,4,5}'::pg_set - '{6,7,8}'; SELECT '{0,3,5}'::pg_set::int4[]; SELECT '{1,2,3,4,5}'::int4[]::pg_set; + +-- Large set test +CREATE TABLE int4array_table ( + id int PRIMARY KEY, + values int4[] +); + +CREATE TABLE pg_set_table ( + id int PRIMARY KEY, + values pg_set +); + +CREATE FUNCTION array_sort(a int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT array_agg(a ORDER BY a) FROM unnest(a) a; +$$; + +CREATE FUNCTION array_union(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + UNION + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; + +CREATE FUNCTION array_intersect(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + INTERSECT + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; + +CREATE FUNCTION array_diff(a int4[], b int4[]) + RETURNS int4[] IMMUTABLE + LANGUAGE sql +AS $$ + SELECT COALESCE(array_agg(c.a ORDER BY c.a), '{}') FROM ( + SELECT DISTINCT a FROM unnest(a) a + EXCEPT + SELECT DISTINCT b FROM unnest(b) b + ORDER BY a + ) c; +$$; + +INSERT INTO int4array_table (id, values) +SELECT + x, + vals.* +FROM + generate_series(0, 16) x +CROSS JOIN LATERAL ( + SELECT + array_sort(array_agg(DISTINCT trunc(random()* 100000000)::int)) + FROM + generate_series(1, (2^x)::int) +) vals; + +INSERT INTO pg_set_table (id, values) +SELECT + id, + values::pg_set +FROM + int4array_table; + +WITH sample AS ( + SELECT + id, + values[cardinality(values) / 2 + 1] mid_val, + values array_vals, + values::pg_set set_vals + FROM + int4array_table + WHERE + id = 7 +) +SELECT + 1 +FROM + pg_set_table +JOIN + int4array_table +ON + pg_set_table.id = int4array_table.id +CROSS JOIN + sample +WHERE + pg_set_table.values::text != int4array_table.values::text OR + (pg_set_table.values + sample.set_vals)::text != array_union(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values - sample.set_vals)::text != array_diff(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values * sample.set_vals)::text != array_intersect(int4array_table.values, sample.array_vals)::text OR + (pg_set_table.values + sample.mid_val)::text != array_union(int4array_table.values, array[sample.mid_val])::text OR + (pg_set_table.values && sample.set_vals) != (int4array_table.values && sample.array_vals) OR + (pg_set_table.values @> sample.set_vals) != (int4array_table.values @> sample.array_vals) OR + (pg_set_table.values <@ sample.set_vals) != (int4array_table.values <@ sample.array_vals) OR + (pg_set_table.values @> sample.mid_val) != (int4array_table.values @> array[sample.mid_val]) OR + (pg_set_table.values = sample.set_vals) != (int4array_table.values = sample.array_vals) OR + (pg_set_table.values <> sample.set_vals) != (int4array_table.values <> sample.array_vals) +LIMIT 1; + +DROP TABLE int4array_table, pg_set_table; +CREATE TABLE pg_set_table (values pg_set); + +\copy pg_set_table FROM 'data/pg_set.data' + +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values <> '{217419,456870,462437,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; + +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING GIN (values); + +SET enable_seqscan = off; +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; +DROP INDEX pg_set_table_values_idx; + +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 0)); +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 15)); +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 16065)); +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING gist (values gist_pg_set_ops (masklen = 16064)); + +SET enable_seqscan = off; +SET enable_bitmapscan = off; +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> 456870; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{}'; +SELECT COUNT(*) FROM pg_set_table WHERE values @> '{217419,797909}'; +SELECT COUNT(*) FROM pg_set_table WHERE values && '{217419,723181,738339,104704}'; +DROP INDEX pg_set_table_values_idx; + +CREATE INDEX pg_set_table_values_idx ON pg_set_table USING hash (values); +SELECT COUNT(*) FROM pg_set_table WHERE values = '{217419,456870,462437,797909}'; +DROP INDEX pg_set_table_values_idx; + +CREATE TABLE point_pg_set_table ( + p point, + values pg_set +); + +CREATE INDEX point_pg_set_table_idx ON point_pg_set_table + USING gist (p, values gist_pg_set_ops (masklen = 16064)); + +INSERT INTO point_pg_set_table +SELECT + '(0,0)'::point, + values +FROM + pg_set_table; + +SET enable_seqscan = off; +SET enable_bitmapscan = off; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values = '{217419,456870,462437,797909}'; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> 456870; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> '{}'; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values @> '{217419,797909}'; +SELECT COUNT(*) FROM point_pg_set_table WHERE p ~= '(0,0)'::point AND values && '{217419,723181,738339,104704}'; + + +TRUNCATE TABLE pg_set_table; + +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' +\copy pg_set_table FROM 'data/pg_set.data' + +ANALYZE pg_set_table; + +SELECT num_nulls( + null_frac, + avg_width, + n_distinct, + most_common_vals, + most_common_elems, + most_common_elem_freqs, + elem_count_histogram +) FROM pg_stats WHERE tablename = 'pg_set_table'; + +EXPLAIN SELECT * FROM pg_set_table WHERE values && '{864768,866406,867656,867884}'; + +EXPLAIN SELECT * FROM pg_set_table WHERE values @> '{864768}'; + +EXPLAIN SELECT * FROM pg_set_table WHERE values @> 864768; + +EXPLAIN SELECT * FROM pg_set_table WHERE values = '{1007,138543,345664,751582,972886,990712}'; + +EXPLAIN SELECT * FROM pg_set_table WHERE values <> '{1007,138543,345664,751582,972886,990712}'; + +EXPLAIN SELECT * FROM pg_set_table WHERE values && '{864768,866406,867656,867884}';