Skip to content
Merged
Show file tree
Hide file tree
Changes from 87 commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
9595228
checkpoint
bovlb Jul 28, 2025
2200fed
checkpoint
bovlb Jul 28, 2025
4f7a5bb
Merge remote-tracking branch 'origin/main' into postgresql-server
bovlb Jul 28, 2025
298ac93
checkpoint
bovlb Jul 29, 2025
a335b35
checkpoint
bovlb Jul 29, 2025
08ff6e2
seems to be working
bovlb Jul 29, 2025
16de7a8
tidy
bovlb Jul 29, 2025
f09b4c9
checkpoint
bovlb Jul 29, 2025
c5ac65a
Basic import of tables
bovlb Jul 29, 2025
61dfc97
remove files no longer needed
bovlb Jul 29, 2025
68e4717
Basic execute working
bovlb Jul 29, 2025
872be6c
docs and CI
bovlb Jul 29, 2025
ac9c3f9
Fix JSON encoding
bovlb Jul 29, 2025
c1cb830
doc
bovlb Jul 29, 2025
40d183f
Smaller batch size is safer, maybe slower
bovlb Jul 30, 2025
895cf16
rename to sql-server
bovlb Jul 30, 2025
7f4bdae
Use WF_AUTH_TOKEN
bovlb Jul 30, 2025
e849e61
complete thought
bovlb Jul 30, 2025
f57e255
complete thought
bovlb Jul 30, 2025
6c66b68
Merge branch 'postgresql-server' of github.com:aperture-data/workflow…
bovlb Jul 30, 2025
469018c
revert unintended change
bovlb Jul 30, 2025
7b30b9c
trying to add blobs
bovlb Jul 30, 2025
5fb9253
fix _blob
bovlb Jul 30, 2025
0d4fa7f
make _image and _as_format work
bovlb Jul 30, 2025
1f684f2
refactor for simpler code, no pre-batch query
bovlb Jul 30, 2025
b4efd33
Add operations
bovlb Jul 31, 2025
c5c8c85
Merge remote-tracking branch 'origin/postgresql-server' into sql-server2
bovlb Jul 31, 2025
7504935
Update apps/sql-server/fdw/fdw/__init__.py
bovlb Jul 31, 2025
7f030ee
PR feedback
bovlb Jul 31, 2025
f3293b1
use dotenv
bovlb Jul 31, 2025
b5df5cf
Merge remote-tracking branch 'origin/postgresql-server' into sql-server2
bovlb Jul 31, 2025
0ab4496
Merge remote-tracking branch 'origin/main' into sql-server3
bovlb Jul 31, 2025
d0f53ac
checkpoint
bovlb Jul 31, 2025
566ef7d
Merge remote-tracking branch 'origin/main' into sql-server2
bovlb Jul 31, 2025
236b791
Merge remote-tracking branch 'origin/main' into sql-server2
bovlb Jul 31, 2025
235b0c8
Fix merge issues
bovlb Jul 31, 2025
224db8e
more cleanup
bovlb Jul 31, 2025
e707fde
Merge remote-tracking branch 'origin/main' into sql-server3
bovlb Jul 31, 2025
c4e4648
checkpoint
bovlb Jul 31, 2025
845db84
checkpoint
bovlb Jul 31, 2025
f391998
operations checking
bovlb Jul 31, 2025
9a0191d
Add some enums for videos
bovlb Jul 31, 2025
1d85315
Add access control; fix some issue with descriptor tables
bovlb Aug 1, 2025
200259a
PR feedback
bovlb Aug 1, 2025
d9f218f
Cleaner pool and schema handling.
bovlb Aug 1, 2025
1afd651
Fix suggested by Drew for envar issues
bovlb Aug 1, 2025
19549d4
pinpoint fix
bovlb Aug 1, 2025
6fc6227
Merge remote-tracking branch 'origin/main' into sql-server3
bovlb Aug 1, 2025
da8a69a
Merge remote-tracking branch 'origin/sql-server2' into sql-server3
bovlb Aug 1, 2025
622ef42
Update apps/sql-server/app/app.sh
bovlb Aug 1, 2025
a440d82
Merge remote-tracking branch 'origin/sql-server-connection-fix' into …
bovlb Aug 1, 2025
1fb2094
Merge remote-tracking branch 'origin/main' into sql-server4
bovlb Aug 1, 2025
d5080a5
Add HTTP server; fix FDW log level
bovlb Aug 1, 2025
efecd85
Fix swagger
bovlb Aug 1, 2025
4c01553
response to PR feedback
bovlb Aug 1, 2025
bb8f3fd
Add check_properties
bovlb Aug 1, 2025
69cd1f5
fix merge problem
bovlb Aug 1, 2025
6ba93e1
Add FIND_SIMILAR
bovlb Aug 1, 2025
f0ac097
checkpoint
bovlb Aug 1, 2025
1ff40e4
Be consistent about embeddings requirements
bovlb Aug 1, 2025
5c29a6b
descriptor tables have specialized column sets; detecting embeddability
bovlb Aug 2, 2025
006a459
checkpoint
bovlb Aug 2, 2025
1f14b8a
find similar working
bovlb Aug 2, 2025
7f1a4e6
checkpoint
bovlb Aug 2, 2025
fd4357d
Merge remote-tracking branch 'origin/main' into sql-server5
bovlb Aug 2, 2025
0539bc6
Finish general rewrite; use personal branch of multicorn2
bovlb Aug 4, 2025
71c020e
fix comment
bovlb Aug 4, 2025
9accb65
Hard-code distances
bovlb Aug 4, 2025
252275c
Better boolean handling
bovlb Aug 4, 2025
7f03d75
trivial change
bovlb Aug 4, 2025
2bb214f
More performance reporting
bovlb Aug 4, 2025
81a81e5
fix type hint
bovlb Aug 4, 2025
8e17eb7
remove extraneous parameter
bovlb Aug 4, 2025
0985308
fix type hint
bovlb Aug 4, 2025
b14e457
fix type hints
bovlb Aug 4, 2025
b980842
fix type hint
bovlb Aug 4, 2025
a28db0e
minor fixes
bovlb Aug 4, 2025
6c4ef42
Remove temporary logging
bovlb Aug 4, 2025
ec7f58f
Update apps/sql-server/fdw/fdw/descriptor.py
bovlb Aug 4, 2025
1910804
Update apps/sql-server/fdw/fdw/common.py
bovlb Aug 4, 2025
3b78db5
Update apps/sql-server/fdw/fdw/descriptor.py
bovlb Aug 4, 2025
3ab1976
fix copy-paste error
bovlb Aug 4, 2025
320fdb0
Update apps/sql-server/fdw/fdw/common.py
bovlb Aug 4, 2025
01df854
more timing
bovlb Aug 4, 2025
8ce5863
Add explain
bovlb Aug 4, 2025
e672335
tweak EXPLAIN
bovlb Aug 4, 2025
09ea0c4
PR has been merged upstream
bovlb Aug 4, 2025
36018d2
Ad docstring on Curry
bovlb Aug 4, 2025
4743b8d
Fix typo in field name
bovlb Aug 5, 2025
cab94a9
Enable extra field rejection for TableOptions and ColumnOptions and f…
bovlb Aug 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions apps/crawl-to-rag/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ FROM aperturedata/workflows-base

ENV APP_NAME=workflows-crawl-to-rag

# Needed for text-embeddings
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
# Install dependencies for embeddings
RUN pip install --no-cache-dir -r /app/embeddings/requirements_cpu.txt
RUN pip install --no-cache-dir -r /app/embeddings/requirements.txt

# copy in the app directories
COPY --from=crawl-website /app /workflows/crawl-website
Expand All @@ -28,6 +29,7 @@ RUN pip install --no-cache-dir -r /requirements.txt
COPY --from=rag /requirements.txt /requirements.txt
RUN pip install --no-cache-dir -r /requirements.txt


EXPOSE 8000

COPY app.sh /app/
4 changes: 4 additions & 0 deletions apps/rag/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
COPY requirements.txt /
RUN pip install --no-cache-dir -r /requirements.txt

# Install dependencies for embeddings
RUN pip install --no-cache-dir -r /app/embeddings/requirements_cpu.txt
RUN pip install --no-cache-dir -r /app/embeddings/requirements.txt

# We prefer to cache models in the docker image rather than load them
# at run time.
COPY app/llm.py /app/llm.py
Expand Down
25 changes: 18 additions & 7 deletions apps/sql-server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ENV APP_NAME=workflows-sql-server
ENV POSTGRES_VERSION=17
ARG MULTICORN_VERSION=3.0

# Add PGDG repository and install PostgreSQL 17
# Add PGDG repository and install PostgreSQL
RUN apt-get update && apt-get install -y wget gnupg lsb-release \
&& echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" \
> /etc/apt/sources.list.d/pgdg.list \
Expand All @@ -23,28 +23,39 @@ RUN echo "listen_addresses = '*'" >> /etc/postgresql/${POSTGRES_VERSION}/main/po
RUN echo "host all all 0.0.0.0/0 md5" >> /etc/postgresql/17/main/pg_hba.conf

# Postgres/Multicorn insists on using the system Python, so we need to disable the virtual environment
# Store current VIRTUAL_ENV and PATH values
ENV OLD_VIRTUAL_ENV="${VIRTUAL_ENV}"
ENV OLD_PATH="${PATH}"
ENV OLD_PYTHONPATH="${PYTHONPATH}"

# Disable virtual environment
ENV VIRTUAL_ENV=
ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ENV PYTHONPATH=""

# Install multicorn2 Python module into system Python
RUN /usr/bin/python3 -m pip install --no-cache-dir "git+https://github.com/pgsql-io/multicorn2.git"

# Build and install multicorn2 Postgres extension
RUN git clone https://github.com/pgsql-io/multicorn2.git /multicorn2 \
# Build and install multicorn2 Postgres extension
RUN git clone --single-branch --branch main https://github.com/pgsql-io/multicorn2.git /multicorn2 \
&& cd /multicorn2 \
&& make PYTHON=/usr/bin/python3 \
&& make install

RUN pip install --no-cache-dir aperturedb dotenv
RUN pip install --no-cache-dir aperturedb dotenv numpy pydantic

# Install dependencies for embeddings
RUN pip install --no-cache-dir -r /app/embeddings/requirements_cpu.txt
RUN pip install --no-cache-dir -r /app/embeddings/requirements.txt

# Copy and install our FDW into system Python
COPY fdw /fdw
RUN cd /fdw && /usr/bin/python3 -m pip install .

# Restore virtual environment
ENV VIRTUAL_ENV=/opt/venv
ENV PATH="/opt/venv/bin:/opt/venv/lib/python3.10/site-packages:$PATH"
ENV PYTHONPATH="/app:/opt/venv/lib/python3.10/site-packages"
ENV VIRTUAL_ENV=${OLD_VIRTUAL_ENV}
ENV PATH="${OLD_PATH}"
ENV PYTHONPATH="/app:${OLD_PYTHONPATH}"

# Install application requirements
COPY requirements.txt /requirements.txt
Expand Down
41 changes: 40 additions & 1 deletion apps/sql-server/app/sql/functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,43 @@ $$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION OPERATIONS(VARIADIC ops jsonb[])
RETURNS jsonb AS $$
SELECT jsonb_agg(op) FROM unnest($1) AS op
$$ LANGUAGE SQL IMMUTABLE;
$$ LANGUAGE SQL IMMUTABLE;


-- Find similar

CREATE OR REPLACE FUNCTION FIND_SIMILAR(
text TEXT DEFAULT NULL,
image BYTEA DEFAULT NULL,
vector JSONB DEFAULT NULL,
k INT DEFAULT 10,
knn_first BOOLEAN DEFAULT TRUE
) RETURNS JSONB AS $$
DECLARE
mode_count INT;
BEGIN
-- Count how many modes are specified
mode_count := (CASE WHEN text IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN image IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN vector IS NOT NULL THEN 1 ELSE 0 END);

IF mode_count != 1 THEN
RAISE EXCEPTION 'FIND_SIMILAR requires exactly one of text, image, or vector';
END IF;

IF k IS NULL OR k <= 0 THEN
RAISE EXCEPTION 'k must be a positive integer';
END IF;

RETURN jsonb_build_object(
'type', 'find_similar',
'text', text,
'image', image,
'vector', vector,
'k_neighbors', k,
'knn_first', knn_first
);
END;
$$ LANGUAGE plpgsql IMMUTABLE;

COMMENT ON FUNCTION FIND_SIMILAR IS 'Find similar items based on one of text, image, or vector.';
Loading
Loading