Skip to content

Commit 1976d61

Browse files
authored
Merge pull request #815 from holicc/main
Add Support for Postgres Inet and Vector Types
2 parents c5b8922 + 98a7f59 commit 1976d61

File tree

19 files changed

+706
-340
lines changed

19 files changed

+706
-340
lines changed

.devcontainer/Dockerfile

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
FROM mcr.microsoft.com/devcontainers/rust:1
2+
3+
# Install system dependencies
4+
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
5+
&& apt-get -y install --no-install-recommends \
6+
sqlite3 \
7+
pkg-config \
8+
libclang-dev \
9+
postgresql-client \
10+
default-mysql-client \
11+
&& apt-get clean -y \
12+
&& rm -rf /var/lib/apt/lists/*
13+
14+
# Install Python and Poetry
15+
ENV POETRY_HOME=/opt/poetry
16+
ENV POETRY_VERSION=2.1.3
17+
ENV PATH="/opt/poetry/bin:$PATH"
18+
RUN curl -sSL https://install.python-poetry.org | python3 - \
19+
&& poetry config virtualenvs.create false
20+
21+
# Install Rust components
22+
RUN rustup component add rustfmt clippy \
23+
&& cargo install cargo-watch \
24+
&& cargo install just
25+
26+
# Set environment variables
27+
ENV PATH="/home/vscode/.local/bin:${PATH}"
28+
ENV PYTHONPATH="/workspaces/connector-x:${PYTHONPATH}"

.devcontainer/devcontainer.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"name": "ConnectorX Development",
3+
"dockerComposeFile": [
4+
"docker-compose.yml"
5+
],
6+
"service": "connectorx",
7+
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
8+
"customizations": {
9+
"vscode": {
10+
"extensions": [
11+
"rust-lang.rust-analyzer"
12+
],
13+
"settings": {
14+
"rust-analyzer.checkOnSave.command": "clippy"
15+
}
16+
}
17+
},
18+
"features": {
19+
"ghcr.io/devcontainers/features/rust:1": "latest"
20+
}
21+
}

.devcontainer/docker-compose.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
services:
2+
connectorx:
3+
build:
4+
context: ..
5+
dockerfile: .devcontainer/Dockerfile
6+
volumes:
7+
- ..:/workspaces/connectorx:cached
8+
command: sleep infinity
9+
depends_on:
10+
- postgres
11+
- mysql
12+
networks:
13+
- connectorx-network
14+
15+
postgres:
16+
image: pgvector/pgvector:pg17
17+
environment:
18+
POSTGRES_USER: postgres
19+
POSTGRES_PASSWORD: postgres
20+
POSTGRES_DB: connectorx
21+
ports:
22+
- "5433:5432"
23+
volumes:
24+
- postgres_data:/var/lib/postgresql/data
25+
networks:
26+
- connectorx-network
27+
28+
mysql:
29+
image: ghcr.io/wangxiaoying/mysql:latest
30+
environment:
31+
MYSQL_DATABASE: mysql
32+
MYSQL_ROOT_PASSWORD: mysql
33+
LANG: C.UTF-8
34+
ports:
35+
- "3306:3306"
36+
volumes:
37+
- mysql_data:/var/lib/mysql
38+
networks:
39+
- connectorx-network
40+
41+
networks:
42+
connectorx-network:
43+
driver: bridge
44+
45+
volumes:
46+
postgres_data:
47+
mysql_data:
48+

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
container: ubuntu:24.04
1616
services:
1717
postgres:
18-
image: postgres
18+
image: pgvector/pgvector:pg17
1919
env:
2020
POSTGRES_PASSWORD: postgres
2121
# Set health checks to wait until postgres has started
@@ -117,7 +117,7 @@ jobs:
117117
# Label used to access the service container
118118
postgres:
119119
# Docker Hub image
120-
image: postgres
120+
image: pgvector/pgvector:pg17
121121
env:
122122
POSTGRES_PASSWORD: postgres
123123
# Set health checks to wait until postgres has started

CONTRIBUTING.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,38 @@ This doc describes how you can get started at developing ConnectorX.
99
Please check out [here](https://sfu-db.github.io/connector-x/install.html#build-from-source-code)
1010

1111

12+
### Run In Vscode Dev-Container
13+
14+
1. Install required tools:
15+
- [Docker](https://docs.docker.com/get-docker/)
16+
- [VSCode](https://code.visualstudio.com/)
17+
- [VSCode Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
18+
19+
2. Clone the repository and open it in VSCode:
20+
```bash
21+
git clone https://github.com/sfu-db/connector-x.git
22+
code connector-x
23+
```
24+
25+
3. When prompted, click "Reopen in Container" or use the command palette (F1) and select "Remote-Containers: Reopen in Container"
26+
27+
4. The dev container includes:
28+
- Rust development environment with rust-analyzer
29+
- PostgreSQL (pgvector) running on port 5433
30+
- MySQL running on port 3306
31+
- All necessary build tools and dependencies
32+
33+
5. The container will automatically:
34+
- Mount your local repository into the container
35+
- Install Rust toolchain and dependencies
36+
- Configure rust-analyzer with clippy for code analysis
37+
- Set up the development workspace
38+
39+
6. You can now start developing with:
40+
- Full Rust development support
41+
- Integrated database services
42+
- All development tools pre-configured
43+
1244
### Run tests
1345

1446
* Set up environment variables by creating a `.env` file under project directory. Here is an example:

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

connectorx-python/Cargo.lock

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

connectorx-python/connectorx/tests/test_postgres.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import pytest
55
from pandas.testing import assert_frame_equal
66
import datetime
7+
import numpy as np
8+
import ast
79

810
from .. import read_sql
911

@@ -1058,3 +1060,84 @@ def test_postgres_partitioned_pre_execution_queries(postgres_url: str) -> None:
10581060
},
10591061
).sort_values(by=['name']).reset_index(drop=True)
10601062
assert_frame_equal(df, expected, check_names=True)
1063+
1064+
def test_postgres_inet_type(postgres_url: str) -> None:
1065+
query = "SELECT test_inet FROM test_types"
1066+
df = read_sql(postgres_url, query)
1067+
expected = pd.DataFrame(
1068+
data={
1069+
"test_inet": pd.Series(
1070+
["192.168.1.1", "10.0.0.0/24", "2001:db8::1", "2001:db8::/32", None],
1071+
dtype="object"
1072+
),
1073+
},
1074+
)
1075+
assert_frame_equal(df, expected, check_names=True)
1076+
1077+
def test_postgres_vector_types(postgres_url: str) -> None:
1078+
query = "SELECT dense_vector, half_vector, binary_vector, sparse_vector FROM vector_types"
1079+
df = read_sql(postgres_url, query)
1080+
1081+
# Parse string vectors into numpy arrays
1082+
def parse_vector(vec_str):
1083+
if vec_str is None:
1084+
return None
1085+
# Handle both string and list inputs
1086+
if isinstance(vec_str, str):
1087+
# Remove brackets and split string
1088+
vec_str = vec_str.strip('[]')
1089+
return np.array([float(x) for x in vec_str.split(',')])
1090+
elif isinstance(vec_str, list):
1091+
return np.array([float(x) for x in vec_str])
1092+
else:
1093+
raise TypeError(f"Unexpected type for vector: {type(vec_str)}")
1094+
1095+
# Convert dense_vector and half_vector to numpy arrays
1096+
df['dense_vector'] = df['dense_vector'].apply(parse_vector)
1097+
df['half_vector'] = df['half_vector'].apply(parse_vector)
1098+
1099+
# Verify dense_vector
1100+
expected_dense = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
1101+
assert df['dense_vector'].iloc[0] is not None
1102+
assert np.allclose(df['dense_vector'].iloc[0], expected_dense, rtol=1e-5)
1103+
assert df['dense_vector'].iloc[1] is None
1104+
1105+
# Verify half_vector
1106+
expected_half = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
1107+
assert df['half_vector'].iloc[0] is not None
1108+
assert np.allclose(df['half_vector'].iloc[0], expected_half, rtol=1e-5)
1109+
assert df['half_vector'].iloc[1] is None
1110+
1111+
# Verify binary_vector and sparse_vector
1112+
# Convert binary_vector to string representation for comparison
1113+
def binary_to_string(binary):
1114+
if binary is None:
1115+
return None
1116+
# Convert binary to string of 1s and 0s
1117+
return ''.join(format(b, '08b') for b in binary)[:10] # Take first 10 bits
1118+
1119+
df['binary_vector'] = df['binary_vector'].apply(binary_to_string)
1120+
1121+
# Convert sparse vector array to string format
1122+
def sparse_to_string(sparse_vec):
1123+
if sparse_vec is None:
1124+
return None
1125+
# Convert array to sparse format string with integer values
1126+
non_zero = {i+1: int(val) for i, val in enumerate(sparse_vec) if val != 0}
1127+
return f"{non_zero}/{len(sparse_vec)}"
1128+
1129+
df['sparse_vector'] = df['sparse_vector'].apply(sparse_to_string)
1130+
1131+
expected = pd.DataFrame(
1132+
data={
1133+
"binary_vector": pd.Series(
1134+
["1010101010", None],
1135+
dtype="object"
1136+
),
1137+
"sparse_vector": pd.Series(
1138+
["{1: 1, 3: 2, 5: 3}/5", None],
1139+
dtype="object"
1140+
),
1141+
},
1142+
)
1143+
assert_frame_equal(df[['binary_vector', 'sparse_vector']], expected[['binary_vector', 'sparse_vector']], check_names=True)

0 commit comments

Comments
 (0)