Skip to content

Commit 9259112

Browse files
author
longshan.lu
committed
feat(tests): add tests for Postgres Inet and vector types
- Implemented tests for handling Postgres Inet type, verifying correct data retrieval and format. - Added tests for dense_vector, half_vector, binary_vector, and sparse_vector types, including parsing and validation of numpy arrays. - Enhanced vector type tests to ensure proper conversion and comparison of binary and sparse vector representations.
1 parent 7c20a8e commit 9259112

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

connectorx-python/connectorx/tests/test_postgres.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import pytest
55
from pandas.testing import assert_frame_equal
66
import datetime
7+
import numpy as np
8+
import ast
79

810
from .. import read_sql
911

@@ -1058,3 +1060,84 @@ def test_postgres_partitioned_pre_execution_queries(postgres_url: str) -> None:
10581060
},
10591061
).sort_values(by=['name']).reset_index(drop=True)
10601062
assert_frame_equal(df, expected, check_names=True)
1063+
1064+
def test_postgres_inet_type(postgres_url: str) -> None:
1065+
query = "SELECT test_inet FROM test_types"
1066+
df = read_sql(postgres_url, query)
1067+
expected = pd.DataFrame(
1068+
data={
1069+
"test_inet": pd.Series(
1070+
["192.168.1.1", "10.0.0.0/24", "2001:db8::1", "2001:db8::/32", None],
1071+
dtype="object"
1072+
),
1073+
},
1074+
)
1075+
assert_frame_equal(df, expected, check_names=True)
1076+
1077+
def test_postgres_vector_types(postgres_url: str) -> None:
1078+
query = "SELECT dense_vector, half_vector, binary_vector, sparse_vector FROM vector_types"
1079+
df = read_sql(postgres_url, query)
1080+
1081+
# Parse string vectors into numpy arrays
1082+
def parse_vector(vec_str):
1083+
if vec_str is None:
1084+
return None
1085+
# Handle both string and list inputs
1086+
if isinstance(vec_str, str):
1087+
# Remove brackets and split string
1088+
vec_str = vec_str.strip('[]')
1089+
return np.array([float(x) for x in vec_str.split(',')])
1090+
elif isinstance(vec_str, list):
1091+
return np.array([float(x) for x in vec_str])
1092+
else:
1093+
raise TypeError(f"Unexpected type for vector: {type(vec_str)}")
1094+
1095+
# Convert dense_vector and half_vector to numpy arrays
1096+
df['dense_vector'] = df['dense_vector'].apply(parse_vector)
1097+
df['half_vector'] = df['half_vector'].apply(parse_vector)
1098+
1099+
# Verify dense_vector
1100+
expected_dense = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
1101+
assert df['dense_vector'].iloc[0] is not None
1102+
assert np.allclose(df['dense_vector'].iloc[0], expected_dense, rtol=1e-5)
1103+
assert df['dense_vector'].iloc[1] is None
1104+
1105+
# Verify half_vector
1106+
expected_half = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
1107+
assert df['half_vector'].iloc[0] is not None
1108+
assert np.allclose(df['half_vector'].iloc[0], expected_half, rtol=1e-5)
1109+
assert df['half_vector'].iloc[1] is None
1110+
1111+
# Verify binary_vector and sparse_vector
1112+
# Convert binary_vector to string representation for comparison
1113+
def binary_to_string(binary):
1114+
if binary is None:
1115+
return None
1116+
# Convert binary to string of 1s and 0s
1117+
return ''.join(format(b, '08b') for b in binary)[:10] # Take first 10 bits
1118+
1119+
df['binary_vector'] = df['binary_vector'].apply(binary_to_string)
1120+
1121+
# Convert sparse vector array to string format
1122+
def sparse_to_string(sparse_vec):
1123+
if sparse_vec is None:
1124+
return None
1125+
# Convert array to sparse format string with integer values
1126+
non_zero = {i+1: int(val) for i, val in enumerate(sparse_vec) if val != 0}
1127+
return f"{non_zero}/{len(sparse_vec)}"
1128+
1129+
df['sparse_vector'] = df['sparse_vector'].apply(sparse_to_string)
1130+
1131+
expected = pd.DataFrame(
1132+
data={
1133+
"binary_vector": pd.Series(
1134+
["1010101010", None],
1135+
dtype="object"
1136+
),
1137+
"sparse_vector": pd.Series(
1138+
["{1: 1, 3: 2, 5: 3}/5", None],
1139+
dtype="object"
1140+
),
1141+
},
1142+
)
1143+
assert_frame_equal(df[['binary_vector', 'sparse_vector']], expected[['binary_vector', 'sparse_vector']], check_names=True)

0 commit comments

Comments
 (0)