|
4 | 4 | import pytest |
5 | 5 | from pandas.testing import assert_frame_equal |
6 | 6 | import datetime |
| 7 | +import numpy as np |
| 8 | +import ast |
7 | 9 |
|
8 | 10 | from .. import read_sql |
9 | 11 |
|
@@ -1058,3 +1060,84 @@ def test_postgres_partitioned_pre_execution_queries(postgres_url: str) -> None: |
1058 | 1060 | }, |
1059 | 1061 | ).sort_values(by=['name']).reset_index(drop=True) |
1060 | 1062 | assert_frame_equal(df, expected, check_names=True) |
| 1063 | + |
| 1064 | +def test_postgres_inet_type(postgres_url: str) -> None: |
| 1065 | + query = "SELECT test_inet FROM test_types" |
| 1066 | + df = read_sql(postgres_url, query) |
| 1067 | + expected = pd.DataFrame( |
| 1068 | + data={ |
| 1069 | + "test_inet": pd.Series( |
| 1070 | + ["192.168.1.1", "10.0.0.0/24", "2001:db8::1", "2001:db8::/32", None], |
| 1071 | + dtype="object" |
| 1072 | + ), |
| 1073 | + }, |
| 1074 | + ) |
| 1075 | + assert_frame_equal(df, expected, check_names=True) |
| 1076 | + |
| 1077 | +def test_postgres_vector_types(postgres_url: str) -> None: |
| 1078 | + query = "SELECT dense_vector, half_vector, binary_vector, sparse_vector FROM vector_types" |
| 1079 | + df = read_sql(postgres_url, query) |
| 1080 | + |
| 1081 | + # Parse string vectors into numpy arrays |
| 1082 | + def parse_vector(vec_str): |
| 1083 | + if vec_str is None: |
| 1084 | + return None |
| 1085 | + # Handle both string and list inputs |
| 1086 | + if isinstance(vec_str, str): |
| 1087 | + # Remove brackets and split string |
| 1088 | + vec_str = vec_str.strip('[]') |
| 1089 | + return np.array([float(x) for x in vec_str.split(',')]) |
| 1090 | + elif isinstance(vec_str, list): |
| 1091 | + return np.array([float(x) for x in vec_str]) |
| 1092 | + else: |
| 1093 | + raise TypeError(f"Unexpected type for vector: {type(vec_str)}") |
| 1094 | + |
| 1095 | + # Convert dense_vector and half_vector to numpy arrays |
| 1096 | + df['dense_vector'] = df['dense_vector'].apply(parse_vector) |
| 1097 | + df['half_vector'] = df['half_vector'].apply(parse_vector) |
| 1098 | + |
| 1099 | + # Verify dense_vector |
| 1100 | + expected_dense = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) |
| 1101 | + assert df['dense_vector'].iloc[0] is not None |
| 1102 | + assert np.allclose(df['dense_vector'].iloc[0], expected_dense, rtol=1e-5) |
| 1103 | + assert df['dense_vector'].iloc[1] is None |
| 1104 | + |
| 1105 | + # Verify half_vector |
| 1106 | + expected_half = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]) |
| 1107 | + assert df['half_vector'].iloc[0] is not None |
| 1108 | + assert np.allclose(df['half_vector'].iloc[0], expected_half, rtol=1e-5) |
| 1109 | + assert df['half_vector'].iloc[1] is None |
| 1110 | + |
| 1111 | + # Verify binary_vector and sparse_vector |
| 1112 | + # Convert binary_vector to string representation for comparison |
| 1113 | + def binary_to_string(binary): |
| 1114 | + if binary is None: |
| 1115 | + return None |
| 1116 | + # Convert binary to string of 1s and 0s |
| 1117 | + return ''.join(format(b, '08b') for b in binary)[:10] # Take first 10 bits |
| 1118 | + |
| 1119 | + df['binary_vector'] = df['binary_vector'].apply(binary_to_string) |
| 1120 | + |
| 1121 | + # Convert sparse vector array to string format |
| 1122 | + def sparse_to_string(sparse_vec): |
| 1123 | + if sparse_vec is None: |
| 1124 | + return None |
| 1125 | + # Convert array to sparse format string with integer values |
| 1126 | + non_zero = {i+1: int(val) for i, val in enumerate(sparse_vec) if val != 0} |
| 1127 | + return f"{non_zero}/{len(sparse_vec)}" |
| 1128 | + |
| 1129 | + df['sparse_vector'] = df['sparse_vector'].apply(sparse_to_string) |
| 1130 | + |
| 1131 | + expected = pd.DataFrame( |
| 1132 | + data={ |
| 1133 | + "binary_vector": pd.Series( |
| 1134 | + ["1010101010", None], |
| 1135 | + dtype="object" |
| 1136 | + ), |
| 1137 | + "sparse_vector": pd.Series( |
| 1138 | + ["{1: 1, 3: 2, 5: 3}/5", None], |
| 1139 | + dtype="object" |
| 1140 | + ), |
| 1141 | + }, |
| 1142 | + ) |
| 1143 | + assert_frame_equal(df[['binary_vector', 'sparse_vector']], expected[['binary_vector', 'sparse_vector']], check_names=True) |
0 commit comments