@@ -159,15 +159,13 @@ def with_id_spec(
159159 self ,
160160 column_name : str = "id" ,
161161 python_type : Type = str ,
162- extract_fn : Optional [Callable [[Chunk ], Any ]] = lambda chunk : chunk .id ,
163- convert_fn : Optional [Callable [[Any ], Any ]] = None
162+ convert_fn : Optional [Callable [[str ], Any ]] = None
164163 ) -> 'SpannerColumnSpecsBuilder' :
165164 """Add ID column specification.
166165
167166 Args:
168167 column_name: Column name (default: "id")
169168 python_type: Python type (default: str)
170- extract_fn: Value extractor (default: lambda chunk: chunk.id)
171169 convert_fn: Optional converter (e.g., to cast to int)
172170
173171 Returns:
@@ -189,20 +187,18 @@ def with_id_spec(
189187 column_name = column_name ,
190188 python_type = python_type ,
191189 value_fn = functools .partial (
192- _extract_and_convert , extract_fn , convert_fn )))
190+ _extract_and_convert , lambda chunk : chunk . id , convert_fn )))
193191 return self
194192
195193 def with_embedding_spec (
196194 self ,
197195 column_name : str = "embedding" ,
198- extract_fn : Optional [Callable [[Chunk ], List [float ]]] = None ,
199196 convert_fn : Optional [Callable [[List [float ]], List [float ]]] = None
200197 ) -> 'SpannerColumnSpecsBuilder' :
201198 """Add embedding array column (ARRAY<FLOAT32> or ARRAY<FLOAT64>).
202199
203200 Args:
204201 column_name: Column name (default: "embedding")
205- extract_fn: Value extractor (default: chunk.embedding.dense_embedding)
206202 convert_fn: Optional converter (e.g., normalize, quantize)
207203
208204 Returns:
@@ -223,13 +219,11 @@ def with_embedding_spec(
223219 ... convert_fn=lambda vec: [round(x, 4) for x in vec]
224220 ... )
225221 """
226- def default_fn (chunk : Chunk ) -> List [float ]:
222+ def extract_fn (chunk : Chunk ) -> List [float ]:
227223 if chunk .embedding is None or chunk .embedding .dense_embedding is None :
228224 raise ValueError (f'Chunk must contain embedding: { chunk } ' )
229225 return chunk .embedding .dense_embedding
230226
231- extract_fn = extract_fn or default_fn
232-
233227 self ._specs .append (
234228 SpannerColumnSpec (
235229 column_name = column_name ,
@@ -242,15 +236,13 @@ def with_content_spec(
242236 self ,
243237 column_name : str = "content" ,
244238 python_type : Type = str ,
245- extract_fn : Optional [Callable [[Chunk ], Any ]] = None ,
246- convert_fn : Optional [Callable [[Any ], Any ]] = None
239+ convert_fn : Optional [Callable [[str ], Any ]] = None
247240 ) -> 'SpannerColumnSpecsBuilder' :
248241 """Add content column.
249242
250243 Args:
251244 column_name: Column name (default: "content")
252245 python_type: Python type (default: str)
253- extract_fn: Value extractor (default: chunk.content.text)
254246 convert_fn: Optional converter
255247
256248 Returns:
@@ -272,7 +264,7 @@ def with_content_spec(
272264 ... convert_fn=lambda text: text[:1000]
273265 ... )
274266 """
275- def default_fn (chunk : Chunk ) -> str :
267+ def extract_fn (chunk : Chunk ) -> str :
276268 if chunk .content .text is None :
277269 raise ValueError (f'Chunk must contain content: { chunk } ' )
278270 return chunk .content .text
@@ -288,25 +280,21 @@ def default_fn(chunk: Chunk) -> str:
288280 return self
289281
290282 def with_metadata_spec (
291- self ,
292- column_name : str = "metadata" ,
293- value_fn : Optional [Callable [[Chunk ], Any ]] = None
294- ) -> 'SpannerColumnSpecsBuilder' :
283+ self , column_name : str = "metadata" ) -> 'SpannerColumnSpecsBuilder' :
295284 """Add metadata JSON column.
296285
297286 Stores the full metadata dictionary as a JSON string in Spanner.
298287
299288 Args:
300289 column_name: Column name (default: "metadata")
301- value_fn: Value extractor (default: lambda chunk: chunk.metadata)
302290
303291 Returns:
304292 Self for method chaining
305293
306294 Note:
307295 Metadata is automatically converted to JSON string using json.dumps()
308296 """
309- value_fn = value_fn or ( lambda chunk : json .dumps (chunk .metadata ) )
297+ value_fn = lambda chunk : json .dumps (chunk .metadata )
310298 self ._specs .append (
311299 SpannerColumnSpec (
312300 column_name = column_name , python_type = str , value_fn = value_fn ))
0 commit comments