Skip to content

Commit 9538a4a

Browse files
committed
Remove extractor_fn args.
1 parent f5e4fd7 commit 9538a4a

File tree

1 file changed

+7
-19
lines changed
  • sdks/python/apache_beam/ml/rag/ingestion

1 file changed

+7
-19
lines changed

sdks/python/apache_beam/ml/rag/ingestion/spanner.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -159,15 +159,13 @@ def with_id_spec(
159159
self,
160160
column_name: str = "id",
161161
python_type: Type = str,
162-
extract_fn: Optional[Callable[[Chunk], Any]] = lambda chunk: chunk.id,
163-
convert_fn: Optional[Callable[[Any], Any]] = None
162+
convert_fn: Optional[Callable[[str], Any]] = None
164163
) -> 'SpannerColumnSpecsBuilder':
165164
"""Add ID column specification.
166165
167166
Args:
168167
column_name: Column name (default: "id")
169168
python_type: Python type (default: str)
170-
extract_fn: Value extractor (default: lambda chunk: chunk.id)
171169
convert_fn: Optional converter (e.g., to cast to int)
172170
173171
Returns:
@@ -189,20 +187,18 @@ def with_id_spec(
189187
column_name=column_name,
190188
python_type=python_type,
191189
value_fn=functools.partial(
192-
_extract_and_convert, extract_fn, convert_fn)))
190+
_extract_and_convert, lambda chunk: chunk.id, convert_fn)))
193191
return self
194192

195193
def with_embedding_spec(
196194
self,
197195
column_name: str = "embedding",
198-
extract_fn: Optional[Callable[[Chunk], List[float]]] = None,
199196
convert_fn: Optional[Callable[[List[float]], List[float]]] = None
200197
) -> 'SpannerColumnSpecsBuilder':
201198
"""Add embedding array column (ARRAY<FLOAT32> or ARRAY<FLOAT64>).
202199
203200
Args:
204201
column_name: Column name (default: "embedding")
205-
extract_fn: Value extractor (default: chunk.embedding.dense_embedding)
206202
convert_fn: Optional converter (e.g., normalize, quantize)
207203
208204
Returns:
@@ -223,13 +219,11 @@ def with_embedding_spec(
223219
... convert_fn=lambda vec: [round(x, 4) for x in vec]
224220
... )
225221
"""
226-
def default_fn(chunk: Chunk) -> List[float]:
222+
def extract_fn(chunk: Chunk) -> List[float]:
227223
if chunk.embedding is None or chunk.embedding.dense_embedding is None:
228224
raise ValueError(f'Chunk must contain embedding: {chunk}')
229225
return chunk.embedding.dense_embedding
230226

231-
extract_fn = extract_fn or default_fn
232-
233227
self._specs.append(
234228
SpannerColumnSpec(
235229
column_name=column_name,
@@ -242,15 +236,13 @@ def with_content_spec(
242236
self,
243237
column_name: str = "content",
244238
python_type: Type = str,
245-
extract_fn: Optional[Callable[[Chunk], Any]] = None,
246-
convert_fn: Optional[Callable[[Any], Any]] = None
239+
convert_fn: Optional[Callable[[str], Any]] = None
247240
) -> 'SpannerColumnSpecsBuilder':
248241
"""Add content column.
249242
250243
Args:
251244
column_name: Column name (default: "content")
252245
python_type: Python type (default: str)
253-
extract_fn: Value extractor (default: chunk.content.text)
254246
convert_fn: Optional converter
255247
256248
Returns:
@@ -272,7 +264,7 @@ def with_content_spec(
272264
... convert_fn=lambda text: text[:1000]
273265
... )
274266
"""
275-
def default_fn(chunk: Chunk) -> str:
267+
def extract_fn(chunk: Chunk) -> str:
276268
if chunk.content.text is None:
277269
raise ValueError(f'Chunk must contain content: {chunk}')
278270
return chunk.content.text
@@ -288,25 +280,21 @@ def default_fn(chunk: Chunk) -> str:
288280
return self
289281

290282
def with_metadata_spec(
291-
self,
292-
column_name: str = "metadata",
293-
value_fn: Optional[Callable[[Chunk], Any]] = None
294-
) -> 'SpannerColumnSpecsBuilder':
283+
self, column_name: str = "metadata") -> 'SpannerColumnSpecsBuilder':
295284
"""Add metadata JSON column.
296285
297286
Stores the full metadata dictionary as a JSON string in Spanner.
298287
299288
Args:
300289
column_name: Column name (default: "metadata")
301-
value_fn: Value extractor (default: lambda chunk: chunk.metadata)
302290
303291
Returns:
304292
Self for method chaining
305293
306294
Note:
307295
Metadata is automatically converted to JSON string using json.dumps()
308296
"""
309-
value_fn = value_fn or (lambda chunk: json.dumps(chunk.metadata))
297+
value_fn = lambda chunk: json.dumps(chunk.metadata)
310298
self._specs.append(
311299
SpannerColumnSpec(
312300
column_name=column_name, python_type=str, value_fn=value_fn))

0 commit comments

Comments
 (0)