Skip to content

Commit 85ac0d7

Browse files
Bug: ValueError when using opensearch.index_df with documents with an array field (#1444)
* cast df values to numpy array to evaulate notna(v).all() * notna 'all' -> 'any' * adding test * formatting Co-authored-by: kukushking <[email protected]>
1 parent 3b5e2cc commit 85ac0d7

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

awswrangler/opensearch/_write.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Tuple, Union
88

99
import boto3
10+
import numpy as np
1011
import pandas as pd
1112
import progressbar
1213
from jsonpath_ng import parse
@@ -79,7 +80,7 @@ def _deserialize(v: Any) -> Any:
7980

8081
df_iter = df.iterrows()
8182
for _, document in df_iter:
82-
yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
83+
yield {k: _deserialize(v) for k, v in document.items() if np.array(notna(v)).any()}
8384

8485

8586
def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]:

tests/test_opensearch.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,18 @@ def test_index_df(client):
215215
assert response.get("success", 0) == 3
216216

217217

218+
def test_index_df_with_array(client):
219+
response = wr.opensearch.index_df(
220+
client,
221+
df=pd.DataFrame(
222+
[{"_id": "1", "name": "John", "tags": ["foo", "bar"]}, {"_id": "2", "name": "George", "tags": ["foo"]}]
223+
),
224+
index="test_index_df1",
225+
)
226+
print(response)
227+
assert response.get("success", 0) == 2
228+
229+
218230
def test_index_documents(client):
219231
response = wr.opensearch.index_documents(
220232
client,

0 commit comments

Comments
 (0)