Skip to content

Commit 303de9f

Browse files
Ancy AugustinAncy Augustin
authored andcommitted
fix: review comments
1 parent 8ca6389 commit 303de9f

File tree

8 files changed

+148
-99
lines changed

8 files changed

+148
-99
lines changed

docs/getting_started.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@ With a :class:`.DataFrameClient` object, you can:
103103
Pandas Utility
104104
~~~~~~~~~~~~~~
105105

106-
Utility functions for managing Pandas DataFrames and interacting with the DataFrame API include:
106+
Utility functions to interact with :class:`.DataFrameClient` using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)
107107

108-
* Create a table from a pandas dataframe.
108+
* Create a table from a `pandas.DataFrame`.
109109

110-
* Append pandas dataframe to an existing table.
110+
* Append `pandas.DataFrame` to an existing table.
111111

112-
* Query decimated data from a table as pandas dataframe.
112+
* Query decimated data from a table as `pandas.DataFrame`.
113113

114-
* Query data from a table as pandas dataframe.
114+
* Query data from a table as `pandas.DataFrame`.
115115

116116
Examples
117117
~~~~~~~~
@@ -134,7 +134,7 @@ Export data from a table
134134
:language: python
135135
:linenos:
136136

137-
Table operations using pandas dataframe
137+
Table operations using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)
138138

139139
.. literalinclude:: ../examples/dataframe/pandas_dataframe_operations.py
140140
:language: python

examples/dataframe/pandas_dataframe_operations.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pandas as pd
22
from nisystemlink.clients.dataframe import DataFrameClient
3-
from nisystemlink.clients.core import HttpConfiguration
43
from nisystemlink.clients.dataframe.models import (
54
DecimationMethod,
65
DecimationOptions,
@@ -21,7 +20,10 @@
2120
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
2221
)
2322
df.set_index("a", inplace=True)
23+
print(df)
2424

25+
print(client.list_tables())
26+
client.list_tables()
2527
try:
2628
table_id = create_table_from_pandas_df(
2729
client, df, "Example Table", nullable_columns=False
@@ -55,4 +57,4 @@
5557
print("Queried table data as pandas dataframe:")
5658
print(queried_df)
5759

58-
client.delete_table(table_id)
60+
client.delete_table(table_id)

nisystemlink/clients/dataframe/models/_data_frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class DataFrame(JsonModel):
5353
columns: Optional[List[str]] = None
5454
"""The names and order of the columns included in the data frame."""
5555

56-
data: List[List[Optional[str]]] = None
56+
data: Optional[List[List[Optional[str]]]] = None
5757
"""The data for each row with the order specified in the columns property.
5858
Must contain a value for each column in the columns property."""
5959

nisystemlink/clients/dataframe/utilities/_pandas_dataframe_operations.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Optional
2+
13
import pandas as pd
24
from nisystemlink.clients.dataframe import DataFrameClient
35
from nisystemlink.clients.dataframe.models import (
@@ -24,7 +26,7 @@ def create_table_from_pandas_df(
2426
client (DataFrameClient): Instance of DataFrameClient.
2527
df (pd.DataFrame): Pandas dataframe.
2628
table_name (str): Name of the table.
27-
nullable_columns (bool): Make the columns nullable.
29+
nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values.
2830
2931
Returns:
3032
str: ID of the table.
@@ -42,7 +44,10 @@ def create_table_from_pandas_df(
4244

4345

4446
def append_pandas_df_to_table(
45-
client: DataFrameClient, table_id: str, df: pd.DataFrame
47+
client: DataFrameClient,
48+
table_id: str,
49+
df: pd.DataFrame,
50+
end_of_data: Optional[bool] = None,
4651
) -> None:
4752
"""Append `df` to table.
4853
@@ -54,12 +59,48 @@ def append_pandas_df_to_table(
5459
Returns:
5560
None
5661
"""
57-
frame = DataFrame()
62+
frame: DataFrame = DataFrame()
5863
frame.from_pandas(df)
5964
client.append_table_data(
60-
id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=False)
65+
id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=end_of_data)
66+
)
67+
68+
69+
def create_table_with_data_from_pandas_df(
70+
client: DataFrameClient,
71+
df: pd.DataFrame,
72+
table_name: str,
73+
nullable_columns: bool,
74+
batch_size: int = 1000,
75+
end_of_data: Optional[bool] = None,
76+
) -> str:
77+
"""Create a table and upload data from a pandas DataFrame.
78+
79+
This function creates the table, uploads the data (with batching for large data),
80+
and closes the upload process in one seamless call.
81+
82+
Args:
83+
client (DataFrameClient): Instance of DataFrameClient.
84+
df (pd.DataFrame): Pandas DataFrame with data to upload.
85+
table_name (str): Name of the table to create.
86+
nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values.
87+
batch_size (Optional[int]): Number of rows to batch in each upload. Default is 1000.
88+
89+
Returns:
90+
str: ID of the created table.
91+
"""
92+
table_id = create_table_from_pandas_df(
93+
client=client, df=df, table_name=table_name, nullable_columns=nullable_columns
6194
)
6295

96+
num_rows = len(df)
97+
for start_row in range(0, num_rows, batch_size):
98+
end_row = min(start_row + batch_size, num_rows)
99+
batch_df = df.iloc[start_row:end_row]
100+
append_pandas_df_to_table(client, table_id, batch_df, end_of_data)
101+
102+
return table_id
103+
63104

64105
def query_decimated_table_data_as_pandas_df(
65106
client: DataFrameClient,
@@ -78,10 +119,10 @@ def query_decimated_table_data_as_pandas_df(
78119
Returns:
79120
pd.DataFrame: Table data in pandas dataframe format.
80121
"""
81-
index_name: str = None
122+
index_name = None
82123
if index:
83124
index_name = _get_table_index_name(client=client, table_id=table_id)
84-
if query.columns:
125+
if query.columns and index_name:
85126
if index_name not in query.columns:
86127
query.columns.append(index_name)
87128
response = client.query_decimated_data(table_id, query)
@@ -107,11 +148,10 @@ def query_table_data_as_pandas_df(
107148
"""
108149
continuation_token = None
109150
all_rows = []
110-
index_name: str = None
111151

112152
if index:
113153
index_name = _get_table_index_name(client=client, table_id=table_id)
114-
if query.columns:
154+
if query.columns and index_name:
115155
if index_name not in query.columns:
116156
query.columns.append(index_name)
117157

nisystemlink/clients/dataframe/utilities/_pandas_exception.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
SUPPORTED_INDEX_DATA_TYPE = ["INT32", "INT64", "TIMESTAMP"]
2+
3+
14
class DataFrameError(Exception):
25
"""Base class for Dataframe errors."""
36

@@ -11,9 +14,8 @@ def __init__(self, index_name: str = None) -> None:
1114
self.index_name = index_name
1215
self.message = "Data frame must contain one index."
1316
if index_name:
14-
self.message = (
15-
f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column."
16-
)
17+
self.message = f"Column '{self.index_name}' must be of type {SUPPORTED_INDEX_DATA_TYPE}"
18+
" to be an index column."
1719
super().__init__(self.message)
1820

1921

@@ -23,5 +25,7 @@ class InvalidColumnTypeError(DataFrameError):
2325
def __init__(self, column_name: str, column_type: str) -> None:
2426
self.column_name = column_name
2527
self.column_type = column_type
26-
self.message = f"Column '{column_name}' has an unsupported datatype: {column_type}"
28+
self.message = (
29+
f"Column '{column_name}' has an unsupported datatype: {column_type}"
30+
)
2731
super().__init__(self.message)

nisystemlink/clients/dataframe/utilities/_pandas_utils.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66

77
from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError
88

9-
UNSUPPORTED_PANDAS_INT_TYPES = ["int8", "int16"]
10-
"""List of unsupported pandas integer types for conversion to `DataType`."""
11-
12-
UNSUPPORTED_PANDAS_FLOAT_TYPES = ["float16"]
13-
"""List of unsupported pandas float types for conversion to `DataType`."""
9+
UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION = {
10+
"int8": "int32",
11+
"int16": "int32",
12+
"float16": "float32",
13+
}
14+
"""Mapping of unsupported pandas types to supported data types for `DataType`."""
1415

1516
SUPPORTED_INDEX_DATA_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp]
1617
"""List of supported index data types for table creation.
@@ -62,11 +63,8 @@ def _type_cast_column_datatype(
6263
data = pd.to_numeric(data, downcast="integer")
6364
pd_dtype = data.dtype
6465

65-
if pd_dtype in UNSUPPORTED_PANDAS_INT_TYPES:
66-
data = data.astype("int32")
67-
68-
elif pd_dtype in UNSUPPORTED_PANDAS_FLOAT_TYPES:
69-
data = data.astype("float32")
66+
if pd_dtype in UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION:
67+
data = data.astype(UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION[pd_dtype])
7068

7169
return data
7270

@@ -81,7 +79,7 @@ def _infer_index_column(df: pd.DataFrame) -> Column:
8179
InvalidIndexError: If multiple index present or index is of unsupported type.
8280
8381
Returns:
84-
Column: Valid `Column` to the table.
82+
Column: Valid Index `Column` for the table.
8583
"""
8684
index = df.index.name
8785

@@ -140,7 +138,7 @@ def _infer_dataframe_columns(df: pd.DataFrame, nullable_columns: bool) -> List[C
140138
return columns
141139

142140

143-
def _get_table_index_name(client: DataFrameClient, table_id: str) -> str:
141+
def _get_table_index_name(client: DataFrameClient, table_id: str) -> Optional[str]:
144142
"""Get the index name from the table columns.
145143
146144
Args:

0 commit comments

Comments
 (0)