-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Closed
Closed
Copy link
Labels
EnhancementNeeds TriageIssue that has not been reviewed by a pandas team memberIssue that has not been reviewed by a pandas team member
Description
Feature Type
-
Adding new functionality to pandas
-
Changing existing functionality in pandas
-
Removing existing functionality in pandas
Problem Description
I receive real-time financial data using a dataframe. While pandas 1.x allows appending rows via df.loc[new_index]=..., pandas 2 does not support this operation. Therefore, I implemented a function to enables row appending, and this function is highly efficient.
Feature Description
append row to dataframe
Alternative Solutions
def df_append_row(df: 'pd.DataFrame', row_index: 'pd.Timestamp|pd.NaT|int|None' = None, extend_size=1_000, *, ffill: bool = False) -> None:
mgr = df._mgr
df_len = len(df)
for blk in mgr.blocks:
# blk.values is data array
arr: 'np.ndarray' = blk.values
arr_shape = arr.shape
cached_array: 'np.ndarray|None' = getattr(blk, "mx_cache_d", None)
if cached_array is None or cached_array.shape[-1] <= arr_shape[-1]:
new_shape = list(arr_shape)
new_shape[-1] += extend_size
cached_array = np.empty(new_shape, arr.dtype)
cached_array[..., :arr_shape[-1]] = arr
if arr.dtype.kind == 'f': # if dtype is np.floatN: set to np.nan
cached_array[..., arr_shape[-1]:] = np.nan
setattr(blk, "mx_cache_d", cached_array)
blk.values = cached_array[..., :arr_shape[-1] + 1]
if ffill:
blk.values[-1] = blk.values[-2]
index = mgr.axes[1]
if isinstance(index, pd.DatetimeIndex):
cached_index: 'pd.DatetimeIndex' = getattr(mgr, "mx_cache_i", None)
if cached_index is None or len(cached_index) <= df_len:
cached_array = np.zeros(df_len + extend_size, dtype='datetime64[ns]')
cached_array[:df_len] = index._data._ndarray
cached_index = pd.DatetimeIndex(data=cached_array, copy=False, name=index.name)
setattr(mgr, "mx_cache_i", cached_index)
# index._data._ndarray=cached_index[:len(index)+1] # _ndarray not changed
# setattr(index._data,"_ndarray",cached_index[:len(index)+1])
new_index = cached_index[:df_len + 1] # new_index = pd.DatetimeIndex(cached_index[:df_len + 1], copy=False, name=index.name)
if row_index is None:
row_index = _NaT_i64
elif isinstance(row_index, pd.Timestamp):
row_index = row_index.to_datetime64().view(np.int64)
new_index._data._ndarray[-1] = row_index
mgr.axes[1] = new_index
elif isinstance(index, pd.RangeIndex): #
r: 'range' = index._range
index._range = range(r.start, r.stop + r.step, r.step)
_ = getattr(index, "_cache", {}).pop("_data", None)
else:
raise NotImplemented(f"df_append_row:The type of index ({type(index)}) is not supported. [DatetimeIndex/RangeIndex]")
df._reset_cache()
df._clear_item_cache()
Additional Context
No response
Metadata
Metadata
Assignees
Labels
EnhancementNeeds TriageIssue that has not been reviewed by a pandas team memberIssue that has not been reviewed by a pandas team member