Skip to content

Commit 5259589

Browse files
AsafMahCopilotgithub-actions[bot]
authored
Fixes pandas date method + fix ingest tests race conditions (#615)
* Import changes from other branch * Added tests * Is this test really fails it? * Fixed test * Fixed test * skip * Remove * back * test * test * test * fixed warning * fixed warning * rename * rename * Increate timeout * Increate timeout * Same group * Separate table per test * Close clients * Close clients * Close clients * Removed shared state * Separate managed streaming * Fixed test * w * w * w * Update azure-kusto-data/azure/kusto/data/helpers.py Co-authored-by: Copilot <[email protected]> * Update azure-kusto-ingest/tests/test_e2e_ingest.py Co-authored-by: Copilot <[email protected]> * Fix * Fix * Add retry * Update azure-kusto-ingest/tests/test_e2e_ingest.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update azure-kusto-ingest/tests/test_e2e_ingest.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Add sleep * Try a different way --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 9c10946 commit 5259589

File tree

4 files changed

+377
-271
lines changed

4 files changed

+377
-271
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [Unreleased]
8+
## [6.0.1] - 2025-12-25
99

1010
### Fixed
1111
- Changed extra's name back to `aio`
12+
- Fixed handling of datetime columns in old pandas versions.
1213
- Fixed encoding error in `ingest_from_dataframe` when using csv data format.
1314

1415
## [6.0.0] - 2025-11-26

azure-kusto-data/azure/kusto/data/helpers.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import json
22
from functools import lru_cache
33
from pathlib import Path
4-
from typing import TYPE_CHECKING, Union, Callable, Dict, Optional
4+
from typing import TYPE_CHECKING, Any, Union, Callable, Optional
55

66
if TYPE_CHECKING:
77
import pandas as pd
88
from azure.kusto.data._models import KustoResultTable, KustoStreamingResultTable
99

1010
# Alias for dataframe_from_result_table converter type
11-
Converter = Dict[str, Union[str, Callable[[str, "pd.DataFrame"], "pd.Series"]]]
11+
Converter = dict[str, Union[str, Callable[[str, "pd.DataFrame"], "pd.Series"]]]
1212

1313

14-
def load_bundled_json(file_name: str) -> Dict:
14+
def load_bundled_json(file_name: str) -> dict[Any, Any]:
1515
filename = Path(__file__).absolute().parent.joinpath(file_name)
1616
with filename.open("r", encoding="utf-8") as data:
1717
return json.load(data)
@@ -113,23 +113,24 @@ def parse_float(frame, col):
113113
import numpy as np
114114
import pandas as pd
115115

116-
frame[col] = frame[col].replace("NaN", np.nan).replace("Infinity", np.inf).replace("-Infinity", -np.inf)
116+
frame[col] = frame[col].infer_objects(copy=False).replace({"NaN": np.nan, "Infinity": np.inf, "-Infinity": -np.inf})
117117
frame[col] = pd.to_numeric(frame[col], errors="coerce").astype(pd.Float64Dtype()) # pyright: ignore[reportCallIssue,reportArgumentType]
118+
118119
return frame[col]
119120

120121

121-
def parse_datetime(frame, col):
122+
def parse_datetime(frame, col, force_version: Optional[str] = None) -> "pd.Series":
122123
# Pandas before version 2 doesn't support the "format" arg
123124
import pandas as pd
124125

125126
args = {}
126-
if pd.__version__.startswith("2."):
127+
if (force_version or pd.__version__).startswith("2."):
127128
args = {"format": "ISO8601", "utc": True}
128129
else:
129130
# if frame contains ".", replace "Z" with ".000Z"
130-
# == False is not a mistake - that's the pandas way to do it
131-
contains_dot = frame[col].str.contains(".")
132-
frame.loc[not contains_dot, col] = frame.loc[not contains_dot, col].str.replace("Z", ".000Z")
131+
# Using bitwise NOT (~) on the boolean Series is the idiomatic pandas way to negate the mask
132+
contains_dot = frame[col].str.contains("\\.")
133+
frame.loc[~contains_dot, col] = frame.loc[~contains_dot, col].str.replace("Z", ".000Z")
133134
frame[col] = pd.to_datetime(frame[col], errors="coerce", **args)
134135
return frame[col]
135136

azure-kusto-data/tests/test_helpers.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
from azure.kusto.data._models import KustoResultTable
10-
from azure.kusto.data.helpers import dataframe_from_result_table
10+
from azure.kusto.data.helpers import dataframe_from_result_table, parse_datetime
1111
from azure.kusto.data.response import KustoResponseDataSetV2
1212
import pandas
1313
import numpy
@@ -128,3 +128,31 @@ def test_pandas_mixed_date():
128128

129129
assert df["Date"][0] == pandas.Timestamp(year=2023, month=12, day=12, hour=1, minute=59, second=59, microsecond=352000, tzinfo=datetime.timezone.utc)
130130
assert df["Date"][1] == pandas.Timestamp(year=2023, month=12, day=12, hour=1, minute=54, second=44, tzinfo=datetime.timezone.utc)
131+
132+
133+
def test_datetime_parsing():
134+
"""Test parse_datetime function with different pandas versions and datetime formats"""
135+
136+
# Test with pandas v2 behavior (force version 2)
137+
df_v2 = pandas.DataFrame(
138+
{
139+
"mixed": ["2023-12-12T01:59:59.352Z", "2023-12-12T01:54:44Z"],
140+
}
141+
)
142+
143+
# Force pandas v2 behavior
144+
result_v2 = parse_datetime(df_v2, "mixed", force_version="2.0.0")
145+
assert str(result_v2[0]) == "2023-12-12 01:59:59.352000+00:00"
146+
assert str(result_v2[1]) == "2023-12-12 01:54:44+00:00"
147+
# Test with pandas v1 behavior (force version 1)
148+
149+
df_v1 = pandas.DataFrame(
150+
{
151+
"mixed": ["2023-12-12T01:59:59.352Z", "2023-12-12T01:54:44Z"],
152+
}
153+
)
154+
155+
# Force pandas v1 behavior - it should add .000 to dates without milliseconds
156+
result_v1 = parse_datetime(df_v1, "mixed", force_version="1.5.3")
157+
assert str(result_v1[0]) == "2023-12-12 01:59:59.352000+00:00"
158+
assert str(result_v1[1]) == "2023-12-12 01:54:44+00:00"

0 commit comments

Comments
 (0)