Skip to content

Commit fbab5cd

Browse files
Copilotmykaul
andcommitted
Changes before error encountered
Co-authored-by: mykaul <[email protected]>
1 parent e803c16 commit fbab5cd

File tree

6 files changed

+186
-10
lines changed

6 files changed

+186
-10
lines changed

cassandra/cqltypes.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -642,25 +642,26 @@ def interpret_datestring(val):
642642

643643
@staticmethod
644644
def deserialize(byts, protocol_version):
645-
timestamp = int64_unpack(byts) / 1000.0
646-
return util.datetime_from_timestamp(timestamp)
645+
timestamp_ms = int64_unpack(byts)
646+
return util.datetime_from_timestamp_ms(timestamp_ms)
647647

648648
@staticmethod
649649
def serialize(v, protocol_version):
650650
try:
651651
# v is datetime
652652
timestamp_seconds = calendar.timegm(v.utctimetuple())
653-
timestamp = timestamp_seconds * 1e3 + getattr(v, 'microsecond', 0) / 1e3
653+
# Use integer arithmetic to preserve precision
654+
timestamp_ms = timestamp_seconds * 1000 + getattr(v, 'microsecond', 0) // 1000
654655
except AttributeError:
655656
try:
656-
timestamp = calendar.timegm(v.timetuple()) * 1e3
657+
timestamp_ms = calendar.timegm(v.timetuple()) * 1000
657658
except AttributeError:
658659
# Ints and floats are valid timestamps too
659660
if type(v) not in _number_types:
660661
raise TypeError('DateType arguments must be a datetime, date, or timestamp')
661-
timestamp = v
662+
timestamp_ms = v
662663

663-
return int64_pack(int(timestamp))
664+
return int64_pack(int(timestamp_ms))
664665

665666
@classmethod
666667
def serial_size(cls):

cassandra/cython_utils.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from libc.stdint cimport int64_t
22
cdef datetime_from_timestamp(double timestamp)
3+
cdef datetime_from_timestamp_ms(int64_t timestamp_ms)

cassandra/cython_utils.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,26 @@ cdef datetime_from_timestamp(double timestamp):
6060
microseconds += <int>tmp
6161

6262
return DATETIME_EPOC + timedelta_new(days, seconds, microseconds)
63+
64+
65+
cdef datetime_from_timestamp_ms(int64_t timestamp_ms):
66+
"""
67+
Creates a timezone-agnostic datetime from timestamp in milliseconds.
68+
Avoids floating-point conversion to maintain precision for large timestamps.
69+
70+
:param timestamp_ms: a unix timestamp, in milliseconds
71+
"""
72+
# Break down milliseconds into components to avoid float conversion
73+
cdef int64_t timestamp_seconds = timestamp_ms // 1000
74+
cdef int64_t remainder_ms = timestamp_ms % 1000
75+
# Handle negative timestamps correctly
76+
if remainder_ms < 0:
77+
remainder_ms += 1000
78+
timestamp_seconds -= 1
79+
80+
cdef int days = <int> (timestamp_seconds // DAY_IN_SECONDS)
81+
cdef int64_t days_in_seconds = (<int64_t> days) * DAY_IN_SECONDS
82+
cdef int seconds = <int> (timestamp_seconds - days_in_seconds)
83+
cdef int microseconds = <int> (remainder_ms * 1000)
84+
85+
return DATETIME_EPOC + timedelta_new(days, seconds, microseconds)

cassandra/deserializers.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
# limitations under the License.
1414

1515

16-
from libc.stdint cimport int32_t, uint16_t
16+
from libc.stdint cimport int32_t, uint16_t, int64_t
1717

1818
include 'cython_marshal.pyx'
1919
from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
20-
from cassandra.cython_utils cimport datetime_from_timestamp
20+
from cassandra.cython_utils cimport datetime_from_timestamp, datetime_from_timestamp_ms
2121

2222
from cython.view cimport array as cython_array
2323
from cassandra.tuple cimport tuple_new, tuple_set
@@ -135,8 +135,8 @@ cdef class DesCounterColumnType(DesLongType):
135135

136136
cdef class DesDateType(Deserializer):
137137
cdef deserialize(self, Buffer *buf, int protocol_version):
138-
cdef double timestamp = unpack_num[int64_t](buf) / 1000.0
139-
return datetime_from_timestamp(timestamp)
138+
cdef int64_t timestamp_ms = unpack_num[int64_t](buf)
139+
return datetime_from_timestamp_ms(timestamp_ms)
140140

141141

142142
cdef class TimestampType(DesDateType):

cassandra/util.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,29 @@ def datetime_from_timestamp(timestamp):
6262
return dt
6363

6464

65+
def datetime_from_timestamp_ms(timestamp_ms):
66+
"""
67+
Creates a timezone-agnostic datetime from timestamp in milliseconds.
68+
Avoids floating-point conversion to maintain precision for large timestamps.
69+
70+
Works around precision loss issues with large timestamps (far from epoch)
71+
by using integer arithmetic throughout.
72+
73+
:param timestamp_ms: a unix timestamp, in milliseconds (as integer)
74+
"""
75+
# Break down milliseconds into components to avoid float conversion
76+
timestamp_seconds = timestamp_ms // 1000
77+
remainder_ms = timestamp_ms % 1000
78+
# Handle negative timestamps correctly
79+
if remainder_ms < 0:
80+
remainder_ms += 1000
81+
timestamp_seconds -= 1
82+
83+
microseconds = remainder_ms * 1000
84+
dt = DATETIME_EPOC + datetime.timedelta(seconds=timestamp_seconds, microseconds=microseconds)
85+
return dt
86+
87+
6588
def utc_datetime_from_ms_timestamp(timestamp):
6689
"""
6790
Creates a UTC datetime from a timestamp in milliseconds. See
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
import datetime
17+
from cassandra.cqltypes import DateType
18+
from cassandra.marshal import int64_pack
19+
20+
21+
class TimestampPrecisionTests(unittest.TestCase):
22+
"""
23+
Tests for timestamp precision with large values (far from epoch).
24+
See: https://github.com/scylladb/python-driver/issues/XXX
25+
"""
26+
27+
def test_large_timestamp_roundtrip(self):
28+
"""
29+
Test that timestamps far from epoch (> 300 years) maintain precision
30+
through serialize/deserialize cycle.
31+
"""
32+
# Timestamp for "2300-01-01 00:00:00.001" in milliseconds
33+
# This is far enough from epoch that float precision is lost
34+
original_ms = 10413792000001 # 2300-01-01 00:00:00.001
35+
36+
# Pack as int64 (simulating database storage)
37+
packed = int64_pack(original_ms)
38+
39+
# Deserialize back
40+
dt = DateType.deserialize(packed, 0)
41+
42+
# Serialize again
43+
repacked = DateType.serialize(dt, 0)
44+
45+
# Unpack and compare
46+
from cassandra.marshal import int64_unpack
47+
result_ms = int64_unpack(repacked)
48+
49+
# Should be exactly equal
50+
assert result_ms == original_ms, \
51+
f"Expected {original_ms}, got {result_ms}, difference: {result_ms - original_ms}"
52+
53+
def test_year_2300_timestamp_precision(self):
54+
"""
55+
Test the specific case from the issue report:
56+
timestamp "2300-01-01 00:00:00.001" should maintain precision.
57+
"""
58+
# Create datetime for 2300-01-01 00:00:00.001
59+
dt = datetime.datetime(2300, 1, 1, 0, 0, 0, 1000) # 1000 microseconds = 1 millisecond
60+
61+
# Serialize to bytes
62+
packed = DateType.serialize(dt, 0)
63+
64+
# Deserialize back
65+
dt_restored = DateType.deserialize(packed, 0)
66+
67+
# Serialize again
68+
repacked = DateType.serialize(dt_restored, 0)
69+
70+
# They should be exactly equal
71+
assert packed == repacked, \
72+
f"Serialization not stable: {packed.hex()} != {repacked.hex()}"
73+
74+
# The microseconds should be preserved
75+
assert dt_restored.microsecond == 1000, \
76+
f"Expected 1000 microseconds, got {dt_restored.microsecond}"
77+
78+
def test_various_large_timestamps(self):
79+
"""
80+
Test multiple timestamps far from epoch to ensure precision is maintained.
81+
"""
82+
# Various timestamps > 300 years from epoch (in milliseconds)
83+
test_timestamps_ms = [
84+
10413792000001, # 2300-01-01 00:00:00.001
85+
10413792000999, # 2300-01-01 00:00:00.999
86+
15768000000000, # 2469-12-31 12:00:00.000
87+
20000000000001, # ~2603 with millisecond precision
88+
-10413792000001, # ~1640 BCE
89+
]
90+
91+
for original_ms in test_timestamps_ms:
92+
with self.subTest(timestamp_ms=original_ms):
93+
# Pack as int64
94+
packed = int64_pack(original_ms)
95+
96+
# Deserialize
97+
dt = DateType.deserialize(packed, 0)
98+
99+
# Serialize again
100+
repacked = DateType.serialize(dt, 0)
101+
102+
# Unpack and compare
103+
from cassandra.marshal import int64_unpack
104+
result_ms = int64_unpack(repacked)
105+
106+
# Should be exactly equal
107+
assert result_ms == original_ms, \
108+
f"Expected {original_ms}, got {result_ms}, difference: {result_ms - original_ms}"
109+
110+
def test_small_timestamp_still_works(self):
111+
"""
112+
Ensure that timestamps close to epoch still work correctly.
113+
"""
114+
# Timestamp close to epoch (well within float precision)
115+
original_ms = 1000000000000 # 2001-09-09 01:46:40.000
116+
117+
packed = int64_pack(original_ms)
118+
dt = DateType.deserialize(packed, 0)
119+
repacked = DateType.serialize(dt, 0)
120+
121+
from cassandra.marshal import int64_unpack
122+
result_ms = int64_unpack(repacked)
123+
124+
assert result_ms == original_ms
125+
126+
127+
if __name__ == '__main__':
128+
unittest.main()

0 commit comments

Comments
 (0)