Skip to content

Commit d2e236f

Browse files
serhii73Copilot
andauthored
Fix explicit +/- signs in relative date offsets (scrapinghub#1303)
* Add tests for relative date time offset parsing * Fix explicit +/- signs in relative date offsets * Update tests/test_date_parser.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent f0f9102 commit d2e236f

File tree

2 files changed

+142
-9
lines changed

2 files changed

+142
-9
lines changed

dateparser/freshness_date_parser.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .timezone_parser import pop_tz_offset_from_string
1111

1212
_UNITS = r"decade|year|month|week|day|hour|minute|second"
13-
PATTERN = re.compile(r"(\d+[.,]?\d*)\s*(%s)\b" % _UNITS, re.I | re.S | re.U)
13+
PATTERN = re.compile(r"([+-]?\d+[.,]?\d*)\s*(%s)\b" % _UNITS, re.I | re.S | re.U)
1414

1515

1616
class FreshnessDateDataParser:
@@ -112,7 +112,13 @@ def _parse_date(self, date_string, now, prefer_dates_from):
112112
if not self._are_all_words_units(date_string):
113113
return None, None
114114

115-
kwargs = self.get_kwargs(date_string)
115+
result = self.get_kwargs(date_string)
116+
if isinstance(result, tuple):
117+
kwargs, explicit_signs = result
118+
else:
119+
kwargs = result
120+
explicit_signs = {}
121+
116122
if not kwargs:
117123
return None, None
118124
period = "day"
@@ -121,16 +127,27 @@ def _parse_date(self, date_string, now, prefer_dates_from):
121127
if k in kwargs:
122128
period = k[:-1]
123129
break
124-
td = relativedelta(**kwargs)
125130

126-
if (
131+
going_forward = (
127132
re.search(r"\bin\b", date_string)
128133
or re.search(r"\bfuture\b", prefer_dates_from)
129134
and not re.search(r"\bago\b", date_string)
130-
):
131-
date = now + td
132-
else:
133-
date = now - td
135+
)
136+
137+
adjusted_kwargs = {}
138+
for key, value in kwargs.items():
139+
if explicit_signs.get(key, False):
140+
adjusted_kwargs[key] = value
141+
else:
142+
if going_forward:
143+
adjusted_kwargs[key] = value
144+
else:
145+
adjusted_kwargs[key] = -value
146+
147+
td = relativedelta(**adjusted_kwargs)
148+
149+
date = now + td
150+
134151
return date, period
135152

136153
def get_kwargs(self, date_string):
@@ -139,12 +156,21 @@ def get_kwargs(self, date_string):
139156
return {}
140157

141158
kwargs = {}
159+
explicit_signs = {}
160+
142161
for num, unit in m:
162+
has_explicit_sign = num.startswith("+") or num.startswith("-")
163+
explicit_signs[unit + "s"] = has_explicit_sign
143164
kwargs[unit + "s"] = float(num.replace(",", "."))
165+
144166
if "decades" in kwargs:
145167
kwargs["years"] = 10 * kwargs["decades"] + kwargs.get("years", 0)
168+
if "decades" in explicit_signs:
169+
explicit_signs["years"] = explicit_signs["decades"]
146170
del kwargs["decades"]
147-
return kwargs
171+
explicit_signs.pop("decades", None)
172+
173+
return kwargs, explicit_signs
148174

149175
def get_date_data(self, date_string, settings=None):
150176
from dateparser.date import DateData

tests/test_date_parser.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from parameterized import param, parameterized
77

88
import dateparser.timezone_parser
9+
from dateparser import parse
910
from dateparser.date import DateDataParser, date_parser
1011
from dateparser.date_parser import DateParser
1112
from dateparser.parser import _parse_absolute
@@ -1345,6 +1346,82 @@ def test_dates_with_no_day_or_month(
13451346
self.then_date_was_parsed_by_date_parser()
13461347
self.then_date_obj_exactly_is(expected)
13471348

1349+
@parameterized.expand(
1350+
[
1351+
param(
1352+
"yesterday +1h",
1353+
lambda base: base - timedelta(days=1) + timedelta(hours=1),
1354+
"Yesterday plus 1 hour",
1355+
),
1356+
param(
1357+
"yesterday +2h",
1358+
lambda base: base - timedelta(days=1) + timedelta(hours=2),
1359+
"Yesterday plus 2 hours",
1360+
),
1361+
param(
1362+
"yesterday +30m",
1363+
lambda base: base - timedelta(days=1) + timedelta(minutes=30),
1364+
"Yesterday plus 30 minutes",
1365+
),
1366+
param(
1367+
"yesterday -1h",
1368+
lambda base: base - timedelta(days=1) - timedelta(hours=1),
1369+
"Yesterday minus 1 hour",
1370+
),
1371+
param(
1372+
"yesterday -2h",
1373+
lambda base: base - timedelta(days=1) - timedelta(hours=2),
1374+
"Yesterday minus 2 hours",
1375+
),
1376+
param(
1377+
"yesterday -30m",
1378+
lambda base: base - timedelta(days=1) - timedelta(minutes=30),
1379+
"Yesterday minus 30 minutes",
1380+
),
1381+
param(
1382+
"tomorrow +1h",
1383+
lambda base: base + timedelta(days=1) + timedelta(hours=1),
1384+
"Tomorrow plus 1 hour",
1385+
),
1386+
param(
1387+
"tomorrow +3h",
1388+
lambda base: base + timedelta(days=1) + timedelta(hours=3),
1389+
"Tomorrow plus 3 hours",
1390+
),
1391+
param(
1392+
"tomorrow -1h",
1393+
lambda base: base + timedelta(days=1) - timedelta(hours=1),
1394+
"Tomorrow minus 1 hour",
1395+
),
1396+
param(
1397+
"tomorrow -2h",
1398+
lambda base: base + timedelta(days=1) - timedelta(hours=2),
1399+
"Tomorrow minus 2 hours",
1400+
),
1401+
]
1402+
)
1403+
def test_relative_date_with_time_offset(
1404+
self, date_string, offset_calculator, description
1405+
):
1406+
"""Ensure +/- signs in time offsets are parsed correctly."""
1407+
base_date = datetime(2026, 1, 19, 12, 0, 0)
1408+
expected = offset_calculator(base_date)
1409+
1410+
result = parse(
1411+
date_string,
1412+
settings={
1413+
"RELATIVE_BASE": base_date,
1414+
"RETURN_AS_TIMEZONE_AWARE": False,
1415+
},
1416+
)
1417+
1418+
self.assertIsNotNone(result, f"Failed to parse: {description}")
1419+
self.assertEqual(
1420+
expected,
1421+
result,
1422+
f"{description}: Expected {expected}, got {result}",
1423+
)
1424+
13481425
def given_local_tz_offset(self, offset):
13491426
self.add_patch(
13501427
patch.object(
@@ -1354,6 +1431,36 @@ def given_local_tz_offset(self, offset):
13541431
)
13551432
)
13561433

1434+
def test_yesterday_plus_and_minus_expected_values(self):
1435+
"""Verify correct time offset calculations for yesterday."""
1436+
# Base: 2026-01-08 21:38:10
1437+
base_date = datetime(2026, 1, 8, 21, 38, 10)
1438+
1439+
plus_result = parse(
1440+
"yesterday +1h",
1441+
settings={"RELATIVE_BASE": base_date, "RETURN_AS_TIMEZONE_AWARE": False},
1442+
)
1443+
minus_result = parse(
1444+
"yesterday -1h",
1445+
settings={"RELATIVE_BASE": base_date, "RETURN_AS_TIMEZONE_AWARE": False},
1446+
)
1447+
1448+
# Expected: 2026-01-07 22:38:10 (yesterday at same time, plus 1 hour)
1449+
expected_plus = datetime(2026, 1, 7, 22, 38, 10)
1450+
# Expected: 2026-01-07 20:38:10 (yesterday at same time, minus 1 hour)
1451+
expected_minus = datetime(2026, 1, 7, 20, 38, 10)
1452+
1453+
self.assertEqual(
1454+
expected_plus,
1455+
plus_result,
1456+
f"'yesterday +1h' should be {expected_plus}, got {plus_result}",
1457+
)
1458+
self.assertEqual(
1459+
expected_minus,
1460+
minus_result,
1461+
f"'yesterday -1h' should be {expected_minus}, got {minus_result}",
1462+
)
1463+
13571464
def given_parser(self, *args, **kwds):
13581465
def collecting_get_date_data(parse):
13591466
@wraps(parse)

0 commit comments

Comments
 (0)