perf(date_parser): bound regex quantifiers for deterministic parsing performance (#36983)

This commit is contained in:
Jean Massucatto
2026-01-12 06:52:19 -03:00
committed by GitHub
parent d914b35cc0
commit 459b4cb23d
2 changed files with 52 additions and 5 deletions

View File

@@ -611,3 +611,50 @@ def test_date_range_migration() -> None:
field = "10 years ago"
assert not re.search(DateRangeMigration.x_dateunit, field)
# Tests for bounded whitespace regex patterns in time_range_lookup
@pytest.mark.parametrize(
"time_range",
[
"last 7 days : ",
"this week : ",
"start of next month : ",
"prior quarter : ",
"last 7 days : ",
"last 7 days : ",
"last 7 days : ",
"last 7 days : ",
"start of next month : ", # 5 spaces - valid
"last week : ",
"last week : ",
"last week : ",
"next 12 months : ",
"next 12 months : ",
"next 12 months : ",
"last 7days : ", # \s{0,5} allows 0 spaces after number - valid
],
)
@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime)
def test_time_range_bounded_whitespace_regex_valid(time_range: str) -> None:
"""Match expressions with 1-5 spaces between tokens."""
result = get_since_until(time_range)
assert result[0] is not None, f"Expected '{time_range}' to parse successfully"
@pytest.mark.parametrize(
"time_range",
[
"last 7 days : ",
"last7days : ",
"lastweek : ",
"last : ",
"start of : ",
"last 7 days extra : ",
],
)
@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime)
def test_time_range_bounded_whitespace_regex_invalid(time_range: str) -> None:
"""Reject expressions with 0 or 6+ spaces (fall back to DATETIME wrapping)."""
result = get_since_until(time_range)
assert result[0] is None, f"Expected '{time_range}' to NOT match bounded regex"