mirror of
https://github.com/apache/superset.git
synced 2026-04-18 15:44:57 +00:00
perf(date_parser): bound regex quantifiers for deterministic parsing performance (#36983)
This commit is contained in:
@@ -418,9 +418,9 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m
|
||||
if time_range and separator in time_range:
|
||||
time_range_lookup = [
|
||||
(
|
||||
r"^(start of|beginning of|end of)\s+"
|
||||
r"(this|last|next|prior)\s+"
|
||||
r"([0-9]+)?\s*"
|
||||
r"^(start of|beginning of|end of)\s{1,5}"
|
||||
r"(this|last|next|prior)\s{1,5}"
|
||||
r"([0-9]+)?\s{0,5}"
|
||||
r"(day|week|month|quarter|year)s?$", # Matches phrases like "start of next month" # noqa: E501
|
||||
lambda modifier, scope, delta, unit: handle_modifier_and_unit(
|
||||
modifier,
|
||||
@@ -431,8 +431,8 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m
|
||||
),
|
||||
),
|
||||
(
|
||||
r"^(this|last|next|prior)\s+"
|
||||
r"([0-9]+)?\s*"
|
||||
r"^(this|last|next|prior)\s{1,5}"
|
||||
r"([0-9]+)?\s{0,5}"
|
||||
r"(second|minute|day|week|month|quarter|year)s?$", # Matches "next 5 days" or "last 2 weeks" # noqa: E501
|
||||
lambda scope, delta, unit: handle_scope_and_unit(
|
||||
scope, delta, unit, get_relative_base(unit, relative_start)
|
||||
|
||||
@@ -611,3 +611,50 @@ def test_date_range_migration() -> None:
|
||||
|
||||
field = "10 years ago"
|
||||
assert not re.search(DateRangeMigration.x_dateunit, field)
|
||||
|
||||
|
||||
# Tests for bounded whitespace regex patterns in time_range_lookup
|
||||
@pytest.mark.parametrize(
|
||||
"time_range",
|
||||
[
|
||||
"last 7 days : ",
|
||||
"this week : ",
|
||||
"start of next month : ",
|
||||
"prior quarter : ",
|
||||
"last 7 days : ",
|
||||
"last 7 days : ",
|
||||
"last 7 days : ",
|
||||
"last 7 days : ",
|
||||
"start of next month : ", # 5 spaces - valid
|
||||
"last week : ",
|
||||
"last week : ",
|
||||
"last week : ",
|
||||
"next 12 months : ",
|
||||
"next 12 months : ",
|
||||
"next 12 months : ",
|
||||
"last 7days : ", # \s{0,5} allows 0 spaces after number - valid
|
||||
],
|
||||
)
|
||||
@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime)
|
||||
def test_time_range_bounded_whitespace_regex_valid(time_range: str) -> None:
|
||||
"""Match expressions with 1-5 spaces between tokens."""
|
||||
result = get_since_until(time_range)
|
||||
assert result[0] is not None, f"Expected '{time_range}' to parse successfully"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"time_range",
|
||||
[
|
||||
"last 7 days : ",
|
||||
"last7days : ",
|
||||
"lastweek : ",
|
||||
"last : ",
|
||||
"start of : ",
|
||||
"last 7 days extra : ",
|
||||
],
|
||||
)
|
||||
@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime)
|
||||
def test_time_range_bounded_whitespace_regex_invalid(time_range: str) -> None:
|
||||
"""Reject expressions with 0 or 6+ spaces (fall back to DATETIME wrapping)."""
|
||||
result = get_since_until(time_range)
|
||||
assert result[0] is None, f"Expected '{time_range}' to NOT match bounded regex"
|
||||
|
||||
Reference in New Issue
Block a user