fix: time filter db migration optimization (#13015)

This commit is contained in:
Jesse Yang
2021-02-09 08:49:50 -10:00
committed by GitHub
parent 4ba98bf8c8
commit bbcb41149e
4 changed files with 60 additions and 29 deletions

View File

@@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from flask_babel import lazy_gettext as _
from flask_babel import _
from marshmallow.validate import ValidationError
from superset.commands.exceptions import (
@@ -28,13 +28,41 @@ from superset.commands.exceptions import (
)
class TimeRangeUnclearError(ValidationError):
"""
Time range is in valid error.
"""
def __init__(self, human_readable: str) -> None:
super().__init__(
_(
"Time string is unclear."
" Please specify [%(human_readable)s ago]"
" or [%(human_readable)s later].",
human_readable=human_readable,
),
field_name="time_range",
)
class TimeRangeParseFailError(ValidationError):
def __init__(self, human_readable: str) -> None:
super().__init__(
_(
"Cannot parse time string [%(human_readable)s]",
human_readable=human_readable,
),
field_name="time_range",
)
class DatabaseNotFoundValidationError(ValidationError):
"""
Marshmallow validation error for database does not exist
"""
def __init__(self) -> None:
super().__init__(_("Database does not exist"), field_names=["database"])
super().__init__(_("Database does not exist"), field_name="database")
class DashboardsNotFoundValidationError(ValidationError):
@@ -43,7 +71,7 @@ class DashboardsNotFoundValidationError(ValidationError):
"""
def __init__(self) -> None:
super().__init__(_("Dashboards do not exist"), field_names=["dashboards"])
super().__init__(_("Dashboards do not exist"), field_name="dashboards")
class DatasourceTypeUpdateRequiredValidationError(ValidationError):

View File

@@ -79,11 +79,14 @@ def upgrade():
)
try:
slices = session.query(Slice).filter(where_clause).all()
slices = session.query(Slice).filter(where_clause)
total = slices.count()
sep = " : "
pattern = DateRangeMigration.x_dateunit
for idx, slc in enumerate(slices):
print(f"Upgrading ({idx + 1}/{len(slices)}): {slc.slice_name}#{slc.id}")
idx = 0
for slc in slices.yield_per(100):
idx += 1
print(f"Upgrading ({idx}/{total}): {slc.slice_name}#{slc.id}")
params = json.loads(slc.params)
time_range = params["time_range"]
if sep in time_range:
@@ -93,7 +96,6 @@ def upgrade():
if re.match(pattern, end):
end = f"{end.strip()} later"
params["time_range"] = f"{start}{sep}{end}"
slc.params = json.dumps(params, sort_keys=True, indent=4)
session.commit()
except OperationalError:

View File

@@ -39,7 +39,11 @@ from pyparsing import (
Suppress,
)
from .core import memoized
from superset.charts.commands.exceptions import (
TimeRangeParseFailError,
TimeRangeUnclearError,
)
from superset.utils.core import memoized
ParserElement.enablePackrat()
@@ -73,15 +77,7 @@ def parse_human_datetime(human_readable: str) -> datetime:
"""
x_periods = r"^\s*([0-9]+)\s+(second|minute|hour|day|week|month|quarter|year)s?\s*$"
if re.search(x_periods, human_readable, re.IGNORECASE):
raise ValueError(
_(
"Date string is unclear."
" Please specify [%(human_readable)s ago]"
" or [%(human_readable)s later]",
human_readable=human_readable,
)
)
raise TimeRangeUnclearError(human_readable)
try:
dttm = parse(human_readable)
except (ValueError, OverflowError) as ex:
@@ -90,12 +86,7 @@ def parse_human_datetime(human_readable: str) -> datetime:
# 0 == not parsed at all
if parsed_flags == 0:
logger.exception(ex)
raise ValueError(
_(
"Couldn't parse date string [%(human_readable)s]",
human_readable=human_readable,
)
)
raise TimeRangeParseFailError(human_readable)
# when time is not extracted, we 'reset to midnight'
if parsed_flags & 2 == 0:
parsed_dttm = parsed_dttm.replace(hour=0, minute=0, second=0)
@@ -492,9 +483,9 @@ def datetime_eval(datetime_expression: Optional[str] = None) -> Optional[datetim
class DateRangeMigration: # pylint: disable=too-few-public-methods
x_dateunit_in_since = (
r'"time_range":\s"\s*[0-9]+\s(day|week|month|quarter|year)s?\s*\s:\s'
r'"time_range":\s*"\s*[0-9]+\s+(day|week|month|quarter|year)s?\s*\s:\s'
)
x_dateunit_in_until = (
r'"time_range":\s".*\s:\s\s*[0-9]+\s(day|week|month|quarter|year)s?\s*"'
r'"time_range":\s*".*\s:\s*[0-9]+\s+(day|week|month|quarter|year)s?\s*"'
)
x_dateunit = r"\s*[0-9]+\s(day|week|month|quarter|year)s?\s*"
x_dateunit = r"^\s*[0-9]+\s+(day|week|month|quarter|year)s?\s*$"

View File

@@ -17,6 +17,10 @@
from datetime import datetime, timedelta
from unittest.mock import patch
from superset.charts.commands.exceptions import (
TimeRangeParseFailError,
TimeRangeUnclearError,
)
from superset.utils.date_parser import (
DateRangeMigration,
datetime_eval,
@@ -265,13 +269,13 @@ class TestDateParser(SupersetTestCase):
self.assertEqual(parse_past_timedelta("1 month"), timedelta(31))
def test_parse_human_datetime(self):
with self.assertRaises(ValueError):
with self.assertRaises(TimeRangeUnclearError):
parse_human_datetime(" 2 days ")
with self.assertRaises(ValueError):
with self.assertRaises(TimeRangeUnclearError):
parse_human_datetime("2 day")
with self.assertRaises(ValueError):
with self.assertRaises(TimeRangeParseFailError):
parse_human_datetime("xxxxxxx")
def test_DateRangeMigration(self):
@@ -291,3 +295,9 @@ class TestDateParser(SupersetTestCase):
field = " 8 days "
self.assertRegex(field, DateRangeMigration.x_dateunit)
field = "last week"
self.assertNotRegex(field, DateRangeMigration.x_dateunit)
field = "10 years ago"
self.assertNotRegex(field, DateRangeMigration.x_dateunit)