mirror of
https://github.com/apache/superset.git
synced 2026-04-21 00:54:44 +00:00
Celery task for warming up cache (#7148)
* Sparkline dates aren't formatting in Time Series Table (#6976) * Exclude venv for python linter to ignore * Fix NaN error * Fix the white background shown in SQL editor on drag (#7021) This PR sets the background-color css property on `.ace_scroller` instead of `.ace_content` to prevent the white background shown during resizing of the SQL editor before drag ends. * Show tooltip with time frame (#6979) * Fix time filter control (#6978) * Enhancement of query context and object. (#6962) * added more functionalities for query context and object. * fixed cache logic * added default value for groupby * updated comments and removed print (cherry picked from commitd5b9795f87) * [fix] /superset/slice/id url is too long (#6989) (cherry picked from commit6a4d507ab6) * [WIP] fix user specified JSON metadata not updating dashboard on refresh (#7027) (cherry picked from commitcc58f0e661) * feat: add ability to change font size in big number (#7003) * Add ability to change font sizes in Big Number * rename big number to header * Add comment to clarify font size values * Allow LIMIT to be specified in parameters (#7052) * [fix] Cursor jumping when editing chart and dashboard titles (#7038) (cherry picked from commitfc1770f7b7) * Changing time table viz to pass formatTime a date (#7020) (cherry picked from commit7f3c145b1f) * [db-engine-spec] Aligning Hive/Presto partition logic (#7007) (cherry picked from commit05be866117) * [fix] explore chart from dashboard missed slice title (#7046) (cherry picked from commita6d48d4052) * fix inaccurate data calculation with adata rolling and contribution (#7035) (cherry picked from commit0782e831cd) * Adding warning message for sqllab save query (#7028) (cherry picked from commitead3d48133) * [datasource] Ensuring consistent behavior of datasource editing/saving. (#7037) * Update datasource.py * Update datasource.py (cherry picked from commitc771625f10) * [csv-upload] Fixing message encoding (#6971) (cherry picked from commit48431ab5b9) * [sql-parse] Fixing LIMIT exceptions (#6963) (cherry picked from commit3e076cb60b) * Adding custom control overrides (#6956) * Adding extraOverrides to line chart * Updating extraOverrides to fit with more cases * Moving extraOverrides to index.js * Removing webpack-merge in package.json * Fixing metrics control clearing metric (cherry picked from commite6194051f4) * [sqlparse] Fixing table name extraction for ill-defined query (#7029) (cherry picked from commit07c340cf82) * [missing values] Removing replacing missing values (#4905) (cherry picked from commit61add606ca) * [SQL Lab] Improved query and results tabs rendering reliability (#7082) closes #7080 (cherry picked from commit9b58e9f492) * Fix filter_box migration PR #6523 (#7066) * Fix filter_box migration PR #6523 * Fix druid-related bug (cherry picked from commitb210742ad2) * SQL editor layout makeover (#7102) This PR includes the following layout and css tweaks: - Using flex to layout the north and south sub panes of query pane so resizing works properly in both Chrome and Firefox - Removal of necessary wrapper divs and tweaking of css in sql lab so we can scroll to the bottom of both the table list and the results pane - Make sql lab's content not overflow vertically and layout the query result area to eliminate double scroll bars - css tweaks on the basic.html page so the loading animation appears in the center of the page across the board (cherry picked from commit71f1bbd2ec) * [forms] Fix handling of NULLs (cherry picked from commite83a07d3df) * handle null column_name in sqla and druid models (cherry picked from commit2ff721ae07) * Use metric name instead of metric in filter box (#7106) (cherry picked from commit003364e74e) * Bump python lib croniter to an existing version (#7132) Package maintainers should really never delete packages, but it appears this happened with croniter and resulted in breaking our builds. This PR bumps to a more recent existing version of the library (cherry picked from commit215ed392a1) * Revert PR #6933 (#7162) * Celery worker for warming up cache * Remove testing changes * Add documentation * Fix lint * WIP dashboard filters * Use new cache so it works with dashboards * Add more unit tests, fix old ones * Fix flake8 and docs * Sparkline dates aren't formatting in Time Series Table (#6976) * Exclude venv for python linter to ignore * Fix NaN error * Changing time table viz to pass formatTime a date (#7020) (cherry picked from commit7f3c145b1f) * SQL editor layout makeover (#7102) This PR includes the following layout and css tweaks: - Using flex to layout the north and south sub panes of query pane so resizing works properly in both Chrome and Firefox - Removal of necessary wrapper divs and tweaking of css in sql lab so we can scroll to the bottom of both the table list and the results pane - Make sql lab's content not overflow vertically and layout the query result area to eliminate double scroll bars - css tweaks on the basic.html page so the loading animation appears in the center of the page across the board (cherry picked from commit71f1bbd2ec) * Celery worker for warming up cache * Remove testing changes * Add documentation * Fix lint * WIP dashboard filters * Use new cache so it works with dashboards * Add more unit tests, fix old ones * Fix flake8 and docs * Fix bad merge and pylint
This commit is contained in:
committed by
Christine Chambers
parent
1132c3cbde
commit
4ab89dbcf7
316
superset/tasks/cache.py
Normal file
316
superset/tasks/cache.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=too-few-public-methods
|
||||
|
||||
import json
|
||||
import logging
|
||||
import urllib.parse
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
from flask import url_for
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
from sqlalchemy import and_, func
|
||||
|
||||
from superset import app, db
|
||||
from superset.models.core import Dashboard, Log, Slice
|
||||
from superset.models.tags import Tag, TaggedObject
|
||||
from superset.tasks.celery_app import app as celery_app
|
||||
from superset.utils.core import parse_human_datetime
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def get_form_data(chart_id, dashboard=None):
|
||||
"""
|
||||
Build `form_data` for chart GET request from dashboard's `default_filters`.
|
||||
|
||||
When a dashboard has `default_filters` they need to be added as extra
|
||||
filters in the GET request for charts.
|
||||
|
||||
"""
|
||||
form_data = {'slice_id': chart_id}
|
||||
|
||||
if dashboard is None or not dashboard.json_metadata:
|
||||
return form_data
|
||||
|
||||
json_metadata = json.loads(dashboard.json_metadata)
|
||||
|
||||
# do not apply filters if chart is immune to them
|
||||
if chart_id in json_metadata.get('filter_immune_slices', []):
|
||||
return form_data
|
||||
|
||||
default_filters = json.loads(json_metadata.get('default_filters', 'null'))
|
||||
if not default_filters:
|
||||
return form_data
|
||||
|
||||
# are some of the fields in the chart immune to filters?
|
||||
filter_immune_slice_fields = json_metadata.get('filter_immune_slice_fields', {})
|
||||
immune_fields = filter_immune_slice_fields.get(str(chart_id), [])
|
||||
|
||||
extra_filters = []
|
||||
for filters in default_filters.values():
|
||||
for col, val in filters.items():
|
||||
if col not in immune_fields:
|
||||
extra_filters.append({'col': col, 'op': 'in', 'val': val})
|
||||
if extra_filters:
|
||||
form_data['extra_filters'] = extra_filters
|
||||
|
||||
return form_data
|
||||
|
||||
|
||||
def get_url(params):
|
||||
"""Return external URL for warming up a given chart/table cache."""
|
||||
baseurl = 'http://{SUPERSET_WEBSERVER_ADDRESS}:{SUPERSET_WEBSERVER_PORT}/'.format(
|
||||
**app.config)
|
||||
with app.test_request_context():
|
||||
return urllib.parse.urljoin(
|
||||
baseurl,
|
||||
url_for('Superset.explore_json', **params),
|
||||
)
|
||||
|
||||
|
||||
class Strategy:
|
||||
"""
|
||||
A cache warm up strategy.
|
||||
|
||||
Each strategy defines a `get_urls` method that returns a list of URLs to
|
||||
be fetched from the `/superset/warm_up_cache/` endpoint.
|
||||
|
||||
Strategies can be configured in `superset/config.py`:
|
||||
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
'cache-warmup-hourly': {
|
||||
'task': 'cache-warmup',
|
||||
'schedule': crontab(minute=1, hour='*'), # @hourly
|
||||
'kwargs': {
|
||||
'strategy_name': 'top_n_dashboards',
|
||||
'top_n': 10,
|
||||
'since': '7 days ago',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_urls(self):
|
||||
raise NotImplementedError('Subclasses must implement get_urls!')
|
||||
|
||||
|
||||
class DummyStrategy(Strategy):
|
||||
"""
|
||||
Warm up all charts.
|
||||
|
||||
This is a dummy strategy that will fetch all charts. Can be configured by:
|
||||
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
'cache-warmup-hourly': {
|
||||
'task': 'cache-warmup',
|
||||
'schedule': crontab(minute=1, hour='*'), # @hourly
|
||||
'kwargs': {'strategy_name': 'dummy'},
|
||||
},
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
name = 'dummy'
|
||||
|
||||
def get_urls(self):
|
||||
session = db.create_scoped_session()
|
||||
charts = session.query(Slice).all()
|
||||
|
||||
return [get_url({'form_data': get_form_data(chart.id)}) for chart in charts]
|
||||
|
||||
|
||||
class TopNDashboardsStrategy(Strategy):
|
||||
"""
|
||||
Warm up charts in the top-n dashboards.
|
||||
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
'cache-warmup-hourly': {
|
||||
'task': 'cache-warmup',
|
||||
'schedule': crontab(minute=1, hour='*'), # @hourly
|
||||
'kwargs': {
|
||||
'strategy_name': 'top_n_dashboards',
|
||||
'top_n': 5,
|
||||
'since': '7 days ago',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
name = 'top_n_dashboards'
|
||||
|
||||
def __init__(self, top_n=5, since='7 days ago'):
|
||||
super(TopNDashboardsStrategy, self).__init__()
|
||||
self.top_n = top_n
|
||||
self.since = parse_human_datetime(since)
|
||||
|
||||
def get_urls(self):
|
||||
urls = []
|
||||
session = db.create_scoped_session()
|
||||
|
||||
records = (
|
||||
session
|
||||
.query(Log.dashboard_id, func.count(Log.dashboard_id))
|
||||
.filter(and_(
|
||||
Log.dashboard_id.isnot(None),
|
||||
Log.dttm >= self.since,
|
||||
))
|
||||
.group_by(Log.dashboard_id)
|
||||
.order_by(func.count(Log.dashboard_id).desc())
|
||||
.limit(self.top_n)
|
||||
.all()
|
||||
)
|
||||
dash_ids = [record.dashboard_id for record in records]
|
||||
dashboards = (
|
||||
session
|
||||
.query(Dashboard)
|
||||
.filter(Dashboard.id.in_(dash_ids))
|
||||
.all()
|
||||
)
|
||||
for dashboard in dashboards:
|
||||
for chart in dashboard.slices:
|
||||
urls.append(
|
||||
get_url({'form_data': get_form_data(chart.id, dashboard)}))
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
class DashboardTagsStrategy(Strategy):
|
||||
"""
|
||||
Warm up charts in dashboards with custom tags.
|
||||
|
||||
CELERYBEAT_SCHEDULE = {
|
||||
'cache-warmup-hourly': {
|
||||
'task': 'cache-warmup',
|
||||
'schedule': crontab(minute=1, hour='*'), # @hourly
|
||||
'kwargs': {
|
||||
'strategy_name': 'dashboard_tags',
|
||||
'tags': ['core', 'warmup'],
|
||||
},
|
||||
},
|
||||
}
|
||||
"""
|
||||
|
||||
name = 'dashboard_tags'
|
||||
|
||||
def __init__(self, tags=None):
|
||||
super(DashboardTagsStrategy, self).__init__()
|
||||
self.tags = tags or []
|
||||
|
||||
def get_urls(self):
|
||||
urls = []
|
||||
session = db.create_scoped_session()
|
||||
|
||||
tags = (
|
||||
session
|
||||
.query(Tag)
|
||||
.filter(Tag.name.in_(self.tags))
|
||||
.all()
|
||||
)
|
||||
tag_ids = [tag.id for tag in tags]
|
||||
|
||||
# add dashboards that are tagged
|
||||
tagged_objects = (
|
||||
session
|
||||
.query(TaggedObject)
|
||||
.filter(and_(
|
||||
TaggedObject.object_type == 'dashboard',
|
||||
TaggedObject.tag_id.in_(tag_ids),
|
||||
))
|
||||
.all()
|
||||
)
|
||||
dash_ids = [tagged_object.object_id for tagged_object in tagged_objects]
|
||||
tagged_dashboards = (
|
||||
session
|
||||
.query(Dashboard)
|
||||
.filter(Dashboard.id.in_(dash_ids))
|
||||
)
|
||||
for dashboard in tagged_dashboards:
|
||||
for chart in dashboard.slices:
|
||||
urls.append(
|
||||
get_url({'form_data': get_form_data(chart.id, dashboard)}))
|
||||
|
||||
# add charts that are tagged
|
||||
tagged_objects = (
|
||||
session
|
||||
.query(TaggedObject)
|
||||
.filter(and_(
|
||||
TaggedObject.object_type == 'chart',
|
||||
TaggedObject.tag_id.in_(tag_ids),
|
||||
))
|
||||
.all()
|
||||
)
|
||||
chart_ids = [tagged_object.object_id for tagged_object in tagged_objects]
|
||||
tagged_charts = (
|
||||
session
|
||||
.query(Slice)
|
||||
.filter(Slice.id.in_(chart_ids))
|
||||
)
|
||||
for chart in tagged_charts:
|
||||
urls.append(get_url({'form_data': get_form_data(chart.id)}))
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
strategies = [DummyStrategy, TopNDashboardsStrategy, DashboardTagsStrategy]
|
||||
|
||||
|
||||
@celery_app.task(name='cache-warmup')
|
||||
def cache_warmup(strategy_name, *args, **kwargs):
|
||||
"""
|
||||
Warm up cache.
|
||||
|
||||
This task periodically hits charts to warm up the cache.
|
||||
|
||||
"""
|
||||
logger.info('Loading strategy')
|
||||
class_ = None
|
||||
for class_ in strategies:
|
||||
if class_.name == strategy_name:
|
||||
break
|
||||
else:
|
||||
message = f'No strategy {strategy_name} found!'
|
||||
logger.error(message)
|
||||
return message
|
||||
|
||||
logger.info(f'Loading {class_.__name__}')
|
||||
try:
|
||||
strategy = class_(*args, **kwargs)
|
||||
logger.info('Success!')
|
||||
except TypeError:
|
||||
message = 'Error loading strategy!'
|
||||
logger.exception(message)
|
||||
return message
|
||||
|
||||
results = {'success': [], 'errors': []}
|
||||
for url in strategy.get_urls():
|
||||
try:
|
||||
logger.info(f'Fetching {url}')
|
||||
requests.get(url)
|
||||
results['success'].append(url)
|
||||
except RequestException:
|
||||
logger.exception('Error warming up cache!')
|
||||
results['errors'].append(url)
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user