mirror of
https://github.com/apache/superset.git
synced 2026-04-20 16:44:46 +00:00
feat(SIP-39): Async query support for charts (#11499)
* Generate JWT in Flask app * Refactor chart data API query logic, add JWT validation and async worker * Add redis stream implementation, refactoring * Add chart data cache endpoint, refactor QueryContext caching * Typing, linting, refactoring * pytest fixes and openapi schema update * Enforce caching be configured for async query init * Async query processing for explore_json endpoint * Add /api/v1/async_event endpoint * Async frontend for dashboards [WIP] * Chart async error message support, refactoring * Abstract asyncEvent middleware * Async chart loading for Explore * Pylint fixes * asyncEvent middleware -> TypeScript, JS linting * Chart data API: enforce forced_cache, add tests * Add tests for explore_json endpoints * Add test for chart data cache enpoint (no login) * Consolidate set_and_log_cache and add STORE_CACHE_KEYS_IN_METADATA_DB flag * Add tests for tasks/async_queries and address PR comments * Bypass non-JSON result formats for async queries * Add tests for redux middleware * Remove debug statement Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> * Skip force_cached if no queryObj * SunburstViz: don't modify self.form_data * Fix failing annotation test * Resolve merge/lint issues * Reduce polling delay * Fix new getClientErrorObject reference * Fix flakey unit tests * /api/v1/async_event: increment redis stream ID, add tests * PR feedback: refactoring, configuration * Fixup: remove debugging * Fix typescript errors due to redux upgrade * Update UPDATING.md * Fix failing py tests * asyncEvent_spec.js -> asyncEvent_spec.ts * Refactor flakey Python 3.7 mock assertions * Fix another shared state issue in Py tests * Use 'sub' claim in JWT for user_id * Refactor async middleware config * Fixup: restore FeatureFlag boolean type Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
@@ -17,7 +17,7 @@
|
||||
import copy
|
||||
import logging
|
||||
import math
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import timedelta
|
||||
from typing import Any, cast, ClassVar, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -30,13 +30,17 @@ from superset.charts.dao import ChartDAO
|
||||
from superset.common.query_object import QueryObject
|
||||
from superset.connectors.base.models import BaseDatasource
|
||||
from superset.connectors.connector_registry import ConnectorRegistry
|
||||
from superset.exceptions import QueryObjectValidationError, SupersetException
|
||||
from superset.exceptions import (
|
||||
CacheLoadError,
|
||||
QueryObjectValidationError,
|
||||
SupersetException,
|
||||
)
|
||||
from superset.extensions import cache_manager, security_manager
|
||||
from superset.stats_logger import BaseStatsLogger
|
||||
from superset.utils import core as utils
|
||||
from superset.utils.cache import generate_cache_key, set_and_log_cache
|
||||
from superset.utils.core import DTTM_ALIAS
|
||||
from superset.views.utils import get_viz
|
||||
from superset.viz import set_and_log_cache
|
||||
|
||||
config = app.config
|
||||
stats_logger: BaseStatsLogger = config["STATS_LOGGER"]
|
||||
@@ -78,6 +82,13 @@ class QueryContext:
|
||||
self.custom_cache_timeout = custom_cache_timeout
|
||||
self.result_type = result_type or utils.ChartDataResultType.FULL
|
||||
self.result_format = result_format or utils.ChartDataResultFormat.JSON
|
||||
self.cache_values = {
|
||||
"datasource": datasource,
|
||||
"queries": queries,
|
||||
"force": force,
|
||||
"result_type": result_type,
|
||||
"result_format": result_format,
|
||||
}
|
||||
|
||||
def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
|
||||
"""Returns a pandas dataframe based on the query object"""
|
||||
@@ -142,8 +153,11 @@ class QueryContext:
|
||||
|
||||
return df.to_dict(orient="records")
|
||||
|
||||
def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]:
|
||||
def get_single_payload(
|
||||
self, query_obj: QueryObject, **kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""Returns a payload of metadata and data"""
|
||||
force_cached = kwargs.get("force_cached", False)
|
||||
if self.result_type == utils.ChartDataResultType.QUERY:
|
||||
return {
|
||||
"query": self.datasource.get_query_str(query_obj.to_dict()),
|
||||
@@ -159,8 +173,7 @@ class QueryContext:
|
||||
query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"])
|
||||
query_obj.row_offset = 0
|
||||
query_obj.columns = [o.column_name for o in self.datasource.columns]
|
||||
payload = self.get_df_payload(query_obj)
|
||||
|
||||
payload = self.get_df_payload(query_obj, force_cached=force_cached)
|
||||
df = payload["df"]
|
||||
status = payload["status"]
|
||||
if status != utils.QueryStatus.FAILED:
|
||||
@@ -186,9 +199,28 @@ class QueryContext:
|
||||
return {"data": payload["data"]}
|
||||
return payload
|
||||
|
||||
def get_payload(self) -> List[Dict[str, Any]]:
|
||||
"""Get all the payloads from the QueryObjects"""
|
||||
return [self.get_single_payload(query_object) for query_object in self.queries]
|
||||
def get_payload(self, **kwargs: Any) -> Dict[str, Any]:
|
||||
cache_query_context = kwargs.get("cache_query_context", False)
|
||||
force_cached = kwargs.get("force_cached", False)
|
||||
|
||||
# Get all the payloads from the QueryObjects
|
||||
query_results = [
|
||||
self.get_single_payload(query_object, force_cached=force_cached)
|
||||
for query_object in self.queries
|
||||
]
|
||||
return_value = {"queries": query_results}
|
||||
|
||||
if cache_query_context:
|
||||
cache_key = self.cache_key()
|
||||
set_and_log_cache(
|
||||
cache_manager.cache,
|
||||
cache_key,
|
||||
{"data": self.cache_values},
|
||||
self.cache_timeout,
|
||||
)
|
||||
return_value["cache_key"] = cache_key # type: ignore
|
||||
|
||||
return return_value
|
||||
|
||||
@property
|
||||
def cache_timeout(self) -> int:
|
||||
@@ -203,7 +235,22 @@ class QueryContext:
|
||||
return self.datasource.database.cache_timeout
|
||||
return config["CACHE_DEFAULT_TIMEOUT"]
|
||||
|
||||
def cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]:
|
||||
def cache_key(self, **extra: Any) -> str:
|
||||
"""
|
||||
The QueryContext cache key is made out of the key/values from
|
||||
self.cached_values, plus any other key/values in `extra`. It includes only data
|
||||
required to rehydrate a QueryContext object.
|
||||
"""
|
||||
key_prefix = "qc-"
|
||||
cache_dict = self.cache_values.copy()
|
||||
cache_dict.update(extra)
|
||||
|
||||
return generate_cache_key(cache_dict, key_prefix)
|
||||
|
||||
def query_cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]:
|
||||
"""
|
||||
Returns a QueryObject cache key for objects in self.queries
|
||||
"""
|
||||
extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict())
|
||||
|
||||
cache_key = (
|
||||
@@ -215,7 +262,7 @@ class QueryContext:
|
||||
and self.datasource.is_rls_supported
|
||||
else [],
|
||||
changed_on=self.datasource.changed_on,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
if query_obj
|
||||
else None
|
||||
@@ -298,12 +345,12 @@ class QueryContext:
|
||||
self, query_obj: QueryObject, **kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""Handles caching around the df payload retrieval"""
|
||||
cache_key = self.cache_key(query_obj, **kwargs)
|
||||
force_cached = kwargs.get("force_cached", False)
|
||||
cache_key = self.query_cache_key(query_obj)
|
||||
logger.info("Cache key: %s", cache_key)
|
||||
is_loaded = False
|
||||
stacktrace = None
|
||||
df = pd.DataFrame()
|
||||
cached_dttm = datetime.utcnow().isoformat().split(".")[0]
|
||||
cache_value = None
|
||||
status = None
|
||||
query = ""
|
||||
@@ -327,6 +374,12 @@ class QueryContext:
|
||||
)
|
||||
logger.info("Serving from cache")
|
||||
|
||||
if force_cached and not is_loaded:
|
||||
logger.warning(
|
||||
"force_cached (QueryContext): value not found for key %s", cache_key
|
||||
)
|
||||
raise CacheLoadError("Error loading data from cache")
|
||||
|
||||
if query_obj and not is_loaded:
|
||||
try:
|
||||
invalid_columns = [
|
||||
@@ -367,13 +420,11 @@ class QueryContext:
|
||||
|
||||
if is_loaded and cache_key and status != utils.QueryStatus.FAILED:
|
||||
set_and_log_cache(
|
||||
cache_key=cache_key,
|
||||
df=df,
|
||||
query=query,
|
||||
annotation_data=annotation_data,
|
||||
cached_dttm=cached_dttm,
|
||||
cache_timeout=self.cache_timeout,
|
||||
datasource_uid=self.datasource.uid,
|
||||
cache_manager.data_cache,
|
||||
cache_key,
|
||||
{"df": df, "query": query, "annotation_data": annotation_data},
|
||||
self.cache_timeout,
|
||||
self.datasource.uid,
|
||||
)
|
||||
return {
|
||||
"cache_key": cache_key,
|
||||
|
||||
Reference in New Issue
Block a user