mirror of
https://github.com/apache/superset.git
synced 2026-04-09 19:35:21 +00:00
305 lines
10 KiB
Python
305 lines
10 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
import copy
|
|
import decimal
|
|
import logging
|
|
import uuid
|
|
from datetime import date, datetime, time, timedelta
|
|
from typing import Any, Callable, Dict, Optional, Union
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import simplejson
|
|
from flask_babel.speaklater import LazyString
|
|
from jsonpath_ng import parse
|
|
from simplejson import JSONDecodeError
|
|
|
|
from superset.constants import PASSWORD_MASK
|
|
from superset.utils.dates import datetime_to_epoch, EPOCH
|
|
|
|
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DashboardEncoder(simplejson.JSONEncoder):
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
super().__init__(*args, **kwargs)
|
|
self.sort_keys = True
|
|
|
|
def default(self, o: Any) -> Union[dict[Any, Any], str]: # type: ignore
|
|
if isinstance(o, uuid.UUID):
|
|
return str(o)
|
|
try:
|
|
vals = {k: v for k, v in o.__dict__.items() if k != "_sa_instance_state"}
|
|
return {f"__{o.__class__.__name__}__": vals}
|
|
except Exception: # pylint: disable=broad-except
|
|
if isinstance(o, datetime):
|
|
return {"__datetime__": o.replace(microsecond=0).isoformat()}
|
|
return simplejson.JSONEncoder(sort_keys=True).default(o)
|
|
|
|
|
|
def format_timedelta(time_delta: timedelta) -> str:
|
|
"""
|
|
Ensures negative time deltas are easily interpreted by humans
|
|
|
|
>>> td = timedelta(0) - timedelta(days=1, hours=5,minutes=6)
|
|
>>> str(td)
|
|
'-2 days, 18:54:00'
|
|
>>> format_timedelta(td)
|
|
'-1 day, 5:06:00'
|
|
"""
|
|
if time_delta < timedelta(0):
|
|
return "-" + str(abs(time_delta))
|
|
|
|
# Change this to format positive time deltas the way you want
|
|
return str(time_delta)
|
|
|
|
|
|
def base_json_conv(obj: Any) -> Any: # noqa: C901
|
|
"""
|
|
Tries to convert additional types to JSON compatible forms.
|
|
|
|
:param obj: The serializable object
|
|
:returns: The JSON compatible form
|
|
:raises TypeError: If the object cannot be serialized
|
|
:see: https://docs.python.org/3/library/json.html#encoders-and-decoders
|
|
"""
|
|
|
|
if isinstance(obj, memoryview):
|
|
obj = obj.tobytes()
|
|
if isinstance(obj, np.int64):
|
|
return int(obj)
|
|
if isinstance(obj, np.bool_):
|
|
return bool(obj)
|
|
if isinstance(obj, np.ndarray):
|
|
return obj.tolist()
|
|
if isinstance(obj, set):
|
|
return list(obj)
|
|
if isinstance(obj, decimal.Decimal):
|
|
return float(obj)
|
|
if isinstance(obj, (uuid.UUID, time, LazyString)):
|
|
return str(obj)
|
|
if isinstance(obj, timedelta):
|
|
return format_timedelta(obj)
|
|
if isinstance(obj, pd.DateOffset):
|
|
offset_attrs = ", ".join(f"{k}={v}" for k, v in obj.kwds.items())
|
|
return f"DateOffset({offset_attrs})"
|
|
if isinstance(obj, bytes):
|
|
try:
|
|
return obj.decode("utf-8")
|
|
except Exception: # pylint: disable=broad-except
|
|
try:
|
|
return obj.decode("utf-16")
|
|
except Exception: # pylint: disable=broad-except
|
|
return "[bytes]"
|
|
|
|
raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
|
|
|
|
|
|
def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:
|
|
"""
|
|
A JSON serializer that deals with dates by serializing them to ISO 8601.
|
|
|
|
>>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser)
|
|
'{"dttm": "1970-01-01T00:00:00"}'
|
|
|
|
:param obj: The serializable object
|
|
:param pessimistic: Whether to be pessimistic regarding serialization
|
|
:returns: The JSON compatible form
|
|
:raises TypeError: If the non-pessimistic object cannot be serialized
|
|
"""
|
|
|
|
if isinstance(obj, (datetime, date, pd.Timestamp)):
|
|
return obj.isoformat()
|
|
|
|
try:
|
|
return base_json_conv(obj)
|
|
except TypeError:
|
|
if pessimistic:
|
|
logger.error("Failed to serialize %s", obj)
|
|
return f"Unserializable [{type(obj)}]"
|
|
raise
|
|
|
|
|
|
def pessimistic_json_iso_dttm_ser(obj: Any) -> Any:
|
|
"""Proxy to call json_iso_dttm_ser in a pessimistic way
|
|
|
|
If one of object is not serializable to json, it will still succeed"""
|
|
return json_iso_dttm_ser(obj, pessimistic=True)
|
|
|
|
|
|
def json_int_dttm_ser(obj: Any) -> Any:
|
|
"""
|
|
A JSON serializer that deals with dates by serializing them to EPOCH.
|
|
|
|
>>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_int_dttm_ser)
|
|
'{"dttm": 0.0}'
|
|
|
|
:param obj: The serializable object
|
|
:returns: The JSON compatible form
|
|
:raises TypeError: If the object cannot be serialized
|
|
"""
|
|
|
|
if isinstance(obj, (datetime, pd.Timestamp)):
|
|
return datetime_to_epoch(obj)
|
|
|
|
if isinstance(obj, date):
|
|
return (obj - EPOCH.date()).total_seconds() * 1000
|
|
|
|
return base_json_conv(obj)
|
|
|
|
|
|
def json_dumps_w_dates(payload: dict[Any, Any], sort_keys: bool = False) -> str:
|
|
"""Dumps payload to JSON with Datetime objects properly converted"""
|
|
return dumps(payload, default=json_int_dttm_ser, sort_keys=sort_keys)
|
|
|
|
|
|
def validate_json(obj: Union[bytes, bytearray, str]) -> None:
|
|
"""
|
|
A JSON Validator that validates an object of bytes, bytes array or string
|
|
to be in valid JSON format
|
|
|
|
:raises SupersetException: if obj is not serializable to JSON
|
|
:param obj: an object that should be parseable to JSON
|
|
"""
|
|
if obj:
|
|
try:
|
|
loads(obj)
|
|
except JSONDecodeError as ex:
|
|
logger.error("JSON is not valid %s", str(ex), exc_info=True)
|
|
raise
|
|
|
|
|
|
def dumps( # pylint: disable=too-many-arguments
|
|
obj: Any,
|
|
default: Optional[Callable[[Any], Any]] = json_iso_dttm_ser,
|
|
allow_nan: bool = False,
|
|
ignore_nan: bool = True,
|
|
sort_keys: bool = False,
|
|
indent: Union[str, int, None] = None,
|
|
separators: Union[tuple[str, str], None] = None,
|
|
cls: Union[type[simplejson.JSONEncoder], None] = None,
|
|
encoding: Optional[str] = "utf-8",
|
|
) -> str:
|
|
"""
|
|
Dumps object to compatible JSON format
|
|
|
|
:param obj: The serializable object
|
|
:param default: function that should return a serializable version of obj
|
|
:param allow_nan: when set to True NaN values will be serialized
|
|
:param ignore_nan: when set to True nan values will be ignored
|
|
:param sort_keys: when set to True keys will be sorted
|
|
:param indent: when set elements and object members will be pretty-printed
|
|
:param separators: when specified dumps will use (item_separator, key_separator)
|
|
:param cls: custom `JSONEncoder` subclass
|
|
:returns: String object in the JSON compatible form
|
|
"""
|
|
|
|
results_string = ""
|
|
dumps_kwargs: Dict[str, Any] = {
|
|
"default": default,
|
|
"allow_nan": allow_nan,
|
|
"ignore_nan": ignore_nan,
|
|
"sort_keys": sort_keys,
|
|
"indent": indent,
|
|
"separators": separators,
|
|
"cls": cls,
|
|
"encoding": encoding,
|
|
}
|
|
try:
|
|
results_string = simplejson.dumps(obj, **dumps_kwargs)
|
|
except UnicodeDecodeError:
|
|
dumps_kwargs["encoding"] = None
|
|
results_string = simplejson.dumps(obj, **dumps_kwargs)
|
|
return results_string
|
|
|
|
|
|
def loads(
|
|
obj: Union[bytes, bytearray, str],
|
|
encoding: Union[str, None] = None,
|
|
allow_nan: bool = False,
|
|
object_hook: Union[Callable[[dict[Any, Any]], Any], None] = None,
|
|
) -> Any:
|
|
"""
|
|
deserializable instance to a Python object.
|
|
|
|
:param obj: The deserializable object
|
|
:param encoding: determines the encoding used to interpret the obj
|
|
:param allow_nan: if True it will allow the parser to accept nan values
|
|
:param object_hook: function that will be called to decode objects values
|
|
:returns: A Python object deserialized from string
|
|
"""
|
|
return simplejson.loads(
|
|
obj,
|
|
encoding=encoding,
|
|
allow_nan=allow_nan,
|
|
object_hook=object_hook,
|
|
)
|
|
|
|
|
|
def redact_sensitive(
|
|
payload: dict[str, Any],
|
|
sensitive_fields: set[str],
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Redacts sensitive fields from a payload.
|
|
|
|
:param payload: The payload to redact
|
|
:param sensitive_fields: The set of fields to redact, as JSONPath expressions
|
|
:returns: The redacted payload
|
|
"""
|
|
redacted_payload = copy.deepcopy(payload)
|
|
|
|
for json_path in sensitive_fields:
|
|
jsonpath_expr = parse(json_path)
|
|
for match in jsonpath_expr.find(redacted_payload):
|
|
match.context.value[match.path.fields[0]] = PASSWORD_MASK
|
|
|
|
return redacted_payload
|
|
|
|
|
|
def reveal_sensitive(
|
|
old_payload: dict[str, Any],
|
|
new_payload: dict[str, Any],
|
|
sensitive_fields: set[str],
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Reveals sensitive fields from a payload when not modified.
|
|
|
|
This allows users to perform deep edits on a payload without having to provide
|
|
sensitive information. The old payload is sent to the user with any sensitive fields
|
|
masked, and when the user sends back a modified payload, any fields that were masked
|
|
are replaced with the original values from the old payload.
|
|
|
|
For now this is only used to edit `encrypted_extra` fields in the database.
|
|
|
|
:param old_payload: The old payload to reveal
|
|
:param new_payload: The new payload to reveal
|
|
:param sensitive_fields: The set of fields to reveal, as JSONPath expressions
|
|
:returns: The revealed payload
|
|
"""
|
|
revealed_payload = copy.deepcopy(new_payload)
|
|
|
|
for json_path in sensitive_fields:
|
|
jsonpath_expr = parse(json_path)
|
|
for match in jsonpath_expr.find(revealed_payload):
|
|
if match.value == PASSWORD_MASK:
|
|
old_value = match.full_path.find(old_payload)
|
|
match.context.value[match.path.fields[0]] = old_value[0].value
|
|
|
|
return revealed_payload
|