Files
superset2/superset/utils/json.py
2025-03-07 16:15:06 -08:00

305 lines
10 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
import decimal
import logging
import uuid
from datetime import date, datetime, time, timedelta
from typing import Any, Callable, Dict, Optional, Union
import numpy as np
import pandas as pd
import simplejson
from flask_babel.speaklater import LazyString
from jsonpath_ng import parse
from simplejson import JSONDecodeError
from superset.constants import PASSWORD_MASK
from superset.utils.dates import datetime_to_epoch, EPOCH
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
logger = logging.getLogger(__name__)
class DashboardEncoder(simplejson.JSONEncoder):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.sort_keys = True
def default(self, o: Any) -> Union[dict[Any, Any], str]: # type: ignore
if isinstance(o, uuid.UUID):
return str(o)
try:
vals = {k: v for k, v in o.__dict__.items() if k != "_sa_instance_state"}
return {f"__{o.__class__.__name__}__": vals}
except Exception: # pylint: disable=broad-except
if isinstance(o, datetime):
return {"__datetime__": o.replace(microsecond=0).isoformat()}
return simplejson.JSONEncoder(sort_keys=True).default(o)
def format_timedelta(time_delta: timedelta) -> str:
"""
Ensures negative time deltas are easily interpreted by humans
>>> td = timedelta(0) - timedelta(days=1, hours=5,minutes=6)
>>> str(td)
'-2 days, 18:54:00'
>>> format_timedelta(td)
'-1 day, 5:06:00'
"""
if time_delta < timedelta(0):
return "-" + str(abs(time_delta))
# Change this to format positive time deltas the way you want
return str(time_delta)
def base_json_conv(obj: Any) -> Any: # noqa: C901
"""
Tries to convert additional types to JSON compatible forms.
:param obj: The serializable object
:returns: The JSON compatible form
:raises TypeError: If the object cannot be serialized
:see: https://docs.python.org/3/library/json.html#encoders-and-decoders
"""
if isinstance(obj, memoryview):
obj = obj.tobytes()
if isinstance(obj, np.int64):
return int(obj)
if isinstance(obj, np.bool_):
return bool(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
if isinstance(obj, set):
return list(obj)
if isinstance(obj, decimal.Decimal):
return float(obj)
if isinstance(obj, (uuid.UUID, time, LazyString)):
return str(obj)
if isinstance(obj, timedelta):
return format_timedelta(obj)
if isinstance(obj, pd.DateOffset):
offset_attrs = ", ".join(f"{k}={v}" for k, v in obj.kwds.items())
return f"DateOffset({offset_attrs})"
if isinstance(obj, bytes):
try:
return obj.decode("utf-8")
except Exception: # pylint: disable=broad-except
try:
return obj.decode("utf-16")
except Exception: # pylint: disable=broad-except
return "[bytes]"
raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:
"""
A JSON serializer that deals with dates by serializing them to ISO 8601.
>>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser)
'{"dttm": "1970-01-01T00:00:00"}'
:param obj: The serializable object
:param pessimistic: Whether to be pessimistic regarding serialization
:returns: The JSON compatible form
:raises TypeError: If the non-pessimistic object cannot be serialized
"""
if isinstance(obj, (datetime, date, pd.Timestamp)):
return obj.isoformat()
try:
return base_json_conv(obj)
except TypeError:
if pessimistic:
logger.error("Failed to serialize %s", obj)
return f"Unserializable [{type(obj)}]"
raise
def pessimistic_json_iso_dttm_ser(obj: Any) -> Any:
"""Proxy to call json_iso_dttm_ser in a pessimistic way
If one of object is not serializable to json, it will still succeed"""
return json_iso_dttm_ser(obj, pessimistic=True)
def json_int_dttm_ser(obj: Any) -> Any:
"""
A JSON serializer that deals with dates by serializing them to EPOCH.
>>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_int_dttm_ser)
'{"dttm": 0.0}'
:param obj: The serializable object
:returns: The JSON compatible form
:raises TypeError: If the object cannot be serialized
"""
if isinstance(obj, (datetime, pd.Timestamp)):
return datetime_to_epoch(obj)
if isinstance(obj, date):
return (obj - EPOCH.date()).total_seconds() * 1000
return base_json_conv(obj)
def json_dumps_w_dates(payload: dict[Any, Any], sort_keys: bool = False) -> str:
"""Dumps payload to JSON with Datetime objects properly converted"""
return dumps(payload, default=json_int_dttm_ser, sort_keys=sort_keys)
def validate_json(obj: Union[bytes, bytearray, str]) -> None:
"""
A JSON Validator that validates an object of bytes, bytes array or string
to be in valid JSON format
:raises SupersetException: if obj is not serializable to JSON
:param obj: an object that should be parseable to JSON
"""
if obj:
try:
loads(obj)
except JSONDecodeError as ex:
logger.error("JSON is not valid %s", str(ex), exc_info=True)
raise
def dumps( # pylint: disable=too-many-arguments
obj: Any,
default: Optional[Callable[[Any], Any]] = json_iso_dttm_ser,
allow_nan: bool = False,
ignore_nan: bool = True,
sort_keys: bool = False,
indent: Union[str, int, None] = None,
separators: Union[tuple[str, str], None] = None,
cls: Union[type[simplejson.JSONEncoder], None] = None,
encoding: Optional[str] = "utf-8",
) -> str:
"""
Dumps object to compatible JSON format
:param obj: The serializable object
:param default: function that should return a serializable version of obj
:param allow_nan: when set to True NaN values will be serialized
:param ignore_nan: when set to True nan values will be ignored
:param sort_keys: when set to True keys will be sorted
:param indent: when set elements and object members will be pretty-printed
:param separators: when specified dumps will use (item_separator, key_separator)
:param cls: custom `JSONEncoder` subclass
:returns: String object in the JSON compatible form
"""
results_string = ""
dumps_kwargs: Dict[str, Any] = {
"default": default,
"allow_nan": allow_nan,
"ignore_nan": ignore_nan,
"sort_keys": sort_keys,
"indent": indent,
"separators": separators,
"cls": cls,
"encoding": encoding,
}
try:
results_string = simplejson.dumps(obj, **dumps_kwargs)
except UnicodeDecodeError:
dumps_kwargs["encoding"] = None
results_string = simplejson.dumps(obj, **dumps_kwargs)
return results_string
def loads(
obj: Union[bytes, bytearray, str],
encoding: Union[str, None] = None,
allow_nan: bool = False,
object_hook: Union[Callable[[dict[Any, Any]], Any], None] = None,
) -> Any:
"""
deserializable instance to a Python object.
:param obj: The deserializable object
:param encoding: determines the encoding used to interpret the obj
:param allow_nan: if True it will allow the parser to accept nan values
:param object_hook: function that will be called to decode objects values
:returns: A Python object deserialized from string
"""
return simplejson.loads(
obj,
encoding=encoding,
allow_nan=allow_nan,
object_hook=object_hook,
)
def redact_sensitive(
payload: dict[str, Any],
sensitive_fields: set[str],
) -> dict[str, Any]:
"""
Redacts sensitive fields from a payload.
:param payload: The payload to redact
:param sensitive_fields: The set of fields to redact, as JSONPath expressions
:returns: The redacted payload
"""
redacted_payload = copy.deepcopy(payload)
for json_path in sensitive_fields:
jsonpath_expr = parse(json_path)
for match in jsonpath_expr.find(redacted_payload):
match.context.value[match.path.fields[0]] = PASSWORD_MASK
return redacted_payload
def reveal_sensitive(
old_payload: dict[str, Any],
new_payload: dict[str, Any],
sensitive_fields: set[str],
) -> dict[str, Any]:
"""
Reveals sensitive fields from a payload when not modified.
This allows users to perform deep edits on a payload without having to provide
sensitive information. The old payload is sent to the user with any sensitive fields
masked, and when the user sends back a modified payload, any fields that were masked
are replaced with the original values from the old payload.
For now this is only used to edit `encrypted_extra` fields in the database.
:param old_payload: The old payload to reveal
:param new_payload: The new payload to reveal
:param sensitive_fields: The set of fields to reveal, as JSONPath expressions
:returns: The revealed payload
"""
revealed_payload = copy.deepcopy(new_payload)
for json_path in sensitive_fields:
jsonpath_expr = parse(json_path)
for match in jsonpath_expr.find(revealed_payload):
if match.value == PASSWORD_MASK:
old_value = match.full_path.find(old_payload)
match.context.value[match.path.fields[0]] = old_value[0].value
return revealed_payload