Compare commits

...

1 Commits

Author SHA1 Message Date
Claude Code
ace0fdeb55 feat(charts): validate post-processing options against operation schemas [DRAFT]
ChartDataPostProcessingOperationSchema accepted `options` as a free-form Dict;
the per-operation option schemas (aggregate, rolling, prophet, pivot, ...)
existed only for OpenAPI docs and were never wired into validation.

Add a validates_schema hook that maps each operation to its option schema and
validates `options` against it. Validation is lenient (unknown=EXCLUDE) so it
surfaces wrong types / out-of-range values on declared fields without rejecting
payloads that carry extra keys; operations without a dedicated schema are
unaffected.

DRAFT: these option schemas were never used for validation and have latent
issues (e.g. ChartDataAggregateOptionsSchema.groupby is accidentally a tuple,
so it isn't validated). Each option schema should be audited against the real
pandas_postprocessing signatures before strict (unknown=RAISE) validation is
considered, to avoid rejecting currently-valid requests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 21:09:21 -07:00
2 changed files with 95 additions and 1 deletions

View File

@@ -22,7 +22,15 @@ from typing import Any, TYPE_CHECKING
from flask import current_app
from flask_babel import gettext as _
from marshmallow import EXCLUDE, fields, post_load, Schema, validate
from marshmallow import (
EXCLUDE,
fields,
post_load,
Schema,
validate,
validates_schema,
ValidationError,
)
from marshmallow.validate import Length, Range
from marshmallow_union import Union
@@ -937,6 +945,42 @@ class ChartDataPostProcessingOperationSchema(Schema):
},
)
# Map post-processing operation -> its options schema, for operations that
# declare one. Operations without a dedicated schema are not structurally
# validated here.
_OPTIONS_SCHEMAS: dict[str, type[Schema]] = {
"aggregate": ChartDataAggregateOptionsSchema,
"rolling": ChartDataRollingOptionsSchema,
"select": ChartDataSelectOptionsSchema,
"sort": ChartDataSortOptionsSchema,
"contribution": ChartDataContributionOptionsSchema,
"prophet": ChartDataProphetOptionsSchema,
"boxplot": ChartDataBoxplotOptionsSchema,
"pivot": ChartDataPivotOptionsSchema,
"geohash_decode": ChartDataGeohashDecodeOptionsSchema,
"geohash_encode": ChartDataGeohashEncodeOptionsSchema,
"geodetic_parse": ChartDataGeodeticParseOptionsSchema,
}
@validates_schema
def validate_options(self, data: dict[str, Any], **kwargs: Any) -> None:
"""Validate ``options`` against the operation's option schema.
Validation is lenient (unknown keys are ignored) so it surfaces wrong
types / out-of-range values on declared fields without rejecting
payloads that carry extra keys.
"""
operation = data.get("operation")
options = data.get("options")
if not isinstance(operation, str) or not isinstance(options, dict):
return
schema_cls = self._OPTIONS_SCHEMAS.get(operation)
if schema_cls is None:
return
errors = schema_cls(unknown=EXCLUDE).validate(options)
if errors:
raise ValidationError({"options": errors})
class ChartDataFilterSchema(Schema):
col = fields.Raw(

View File

@@ -152,3 +152,53 @@ def test_time_grain_validation_with_config_addons(app_context: None) -> None:
}
result = schema.load(custom_data)
assert result["time_grain"] == "PT10M"
def test_post_processing_operation_validates_options(app_context: None) -> None:
"""options are validated against the operation's option schema (leniently)."""
from superset.charts.schemas import ChartDataPostProcessingOperationSchema
schema = ChartDataPostProcessingOperationSchema()
# Valid prophet options load.
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 0.8,
},
}
)
# Out-of-range confidence_interval (must be 0-1) on a declared field is
# rejected.
with pytest.raises(ValidationError) as exc_info:
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 2.0,
},
}
)
assert "options" in exc_info.value.messages
# Extra/unknown keys are tolerated (lenient validation).
schema.load(
{
"operation": "prophet",
"options": {
"time_grain": "P1D",
"periods": 7,
"confidence_interval": 0.8,
"some_future_option": True,
},
}
)
# An operation without a dedicated schema accepts arbitrary options.
schema.load({"operation": "flatten", "options": {"anything": [1, 2, 3]}})