Compare commits

...

2 Commits

Author SHA1 Message Date
Evan
9b7f632718 fix(api): restrict rejected-field log token to a safe character set
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-22 21:04:29 -07:00
Amin Ghadersohi
7d9b918f4d feat(api): log rejected related/distinct field access as security events
When a user-supplied column_name fails the related/distinct allowlist check,
the API incremented a statsd counter and returned 404 but emitted no structured
log event, so rejected field-access attempts were absent from the security
audit trail (no user identity, endpoint, or attempted value).

Emit a sanitized security log event (user id, endpoint, attempted column) on
these denials, alongside the existing statsd counter. The column name is
sanitized to a single bounded token to prevent log injection. The allowlist
control itself is unchanged.

Extends the related-field failure integration test to assert the event is
logged with the rejected column name.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-22 17:59:04 -07:00
2 changed files with 34 additions and 1 deletions

View File

@@ -223,6 +223,29 @@ class BaseSupersetApiMixin:
f"{self.__class__.__name__}.{func_name}.{action}"
)
def log_rejected_field_access(self, func_name: str, column_name: str) -> None:
"""Emit a security log event when a related/distinct field is rejected.
The allowlist check itself blocks the request; this records the attempt
in the structured log (alongside the existing statsd counter) so that
rejected field access is visible to security monitoring and forensics,
with the caller's identity, the endpoint, and the attempted value.
"""
# Sanitize the user-supplied column name to a single, bounded token so
# it cannot inject newlines or forge extra key=value tokens in the log
# line. Restrict to a safe character set (column names are alphanumeric
# plus ``_-.``) and replace anything else with ``?``.
sanitized_column = "".join(
ch if (ch.isalnum() or ch in "_-.") else "?" for ch in str(column_name)
)[:200]
logger.warning(
"Rejected disallowed field access: user_id=%s endpoint=%s.%s column=%s",
get_user_id(),
self.__class__.__name__,
func_name,
sanitized_column,
)
def timing_stats(self, action: str, func_name: str, value: float) -> None:
"""
Proxy function for statsd.incr to impose a key structure for REST API's
@@ -619,6 +642,7 @@ class BaseSupersetModelRestApi(BaseSupersetApiMixin, ModelRestApi):
return response
if column_name not in self.allowed_rel_fields:
self.incr_stats("error", self.related.__name__)
self.log_rejected_field_access(self.related.__name__, column_name)
return self.response_404()
args = kwargs.get("rison", {})
@@ -698,6 +722,7 @@ class BaseSupersetModelRestApi(BaseSupersetApiMixin, ModelRestApi):
"""
if column_name not in self.allowed_distinct_fields:
self.incr_stats("error", self.related.__name__)
self.log_rejected_field_access(self.distinct.__name__, column_name)
return self.response_404()
args = kwargs.get("rison", {})
# handle pagination

View File

@@ -423,5 +423,13 @@ class ApiOwnersTestCaseMixin:
self.login(ADMIN_USERNAME)
uri = f"api/v1/{self.resource_name}/related/owner"
rv = self.client.get(uri)
with patch("superset.views.base_api.logger") as mock_logger:
rv = self.client.get(uri)
assert rv.status_code == 404
# A disallowed related field is recorded as a security log event,
# including the rejected column name, in addition to the 404.
mock_logger.warning.assert_called_once()
assert "column=owner" in (
mock_logger.warning.call_args.args[0]
% mock_logger.warning.call_args.args[1:]
)