From af54788b5e0388c36c6709331fd04a5951113867 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Fri, 20 Mar 2026 09:41:54 -0400 Subject: [PATCH] fix(mcp): fix dashboard slug null and execute_sql encoding error (#38710) --- superset/mcp_service/dashboard/schemas.py | 2 +- .../mcp_service/sql_lab/tool/execute_sql.py | 21 +- .../dashboard/test_dashboard_schemas.py | 119 ++++++++++ .../sql_lab/tool/test_execute_sql.py | 206 ++++++++++++++++++ 4 files changed, 346 insertions(+), 2 deletions(-) create mode 100644 tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index ba56cf774b0..97661d1c794 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -525,7 +525,7 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: return DashboardInfo( id=dashboard_id, dashboard_title=getattr(dashboard, "dashboard_title", None), - slug=slug, + slug=slug or "", url=dashboard_url, published=getattr(dashboard, "published", None), changed_by=getattr(dashboard, "changed_by_name", None), diff --git a/superset/mcp_service/sql_lab/tool/execute_sql.py b/superset/mcp_service/sql_lab/tool/execute_sql.py index 5490dc1b5d7..61f00889200 100644 --- a/superset/mcp_service/sql_lab/tool/execute_sql.py +++ b/superset/mcp_service/sql_lab/tool/execute_sql.py @@ -25,6 +25,7 @@ Database.execute() API with RLS, template rendering, and security validation. from __future__ import annotations import logging +from decimal import Decimal from typing import Any from fastmcp import Context @@ -159,6 +160,22 @@ async def execute_sql(request: ExecuteSqlRequest, ctx: Context) -> ExecuteSqlRes raise +def _sanitize_row_values(rows: list[dict[str, Any]]) -> None: + """Sanitize non-serializable values in rows for JSON serialization.""" + for row in rows: + for key, value in row.items(): + if isinstance(value, (bytes, memoryview)): + raw = bytes(value) if isinstance(value, memoryview) else value + try: + row[key] = raw.decode("utf-8") + except (UnicodeDecodeError, AttributeError): + row[key] = raw.hex() + elif isinstance(value, Decimal): + row[key] = float(value) + elif not isinstance(value, (str, int, float, bool, type(None), list, dict)): + row[key] = str(value) + + def _convert_to_response(result: QueryResult) -> ExecuteSqlResponse: """Convert QueryResult to ExecuteSqlResponse.""" if result.status != QueryStatus.SUCCESS: @@ -177,8 +194,10 @@ def _convert_to_response(result: QueryResult) -> ExecuteSqlResponse: stmt_data: StatementData | None = None if stmt.data is not None: df = stmt.data + rows_data = df.to_dict(orient="records") + _sanitize_row_values(rows_data) stmt_data = StatementData( - rows=df.to_dict(orient="records"), + rows=rows_data, columns=[ ColumnInfo(name=col, type=str(df[col].dtype)) for col in df.columns ], diff --git a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py new file mode 100644 index 00000000000..d5ace35054d --- /dev/null +++ b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Unit tests for dashboard schema serialization. + +Tests that serialize_dashboard_object correctly handles slug and other fields. +""" + +from typing import Any +from unittest.mock import MagicMock, patch + +from superset.mcp_service.dashboard.schemas import serialize_dashboard_object + + +def _mock_dashboard( + id: int = 1, + title: str = "Test Dashboard", + slug: str | None = None, + owners: list[Any] | None = None, + slices: list[Any] | None = None, + tags: list[Any] | None = None, + roles: list[Any] | None = None, +) -> MagicMock: + """Create a mock Dashboard ORM object.""" + dashboard = MagicMock() + dashboard.id = id + dashboard.dashboard_title = title + dashboard.slug = slug + dashboard.published = True + dashboard.changed_by_name = "admin" + dashboard.changed_on = None + dashboard.changed_on_humanized = "2 hours ago" + dashboard.created_by_name = "admin" + dashboard.created_on = None + dashboard.created_on_humanized = "1 day ago" + dashboard.description = "A test dashboard" + dashboard.css = None + dashboard.certified_by = None + dashboard.certification_details = None + dashboard.json_metadata = None + dashboard.position_json = None + dashboard.is_managed_externally = False + dashboard.external_url = None + dashboard.uuid = None + dashboard.owners = owners or [] + dashboard.slices = slices or [] + dashboard.tags = tags or [] + dashboard.roles = roles or [] + return dashboard + + +class TestSerializeDashboardObject: + """Tests for serialize_dashboard_object slug handling.""" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_slug_none_returns_empty_string(self, mock_base_url): + """Dashboards with slug=None should return slug="" for consistency + with dashboard_serializer.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=1, slug=None) + result = serialize_dashboard_object(dashboard) + + assert result.slug == "" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_slug_empty_string_returns_empty_string(self, mock_base_url): + """Dashboards with slug="" should return slug="".""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=2, slug="") + result = serialize_dashboard_object(dashboard) + + assert result.slug == "" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_slug_with_value_preserved(self, mock_base_url): + """Dashboards with a real slug should preserve it.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=3, slug="my-dashboard") + result = serialize_dashboard_object(dashboard) + + assert result.slug == "my-dashboard" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_url_uses_id_when_no_slug(self, mock_base_url): + """URL should use dashboard id when slug is None.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=42, slug=None) + result = serialize_dashboard_object(dashboard) + + assert result.url == "http://localhost:8088/superset/dashboard/42/" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_url_uses_slug_when_available(self, mock_base_url): + """URL should use slug when available.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=42, slug="my-dashboard") + result = serialize_dashboard_object(dashboard) + + assert result.url == "http://localhost:8088/superset/dashboard/my-dashboard/" diff --git a/tests/unit_tests/mcp_service/sql_lab/tool/test_execute_sql.py b/tests/unit_tests/mcp_service/sql_lab/tool/test_execute_sql.py index 764e1bf2cee..b536578ca1f 100644 --- a/tests/unit_tests/mcp_service/sql_lab/tool/test_execute_sql.py +++ b/tests/unit_tests/mcp_service/sql_lab/tool/test_execute_sql.py @@ -23,6 +23,7 @@ and response conversion logic. """ import logging +from decimal import Decimal from typing import Any from unittest.mock import MagicMock, Mock, patch @@ -883,3 +884,208 @@ class TestExecuteSql: options = call_args[0][1] assert options.cache is not None assert options.cache.force_refresh is True + + @patch("superset.security_manager") + @patch("superset.db") + @pytest.mark.asyncio + async def test_execute_sql_bytes_in_dataframe( + self, mock_db, mock_security_manager, mcp_server + ): + """Test that bytes/memoryview values in DataFrame are sanitized for JSON. + + Regression test: execute_sql fails with 'encoding without a string + argument' when queries return binary/bytea data. + """ + mock_database = _mock_database() + df = pd.DataFrame( + [ + { + "id": 1, + "name": "test", + "utf8_data": b"hello world", + "binary_data": b"\x00\x01\x02\xff", + }, + ] + ) + mock_database.execute.return_value = QueryResult( + status=QueryStatus.SUCCESS, + statements=[ + StatementResult( + original_sql="SELECT * FROM files", + executed_sql="SELECT * FROM files", + data=df, + row_count=1, + execution_time_ms=5.0, + ) + ], + query_id=None, + total_execution_time_ms=5.0, + is_cached=False, + ) + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + mock_database + ) + mock_security_manager.can_access_database.return_value = True + + request = { + "database_id": 1, + "sql": "SELECT * FROM files", + "limit": 10, + } + + async with Client(mcp_server) as client: + result = await client.call_tool("execute_sql", {"request": request}) + + data = result.structured_content + assert data["success"] is True + assert data["row_count"] == 1 + row = data["rows"][0] + # UTF-8 decodable bytes should become string + assert row["utf8_data"] == "hello world" + # Non-UTF-8 bytes should become hex + assert row["binary_data"] == "000102ff" + + @patch("superset.security_manager") + @patch("superset.db") + @pytest.mark.asyncio + async def test_execute_sql_decimal_in_dataframe( + self, mock_db, mock_security_manager, mcp_server + ): + """Test that Decimal values in DataFrame are converted to float for JSON. + + Regression test: execute_sql fails with 'encoding without a string + argument' when queries return Decimal types (common with SUM/AVG). + """ + mock_database = _mock_database() + df = pd.DataFrame( + [ + { + "id": 1, + "price": Decimal("19.99"), + "total": Decimal("1234567.89"), + }, + ] + ) + mock_database.execute.return_value = QueryResult( + status=QueryStatus.SUCCESS, + statements=[ + StatementResult( + original_sql="SELECT * FROM orders", + executed_sql="SELECT * FROM orders", + data=df, + row_count=1, + execution_time_ms=5.0, + ) + ], + query_id=None, + total_execution_time_ms=5.0, + is_cached=False, + ) + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + mock_database + ) + mock_security_manager.can_access_database.return_value = True + + request = { + "database_id": 1, + "sql": "SELECT * FROM orders", + "limit": 10, + } + + async with Client(mcp_server) as client: + result = await client.call_tool("execute_sql", {"request": request}) + + data = result.structured_content + assert data["success"] is True + assert data["row_count"] == 1 + row = data["rows"][0] + assert row["price"] == 19.99 + assert row["total"] == 1234567.89 + assert isinstance(row["price"], float) + + +class TestSanitizeRowValues: + """Unit tests for _sanitize_row_values helper function.""" + + def test_sanitize_utf8_bytes(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [{"data": b"hello"}] + _sanitize_row_values(rows) + assert rows[0]["data"] == "hello" + + def test_sanitize_non_utf8_bytes(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [{"data": b"\x00\xff"}] + _sanitize_row_values(rows) + assert rows[0]["data"] == "00ff" + + def test_sanitize_memoryview(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [{"data": memoryview(b"test")}] + _sanitize_row_values(rows) + assert rows[0]["data"] == "test" + + def test_sanitize_decimal(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [{"price": Decimal("19.99"), "count": Decimal("42")}] + _sanitize_row_values(rows) + assert rows[0]["price"] == 19.99 + assert isinstance(rows[0]["price"], float) + assert rows[0]["count"] == 42.0 + + def test_sanitize_custom_type_uses_str(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + class CustomType: + def __str__(self): + return "custom_value" + + rows = [{"data": CustomType()}] + _sanitize_row_values(rows) + assert rows[0]["data"] == "custom_value" + + def test_preserves_json_serializable_types(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [ + { + "str_val": "hello", + "int_val": 42, + "float_val": 3.14, + "bool_val": True, + "none_val": None, + "list_val": [1, 2], + "dict_val": {"a": 1}, + } + ] + original = [dict(row) for row in rows] + _sanitize_row_values(rows) + assert rows == original + + def test_sanitize_empty_rows(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows: list[dict[str, Any]] = [] + _sanitize_row_values(rows) + assert rows == [] + + def test_sanitize_mixed_types_in_single_row(self): + from superset.mcp_service.sql_lab.tool.execute_sql import _sanitize_row_values + + rows = [ + { + "id": 1, + "name": "test", + "price": Decimal("9.99"), + "blob": b"\x00\x01\x02\xff", + } + ] + _sanitize_row_values(rows) + assert rows[0]["id"] == 1 + assert rows[0]["name"] == "test" + assert rows[0]["price"] == 9.99 + assert rows[0]["blob"] == "000102ff"