mirror of
https://github.com/apache/superset.git
synced 2026-05-12 11:25:56 +00:00
fix(mcp): ASCII chart crashes with NaN when dataset contains null values (#39916)
This commit is contained in:
@@ -79,7 +79,12 @@ def _generate_ascii_bar_chart(data: list[Any], width: int, height: int) -> str:
|
||||
label_val = None
|
||||
|
||||
for _key, val in row.items():
|
||||
if isinstance(val, (int, float)) and numeric_val is None:
|
||||
if (
|
||||
isinstance(val, (int, float))
|
||||
and not isinstance(val, bool)
|
||||
and not _is_nan_value(val)
|
||||
and numeric_val is None
|
||||
):
|
||||
numeric_val = val
|
||||
elif isinstance(val, str) and label_val is None:
|
||||
label_val = val
|
||||
@@ -121,7 +126,10 @@ def _generate_horizontal_bar_chart(
|
||||
# Calculate bar length
|
||||
if max_val > min_val:
|
||||
normalized = (value - min_val) / (max_val - min_val)
|
||||
bar_length = max(1, int(normalized * max_bar_width))
|
||||
if _is_nan_value(normalized):
|
||||
bar_length = 0
|
||||
else:
|
||||
bar_length = max(1, int(normalized * max_bar_width))
|
||||
else:
|
||||
bar_length = 1
|
||||
|
||||
@@ -164,7 +172,10 @@ def _generate_vertical_bar_chart( # noqa: C901
|
||||
for col, value in enumerate(values):
|
||||
if max_val > min_val:
|
||||
normalized = (value - min_val) / (max_val - min_val)
|
||||
bar_height = max(1, int(normalized * chart_height))
|
||||
if _is_nan_value(normalized):
|
||||
bar_height = 0
|
||||
else:
|
||||
bar_height = max(1, int(normalized * chart_height))
|
||||
else:
|
||||
bar_height = 1
|
||||
|
||||
@@ -274,7 +285,12 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]:
|
||||
label_val = None
|
||||
|
||||
for key, val in row.items():
|
||||
if isinstance(val, (int, float)) and numeric_val is None:
|
||||
if (
|
||||
isinstance(val, (int, float))
|
||||
and not isinstance(val, bool)
|
||||
and not _is_nan_value(val)
|
||||
and numeric_val is None
|
||||
):
|
||||
numeric_val = val
|
||||
elif isinstance(val, str) and label_val is None:
|
||||
# Use the key name if it looks like a date/time field
|
||||
@@ -520,9 +536,11 @@ def _extract_scatter_data(
|
||||
if data and isinstance(data[0], dict):
|
||||
# Find the first two numeric columns
|
||||
for key, val in data[0].items():
|
||||
if isinstance(val, (int, float)) and not (
|
||||
isinstance(val, float) and (val != val)
|
||||
): # Exclude NaN
|
||||
if (
|
||||
isinstance(val, (int, float))
|
||||
and not isinstance(val, bool)
|
||||
and not _is_nan_value(val)
|
||||
):
|
||||
numeric_columns.append(key)
|
||||
|
||||
if len(numeric_columns) >= 2:
|
||||
@@ -534,15 +552,14 @@ def _extract_scatter_data(
|
||||
if isinstance(row, dict):
|
||||
x_val = row.get(x_column)
|
||||
y_val = row.get(y_column)
|
||||
# Check for valid numbers (not NaN)
|
||||
if (
|
||||
isinstance(x_val, (int, float))
|
||||
and not isinstance(x_val, bool)
|
||||
and not _is_nan_value(x_val)
|
||||
and isinstance(y_val, (int, float))
|
||||
and not (
|
||||
isinstance(x_val, float) and (x_val != x_val)
|
||||
) # Not NaN
|
||||
and not (isinstance(y_val, float) and (y_val != y_val))
|
||||
): # Not NaN
|
||||
and not isinstance(y_val, bool)
|
||||
and not _is_nan_value(y_val)
|
||||
):
|
||||
x_values.append(x_val)
|
||||
y_values.append(y_val)
|
||||
|
||||
|
||||
175
tests/unit_tests/mcp_service/chart/test_ascii_charts.py
Normal file
175
tests/unit_tests/mcp_service/chart/test_ascii_charts.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Unit tests for ascii_charts.py NaN/null value handling."""
|
||||
|
||||
from superset.mcp_service.chart.ascii_charts import generate_ascii_chart
|
||||
|
||||
|
||||
def test_bar_chart_with_null_values_does_not_raise() -> None:
|
||||
"""Bar chart renderer must not crash when dataset rows contain NaN values."""
|
||||
data = [
|
||||
{"category": "A", "value": 10.0},
|
||||
{"category": "B", "value": float("nan")},
|
||||
{"category": "C", "value": 30.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "bar")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_bar_chart_with_all_null_values_returns_fallback() -> None:
|
||||
"""Bar chart with no valid numeric rows should return the no-data fallback."""
|
||||
data = [
|
||||
{"category": "A", "value": float("nan")},
|
||||
{"category": "B", "value": float("nan")},
|
||||
]
|
||||
result = generate_ascii_chart(data, "bar")
|
||||
assert isinstance(result, str)
|
||||
assert result == "No numeric data found for bar chart"
|
||||
|
||||
|
||||
def test_line_chart_with_null_values_does_not_raise() -> None:
|
||||
"""Line chart renderer must not crash when dataset rows contain NaN values."""
|
||||
data = [
|
||||
{"date": "2024-01", "sales": 100.0},
|
||||
{"date": "2024-02", "sales": float("nan")},
|
||||
{"date": "2024-03", "sales": 300.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "line")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_horizontal_bar_chart_nan_rows_are_skipped() -> None:
|
||||
"""NaN rows must be silently skipped; valid rows render normally."""
|
||||
# Use labels longer than 8 chars to force horizontal layout, where full
|
||||
# label text is preserved (vertical layout truncates to 3 chars).
|
||||
data = [
|
||||
{"label": "Alpha Category", "amount": 50.0},
|
||||
{"label": "Beta Category", "amount": float("nan")},
|
||||
{"label": "Gamma Category", "amount": 150.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "bar")
|
||||
# avg label length is 14 (> 8 threshold), so horizontal layout is chosen;
|
||||
# horizontal mode preserves full label text unlike vertical (3-char truncation).
|
||||
assert "Horizontal Bar Chart" in result
|
||||
# Both valid labels must appear in full; the NaN row (Beta) must be absent
|
||||
assert "Alpha" in result
|
||||
assert "Gamma" in result
|
||||
assert "Beta" not in result
|
||||
|
||||
|
||||
def test_column_chart_with_null_values_does_not_raise() -> None:
|
||||
"""Column (vertical bar) chart must not crash on NaN values."""
|
||||
data = [
|
||||
{"x": "Q1", "y": 10.0},
|
||||
{"x": "Q2", "y": float("nan")},
|
||||
{"x": "Q3", "y": 30.0},
|
||||
{"x": "Q4", "y": 40.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "column")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_timeseries_bar_with_null_values_does_not_raise() -> None:
|
||||
"""echarts_timeseries_bar chart type must not crash on NaN values."""
|
||||
data = [
|
||||
{"ts": "2024-01-01", "count": 5.0},
|
||||
{"ts": "2024-01-02", "count": float("nan")},
|
||||
{"ts": "2024-01-03", "count": 15.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "echarts_timeseries_bar")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_chart_with_none_values_does_not_raise() -> None:
|
||||
"""None (SQL NULL) values should be treated identically to NaN."""
|
||||
data = [
|
||||
{"name": "X", "metric": 100.0},
|
||||
{"name": "Y", "metric": None},
|
||||
{"name": "Z", "metric": 200.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "bar")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_bar_chart_skips_boolean_columns() -> None:
|
||||
"""Boolean fields must not be selected as the numeric metric.
|
||||
|
||||
bool is a subclass of int, so isinstance(True, (int, float)) is True.
|
||||
Without an explicit bool guard the extractor would lock onto a boolean
|
||||
column (e.g. is_active=True -> 1) and ignore the real numeric metric.
|
||||
"""
|
||||
data = [
|
||||
{"label": "Alpha Category", "is_active": True, "revenue": 500.0},
|
||||
{"label": "Beta Category", "is_active": False, "revenue": 1500.0},
|
||||
{"label": "Gamma Category", "is_active": True, "revenue": 1000.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "bar")
|
||||
# If booleans are correctly skipped, revenue (500/1500/1000) drives the
|
||||
# bars. The max value is 1500, so we expect at least one K-formatted value.
|
||||
assert "1.5K" in result or "1500" in result or "1.0K" in result
|
||||
# The scale min/max would be "0.0" and "1.0" only if booleans were chosen;
|
||||
# with revenue selected the scale starts at 500 (never "Scale: 0.0").
|
||||
assert "Scale: 0.0" not in result
|
||||
|
||||
|
||||
def test_line_chart_skips_boolean_columns() -> None:
|
||||
"""Boolean fields must not be selected as numeric points in line charts."""
|
||||
data = [
|
||||
{"date": "2024-01", "is_active": True, "sales": 100.0},
|
||||
{"date": "2024-02", "is_active": False, "sales": 200.0},
|
||||
{"date": "2024-03", "is_active": True, "sales": 300.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "line")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
# If booleans were selected, the range would be 0-1; if revenue is
|
||||
# selected the range includes values up to 300.
|
||||
assert "300" in result or "200" in result
|
||||
|
||||
|
||||
def test_scatter_chart_with_nan_values_does_not_raise() -> None:
|
||||
"""Scatter chart renderer must not crash when dataset rows contain NaN values."""
|
||||
data = [
|
||||
{"x": 1.0, "y": 2.0},
|
||||
{"x": float("nan"), "y": 4.0},
|
||||
{"x": 5.0, "y": float("nan")},
|
||||
{"x": 7.0, "y": 8.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "scatter")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_scatter_chart_skips_boolean_columns() -> None:
|
||||
"""Boolean fields must not be selected as X/Y axes in scatter charts."""
|
||||
data = [
|
||||
{"is_active": True, "x": 10.0, "y": 20.0},
|
||||
{"is_active": False, "x": 30.0, "y": 40.0},
|
||||
{"is_active": True, "x": 50.0, "y": 60.0},
|
||||
]
|
||||
result = generate_ascii_chart(data, "scatter")
|
||||
# If booleans are correctly skipped, x/y (10-50 / 20-60) drive the axes;
|
||||
# boolean-driven axes would be confined to 0-1.
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
assert "10" in result or "30" in result or "50" in result
|
||||
Reference in New Issue
Block a user