fix(mcp): Skip misleading trend analysis for categorical ASCII charts (#39761)

This commit is contained in:
Alexandru Soare
2026-05-20 18:04:21 +03:00
committed by GitHub
parent 6e8b3bf976
commit fb276b08dd
2 changed files with 124 additions and 26 deletions

View File

@@ -251,7 +251,7 @@ def _generate_ascii_line_chart(data: list[Any], width: int, height: int) -> str:
lines.append("" * min(width, 60))
# Extract values and labels for plotting
values, labels = _extract_time_series_data(data)
values, labels, is_temporal = _extract_time_series_data(data)
if not values:
return "No numeric data found for line chart"
@@ -265,7 +265,7 @@ def _generate_ascii_line_chart(data: list[Any], width: int, height: int) -> str:
lines.extend(sparkline_data)
# Add trend analysis
trend_analysis = _analyze_trend(values)
trend_analysis = _analyze_trend(values, is_temporal=is_temporal)
lines.append("")
lines.append("📊 Trend Analysis:")
lines.extend(trend_analysis)
@@ -273,10 +273,17 @@ def _generate_ascii_line_chart(data: list[Any], width: int, height: int) -> str:
return "\n".join(lines)
def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]:
"""Extract time series data with labels."""
def _extract_time_series_data(
data: list[Any],
) -> tuple[list[float], list[str], bool]:
"""Extract time series data with labels.
Returns a tuple of (values, labels, is_temporal) where is_temporal indicates
whether the label column appears to represent temporal data based on its name.
"""
values = []
labels = []
is_temporal = False
for row in data[:20]: # Limit points for readability
if isinstance(row, dict):
@@ -299,6 +306,7 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]:
for date_word in ["date", "time", "month", "day", "year"]
):
label_val = str(val)[:10] # Truncate long dates
is_temporal = True
else:
label_val = str(val)[:8] # Truncate long strings
@@ -306,7 +314,7 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]:
values.append(numeric_val)
labels.append(label_val or f"P{len(values)}")
return values, labels
return values, labels, is_temporal
def _create_enhanced_line_chart(
@@ -407,8 +415,15 @@ def _draw_line_segment(
grid[y][x] = ""
def _analyze_trend(values: list[float]) -> list[str]:
"""Analyze trend in the data."""
def _analyze_trend(values: list[float], *, is_temporal: bool = True) -> list[str]:
"""Analyze trend in the data.
Args:
values: Numeric data points to analyze.
is_temporal: Whether the data represents a time series. When False,
directional trend analysis is skipped because the ordering of
categorical data is arbitrary.
"""
if len(values) < 2:
return ["• Insufficient data for trend analysis"]
@@ -421,24 +436,28 @@ def _analyze_trend(values: list[float]) -> list[str]:
max_val = max(values)
avg_val = sum(values) / len(values)
# Overall trend
if last_val > first_val * 1.1:
trend_icon = "📈"
trend_desc = "Strong upward trend"
elif last_val > first_val * 1.05:
trend_icon = "📊"
trend_desc = "Moderate upward trend"
elif last_val < first_val * 0.9:
trend_icon = "📉"
trend_desc = "Strong downward trend"
elif last_val < first_val * 0.95:
trend_icon = "📊"
trend_desc = "Moderate downward trend"
else:
trend_icon = "➡️"
trend_desc = "Relatively stable"
if is_temporal:
# Overall trend (only meaningful for temporally ordered data)
if last_val > first_val * 1.1:
trend_icon = "📈"
trend_desc = "Strong upward trend"
elif last_val > first_val * 1.05:
trend_icon = "📊"
trend_desc = "Moderate upward trend"
elif last_val < first_val * 0.9:
trend_icon = "📉"
trend_desc = "Strong downward trend"
elif last_val < first_val * 0.95:
trend_icon = "📊"
trend_desc = "Moderate downward trend"
else:
trend_icon = "➡️"
trend_desc = "Relatively stable"
analysis.append(f"{trend_icon} {trend_desc}")
else:
analysis.append("• ⚠️ Data is categorical — directional trend not meaningful")
analysis.append(f"{trend_icon} {trend_desc}")
analysis.append(f"• Range: {min_val:.1f} to {max_val:.1f} (avg: {avg_val:.1f})")
# Volatility

View File

@@ -15,9 +15,88 @@
# specific language governing permissions and limitations
# under the License.
"""Unit tests for ascii_charts.py NaN/null value handling."""
"""Unit tests for ASCII charts: trend analysis, NaN/null handling,
and boolean skipping."""
from superset.mcp_service.chart.ascii_charts import generate_ascii_chart
from superset.mcp_service.chart.ascii_charts import (
_analyze_trend,
_extract_time_series_data,
generate_ascii_chart,
)
def test_analyze_trend_temporal_upward():
values = [100.0, 110.0, 120.0, 130.0, 150.0]
result = _analyze_trend(values, is_temporal=True)
trend_line = result[0]
assert "upward trend" in trend_line.lower()
assert "categorical" not in trend_line.lower()
def test_analyze_trend_temporal_downward():
values = [200.0, 180.0, 150.0, 120.0, 100.0]
result = _analyze_trend(values, is_temporal=True)
trend_line = result[0]
assert "downward trend" in trend_line.lower()
def test_analyze_trend_temporal_stable():
values = [100.0, 101.0, 100.5, 100.2, 100.8]
result = _analyze_trend(values, is_temporal=True)
trend_line = result[0]
assert "stable" in trend_line.lower()
def test_analyze_trend_categorical_skips_direction():
values = [200.0, 180.0, 150.0, 120.0, 100.0]
result = _analyze_trend(values, is_temporal=False)
trend_line = result[0]
assert "categorical" in trend_line.lower()
assert "downward" not in trend_line.lower()
assert "upward" not in trend_line.lower()
def test_analyze_trend_categorical_keeps_range_and_volatility():
values = [200.0, 180.0, 150.0, 120.0, 100.0]
result = _analyze_trend(values, is_temporal=False)
full_text = "\n".join(result)
assert "Range:" in full_text
assert "Volatility:" in full_text
def test_analyze_trend_defaults_to_temporal():
values = [100.0, 150.0, 200.0]
result = _analyze_trend(values)
trend_line = result[0]
assert "categorical" not in trend_line.lower()
assert "upward trend" in trend_line.lower()
def test_extract_time_series_data_temporal_column():
data = [
{"date": "2024-01-01", "value": 100},
{"date": "2024-02-01", "value": 200},
]
values, labels, is_temporal = _extract_time_series_data(data)
assert values == [100, 200]
assert is_temporal is True
def test_extract_time_series_data_categorical_column():
data = [
{"country": "Argentina", "value": 500},
{"country": "Brazil", "value": 300},
]
values, labels, is_temporal = _extract_time_series_data(data)
assert values == [500, 300]
assert is_temporal is False
def test_extract_time_series_data_time_keyword_variants():
for key in ["timestamp", "created_time", "month_name", "day_of_week", "year_end"]:
data = [{key: "val", "metric": 42}]
_, _, is_temporal = _extract_time_series_data(data)
assert is_temporal is True, f"Expected temporal for column '{key}'"
def test_bar_chart_with_null_values_does_not_raise() -> None: