Compare commits

...

6 Commits

Author SHA1 Message Date
Evan
6162c90ee9 chore(ci): correct actions/cache version comment to match pinned SHA
The pinned SHA 27d5ce7 actually corresponds to actions/cache v5.0.5,
but the inline comment read "# v5". zizmor's ref-version-mismatch rule
flags this discrepancy. Update the comment to the truthful version.

Resolves code-scanning alert #2551

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 01:41:51 -07:00
innovark
0a18779280 fix(echarts): format mixed timeseries value labels by assigned axis (#40420)
Co-authored-by: Evan Rusackas <evan@preset.io>
2026-06-27 01:27:41 -07:00
Krishna Chaitanya
a147079043 fix(bigquery): backslash-escape apostrophes in filter values (#38835)
BigQuery rejects filter values containing apostrophes (e.g. O'Brien): the
sqlalchemy-bigquery dialect renders string literals via repr(), which switches
to double-quote delimiters that BigQuery parses as identifiers, causing a
syntax error.

Monkey-patch the dialect's colspecs with a TypeDecorator whose literal_processor
emits single-quoted literals using backslash escaping ('O\'Brien'). Doubled
single quotes ('O''Brien') are NOT valid in BigQuery (parsed as concatenated
literals). Control characters are emitted as named escapes with a \xhh fallback,
since BigQuery forbids literal control chars in quoted strings. Follows the
existing Databricks dialect pattern.

Fixes #35857
2026-06-27 00:55:47 -07:00
Abdul Rehman
ebb32de625 fix(cachekey): use data_cache for chart query result invalidation (#40493) 2026-06-26 18:01:14 -07:00
Onur Taşhan
1280eaee18 fix(mcp): include embedded_uuid in get_dashboard_info response (#41195)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-26 18:00:10 -07:00
jesperct
15626a047c fix(sqllab): quote autocomplete table names that need it (#41199) 2026-06-26 17:58:05 -07:00
13 changed files with 795 additions and 26 deletions

View File

@@ -63,7 +63,7 @@ jobs:
yarn install --immutable
- name: Cache pre-commit environments
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.cache/pre-commit
key: pre-commit-v2-${{ runner.os }}-py${{ matrix.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}

View File

@@ -56,7 +56,7 @@ jobs:
- name: Cache npm
if: env.HAS_TAGS
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.npm # npm cache files are stored in `~/.npm` on Linux/macOS
key: ${{ runner.OS }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -70,7 +70,7 @@ jobs:
run: echo "dir=$(npm config get cache)" >> $GITHUB_OUTPUT
- name: Cache npm
if: env.HAS_TAGS
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
id: npm-cache # use this to check for `cache-hit` (`steps.npm-cache.outputs.cache-hit != 'true'`)
with:
path: ${{ steps.npm-cache-dir-path.outputs.dir }}

View File

@@ -344,6 +344,16 @@ export default function transformProps(
data2,
currencyCodeColumn,
);
const getAxisFormatterConfig = (axisIndex?: number) =>
axisIndex === 1
? {
customFormatters: customFormattersSecondary,
formatter: formatterSecondary,
}
: {
customFormatters,
formatter,
};
const primarySeries = new Set<string>();
const secondarySeries = new Set<string>();
@@ -422,6 +432,8 @@ export default function transformProps(
let [minSecondary, maxSecondary] = (yAxisBoundsSecondary || []).map(
parseAxisBound,
);
const getAxisMax = (axisIndex?: number) =>
axisIndex === 1 ? maxSecondary : yAxisMax;
const array = ensureIsArray(chartProps.rawFormData?.time_compare);
const inverted = invert(verboseMap);
@@ -445,10 +457,11 @@ export default function transformProps(
// When no groupby, format as just the entry name with optional query identifier
displayName = showQueryIdentifiers ? `${entryName} (Query A)` : entryName;
}
const axisFormatterConfig = getAxisFormatterConfig(yAxisIndex);
const seriesFormatter = getFormatter(
customFormatters,
formatter,
axisFormatterConfig.customFormatters,
axisFormatterConfig.formatter,
metrics,
labelMap?.[seriesName]?.[0],
!!contributionMode,
@@ -480,7 +493,7 @@ export default function transformProps(
formatter:
seriesType === EchartsTimeseriesSeriesType.Bar
? getOverMaxHiddenFormatter({
max: yAxisMax,
max: getAxisMax(yAxisIndex),
formatter: seriesFormatter,
})
: seriesFormatter,
@@ -518,10 +531,11 @@ export default function transformProps(
// When no groupby, format as just the entry name with optional query identifier
displayName = showQueryIdentifiers ? `${entryName} (Query B)` : entryName;
}
const axisFormatterConfig = getAxisFormatterConfig(yAxisIndexB);
const seriesFormatter = getFormatter(
customFormattersSecondary,
formatterSecondary,
axisFormatterConfig.customFormatters,
axisFormatterConfig.formatter,
metricsB,
labelMapB?.[seriesName]?.[0],
!!contributionMode,
@@ -554,7 +568,7 @@ export default function transformProps(
formatter:
seriesTypeB === EchartsTimeseriesSeriesType.Bar
? getOverMaxHiddenFormatter({
max: maxSecondary,
max: getAxisMax(yAxisIndexB),
formatter: seriesFormatter,
})
: seriesFormatter,

View File

@@ -35,13 +35,26 @@ import {
} from '../../src';
import transformProps from '../../src/MixedTimeseries/transformProps';
import {
DEFAULT_FORM_DATA,
EchartsMixedTimeseriesFormData,
EchartsMixedTimeseriesProps,
} from '../../src/MixedTimeseries/types';
import { DEFAULT_FORM_DATA } from '../../src/MixedTimeseries/types';
import { createEchartsTimeseriesTestChartProps } from '../helpers';
import type { SeriesOption } from 'echarts';
type LabelFormatterParams = {
value: [number, number];
dataIndex: number;
seriesIndex: number;
seriesName: string;
};
type SeriesWithLabelFormatter = SeriesOption & {
label?: {
formatter?: (params: LabelFormatterParams) => string | number;
};
};
/**
* Creates a partial ChartDataResponseResult for testing.
* Only includes the fields needed for tests, with sensible defaults for required fields.
@@ -148,6 +161,30 @@ const queriesData: ChartDataResponseResult[] = [
createTestQueryData(defaultQueryRows, { label_map: defaultLabelMap }),
];
function getSeriesWithLabelFormatter(
series: SeriesOption[],
name: string,
): SeriesWithLabelFormatter {
const result = series.find(seriesOption => seriesOption.name === name);
expect(result).toBeDefined();
expect((result as SeriesWithLabelFormatter).label?.formatter).toBeDefined();
return result as SeriesWithLabelFormatter;
}
function formatSeriesLabel(
series: SeriesWithLabelFormatter,
value: [number, number],
) {
const formatter = series.label?.formatter;
expect(formatter).toBeDefined();
return formatter?.({
dataIndex: 0,
seriesIndex: 0,
seriesName: String(series.name),
value,
});
}
test('should transform chart props for viz with showQueryIdentifiers=false', () => {
const chartProps = createEchartsTimeseriesTestChartProps<
EchartsMixedTimeseriesFormData,
@@ -232,6 +269,162 @@ test('should transform chart props for viz with showQueryIdentifiers=true', () =
]);
});
test('formats value labels with the formatter for the assigned y-axis', () => {
const timestamp = 1704067200000;
const queryAData = createTestQueryData(
[{ __timestamp: timestamp, lineMetric: 0.25 }],
{
colnames: ['__timestamp', 'lineMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { lineMetric: ['lineMetric'] },
},
);
const queryBData = createTestQueryData(
[{ __timestamp: timestamp, barMetric: 0.5 }],
{
colnames: ['__timestamp', 'barMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { 'barMetric (1)': ['barMetric'] },
},
);
const chartProps = createEchartsTimeseriesTestChartProps<
EchartsMixedTimeseriesFormData,
EchartsMixedTimeseriesProps
>({
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
defaultQueriesData: [queryAData, queryBData],
formData: {
...formData,
groupby: [],
groupbyB: [],
metrics: ['lineMetric'],
metricsB: ['barMetric'],
showValue: true,
showValueB: true,
stack: null,
stackB: null,
x_axis: '__timestamp',
yAxisFormat: '.0%',
yAxisFormatSecondary: ',.1f',
yAxisIndex: 1,
yAxisIndexB: 0,
},
queriesData: [queryAData, queryBData],
});
const { echartOptions } = transformProps(chartProps);
const series = echartOptions.series as SeriesOption[];
const lineSeries = getSeriesWithLabelFormatter(series, 'lineMetric');
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
expect(formatSeriesLabel(lineSeries, [timestamp, 0.25])).toBe('0.3');
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('50%');
});
test('formats value labels correctly when y-axis assignments are reversed', () => {
const timestamp = 1704067200000;
const queryAData = createTestQueryData(
[{ __timestamp: timestamp, lineMetric: 0.25 }],
{
colnames: ['__timestamp', 'lineMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { lineMetric: ['lineMetric'] },
},
);
const queryBData = createTestQueryData(
[{ __timestamp: timestamp, barMetric: 0.5 }],
{
colnames: ['__timestamp', 'barMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { 'barMetric (1)': ['barMetric'] },
},
);
const chartProps = createEchartsTimeseriesTestChartProps<
EchartsMixedTimeseriesFormData,
EchartsMixedTimeseriesProps
>({
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
defaultQueriesData: [queryAData, queryBData],
formData: {
...formData,
groupby: [],
groupbyB: [],
metrics: ['lineMetric'],
metricsB: ['barMetric'],
showValue: true,
showValueB: true,
stack: null,
stackB: null,
x_axis: '__timestamp',
yAxisFormat: '.0%',
yAxisFormatSecondary: ',.1f',
yAxisIndex: 0,
yAxisIndexB: 1,
},
queriesData: [queryAData, queryBData],
});
const { echartOptions } = transformProps(chartProps);
const series = echartOptions.series as SeriesOption[];
const lineSeries = getSeriesWithLabelFormatter(series, 'lineMetric');
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
expect(formatSeriesLabel(lineSeries, [timestamp, 0.25])).toBe('25%');
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('0.5');
});
test('keeps bar value label clipping aligned with the assigned y-axis', () => {
const timestamp = 1704067200000;
const queryAData = createTestQueryData(
[{ __timestamp: timestamp, lineMetric: 0.25 }],
{
colnames: ['__timestamp', 'lineMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { lineMetric: ['lineMetric'] },
},
);
const queryBData = createTestQueryData(
[{ __timestamp: timestamp, barMetric: 0.5 }],
{
colnames: ['__timestamp', 'barMetric'],
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
label_map: { 'barMetric (1)': ['barMetric'] },
},
);
const chartProps = createEchartsTimeseriesTestChartProps<
EchartsMixedTimeseriesFormData,
EchartsMixedTimeseriesProps
>({
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
defaultQueriesData: [queryAData, queryBData],
formData: {
...formData,
groupby: [],
groupbyB: [],
metrics: ['lineMetric'],
metricsB: ['barMetric'],
showValue: true,
showValueB: true,
stack: null,
stackB: null,
x_axis: '__timestamp',
yAxisBounds: [undefined, 1],
yAxisBoundsSecondary: [undefined, 0.1],
yAxisFormat: '.0%',
yAxisFormatSecondary: ',.1f',
yAxisIndex: 0,
yAxisIndexB: 1,
},
queriesData: [queryAData, queryBData],
});
const { echartOptions } = transformProps(chartProps);
const series = echartOptions.series as SeriesOption[];
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('');
});
describe('legend sorting', () => {
const getChartProps = (overrides = {}) =>
createEchartsTimeseriesTestChartProps<

View File

@@ -44,13 +44,13 @@ const fakeTableApiResult = {
result: [
{
id: 1,
value: 'fake api result1',
value: 'fake_api_result1',
label: 'fake api label1',
type: 'table',
},
{
id: 2,
value: 'fake api result2',
value: 'fake_api_result2',
label: 'fake api label2',
type: 'table',
},
@@ -152,6 +152,64 @@ test('returns keywords including fetched function_names data', async () => {
});
});
test('quotes table identifiers that require quoting in the inserted value', async () => {
const dbFunctionNamesApiRoute = `glob:*/api/v1/database/${expectDbId}/function_names/`;
fetchMock.get(dbFunctionNamesApiRoute, fakeFunctionNamesApiResult);
act(() => {
store.dispatch(
tableApiUtil.upsertQueryData(
'tables',
{ dbId: expectDbId, schema: expectSchema },
{
options: [
{ value: 'COVID Vaccines', label: 'COVID Vaccines', type: 'table' },
{ value: 'simple_table', label: 'simple_table', type: 'table' },
],
hasMore: false,
},
),
);
});
const { result } = renderHook(
() =>
useKeywords({
queryEditorId: 'testqueryid',
dbId: expectDbId,
schema: expectSchema,
}),
{
wrapper: createWrapper({
useRedux: true,
store,
}),
},
);
await waitFor(() =>
expect(fetchMock.callHistory.calls(dbFunctionNamesApiRoute).length).toBe(1),
);
// A name that needs quoting is inserted as a double-quoted identifier,
// while its display name stays human-readable.
expect(result.current).toContainEqual(
expect.objectContaining({
name: 'COVID Vaccines',
value: '"COVID Vaccines"',
meta: 'table',
}),
);
// A simple identifier is inserted as-is, without quotes.
expect(result.current).toContainEqual(
expect.objectContaining({
name: 'simple_table',
value: 'simple_table',
meta: 'table',
}),
);
});
test('skip fetching if autocomplete skipped', () => {
const { result } = renderHook(
() =>

View File

@@ -53,6 +53,14 @@ const getHelperText = (value: string) =>
detail: value,
};
// Names that aren't simple identifiers (spaces, punctuation, leading digits)
// must be double-quoted to be valid SQL, with embedded quotes doubled.
const SIMPLE_IDENTIFIER_RE = /^[A-Za-z_][A-Za-z0-9_]*$/;
const quoteIdentifier = (identifier: string) =>
SIMPLE_IDENTIFIER_RE.test(identifier)
? identifier
: `"${identifier.replace(/"/g, '""')}"`;
const extensionsRegistry = getExtensionsRegistry();
export function useKeywords(
@@ -197,7 +205,7 @@ export function useKeywords(
() =>
allCachedTables.map(({ value, label, schema: tableSchema }) => ({
name: label,
value,
value: quoteIdentifier(value),
schema: tableSchema,
score: TABLE_AUTOCOMPLETE_SCORE,
meta: 'table',

View File

@@ -100,7 +100,10 @@ class CacheRestApi(BaseSupersetModelRestApi):
)
cache_keys = [c.cache_key for c in cache_key_objs]
if cache_key_objs:
all_keys_deleted = cache_manager.cache.delete_many(*cache_keys)
# Chart query results live in ``data_cache``, not the default
# ``cache`` — using the wrong backend silently misses the Redis
# keys when ``CACHE_KEY_PREFIX`` differs between the two configs.
all_keys_deleted = cache_manager.data_cache.delete_many(*cache_keys)
if not all_keys_deleted:
# expected behavior as keys may expire and cache is not a

View File

@@ -23,7 +23,7 @@ import sys
import urllib
from datetime import datetime
from re import Pattern
from typing import Any, TYPE_CHECKING, TypedDict
from typing import Any, Callable, TYPE_CHECKING, TypedDict
import pandas as pd
from apispec import APISpec
@@ -83,6 +83,97 @@ if TYPE_CHECKING:
logger = logging.getLogger()
# BigQuery string escape sequences keyed off documented escapes in
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals.
# Backslash MUST be first so subsequent escapes don't double-escape their own
# backslash. ``\?``, ``\"`` and ``\``` are valid BigQuery escapes but
# intentionally omitted because those characters do not require escaping
# inside a single-quoted literal. ``\0`` is NOT a valid BigQuery escape
# (octal escapes require exactly three digits); the null byte instead falls
# through to the ``\xhh`` fallback below.
_BIGQUERY_STRING_ESCAPES = {
"\\": "\\\\",
"'": "\\'",
"\n": "\\n",
"\r": "\\r",
"\t": "\\t",
"\b": "\\b",
"\f": "\\f",
"\v": "\\v",
"\a": "\\a",
}
def _process_string_literal(value: str) -> str:
"""
Escape a string value for use as a BigQuery SQL literal.
BigQuery requires backslash escaping for single quotes inside string
literals (``'O\\'Brien'``). Doubled single quotes (``'O''Brien'``) are
**not** valid — BigQuery parses them as two concatenated string literals
without whitespace, causing a syntax error:
``concatenated string literals must be separated by whitespace``.
BigQuery also forbids literal newlines, carriage returns, and other
control characters inside a quoted string; those must be written using
escape sequences (``\\n``, ``\\r``, ``\\t`` …). Control characters
without a named escape are emitted as a ``\\xhh`` hex escape; printable
Unicode passes through unchanged because BigQuery accepts UTF-8 inside
string literals.
The upstream ``sqlalchemy-bigquery`` dialect relies on Python's ``repr()``
to quote values, which switches to double-quote delimiters when the
string contains an apostrophe (e.g. ``repr("O'Brien")`` → ``"O'Brien"``).
Double-quoted tokens inside compiled SQL would be parsed as identifiers,
so the query also fails. This helper always produces a single-quoted
literal.
"""
parts = []
for ch in value:
escape = _BIGQUERY_STRING_ESCAPES.get(ch)
if escape is not None:
parts.append(escape)
elif ord(ch) < 0x20 or ord(ch) == 0x7F:
parts.append(f"\\x{ord(ch):02x}")
else:
parts.append(ch)
return f"'{''.join(parts)}'"
def _monkeypatch_bigquery_string_literal() -> None:
"""
Patch the sqlalchemy-bigquery dialect so that string literals containing
apostrophes are rendered correctly when ``literal_binds=True``.
Without this patch, a filter value like ``O'Brien`` is compiled as the
double-quoted identifier ``"O'Brien"`` instead of the single-quoted literal
``'O\\'Brien'``, causing BigQuery to return a syntax error.
This follows the same pattern used for the Databricks dialect fix in
``superset/db_engine_specs/databricks.py``.
"""
try:
from sqlalchemy_bigquery import BigQueryDialect
class BigQuerySafeString(types.TypeDecorator):
impl = types.String
cache_ok = True
def literal_processor(self, dialect: Any) -> Callable[[str], str]:
if dialect.name == "bigquery":
return _process_string_literal
return super().literal_processor(dialect)
BigQueryDialect.colspecs[types.String] = BigQuerySafeString
except ImportError:
pass
_monkeypatch_bigquery_string_literal()
CONNECTION_DATABASE_PERMISSIONS_REGEX = re.compile(
"Access Denied: Project (?P<project_name>.+?): User does not have "
+ "bigquery.jobs.create permission in project (?P<project>.+?)"

View File

@@ -303,6 +303,7 @@ DEFAULT_GET_DASHBOARD_INFO_COLUMNS: List[str] = [
"created_on",
"changed_on",
"uuid",
"embedded_uuid",
"url",
"created_on_humanized",
"changed_on_humanized",
@@ -427,6 +428,18 @@ class DashboardInfo(BaseModel):
created_on: str | datetime | None = None
changed_on: str | datetime | None = None
uuid: str | None = None
embedded_uuid: str | None = Field(
None,
description=(
"Embedded UUID for this dashboard. This is the UUID required when "
"generating guest tokens for embedded dashboards "
"(resources[].id in the guest token payload). "
"Only present when the dashboard has been configured for embedding "
"via the Embed Dashboard UI. Distinct from `uuid` (the internal "
"dashboard UUID) — using the wrong one causes 403 errors in guest "
"token validation."
),
)
url: str | None = None
created_on_humanized: str | None = None
changed_on_humanized: str | None = None
@@ -1352,6 +1365,9 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo:
created_on=dashboard.created_on,
changed_on=dashboard.changed_on,
uuid=str(dashboard.uuid) if dashboard.uuid else None,
embedded_uuid=str(dashboard.embedded[0].uuid)
if dashboard.embedded
else None,
url=absolute_url,
created_on_humanized=dashboard.created_on_humanized,
changed_on_humanized=dashboard.changed_on_humanized,

View File

@@ -155,10 +155,11 @@ async def get_dashboard_info(
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
# Eager load slices and tags to avoid N+1 queries during serialization.
# Eager load slices, tags, and embedded to avoid N+1 queries.
eager_options = [
subqueryload(Dashboard.slices).subqueryload(Slice.tags),
subqueryload(Dashboard.tags),
subqueryload(Dashboard.embedded),
]
with event_logger.log_context(action="mcp.get_dashboard_info.lookup"):

View File

@@ -18,6 +18,7 @@
"""Unit tests for Superset"""
from typing import Any
from unittest.mock import patch
import pytest
@@ -52,17 +53,43 @@ def test_invalidate_cache(invalidate):
def test_invalidate_existing_cache(invalidate):
db.session.add(CacheKey(cache_key="cache_key", datasource_uid="3__table"))
db.session.commit()
cache_manager.cache.set("cache_key", "value")
cache_manager.data_cache.set("cache_key", "value")
rv = invalidate({"datasource_uids": ["3__table"]})
assert rv.status_code == 201
assert cache_manager.cache.get("cache_key") is None # noqa: E711
assert cache_manager.data_cache.get("cache_key") is None # noqa: E711
assert (
not db.session.query(CacheKey).filter(CacheKey.cache_key == "cache_key").first()
)
def test_invalidate_uses_data_cache_not_default_cache(invalidate):
"""Regression test for #40489.
Chart query results are written through ``cache_manager.data_cache``
(``DATA_CACHE_CONFIG``). When ``CACHE_CONFIG`` and ``DATA_CACHE_CONFIG``
use distinct ``CACHE_KEY_PREFIX`` values, deleting via the default
``cache_manager.cache`` silently misses the underlying Redis keys
because flask-caching prepends the wrong prefix to the DEL call.
"""
db.session.add(CacheKey(cache_key="cache_key", datasource_uid="3__table"))
db.session.commit()
with (
patch.object(cache_manager.data_cache, "delete_many") as data_delete,
patch.object(cache_manager.cache, "delete_many") as default_delete,
):
data_delete.return_value = True
rv = invalidate({"datasource_uids": ["3__table"]})
assert rv.status_code == 201
# Chart-data cache backend (the one that wrote the keys) must be hit.
data_delete.assert_called_once_with("cache_key")
# The default cache must NOT be touched — that's the #40489 regression.
default_delete.assert_not_called()
def test_invalidate_cache_empty_input(invalidate):
rv = invalidate({"datasource_uids": []})
assert rv.status_code == 201
@@ -111,10 +138,10 @@ def test_invalidate_existing_caches(invalidate):
db.session.add(CacheKey(cache_key="cache_keyX", datasource_uid="X__table"))
db.session.commit()
cache_manager.cache.set("cache_key1", "value")
cache_manager.cache.set("cache_key2", "value")
cache_manager.cache.set("cache_key4", "value")
cache_manager.cache.set("cache_keyX", "value")
cache_manager.data_cache.set("cache_key1", "value")
cache_manager.data_cache.set("cache_key2", "value")
cache_manager.data_cache.set("cache_key4", "value")
cache_manager.data_cache.set("cache_keyX", "value")
rv = invalidate(
{
@@ -155,10 +182,10 @@ def test_invalidate_existing_caches(invalidate):
)
assert rv.status_code == 201
assert cache_manager.cache.get("cache_key1") is None
assert cache_manager.cache.get("cache_key2") is None
assert cache_manager.cache.get("cache_key4") is None
assert cache_manager.cache.get("cache_keyX") == "value"
assert cache_manager.data_cache.get("cache_key1") is None
assert cache_manager.data_cache.get("cache_key2") is None
assert cache_manager.data_cache.get("cache_key4") is None
assert cache_manager.data_cache.get("cache_keyX") == "value"
assert (
not db.session.query(CacheKey)
.filter(CacheKey.cache_key.in_({"cache_key1", "cache_key2", "cache_key4"}))

View File

@@ -767,3 +767,265 @@ def test_fetch_data_converts_bigquery_row_objects(mocker: MockerFixture) -> None
assert result == [(1, "a"), (2, "b")]
assert flask_g.bq_memory_limited is False
def test_string_literal_with_apostrophe() -> None:
"""
Test that string literals containing apostrophes are properly escaped
for BigQuery using backslash escaping.
BigQuery requires backslash escaping for single quotes ('O\\'Brien').
Doubled single quotes ('O''Brien') are NOT valid — BigQuery parses them
as two concatenated string literals, causing a syntax error.
The upstream sqlalchemy-bigquery dialect uses ``repr()`` which switches
to double-quote delimiters when the value contains an apostrophe.
Double-quoted tokens are identifiers in BigQuery, causing syntax errors.
"""
from sqlalchemy import column as sa_column
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
# Trigger module load to ensure the monkey-patch is applied
assert BigQueryEngineSpec is not None
dialect = BigQueryDialect()
stmt = select(sa_column("name")).where(sa_column("name") == "Fernando's")
compiled_sql = str(
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
)
# The compiled SQL must use single-quoted literal with backslash-escaped
# apostrophes. Doubled single quotes are NOT valid in BigQuery.
assert "= 'Fernando\\'s'" in compiled_sql
# Must NOT contain doubled-quote escaping (BigQuery rejects this)
assert "''" not in compiled_sql
# Must NOT contain double-quoted identifiers
assert '\\"' not in compiled_sql
def test_string_literal_without_apostrophe() -> None:
"""
Test that normal string literals (without apostrophes) still compile
correctly after the monkey-patch.
"""
from sqlalchemy import column as sa_column
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
assert BigQueryEngineSpec is not None
dialect = BigQueryDialect()
stmt = select(sa_column("name")).where(sa_column("name") == "Fernando")
compiled_sql = str(
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
)
assert "= 'Fernando'" in compiled_sql
def test_string_literal_in_filter_with_apostrophe() -> None:
"""
Test that IN filters with apostrophes in values compile correctly
using backslash escaping.
"""
from sqlalchemy import column as sa_column
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
assert BigQueryEngineSpec is not None
dialect = BigQueryDialect()
stmt = select(sa_column("name")).where(
sa_column("name").in_(["Fernando's", "O'Brien"])
)
compiled_sql = str(
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
)
assert "'Fernando\\'s'" in compiled_sql
assert "'O\\'Brien'" in compiled_sql
# Must NOT contain doubled-quote escaping
assert "''" not in compiled_sql
def test_process_string_literal_directly() -> None:
"""
Test _process_string_literal covers backslash escaping for apostrophes,
control-character escaping (newline/CR/tab/etc.), the ``\\xhh`` fallback
for control chars without a named escape, and pass-through for printable
Unicode and other characters BigQuery accepts unescaped.
"""
from superset.db_engine_specs.bigquery import _process_string_literal
# Plain values
assert _process_string_literal("hello") == "'hello'"
assert _process_string_literal("") == "''"
# Apostrophes (the original fix)
assert _process_string_literal("O'Brien") == "'O\\'Brien'"
assert _process_string_literal("it's a test") == "'it\\'s a test'"
# Backslashes must be escaped before apostrophes
assert _process_string_literal("C:\\path") == "'C:\\\\path'"
assert _process_string_literal("it's C:\\path") == "'it\\'s C:\\\\path'"
# Literal backslash followed by 'n' (two characters, not a newline)
# must produce the two-char sequence '\\n' (escaped backslash + n) so
# BigQuery does not misread it as a newline escape.
assert _process_string_literal("\\n") == "'\\\\n'"
# Control characters must be escaped using named escapes — BigQuery
# rejects literal control characters inside quoted strings.
assert _process_string_literal("foo\nbar") == "'foo\\nbar'"
assert _process_string_literal("foo\rbar") == "'foo\\rbar'"
assert _process_string_literal("foo\tbar") == "'foo\\tbar'"
assert _process_string_literal("a\bb\fc\vd\ae") == "'a\\bb\\fc\\vd\\ae'"
# Control characters without a named escape fall through to ``\\xhh``.
assert _process_string_literal("null\0byte") == "'null\\x00byte'"
assert _process_string_literal("a\x01b") == "'a\\x01b'"
assert _process_string_literal("a\x1bb") == "'a\\x1bb'"
assert _process_string_literal("a\x7fb") == "'a\\x7fb'"
# Double quotes do NOT need escaping in single-quoted BigQuery literals.
assert _process_string_literal('say "hello"') == "'say \"hello\"'"
# Printable Unicode and percent signs pass through unchanged.
assert _process_string_literal("café") == "'café'"
assert _process_string_literal("日本") == "'日本'"
assert _process_string_literal("100%") == "'100%'"
# Combined: apostrophe + newline + backslash + unicode.
assert _process_string_literal("it's\nC:\\café") == "'it\\'s\\nC:\\\\café'"
def test_process_string_literal_no_literal_control_chars() -> None:
"""
Regression test for the issue raised in PR #38835 review: BigQuery
rejects literal control characters inside quoted string literals, so the
output must never contain them as literal characters.
"""
from superset.db_engine_specs.bigquery import _process_string_literal
for char in ["\n", "\r", "\t", "\b", "\f", "\v", "\a", "\0", "\x01", "\x7f"]:
result = _process_string_literal(f"prefix{char}suffix")
assert char not in result, (
f"Literal {char!r} leaked into output {result!r}; "
"BigQuery would reject this literal."
)
def test_string_literal_with_newline_in_filter() -> None:
"""
End-to-end regression test for @rusackas's review feedback on PR #38835:
a filter value containing a newline must compile to valid BigQuery SQL
using the ``\\n`` escape sequence, not a literal newline.
"""
from sqlalchemy import column as sa_column
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
assert BigQueryEngineSpec is not None
dialect = BigQueryDialect()
stmt = select(sa_column("note")).where(sa_column("note") == "line1\nline2")
compiled_sql = str(
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
)
# Must use the escape sequence form, not a literal newline.
assert "'line1\\nline2'" in compiled_sql
assert "\n" not in compiled_sql.split("note")[-1]
def test_literal_processor_non_bigquery_dialect() -> None:
"""
Test that BigQuerySafeString.literal_processor falls back to the parent
implementation when used with a non-BigQuery dialect.
"""
from sqlalchemy import create_engine
from superset.db_engine_specs.bigquery import (
_monkeypatch_bigquery_string_literal, # noqa: F811
)
_monkeypatch_bigquery_string_literal()
safe_cls = BigQueryDialect.colspecs[sqltypes.String]
instance = safe_cls()
# Use a non-BigQuery dialect (sqlite)
sqlite_dialect = create_engine("sqlite://").dialect
processor = instance.literal_processor(sqlite_dialect)
# The fallback processor should still produce a valid quoted string
assert processor is not None
def test_monkeypatch_is_applied() -> None:
"""
Test that _monkeypatch_bigquery_string_literal installs the custom
type decorator into BigQueryDialect.colspecs.
"""
from sqlalchemy.sql import sqltypes as sa_sqltypes
from superset.db_engine_specs.bigquery import (
BigQueryEngineSpec, # noqa: F811
)
assert BigQueryEngineSpec is not None
colspecs = BigQueryDialect.colspecs
assert sa_sqltypes.String in colspecs
safe_cls = colspecs[sa_sqltypes.String]
assert safe_cls.__name__ == "BigQuerySafeString"
def test_literal_processor_returns_process_string_literal_for_bigquery() -> None:
"""
Test that BigQuerySafeString.literal_processor returns the
_process_string_literal function when given a BigQuery dialect,
and that calling it produces correctly escaped output.
"""
from superset.db_engine_specs.bigquery import (
_monkeypatch_bigquery_string_literal,
_process_string_literal,
)
_monkeypatch_bigquery_string_literal()
safe_cls = BigQueryDialect.colspecs[sqltypes.String]
instance = safe_cls()
dialect = BigQueryDialect()
processor = instance.literal_processor(dialect)
assert processor is _process_string_literal
assert processor("O'Brien") == "'O\\'Brien'"
assert processor("plain") == "'plain'"
def test_monkeypatch_handles_missing_bigquery_package() -> None:
"""
Test that _monkeypatch_bigquery_string_literal gracefully handles
the case where sqlalchemy_bigquery is not installed.
"""
import builtins
from superset.db_engine_specs.bigquery import (
_monkeypatch_bigquery_string_literal,
)
original_import = builtins.__import__
def mock_import(name: str, *args: Any, **kwargs: Any) -> Any:
if name == "sqlalchemy_bigquery":
raise ImportError("mocked missing package")
return original_import(name, *args, **kwargs)
with mock.patch("builtins.__import__", side_effect=mock_import):
# Should not raise — the except ImportError branch handles it
_monkeypatch_bigquery_string_literal()

View File

@@ -96,6 +96,7 @@ async def test_list_dashboards_basic(mock_list, mcp_server):
dashboard.uuid = "test-dashboard-uuid-1"
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -162,6 +163,7 @@ async def test_list_dashboards_with_filters(mock_list, mcp_server):
dashboard.uuid = None
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -257,6 +259,7 @@ async def test_list_dashboards_with_search(mock_list, mcp_server):
dashboard.uuid = None
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -351,6 +354,7 @@ async def test_get_dashboard_info_success(
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -429,6 +433,7 @@ async def test_get_dashboard_info_permalink_does_not_double_sanitize(
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
mock_info.return_value = dashboard
permalink_value = {
@@ -521,6 +526,7 @@ async def test_get_dashboard_info_permalink_key_includes_filter_state(
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
mock_info.return_value = dashboard
@@ -767,6 +773,7 @@ async def test_get_dashboard_info_does_not_expose_access_list_or_roles(
dashboard.owners = [owner]
dashboard.tags = []
dashboard.roles = [dashboard_role]
dashboard.embedded = []
mock_info.return_value = dashboard
@@ -838,6 +845,7 @@ async def test_get_dashboard_info_restricted_user_redacts_data_model_metadata(
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
mock_info.return_value = dashboard
@@ -890,6 +898,7 @@ async def test_get_dashboard_info_restricted_user_redacts_permalink_filter_state
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
mock_info.return_value = dashboard
@@ -1012,6 +1021,88 @@ async def test_list_dashboards_omits_requested_user_directory_fields(
assert field not in data["columns_available"]
@patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object")
@pytest.mark.asyncio
async def test_get_dashboard_info_includes_embedded_uuid(mock_find_object, mcp_server):
"""Test that get_dashboard_info returns embedded_uuid when set."""
from superset.models.embedded_dashboard import EmbeddedDashboard
dashboard = Mock()
dashboard.id = 1
dashboard.dashboard_title = "Embedded Dashboard"
dashboard.slug = ""
dashboard.description = None
dashboard.css = None
dashboard.certified_by = None
dashboard.certification_details = None
dashboard.json_metadata = "{}"
dashboard.published = True
dashboard.is_managed_externally = False
dashboard.external_url = None
dashboard.created_on = None
dashboard.changed_on = None
dashboard.created_on_humanized = None
dashboard.changed_on_humanized = None
dashboard.uuid = "94b826a5-dbd5-473d-ab58-1af676ee07e4"
dashboard.url = "/dashboard/1"
dashboard.slices = []
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
embedded = Mock(spec=EmbeddedDashboard)
embedded.uuid = "37c56048-d3f1-452d-b3ae-0879802dcb1f"
dashboard.embedded = [embedded]
mock_find_object.return_value = dashboard
async with Client(mcp_server) as client:
result = await client.call_tool(
"get_dashboard_info", {"request": {"identifier": 1}}
)
assert result.data["uuid"] == "94b826a5-dbd5-473d-ab58-1af676ee07e4"
assert result.data["embedded_uuid"] == "37c56048-d3f1-452d-b3ae-0879802dcb1f"
@patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object")
@pytest.mark.asyncio
async def test_get_dashboard_info_embedded_uuid_none_when_not_embedded(
mock_find_object, mcp_server
):
"""Test that embedded_uuid is None when the dashboard has not been configured
for embedding."""
dashboard = Mock()
dashboard.id = 2
dashboard.dashboard_title = "Non-Embedded Dashboard"
dashboard.slug = ""
dashboard.description = None
dashboard.css = None
dashboard.certified_by = None
dashboard.certification_details = None
dashboard.json_metadata = "{}"
dashboard.published = True
dashboard.is_managed_externally = False
dashboard.external_url = None
dashboard.created_on = None
dashboard.changed_on = None
dashboard.created_on_humanized = None
dashboard.changed_on_humanized = None
dashboard.uuid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
dashboard.url = "/dashboard/2"
dashboard.slices = []
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
mock_find_object.return_value = dashboard
async with Client(mcp_server) as client:
result = await client.call_tool(
"get_dashboard_info", {"request": {"identifier": 2}}
)
assert result.data.get("embedded_uuid") is None
# TODO (Phase 3+): Add tests for get_dashboard_available_filters tool
@@ -1044,6 +1135,7 @@ async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server):
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
mock_find_object.return_value = dashboard
async with Client(mcp_server) as client:
@@ -1083,6 +1175,7 @@ async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server):
dashboard.owners = []
dashboard.tags = []
dashboard.roles = []
dashboard.embedded = []
mock_find_object.return_value = dashboard
async with Client(mcp_server) as client:
@@ -1122,6 +1215,7 @@ async def test_list_dashboards_custom_uuid_slug_columns(mock_list, mcp_server):
dashboard.external_url = None
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -1203,6 +1297,7 @@ async def test_list_dashboards_sanitizes_dashboard_descriptions_and_filter_text(
dashboard.external_url = None
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
dashboard._mapping = {
"id": dashboard.id,
@@ -1343,6 +1438,7 @@ class TestDashboardDefaultColumnFiltering:
dashboard.external_url = None
dashboard.thumbnail_url = None
dashboard.roles = []
dashboard.embedded = []
dashboard.charts = []
mock_list.return_value = ([dashboard], 1)