mirror of
https://github.com/apache/superset.git
synced 2026-06-27 18:35:32 +00:00
Compare commits
3 Commits
dependabot
...
chore/ci-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6162c90ee9 | ||
|
|
0a18779280 | ||
|
|
a147079043 |
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
yarn install --immutable
|
||||
|
||||
- name: Cache pre-commit environments
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit-v2-${{ runner.os }}-py${{ matrix.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}
|
||||
|
||||
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
|
||||
- name: Cache npm
|
||||
if: env.HAS_TAGS
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ~/.npm # npm cache files are stored in `~/.npm` on Linux/macOS
|
||||
key: ${{ runner.OS }}-node-${{ hashFiles('**/package-lock.json') }}
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
run: echo "dir=$(npm config get cache)" >> $GITHUB_OUTPUT
|
||||
- name: Cache npm
|
||||
if: env.HAS_TAGS
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
id: npm-cache # use this to check for `cache-hit` (`steps.npm-cache.outputs.cache-hit != 'true'`)
|
||||
with:
|
||||
path: ${{ steps.npm-cache-dir-path.outputs.dir }}
|
||||
|
||||
@@ -344,6 +344,16 @@ export default function transformProps(
|
||||
data2,
|
||||
currencyCodeColumn,
|
||||
);
|
||||
const getAxisFormatterConfig = (axisIndex?: number) =>
|
||||
axisIndex === 1
|
||||
? {
|
||||
customFormatters: customFormattersSecondary,
|
||||
formatter: formatterSecondary,
|
||||
}
|
||||
: {
|
||||
customFormatters,
|
||||
formatter,
|
||||
};
|
||||
|
||||
const primarySeries = new Set<string>();
|
||||
const secondarySeries = new Set<string>();
|
||||
@@ -422,6 +432,8 @@ export default function transformProps(
|
||||
let [minSecondary, maxSecondary] = (yAxisBoundsSecondary || []).map(
|
||||
parseAxisBound,
|
||||
);
|
||||
const getAxisMax = (axisIndex?: number) =>
|
||||
axisIndex === 1 ? maxSecondary : yAxisMax;
|
||||
|
||||
const array = ensureIsArray(chartProps.rawFormData?.time_compare);
|
||||
const inverted = invert(verboseMap);
|
||||
@@ -445,10 +457,11 @@ export default function transformProps(
|
||||
// When no groupby, format as just the entry name with optional query identifier
|
||||
displayName = showQueryIdentifiers ? `${entryName} (Query A)` : entryName;
|
||||
}
|
||||
const axisFormatterConfig = getAxisFormatterConfig(yAxisIndex);
|
||||
|
||||
const seriesFormatter = getFormatter(
|
||||
customFormatters,
|
||||
formatter,
|
||||
axisFormatterConfig.customFormatters,
|
||||
axisFormatterConfig.formatter,
|
||||
metrics,
|
||||
labelMap?.[seriesName]?.[0],
|
||||
!!contributionMode,
|
||||
@@ -480,7 +493,7 @@ export default function transformProps(
|
||||
formatter:
|
||||
seriesType === EchartsTimeseriesSeriesType.Bar
|
||||
? getOverMaxHiddenFormatter({
|
||||
max: yAxisMax,
|
||||
max: getAxisMax(yAxisIndex),
|
||||
formatter: seriesFormatter,
|
||||
})
|
||||
: seriesFormatter,
|
||||
@@ -518,10 +531,11 @@ export default function transformProps(
|
||||
// When no groupby, format as just the entry name with optional query identifier
|
||||
displayName = showQueryIdentifiers ? `${entryName} (Query B)` : entryName;
|
||||
}
|
||||
const axisFormatterConfig = getAxisFormatterConfig(yAxisIndexB);
|
||||
|
||||
const seriesFormatter = getFormatter(
|
||||
customFormattersSecondary,
|
||||
formatterSecondary,
|
||||
axisFormatterConfig.customFormatters,
|
||||
axisFormatterConfig.formatter,
|
||||
metricsB,
|
||||
labelMapB?.[seriesName]?.[0],
|
||||
!!contributionMode,
|
||||
@@ -554,7 +568,7 @@ export default function transformProps(
|
||||
formatter:
|
||||
seriesTypeB === EchartsTimeseriesSeriesType.Bar
|
||||
? getOverMaxHiddenFormatter({
|
||||
max: maxSecondary,
|
||||
max: getAxisMax(yAxisIndexB),
|
||||
formatter: seriesFormatter,
|
||||
})
|
||||
: seriesFormatter,
|
||||
|
||||
@@ -35,13 +35,26 @@ import {
|
||||
} from '../../src';
|
||||
import transformProps from '../../src/MixedTimeseries/transformProps';
|
||||
import {
|
||||
DEFAULT_FORM_DATA,
|
||||
EchartsMixedTimeseriesFormData,
|
||||
EchartsMixedTimeseriesProps,
|
||||
} from '../../src/MixedTimeseries/types';
|
||||
import { DEFAULT_FORM_DATA } from '../../src/MixedTimeseries/types';
|
||||
import { createEchartsTimeseriesTestChartProps } from '../helpers';
|
||||
import type { SeriesOption } from 'echarts';
|
||||
|
||||
type LabelFormatterParams = {
|
||||
value: [number, number];
|
||||
dataIndex: number;
|
||||
seriesIndex: number;
|
||||
seriesName: string;
|
||||
};
|
||||
|
||||
type SeriesWithLabelFormatter = SeriesOption & {
|
||||
label?: {
|
||||
formatter?: (params: LabelFormatterParams) => string | number;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a partial ChartDataResponseResult for testing.
|
||||
* Only includes the fields needed for tests, with sensible defaults for required fields.
|
||||
@@ -148,6 +161,30 @@ const queriesData: ChartDataResponseResult[] = [
|
||||
createTestQueryData(defaultQueryRows, { label_map: defaultLabelMap }),
|
||||
];
|
||||
|
||||
function getSeriesWithLabelFormatter(
|
||||
series: SeriesOption[],
|
||||
name: string,
|
||||
): SeriesWithLabelFormatter {
|
||||
const result = series.find(seriesOption => seriesOption.name === name);
|
||||
expect(result).toBeDefined();
|
||||
expect((result as SeriesWithLabelFormatter).label?.formatter).toBeDefined();
|
||||
return result as SeriesWithLabelFormatter;
|
||||
}
|
||||
|
||||
function formatSeriesLabel(
|
||||
series: SeriesWithLabelFormatter,
|
||||
value: [number, number],
|
||||
) {
|
||||
const formatter = series.label?.formatter;
|
||||
expect(formatter).toBeDefined();
|
||||
return formatter?.({
|
||||
dataIndex: 0,
|
||||
seriesIndex: 0,
|
||||
seriesName: String(series.name),
|
||||
value,
|
||||
});
|
||||
}
|
||||
|
||||
test('should transform chart props for viz with showQueryIdentifiers=false', () => {
|
||||
const chartProps = createEchartsTimeseriesTestChartProps<
|
||||
EchartsMixedTimeseriesFormData,
|
||||
@@ -232,6 +269,162 @@ test('should transform chart props for viz with showQueryIdentifiers=true', () =
|
||||
]);
|
||||
});
|
||||
|
||||
test('formats value labels with the formatter for the assigned y-axis', () => {
|
||||
const timestamp = 1704067200000;
|
||||
const queryAData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, lineMetric: 0.25 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'lineMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { lineMetric: ['lineMetric'] },
|
||||
},
|
||||
);
|
||||
const queryBData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, barMetric: 0.5 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'barMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { 'barMetric (1)': ['barMetric'] },
|
||||
},
|
||||
);
|
||||
const chartProps = createEchartsTimeseriesTestChartProps<
|
||||
EchartsMixedTimeseriesFormData,
|
||||
EchartsMixedTimeseriesProps
|
||||
>({
|
||||
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
|
||||
defaultQueriesData: [queryAData, queryBData],
|
||||
formData: {
|
||||
...formData,
|
||||
groupby: [],
|
||||
groupbyB: [],
|
||||
metrics: ['lineMetric'],
|
||||
metricsB: ['barMetric'],
|
||||
showValue: true,
|
||||
showValueB: true,
|
||||
stack: null,
|
||||
stackB: null,
|
||||
x_axis: '__timestamp',
|
||||
yAxisFormat: '.0%',
|
||||
yAxisFormatSecondary: ',.1f',
|
||||
yAxisIndex: 1,
|
||||
yAxisIndexB: 0,
|
||||
},
|
||||
queriesData: [queryAData, queryBData],
|
||||
});
|
||||
|
||||
const { echartOptions } = transformProps(chartProps);
|
||||
const series = echartOptions.series as SeriesOption[];
|
||||
const lineSeries = getSeriesWithLabelFormatter(series, 'lineMetric');
|
||||
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
|
||||
|
||||
expect(formatSeriesLabel(lineSeries, [timestamp, 0.25])).toBe('0.3');
|
||||
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('50%');
|
||||
});
|
||||
|
||||
test('formats value labels correctly when y-axis assignments are reversed', () => {
|
||||
const timestamp = 1704067200000;
|
||||
const queryAData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, lineMetric: 0.25 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'lineMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { lineMetric: ['lineMetric'] },
|
||||
},
|
||||
);
|
||||
const queryBData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, barMetric: 0.5 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'barMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { 'barMetric (1)': ['barMetric'] },
|
||||
},
|
||||
);
|
||||
const chartProps = createEchartsTimeseriesTestChartProps<
|
||||
EchartsMixedTimeseriesFormData,
|
||||
EchartsMixedTimeseriesProps
|
||||
>({
|
||||
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
|
||||
defaultQueriesData: [queryAData, queryBData],
|
||||
formData: {
|
||||
...formData,
|
||||
groupby: [],
|
||||
groupbyB: [],
|
||||
metrics: ['lineMetric'],
|
||||
metricsB: ['barMetric'],
|
||||
showValue: true,
|
||||
showValueB: true,
|
||||
stack: null,
|
||||
stackB: null,
|
||||
x_axis: '__timestamp',
|
||||
yAxisFormat: '.0%',
|
||||
yAxisFormatSecondary: ',.1f',
|
||||
yAxisIndex: 0,
|
||||
yAxisIndexB: 1,
|
||||
},
|
||||
queriesData: [queryAData, queryBData],
|
||||
});
|
||||
|
||||
const { echartOptions } = transformProps(chartProps);
|
||||
const series = echartOptions.series as SeriesOption[];
|
||||
const lineSeries = getSeriesWithLabelFormatter(series, 'lineMetric');
|
||||
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
|
||||
|
||||
expect(formatSeriesLabel(lineSeries, [timestamp, 0.25])).toBe('25%');
|
||||
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('0.5');
|
||||
});
|
||||
|
||||
test('keeps bar value label clipping aligned with the assigned y-axis', () => {
|
||||
const timestamp = 1704067200000;
|
||||
const queryAData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, lineMetric: 0.25 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'lineMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { lineMetric: ['lineMetric'] },
|
||||
},
|
||||
);
|
||||
const queryBData = createTestQueryData(
|
||||
[{ __timestamp: timestamp, barMetric: 0.5 }],
|
||||
{
|
||||
colnames: ['__timestamp', 'barMetric'],
|
||||
coltypes: [GenericDataType.Temporal, GenericDataType.Numeric],
|
||||
label_map: { 'barMetric (1)': ['barMetric'] },
|
||||
},
|
||||
);
|
||||
const chartProps = createEchartsTimeseriesTestChartProps<
|
||||
EchartsMixedTimeseriesFormData,
|
||||
EchartsMixedTimeseriesProps
|
||||
>({
|
||||
...MIXED_TIMESERIES_CHART_PROPS_DEFAULTS,
|
||||
defaultQueriesData: [queryAData, queryBData],
|
||||
formData: {
|
||||
...formData,
|
||||
groupby: [],
|
||||
groupbyB: [],
|
||||
metrics: ['lineMetric'],
|
||||
metricsB: ['barMetric'],
|
||||
showValue: true,
|
||||
showValueB: true,
|
||||
stack: null,
|
||||
stackB: null,
|
||||
x_axis: '__timestamp',
|
||||
yAxisBounds: [undefined, 1],
|
||||
yAxisBoundsSecondary: [undefined, 0.1],
|
||||
yAxisFormat: '.0%',
|
||||
yAxisFormatSecondary: ',.1f',
|
||||
yAxisIndex: 0,
|
||||
yAxisIndexB: 1,
|
||||
},
|
||||
queriesData: [queryAData, queryBData],
|
||||
});
|
||||
|
||||
const { echartOptions } = transformProps(chartProps);
|
||||
const series = echartOptions.series as SeriesOption[];
|
||||
const barSeries = getSeriesWithLabelFormatter(series, 'barMetric');
|
||||
|
||||
expect(formatSeriesLabel(barSeries, [timestamp, 0.5])).toBe('');
|
||||
});
|
||||
|
||||
describe('legend sorting', () => {
|
||||
const getChartProps = (overrides = {}) =>
|
||||
createEchartsTimeseriesTestChartProps<
|
||||
|
||||
@@ -23,7 +23,7 @@ import sys
|
||||
import urllib
|
||||
from datetime import datetime
|
||||
from re import Pattern
|
||||
from typing import Any, TYPE_CHECKING, TypedDict
|
||||
from typing import Any, Callable, TYPE_CHECKING, TypedDict
|
||||
|
||||
import pandas as pd
|
||||
from apispec import APISpec
|
||||
@@ -83,6 +83,97 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
# BigQuery string escape sequences keyed off documented escapes in
|
||||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals.
|
||||
# Backslash MUST be first so subsequent escapes don't double-escape their own
|
||||
# backslash. ``\?``, ``\"`` and ``\``` are valid BigQuery escapes but
|
||||
# intentionally omitted because those characters do not require escaping
|
||||
# inside a single-quoted literal. ``\0`` is NOT a valid BigQuery escape
|
||||
# (octal escapes require exactly three digits); the null byte instead falls
|
||||
# through to the ``\xhh`` fallback below.
|
||||
_BIGQUERY_STRING_ESCAPES = {
|
||||
"\\": "\\\\",
|
||||
"'": "\\'",
|
||||
"\n": "\\n",
|
||||
"\r": "\\r",
|
||||
"\t": "\\t",
|
||||
"\b": "\\b",
|
||||
"\f": "\\f",
|
||||
"\v": "\\v",
|
||||
"\a": "\\a",
|
||||
}
|
||||
|
||||
|
||||
def _process_string_literal(value: str) -> str:
|
||||
"""
|
||||
Escape a string value for use as a BigQuery SQL literal.
|
||||
|
||||
BigQuery requires backslash escaping for single quotes inside string
|
||||
literals (``'O\\'Brien'``). Doubled single quotes (``'O''Brien'``) are
|
||||
**not** valid — BigQuery parses them as two concatenated string literals
|
||||
without whitespace, causing a syntax error:
|
||||
``concatenated string literals must be separated by whitespace``.
|
||||
|
||||
BigQuery also forbids literal newlines, carriage returns, and other
|
||||
control characters inside a quoted string; those must be written using
|
||||
escape sequences (``\\n``, ``\\r``, ``\\t`` …). Control characters
|
||||
without a named escape are emitted as a ``\\xhh`` hex escape; printable
|
||||
Unicode passes through unchanged because BigQuery accepts UTF-8 inside
|
||||
string literals.
|
||||
|
||||
The upstream ``sqlalchemy-bigquery`` dialect relies on Python's ``repr()``
|
||||
to quote values, which switches to double-quote delimiters when the
|
||||
string contains an apostrophe (e.g. ``repr("O'Brien")`` → ``"O'Brien"``).
|
||||
Double-quoted tokens inside compiled SQL would be parsed as identifiers,
|
||||
so the query also fails. This helper always produces a single-quoted
|
||||
literal.
|
||||
"""
|
||||
parts = []
|
||||
for ch in value:
|
||||
escape = _BIGQUERY_STRING_ESCAPES.get(ch)
|
||||
if escape is not None:
|
||||
parts.append(escape)
|
||||
elif ord(ch) < 0x20 or ord(ch) == 0x7F:
|
||||
parts.append(f"\\x{ord(ch):02x}")
|
||||
else:
|
||||
parts.append(ch)
|
||||
return f"'{''.join(parts)}'"
|
||||
|
||||
|
||||
def _monkeypatch_bigquery_string_literal() -> None:
|
||||
"""
|
||||
Patch the sqlalchemy-bigquery dialect so that string literals containing
|
||||
apostrophes are rendered correctly when ``literal_binds=True``.
|
||||
|
||||
Without this patch, a filter value like ``O'Brien`` is compiled as the
|
||||
double-quoted identifier ``"O'Brien"`` instead of the single-quoted literal
|
||||
``'O\\'Brien'``, causing BigQuery to return a syntax error.
|
||||
|
||||
This follows the same pattern used for the Databricks dialect fix in
|
||||
``superset/db_engine_specs/databricks.py``.
|
||||
"""
|
||||
try:
|
||||
from sqlalchemy_bigquery import BigQueryDialect
|
||||
|
||||
class BigQuerySafeString(types.TypeDecorator):
|
||||
impl = types.String
|
||||
cache_ok = True
|
||||
|
||||
def literal_processor(self, dialect: Any) -> Callable[[str], str]:
|
||||
if dialect.name == "bigquery":
|
||||
return _process_string_literal
|
||||
return super().literal_processor(dialect)
|
||||
|
||||
BigQueryDialect.colspecs[types.String] = BigQuerySafeString
|
||||
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
_monkeypatch_bigquery_string_literal()
|
||||
|
||||
|
||||
CONNECTION_DATABASE_PERMISSIONS_REGEX = re.compile(
|
||||
"Access Denied: Project (?P<project_name>.+?): User does not have "
|
||||
+ "bigquery.jobs.create permission in project (?P<project>.+?)"
|
||||
|
||||
@@ -767,3 +767,265 @@ def test_fetch_data_converts_bigquery_row_objects(mocker: MockerFixture) -> None
|
||||
|
||||
assert result == [(1, "a"), (2, "b")]
|
||||
assert flask_g.bq_memory_limited is False
|
||||
|
||||
|
||||
def test_string_literal_with_apostrophe() -> None:
|
||||
"""
|
||||
Test that string literals containing apostrophes are properly escaped
|
||||
for BigQuery using backslash escaping.
|
||||
|
||||
BigQuery requires backslash escaping for single quotes ('O\\'Brien').
|
||||
Doubled single quotes ('O''Brien') are NOT valid — BigQuery parses them
|
||||
as two concatenated string literals, causing a syntax error.
|
||||
The upstream sqlalchemy-bigquery dialect uses ``repr()`` which switches
|
||||
to double-quote delimiters when the value contains an apostrophe.
|
||||
Double-quoted tokens are identifiers in BigQuery, causing syntax errors.
|
||||
"""
|
||||
from sqlalchemy import column as sa_column
|
||||
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
|
||||
|
||||
# Trigger module load to ensure the monkey-patch is applied
|
||||
assert BigQueryEngineSpec is not None
|
||||
|
||||
dialect = BigQueryDialect()
|
||||
|
||||
stmt = select(sa_column("name")).where(sa_column("name") == "Fernando's")
|
||||
compiled_sql = str(
|
||||
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
|
||||
)
|
||||
|
||||
# The compiled SQL must use single-quoted literal with backslash-escaped
|
||||
# apostrophes. Doubled single quotes are NOT valid in BigQuery.
|
||||
assert "= 'Fernando\\'s'" in compiled_sql
|
||||
# Must NOT contain doubled-quote escaping (BigQuery rejects this)
|
||||
assert "''" not in compiled_sql
|
||||
# Must NOT contain double-quoted identifiers
|
||||
assert '\\"' not in compiled_sql
|
||||
|
||||
|
||||
def test_string_literal_without_apostrophe() -> None:
|
||||
"""
|
||||
Test that normal string literals (without apostrophes) still compile
|
||||
correctly after the monkey-patch.
|
||||
"""
|
||||
from sqlalchemy import column as sa_column
|
||||
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
|
||||
|
||||
assert BigQueryEngineSpec is not None
|
||||
|
||||
dialect = BigQueryDialect()
|
||||
|
||||
stmt = select(sa_column("name")).where(sa_column("name") == "Fernando")
|
||||
compiled_sql = str(
|
||||
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
|
||||
)
|
||||
|
||||
assert "= 'Fernando'" in compiled_sql
|
||||
|
||||
|
||||
def test_string_literal_in_filter_with_apostrophe() -> None:
|
||||
"""
|
||||
Test that IN filters with apostrophes in values compile correctly
|
||||
using backslash escaping.
|
||||
"""
|
||||
from sqlalchemy import column as sa_column
|
||||
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
|
||||
|
||||
assert BigQueryEngineSpec is not None
|
||||
|
||||
dialect = BigQueryDialect()
|
||||
|
||||
stmt = select(sa_column("name")).where(
|
||||
sa_column("name").in_(["Fernando's", "O'Brien"])
|
||||
)
|
||||
compiled_sql = str(
|
||||
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
|
||||
)
|
||||
|
||||
assert "'Fernando\\'s'" in compiled_sql
|
||||
assert "'O\\'Brien'" in compiled_sql
|
||||
# Must NOT contain doubled-quote escaping
|
||||
assert "''" not in compiled_sql
|
||||
|
||||
|
||||
def test_process_string_literal_directly() -> None:
|
||||
"""
|
||||
Test _process_string_literal covers backslash escaping for apostrophes,
|
||||
control-character escaping (newline/CR/tab/etc.), the ``\\xhh`` fallback
|
||||
for control chars without a named escape, and pass-through for printable
|
||||
Unicode and other characters BigQuery accepts unescaped.
|
||||
"""
|
||||
from superset.db_engine_specs.bigquery import _process_string_literal
|
||||
|
||||
# Plain values
|
||||
assert _process_string_literal("hello") == "'hello'"
|
||||
assert _process_string_literal("") == "''"
|
||||
|
||||
# Apostrophes (the original fix)
|
||||
assert _process_string_literal("O'Brien") == "'O\\'Brien'"
|
||||
assert _process_string_literal("it's a test") == "'it\\'s a test'"
|
||||
|
||||
# Backslashes must be escaped before apostrophes
|
||||
assert _process_string_literal("C:\\path") == "'C:\\\\path'"
|
||||
assert _process_string_literal("it's C:\\path") == "'it\\'s C:\\\\path'"
|
||||
|
||||
# Literal backslash followed by 'n' (two characters, not a newline)
|
||||
# must produce the two-char sequence '\\n' (escaped backslash + n) so
|
||||
# BigQuery does not misread it as a newline escape.
|
||||
assert _process_string_literal("\\n") == "'\\\\n'"
|
||||
|
||||
# Control characters must be escaped using named escapes — BigQuery
|
||||
# rejects literal control characters inside quoted strings.
|
||||
assert _process_string_literal("foo\nbar") == "'foo\\nbar'"
|
||||
assert _process_string_literal("foo\rbar") == "'foo\\rbar'"
|
||||
assert _process_string_literal("foo\tbar") == "'foo\\tbar'"
|
||||
assert _process_string_literal("a\bb\fc\vd\ae") == "'a\\bb\\fc\\vd\\ae'"
|
||||
|
||||
# Control characters without a named escape fall through to ``\\xhh``.
|
||||
assert _process_string_literal("null\0byte") == "'null\\x00byte'"
|
||||
assert _process_string_literal("a\x01b") == "'a\\x01b'"
|
||||
assert _process_string_literal("a\x1bb") == "'a\\x1bb'"
|
||||
assert _process_string_literal("a\x7fb") == "'a\\x7fb'"
|
||||
|
||||
# Double quotes do NOT need escaping in single-quoted BigQuery literals.
|
||||
assert _process_string_literal('say "hello"') == "'say \"hello\"'"
|
||||
|
||||
# Printable Unicode and percent signs pass through unchanged.
|
||||
assert _process_string_literal("café") == "'café'"
|
||||
assert _process_string_literal("日本") == "'日本'"
|
||||
assert _process_string_literal("100%") == "'100%'"
|
||||
|
||||
# Combined: apostrophe + newline + backslash + unicode.
|
||||
assert _process_string_literal("it's\nC:\\café") == "'it\\'s\\nC:\\\\café'"
|
||||
|
||||
|
||||
def test_process_string_literal_no_literal_control_chars() -> None:
|
||||
"""
|
||||
Regression test for the issue raised in PR #38835 review: BigQuery
|
||||
rejects literal control characters inside quoted string literals, so the
|
||||
output must never contain them as literal characters.
|
||||
"""
|
||||
from superset.db_engine_specs.bigquery import _process_string_literal
|
||||
|
||||
for char in ["\n", "\r", "\t", "\b", "\f", "\v", "\a", "\0", "\x01", "\x7f"]:
|
||||
result = _process_string_literal(f"prefix{char}suffix")
|
||||
assert char not in result, (
|
||||
f"Literal {char!r} leaked into output {result!r}; "
|
||||
"BigQuery would reject this literal."
|
||||
)
|
||||
|
||||
|
||||
def test_string_literal_with_newline_in_filter() -> None:
|
||||
"""
|
||||
End-to-end regression test for @rusackas's review feedback on PR #38835:
|
||||
a filter value containing a newline must compile to valid BigQuery SQL
|
||||
using the ``\\n`` escape sequence, not a literal newline.
|
||||
"""
|
||||
from sqlalchemy import column as sa_column
|
||||
|
||||
from superset.db_engine_specs.bigquery import BigQueryEngineSpec # noqa: F811
|
||||
|
||||
assert BigQueryEngineSpec is not None
|
||||
|
||||
dialect = BigQueryDialect()
|
||||
stmt = select(sa_column("note")).where(sa_column("note") == "line1\nline2")
|
||||
compiled_sql = str(
|
||||
stmt.compile(dialect=dialect, compile_kwargs={"literal_binds": True})
|
||||
)
|
||||
|
||||
# Must use the escape sequence form, not a literal newline.
|
||||
assert "'line1\\nline2'" in compiled_sql
|
||||
assert "\n" not in compiled_sql.split("note")[-1]
|
||||
|
||||
|
||||
def test_literal_processor_non_bigquery_dialect() -> None:
|
||||
"""
|
||||
Test that BigQuerySafeString.literal_processor falls back to the parent
|
||||
implementation when used with a non-BigQuery dialect.
|
||||
"""
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from superset.db_engine_specs.bigquery import (
|
||||
_monkeypatch_bigquery_string_literal, # noqa: F811
|
||||
)
|
||||
|
||||
_monkeypatch_bigquery_string_literal()
|
||||
|
||||
safe_cls = BigQueryDialect.colspecs[sqltypes.String]
|
||||
instance = safe_cls()
|
||||
|
||||
# Use a non-BigQuery dialect (sqlite)
|
||||
sqlite_dialect = create_engine("sqlite://").dialect
|
||||
processor = instance.literal_processor(sqlite_dialect)
|
||||
|
||||
# The fallback processor should still produce a valid quoted string
|
||||
assert processor is not None
|
||||
|
||||
|
||||
def test_monkeypatch_is_applied() -> None:
|
||||
"""
|
||||
Test that _monkeypatch_bigquery_string_literal installs the custom
|
||||
type decorator into BigQueryDialect.colspecs.
|
||||
"""
|
||||
from sqlalchemy.sql import sqltypes as sa_sqltypes
|
||||
|
||||
from superset.db_engine_specs.bigquery import (
|
||||
BigQueryEngineSpec, # noqa: F811
|
||||
)
|
||||
|
||||
assert BigQueryEngineSpec is not None
|
||||
|
||||
colspecs = BigQueryDialect.colspecs
|
||||
assert sa_sqltypes.String in colspecs
|
||||
safe_cls = colspecs[sa_sqltypes.String]
|
||||
assert safe_cls.__name__ == "BigQuerySafeString"
|
||||
|
||||
|
||||
def test_literal_processor_returns_process_string_literal_for_bigquery() -> None:
|
||||
"""
|
||||
Test that BigQuerySafeString.literal_processor returns the
|
||||
_process_string_literal function when given a BigQuery dialect,
|
||||
and that calling it produces correctly escaped output.
|
||||
"""
|
||||
from superset.db_engine_specs.bigquery import (
|
||||
_monkeypatch_bigquery_string_literal,
|
||||
_process_string_literal,
|
||||
)
|
||||
|
||||
_monkeypatch_bigquery_string_literal()
|
||||
|
||||
safe_cls = BigQueryDialect.colspecs[sqltypes.String]
|
||||
instance = safe_cls()
|
||||
|
||||
dialect = BigQueryDialect()
|
||||
processor = instance.literal_processor(dialect)
|
||||
|
||||
assert processor is _process_string_literal
|
||||
assert processor("O'Brien") == "'O\\'Brien'"
|
||||
assert processor("plain") == "'plain'"
|
||||
|
||||
|
||||
def test_monkeypatch_handles_missing_bigquery_package() -> None:
|
||||
"""
|
||||
Test that _monkeypatch_bigquery_string_literal gracefully handles
|
||||
the case where sqlalchemy_bigquery is not installed.
|
||||
"""
|
||||
import builtins
|
||||
|
||||
from superset.db_engine_specs.bigquery import (
|
||||
_monkeypatch_bigquery_string_literal,
|
||||
)
|
||||
|
||||
original_import = builtins.__import__
|
||||
|
||||
def mock_import(name: str, *args: Any, **kwargs: Any) -> Any:
|
||||
if name == "sqlalchemy_bigquery":
|
||||
raise ImportError("mocked missing package")
|
||||
return original_import(name, *args, **kwargs)
|
||||
|
||||
with mock.patch("builtins.__import__", side_effect=mock_import):
|
||||
# Should not raise — the except ImportError branch handles it
|
||||
_monkeypatch_bigquery_string_literal()
|
||||
|
||||
Reference in New Issue
Block a user