mirror of
https://github.com/apache/superset.git
synced 2026-06-14 12:09:14 +00:00
Compare commits
14 Commits
fix/deckgl
...
d3-localiz
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad2a1fe92f | ||
|
|
7d85b212ee | ||
|
|
9d55bed7ae | ||
|
|
65cfebd889 | ||
|
|
73eb0c11d4 | ||
|
|
6cb9819d10 | ||
|
|
01ac966b83 | ||
|
|
97e5f0631d | ||
|
|
b7acb7984f | ||
|
|
d3919cf24f | ||
|
|
27889651b3 | ||
|
|
361fe6fe89 | ||
|
|
8506d70242 | ||
|
|
00a53eec2d |
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@@ -20,7 +20,7 @@
|
||||
|
||||
# Notify PMC members of changes to GitHub Actions
|
||||
|
||||
/.github/ @villebro @geido @eschutho @rusackas @betodealmeida @nytai @mistercrunch @craig-rueda @kgabryje @dpgaspar @sadpandajoe
|
||||
/.github/ @villebro @geido @eschutho @rusackas @betodealmeida @nytai @mistercrunch @craig-rueda @kgabryje @dpgaspar @sadpandajoe @hainenber
|
||||
|
||||
# Notify PMC members of changes to required GitHub Actions
|
||||
|
||||
|
||||
2
.github/workflows/ephemeral-env-pr-close.yml
vendored
2
.github/workflows/ephemeral-env-pr-close.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
4
.github/workflows/ephemeral-env.yml
vendored
4
.github/workflows/ephemeral-env.yml
vendored
@@ -189,7 +189,7 @@ jobs:
|
||||
--extra-flags "--build-arg INCLUDE_CHROMIUM=false"
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
@@ -225,7 +225,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v5
|
||||
uses: aws-actions/configure-aws-credentials@v6
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
|
||||
1114
superset-embedded-sdk/package-lock.json
generated
1114
superset-embedded-sdk/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
44
superset-frontend/package-lock.json
generated
44
superset-frontend/package-lock.json
generated
@@ -89,7 +89,7 @@
|
||||
"json-stringify-pretty-compact": "^2.0.0",
|
||||
"lodash": "^4.17.23",
|
||||
"mapbox-gl": "^3.18.1",
|
||||
"markdown-to-jsx": "^9.6.1",
|
||||
"markdown-to-jsx": "^9.7.2",
|
||||
"match-sorter": "^6.3.4",
|
||||
"memoize-one": "^5.2.1",
|
||||
"mousetrap": "^1.6.5",
|
||||
@@ -144,7 +144,7 @@
|
||||
"@applitools/eyes-storybook": "^3.63.10",
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/compat-data": "^7.28.4",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/eslint-parser": "^7.28.6",
|
||||
"@babel/node": "^7.28.6",
|
||||
"@babel/plugin-syntax-dynamic-import": "^7.8.3",
|
||||
@@ -1184,21 +1184,21 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/core": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.6.tgz",
|
||||
"integrity": "sha512-H3mcG6ZDLTlYfaSNi0iOKkigqMFvkTKlGUYlD8GW7nNOYRrevuA46iTypPyv+06V3fEmvvazfntkBU34L0azAw==",
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz",
|
||||
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.28.6",
|
||||
"@babel/generator": "^7.28.6",
|
||||
"@babel/code-frame": "^7.29.0",
|
||||
"@babel/generator": "^7.29.0",
|
||||
"@babel/helper-compilation-targets": "^7.28.6",
|
||||
"@babel/helper-module-transforms": "^7.28.6",
|
||||
"@babel/helpers": "^7.28.6",
|
||||
"@babel/parser": "^7.28.6",
|
||||
"@babel/parser": "^7.29.0",
|
||||
"@babel/template": "^7.28.6",
|
||||
"@babel/traverse": "^7.28.6",
|
||||
"@babel/types": "^7.28.6",
|
||||
"@babel/traverse": "^7.29.0",
|
||||
"@babel/types": "^7.29.0",
|
||||
"@jridgewell/remapping": "^2.3.5",
|
||||
"convert-source-map": "^2.0.0",
|
||||
"debug": "^4.1.0",
|
||||
@@ -43189,9 +43189,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/markdown-to-jsx": {
|
||||
"version": "9.6.1",
|
||||
"resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-9.6.1.tgz",
|
||||
"integrity": "sha512-FLZPygHQzEGNUwIJ3Egdf3Lzm5M4ebswji8aMfM5tAq1vxkSw31a7OBmYXA3tg264PPyJw5UMDHw1fU+wFaQ9Q==",
|
||||
"version": "9.7.2",
|
||||
"resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-9.7.2.tgz",
|
||||
"integrity": "sha512-DTWRWPPyTVww//5s/dZgq1Pl6i5CclnKvx847Hpj6/GQDpYzM1xkD/Z84PzaVPk/zWfEdJKZSr2G/Y7RTYw7Fw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
@@ -61020,7 +61020,7 @@
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
@@ -64998,7 +64998,7 @@
|
||||
"react-resizable": "^3.1.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
@@ -66400,20 +66400,6 @@
|
||||
"react-dom": "^17.0.2"
|
||||
}
|
||||
},
|
||||
"plugins/plugin-chart-pivot-table/node_modules/@babel/types": {
|
||||
"version": "7.29.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz",
|
||||
"integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-string-parser": "^7.27.1",
|
||||
"@babel/helper-validator-identifier": "^7.28.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"plugins/plugin-chart-table": {
|
||||
"name": "@superset-ui/plugin-chart-table",
|
||||
"version": "0.20.3",
|
||||
|
||||
@@ -171,7 +171,7 @@
|
||||
"json-stringify-pretty-compact": "^2.0.0",
|
||||
"lodash": "^4.17.23",
|
||||
"mapbox-gl": "^3.18.1",
|
||||
"markdown-to-jsx": "^9.6.1",
|
||||
"markdown-to-jsx": "^9.7.2",
|
||||
"match-sorter": "^6.3.4",
|
||||
"memoize-one": "^5.2.1",
|
||||
"mousetrap": "^1.6.5",
|
||||
@@ -226,7 +226,7 @@
|
||||
"@applitools/eyes-storybook": "^3.63.10",
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/compat-data": "^7.28.4",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/eslint-parser": "^7.28.6",
|
||||
"@babel/node": "^7.28.6",
|
||||
"@babel/plugin-syntax-dynamic-import": "^7.8.3",
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.28.6",
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
"react-resizable": "^3.1.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.28.6",
|
||||
"@babel/core": "^7.29.0",
|
||||
"@babel/preset-env": "^7.29.0",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
|
||||
@@ -35,7 +35,7 @@ import {
|
||||
getStandardizedControls,
|
||||
} from '@superset-ui/chart-controls';
|
||||
import { DEFAULT_FORM_DATA } from './types';
|
||||
import { LABEL_POSITION } from '../constants';
|
||||
import { LabelPositionEnum } from '../types';
|
||||
import { legendSection } from '../controls';
|
||||
|
||||
const { labelType, labelPosition, numberFormat, showLabels, isCircle } =
|
||||
@@ -72,6 +72,22 @@ const radarMetricMinValue: { name: string; config: ControlFormItemSpec } = {
|
||||
},
|
||||
};
|
||||
|
||||
const getLabelPositionOptions = (): [LabelPositionEnum, string][] => [
|
||||
[LabelPositionEnum.Top, t('Top')],
|
||||
[LabelPositionEnum.Left, t('Left')],
|
||||
[LabelPositionEnum.Right, t('Right')],
|
||||
[LabelPositionEnum.Bottom, t('Bottom')],
|
||||
[LabelPositionEnum.Inside, t('Inside')],
|
||||
[LabelPositionEnum.InsideLeft, t('Inside left')],
|
||||
[LabelPositionEnum.InsideRight, t('Inside right')],
|
||||
[LabelPositionEnum.InsideTop, t('Inside top')],
|
||||
[LabelPositionEnum.InsideBottom, t('Inside bottom')],
|
||||
[LabelPositionEnum.InsideTopLeft, t('Inside top left')],
|
||||
[LabelPositionEnum.InsideBottomLeft, t('Inside bottom left')],
|
||||
[LabelPositionEnum.InsideTopRight, t('Inside top right')],
|
||||
[LabelPositionEnum.InsideBottomRight, t('Inside bottom right')],
|
||||
];
|
||||
|
||||
const config: ControlPanelConfig = {
|
||||
controlPanelSections: [
|
||||
{
|
||||
@@ -136,7 +152,7 @@ const config: ControlPanelConfig = {
|
||||
freeForm: false,
|
||||
label: t('Label position'),
|
||||
renderTrigger: true,
|
||||
choices: LABEL_POSITION,
|
||||
choices: getLabelPositionOptions(),
|
||||
default: labelPosition,
|
||||
description: D3_FORMAT_DOCS,
|
||||
},
|
||||
|
||||
@@ -21,7 +21,6 @@ import { t } from '@apache-superset/core';
|
||||
import { JsonValue, TimeGranularity } from '@superset-ui/core';
|
||||
import { ReactNode } from 'react';
|
||||
import {
|
||||
LabelPositionEnum,
|
||||
LegendFormData,
|
||||
LegendOrientation,
|
||||
LegendType,
|
||||
@@ -50,22 +49,6 @@ export const TIMESERIES_CONSTANTS = {
|
||||
horizontalBarLabelRightPadding: 70,
|
||||
};
|
||||
|
||||
export const LABEL_POSITION: [LabelPositionEnum, string][] = [
|
||||
[LabelPositionEnum.Top, 'Top'],
|
||||
[LabelPositionEnum.Left, 'Left'],
|
||||
[LabelPositionEnum.Right, 'Right'],
|
||||
[LabelPositionEnum.Bottom, 'Bottom'],
|
||||
[LabelPositionEnum.Inside, 'Inside'],
|
||||
[LabelPositionEnum.InsideLeft, 'Inside left'],
|
||||
[LabelPositionEnum.InsideRight, 'Inside right'],
|
||||
[LabelPositionEnum.InsideTop, 'Inside top'],
|
||||
[LabelPositionEnum.InsideBottom, 'Inside bottom'],
|
||||
[LabelPositionEnum.InsideTopLeft, 'Inside top left'],
|
||||
[LabelPositionEnum.InsideBottomLeft, 'Inside bottom left'],
|
||||
[LabelPositionEnum.InsideTopRight, 'Inside top right'],
|
||||
[LabelPositionEnum.InsideBottomRight, 'Inside bottom right'],
|
||||
];
|
||||
|
||||
export enum OpacityEnum {
|
||||
Transparent = 0,
|
||||
SemiTransparent = 0.3,
|
||||
|
||||
@@ -86,14 +86,12 @@ export function getTooltipTimeFormatter(
|
||||
return String;
|
||||
}
|
||||
|
||||
export function getXAxisFormatter(
|
||||
format?: string,
|
||||
): TimeFormatter | StringConstructor | undefined {
|
||||
if (format === SMART_DATE_ID || !format) {
|
||||
return undefined;
|
||||
export function getXAxisFormatter(format?: string): TimeFormatter | undefined {
|
||||
if (format === SMART_DATE_ID) {
|
||||
return getSmartDateFormatter();
|
||||
}
|
||||
if (format) {
|
||||
return getTimeFormatter(format);
|
||||
}
|
||||
return String;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -63,10 +63,14 @@ describe('Scatter Chart X-axis Time Formatting', () => {
|
||||
theme: supersetTheme,
|
||||
};
|
||||
|
||||
test('xAxisTimeFormat has no default formatter', () => {
|
||||
test('xAxisTimeFormat has no formatter when explicitly undefined', () => {
|
||||
const chartProps = new ChartProps({
|
||||
...baseChartPropsConfig,
|
||||
formData: baseFormData,
|
||||
formData: {
|
||||
...baseFormData,
|
||||
// Explicitly override the default smart_date format to test no-format case
|
||||
xAxisTimeFormat: undefined,
|
||||
},
|
||||
});
|
||||
|
||||
const transformedProps = transformProps(
|
||||
@@ -77,6 +81,8 @@ describe('Scatter Chart X-axis Time Formatting', () => {
|
||||
expect(transformedProps.echartOptions.xAxis).toHaveProperty('axisLabel');
|
||||
const xAxis = transformedProps.echartOptions.xAxis as any;
|
||||
expect(xAxis.axisLabel).toHaveProperty('formatter');
|
||||
// When no format is specified, formatter should be undefined
|
||||
// (D3 locale formatting is only applied when a format is explicitly set)
|
||||
expect(xAxis.axisLabel.formatter).toBeUndefined();
|
||||
});
|
||||
|
||||
@@ -98,10 +104,9 @@ describe('Scatter Chart X-axis Time Formatting', () => {
|
||||
|
||||
const xAxis = transformedProps.echartOptions.xAxis as any;
|
||||
expect(xAxis.axisLabel).toHaveProperty('formatter');
|
||||
if (format === SMART_DATE_ID) {
|
||||
expect(xAxis.axisLabel.formatter).toBeUndefined();
|
||||
} else {
|
||||
expect(typeof xAxis.axisLabel.formatter).toBe('function');
|
||||
// All time formats including SMART_DATE should have a formatter function
|
||||
expect(typeof xAxis.axisLabel.formatter).toBe('function');
|
||||
if (format !== SMART_DATE_ID) {
|
||||
expect(xAxis.axisLabel.formatter.id).toBe(format);
|
||||
}
|
||||
},
|
||||
|
||||
@@ -16,8 +16,16 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import { NumberFormats } from '@superset-ui/core';
|
||||
import { getPercentFormatter } from '../../src/utils/formatters';
|
||||
import {
|
||||
NumberFormats,
|
||||
SMART_DATE_ID,
|
||||
getTimeFormatter,
|
||||
} from '@superset-ui/core';
|
||||
import {
|
||||
getPercentFormatter,
|
||||
getXAxisFormatter,
|
||||
getSmartDateFormatter,
|
||||
} from '../../src/utils/formatters';
|
||||
|
||||
describe('getPercentFormatter', () => {
|
||||
const value = 0.6;
|
||||
@@ -35,3 +43,39 @@ describe('getPercentFormatter', () => {
|
||||
).toEqual('60.00%');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getXAxisFormatter', () => {
|
||||
it('should return smart date formatter when SMART_DATE_ID is specified', () => {
|
||||
const formatter = getXAxisFormatter(SMART_DATE_ID);
|
||||
expect(formatter).toBeDefined();
|
||||
expect(formatter).toBeInstanceOf(Function);
|
||||
// The formatter should be the same as getSmartDateFormatter()
|
||||
const smartDateFormatter = getSmartDateFormatter();
|
||||
expect(formatter).toEqual(smartDateFormatter);
|
||||
});
|
||||
|
||||
it('should return undefined when format is not specified', () => {
|
||||
const formatter = getXAxisFormatter();
|
||||
expect(formatter).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should return undefined when format is empty string', () => {
|
||||
const formatter = getXAxisFormatter('');
|
||||
expect(formatter).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should return the appropriate time formatter for a specific format', () => {
|
||||
const format = '%Y-%m-%d';
|
||||
const formatter = getXAxisFormatter(format);
|
||||
expect(formatter).toBeDefined();
|
||||
expect(formatter).toBeInstanceOf(Function);
|
||||
// Should return the same formatter as getTimeFormatter
|
||||
const expectedFormatter = getTimeFormatter(format);
|
||||
expect(formatter).toEqual(expectedFormatter);
|
||||
});
|
||||
|
||||
it('should return undefined for falsy values', () => {
|
||||
const formatter = getXAxisFormatter(null as any);
|
||||
expect(formatter).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -21,8 +21,6 @@ Pydantic schemas for chart-related responses
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Any, Dict, List, Literal, Protocol
|
||||
|
||||
@@ -50,6 +48,10 @@ from superset.mcp_service.system.schemas import (
|
||||
TagInfo,
|
||||
UserInfo,
|
||||
)
|
||||
from superset.mcp_service.utils.sanitization import (
|
||||
sanitize_filter_value,
|
||||
sanitize_user_input,
|
||||
)
|
||||
|
||||
|
||||
class ChartLike(Protocol):
|
||||
@@ -357,113 +359,17 @@ class ColumnRef(BaseModel):
|
||||
@classmethod
|
||||
def sanitize_name(cls, v: str) -> str:
|
||||
"""Sanitize column name to prevent XSS and SQL injection."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Column name cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Column name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Remove HTML tags and decode entities
|
||||
sanitized = html.escape(v.strip())
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = ["<script", "</script>", "<iframe", "<object", "<embed"]
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Column name contains potentially malicious script content"
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries to match only actual URLs
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Column name contains potentially malicious URL scheme")
|
||||
|
||||
# Basic SQL injection patterns (basic protection)
|
||||
# Use simple patterns without backtracking
|
||||
dangerous_patterns = [
|
||||
r"[;|&$`]", # Dangerous shell characters
|
||||
r"\b(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"--", # SQL comment
|
||||
r"/\*", # SQL comment start (just check for start, not full pattern)
|
||||
]
|
||||
|
||||
for pattern in dangerous_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Column name contains potentially unsafe characters or SQL keywords"
|
||||
)
|
||||
|
||||
return sanitized
|
||||
# sanitize_user_input raises ValueError when allow_empty=False (default)
|
||||
# so the return value is guaranteed to be a non-None str
|
||||
return sanitize_user_input(
|
||||
v, "Column name", max_length=255, check_sql_keywords=True
|
||||
) # type: ignore[return-value]
|
||||
|
||||
@field_validator("label")
|
||||
@classmethod
|
||||
def sanitize_label(cls, v: str | None) -> str | None:
|
||||
"""Sanitize display label to prevent XSS attacks."""
|
||||
if v is None:
|
||||
return v
|
||||
|
||||
# Strip whitespace
|
||||
v = v.strip()
|
||||
if not v:
|
||||
return None
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 500:
|
||||
raise ValueError(
|
||||
f"Label too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 500 characters."
|
||||
)
|
||||
|
||||
# Check for dangerous HTML tags and JavaScript protocols using substring checks
|
||||
# This avoids ReDoS vulnerabilities from regex patterns
|
||||
dangerous_tags = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"</iframe>",
|
||||
"<object",
|
||||
"</object>",
|
||||
"<embed",
|
||||
"</embed>",
|
||||
"<link",
|
||||
"<meta",
|
||||
]
|
||||
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Label contains potentially malicious content. "
|
||||
"HTML tags, JavaScript, and event handlers are not allowed "
|
||||
"in labels."
|
||||
)
|
||||
|
||||
# Check URL schemes and event handlers with word boundaries
|
||||
dangerous_patterns = [
|
||||
r"\b(javascript|vbscript|data):", # URL schemes
|
||||
r"on\w+\s*=", # Event handlers
|
||||
]
|
||||
for pattern in dangerous_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Label contains potentially malicious content. "
|
||||
"HTML tags, JavaScript, and event handlers are not allowed."
|
||||
)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
sanitized = html.escape(v)
|
||||
|
||||
return sanitized if sanitized else None
|
||||
return sanitize_user_input(v, "Label", max_length=500, allow_empty=True)
|
||||
|
||||
|
||||
class AxisConfig(BaseModel):
|
||||
@@ -496,112 +402,15 @@ class FilterConfig(BaseModel):
|
||||
@classmethod
|
||||
def sanitize_column(cls, v: str) -> str:
|
||||
"""Sanitize filter column name to prevent injection attacks."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Filter column name cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Filter column name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Remove HTML tags and decode entities
|
||||
sanitized = html.escape(v.strip())
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = ["<script", "</script>"]
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Filter column contains potentially malicious script content"
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Filter column contains potentially malicious URL scheme")
|
||||
|
||||
return sanitized
|
||||
|
||||
@staticmethod
|
||||
def _validate_string_value(v: str) -> None:
|
||||
"""Validate string filter value for security issues."""
|
||||
# Check for dangerous HTML tags and SQL procedures
|
||||
dangerous_substrings = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"<object",
|
||||
"<embed",
|
||||
"xp_cmdshell",
|
||||
"sp_executesql",
|
||||
]
|
||||
v_lower = v.lower()
|
||||
for substring in dangerous_substrings:
|
||||
if substring in v_lower:
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious content. "
|
||||
"HTML tags and JavaScript are not allowed."
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Filter value contains potentially malicious URL scheme")
|
||||
|
||||
# SQL injection patterns
|
||||
sql_patterns = [
|
||||
r";\s*(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"'\s*OR\s*'",
|
||||
r"'\s*AND\s*'",
|
||||
r"--\s*",
|
||||
r"/\*",
|
||||
r"UNION\s+SELECT",
|
||||
]
|
||||
for pattern in sql_patterns:
|
||||
if re.search(pattern, v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious SQL patterns."
|
||||
)
|
||||
|
||||
# Check for other dangerous patterns
|
||||
if re.search(r"[;&|`$()]", v):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially unsafe shell characters."
|
||||
)
|
||||
if re.search(r"on\w+\s*=", v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious event handlers."
|
||||
)
|
||||
if re.search(r"\\x[0-9a-fA-F]{2}", v):
|
||||
raise ValueError("Filter value contains hex encoding which is not allowed.")
|
||||
# sanitize_user_input raises ValueError when allow_empty=False (default)
|
||||
# so the return value is guaranteed to be a non-None str
|
||||
return sanitize_user_input(v, "Filter column", max_length=255) # type: ignore[return-value]
|
||||
|
||||
@field_validator("value")
|
||||
@classmethod
|
||||
def sanitize_value(cls, v: str | int | float | bool) -> str | int | float | bool:
|
||||
"""Sanitize filter value to prevent XSS and SQL injection attacks."""
|
||||
if isinstance(v, str):
|
||||
v = v.strip()
|
||||
|
||||
# Length check FIRST to prevent ReDoS attacks
|
||||
if len(v) > 1000:
|
||||
raise ValueError(
|
||||
f"Filter value too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 1000 characters."
|
||||
)
|
||||
|
||||
# Validate security
|
||||
cls._validate_string_value(v)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
return html.escape(v)
|
||||
|
||||
return v # Return non-string values as-is
|
||||
return sanitize_filter_value(v, max_length=1000)
|
||||
|
||||
|
||||
# Actual chart types
|
||||
@@ -848,6 +657,11 @@ class ListChartsRequest(MetadataCacheControl):
|
||||
class GenerateChartRequest(QueryCacheControl):
|
||||
dataset_id: int | str = Field(..., description="Dataset identifier (ID, UUID)")
|
||||
config: ChartConfig = Field(..., description="Chart configuration")
|
||||
chart_name: str | None = Field(
|
||||
None,
|
||||
description="Custom chart name (optional, auto-generates if not provided)",
|
||||
max_length=255,
|
||||
)
|
||||
save_chart: bool = Field(
|
||||
default=False,
|
||||
description="Whether to permanently save the chart in Superset",
|
||||
@@ -861,6 +675,12 @@ class GenerateChartRequest(QueryCacheControl):
|
||||
description="List of preview formats to generate",
|
||||
)
|
||||
|
||||
@field_validator("chart_name")
|
||||
@classmethod
|
||||
def sanitize_chart_name(cls, v: str | None) -> str | None:
|
||||
"""Sanitize chart name to prevent XSS attacks."""
|
||||
return sanitize_user_input(v, "Chart name", max_length=255, allow_empty=True)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_cache_timeout(self) -> "GenerateChartRequest":
|
||||
"""Validate cache timeout is non-negative."""
|
||||
@@ -911,62 +731,7 @@ class UpdateChartRequest(QueryCacheControl):
|
||||
@classmethod
|
||||
def sanitize_chart_name(cls, v: str | None) -> str | None:
|
||||
"""Sanitize chart name to prevent XSS attacks."""
|
||||
if v is None:
|
||||
return v
|
||||
|
||||
# Strip whitespace
|
||||
v = v.strip()
|
||||
if not v:
|
||||
return None
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(v) > 255:
|
||||
raise ValueError(
|
||||
f"Chart name too long ({len(v)} characters). "
|
||||
f"Maximum allowed length is 255 characters."
|
||||
)
|
||||
|
||||
# Check for dangerous HTML tags using substring checks (safe)
|
||||
dangerous_tags = [
|
||||
"<script",
|
||||
"</script>",
|
||||
"<iframe",
|
||||
"</iframe>",
|
||||
"<object",
|
||||
"</object>",
|
||||
"<embed",
|
||||
"</embed>",
|
||||
"<link",
|
||||
"<meta",
|
||||
]
|
||||
|
||||
v_lower = v.lower()
|
||||
for tag in dangerous_tags:
|
||||
if tag in v_lower:
|
||||
raise ValueError(
|
||||
"Chart name contains potentially malicious content. "
|
||||
"HTML tags and JavaScript are not allowed in chart names."
|
||||
)
|
||||
|
||||
# Check URL schemes with word boundaries
|
||||
if re.search(r"\b(javascript|vbscript|data):", v, re.IGNORECASE):
|
||||
raise ValueError("Chart name contains potentially malicious URL scheme")
|
||||
|
||||
# Check for event handlers with simple regex
|
||||
if re.search(r"on\w+\s*=", v, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Chart name contains potentially malicious event handlers."
|
||||
)
|
||||
|
||||
# Filter dangerous Unicode characters
|
||||
v = re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", v
|
||||
)
|
||||
|
||||
# HTML escape the cleaned content
|
||||
sanitized = html.escape(v)
|
||||
|
||||
return sanitized if sanitized else None
|
||||
return sanitize_user_input(v, "Chart name", max_length=255, allow_empty=True)
|
||||
|
||||
|
||||
class UpdateChartPreviewRequest(FormDataCacheControl):
|
||||
|
||||
@@ -189,9 +189,9 @@ async def generate_chart( # noqa: C901
|
||||
await ctx.report_progress(2, 5, "Creating chart in database")
|
||||
from superset.commands.chart.create import CreateChartCommand
|
||||
|
||||
# Generate a chart name
|
||||
chart_name = generate_chart_name(request.config)
|
||||
await ctx.debug("Generated chart name: chart_name=%s" % (chart_name,))
|
||||
# Use custom chart name if provided, otherwise auto-generate
|
||||
chart_name = request.chart_name or generate_chart_name(request.config)
|
||||
await ctx.debug("Chart name: chart_name=%s" % (chart_name,))
|
||||
|
||||
# Find the dataset to get its numeric ID
|
||||
from superset.daos.dataset import DatasetDAO
|
||||
|
||||
283
superset/mcp_service/utils/sanitization.py
Normal file
283
superset/mcp_service/utils/sanitization.py
Normal file
@@ -0,0 +1,283 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Centralized sanitization utilities for MCP service user inputs.
|
||||
|
||||
This module uses the nh3 library (Rust-based HTML sanitizer) to strip malicious
|
||||
HTML tags and protocols from user inputs. nh3 is faster and safer than manual
|
||||
regex-based sanitization.
|
||||
|
||||
Key features:
|
||||
- Strips all HTML tags using nh3.clean() with no allowed tags
|
||||
- Blocks dangerous URL schemes (javascript:, vbscript:, data:)
|
||||
- Preserves safe text content (e.g., '&' stays as '&', not '&')
|
||||
- Additional SQL injection protection for database-facing inputs
|
||||
"""
|
||||
|
||||
import html
|
||||
import re
|
||||
|
||||
import nh3
|
||||
|
||||
|
||||
def _strip_html_tags(value: str) -> str:
|
||||
"""
|
||||
Strip all HTML tags from the input using nh3.
|
||||
|
||||
Decodes all layers of HTML entity encoding BEFORE passing to nh3,
|
||||
so entity-encoded tags (e.g., ``<script>``) are decoded into
|
||||
real tags that nh3 can detect and strip. After nh3 removes all tags,
|
||||
we only restore ``&`` back to ``&`` (not a full html.unescape)
|
||||
to preserve ampersands in display text without risking XSS from
|
||||
re-introducing angle brackets or other HTML-significant characters.
|
||||
|
||||
Args:
|
||||
value: The input string that may contain HTML
|
||||
|
||||
Returns:
|
||||
String with all HTML tags removed and ampersands preserved
|
||||
"""
|
||||
# Decode all layers of HTML entity encoding to prevent bypass
|
||||
# via entity-encoded tags (e.g., <script> or &lt;script&gt;)
|
||||
# The loop terminates when unescape produces no change (idempotent on decoded text).
|
||||
# Max iterations cap provides defense-in-depth against pathological inputs.
|
||||
max_iterations = 100
|
||||
decoded = value
|
||||
prev = None
|
||||
iterations = 0
|
||||
while prev != decoded and iterations < max_iterations:
|
||||
prev = decoded
|
||||
decoded = html.unescape(decoded)
|
||||
iterations += 1
|
||||
|
||||
# nh3.clean with tags=set() strips ALL HTML tags from the decoded input
|
||||
# url_schemes=set() blocks all URL schemes in any remaining attributes
|
||||
cleaned = nh3.clean(decoded, tags=set(), url_schemes=set())
|
||||
|
||||
# Only restore & → & to preserve ampersands in display text (e.g. "A & B").
|
||||
# Do NOT use html.unescape() here: nh3 may pass through HTML entities from
|
||||
# the input (e.g. <script>), and a full unescape would re-introduce
|
||||
# raw angle brackets, creating an XSS vector.
|
||||
return cleaned.replace("&", "&")
|
||||
|
||||
|
||||
def _check_dangerous_patterns(value: str, field_name: str) -> None:
|
||||
"""
|
||||
Check for dangerous patterns that nh3 doesn't catch.
|
||||
|
||||
This includes URL schemes in plain text (not in HTML attributes),
|
||||
event handler patterns, and dangerous Unicode characters.
|
||||
|
||||
Args:
|
||||
value: The input string to check
|
||||
field_name: Name of the field (for error messages)
|
||||
|
||||
Raises:
|
||||
ValueError: If dangerous patterns are found
|
||||
"""
|
||||
# Block dangerous URL schemes in plain text (word boundary check)
|
||||
if re.search(r"\b(javascript|vbscript|data):", value, re.IGNORECASE):
|
||||
raise ValueError(f"{field_name} contains potentially malicious URL scheme")
|
||||
|
||||
# Block event handler patterns (onclick=, onerror=, etc.)
|
||||
if re.search(r"on\w+\s*=", value, re.IGNORECASE):
|
||||
raise ValueError(f"{field_name} contains potentially malicious event handler")
|
||||
|
||||
|
||||
def _check_sql_patterns(value: str, field_name: str) -> None:
|
||||
"""
|
||||
Check for SQL injection patterns.
|
||||
|
||||
Args:
|
||||
value: The input string to check
|
||||
field_name: Name of the field (for error messages)
|
||||
|
||||
Raises:
|
||||
ValueError: If SQL injection patterns are found
|
||||
"""
|
||||
# Check for dangerous SQL keywords
|
||||
if re.search(
|
||||
r"\b(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
value,
|
||||
re.IGNORECASE,
|
||||
):
|
||||
raise ValueError(f"{field_name} contains potentially unsafe SQL keywords")
|
||||
|
||||
# Check for shell metacharacters and SQL comments
|
||||
if re.search(r"[;|&$`]|--", value):
|
||||
raise ValueError(f"{field_name} contains potentially unsafe characters")
|
||||
|
||||
# Check for SQL comment start
|
||||
if "/*" in value:
|
||||
raise ValueError(f"{field_name} contains potentially unsafe SQL comment syntax")
|
||||
|
||||
|
||||
def _remove_dangerous_unicode(value: str) -> str:
|
||||
"""
|
||||
Remove dangerous Unicode characters (zero-width, control chars).
|
||||
|
||||
Args:
|
||||
value: The input string
|
||||
|
||||
Returns:
|
||||
String with dangerous Unicode characters removed
|
||||
"""
|
||||
return re.sub(
|
||||
r"[\u200B-\u200D\uFEFF\u0000-\u0008\u000B\u000C\u000E-\u001F]", "", value
|
||||
)
|
||||
|
||||
|
||||
def sanitize_user_input(
|
||||
value: str | None,
|
||||
field_name: str,
|
||||
max_length: int = 255,
|
||||
check_sql_keywords: bool = False,
|
||||
allow_empty: bool = False,
|
||||
) -> str | None:
|
||||
"""
|
||||
Centralized sanitization for user-provided text inputs.
|
||||
|
||||
Uses nh3 to strip HTML tags and performs additional security checks.
|
||||
|
||||
Args:
|
||||
value: The input string to sanitize
|
||||
field_name: Name of the field (for error messages)
|
||||
max_length: Maximum allowed length
|
||||
check_sql_keywords: Whether to check for SQL injection keywords
|
||||
allow_empty: Whether to allow empty/None values
|
||||
|
||||
Returns:
|
||||
Sanitized string, or None if allow_empty=True and value is empty
|
||||
|
||||
Raises:
|
||||
ValueError: If value fails security validation
|
||||
|
||||
Security checks performed:
|
||||
- Strips all HTML tags using nh3 (Rust-based sanitizer)
|
||||
- Blocks JavaScript/VBScript/data URL schemes
|
||||
- Blocks event handlers (onclick=, onerror=, etc.)
|
||||
- Removes dangerous Unicode characters (zero-width, control chars)
|
||||
- SQL keywords and shell metacharacters (when check_sql_keywords=True)
|
||||
"""
|
||||
if value is None:
|
||||
if allow_empty:
|
||||
return None
|
||||
raise ValueError(f"{field_name} cannot be empty")
|
||||
|
||||
value = value.strip()
|
||||
|
||||
if not value:
|
||||
if allow_empty:
|
||||
return None
|
||||
raise ValueError(f"{field_name} cannot be empty")
|
||||
|
||||
# Length check first to prevent ReDoS attacks
|
||||
if len(value) > max_length:
|
||||
raise ValueError(
|
||||
f"{field_name} too long ({len(value)} characters). "
|
||||
f"Maximum allowed length is {max_length} characters."
|
||||
)
|
||||
|
||||
# Strip all HTML tags using nh3
|
||||
value = _strip_html_tags(value)
|
||||
|
||||
# Check for dangerous patterns (URL schemes, event handlers)
|
||||
_check_dangerous_patterns(value, field_name)
|
||||
|
||||
# SQL keyword and shell metacharacter checks (for column names, etc.)
|
||||
if check_sql_keywords:
|
||||
_check_sql_patterns(value, field_name)
|
||||
|
||||
# Remove dangerous Unicode characters
|
||||
value = _remove_dangerous_unicode(value)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def sanitize_filter_value(
|
||||
value: str | int | float | bool,
|
||||
max_length: int = 1000,
|
||||
) -> str | int | float | bool:
|
||||
"""
|
||||
Sanitize filter values which can be strings or other types.
|
||||
|
||||
For non-string values, returns as-is (no sanitization needed).
|
||||
For strings, uses nh3 to strip HTML and applies security validation.
|
||||
|
||||
Args:
|
||||
value: The filter value (string, int, float, or bool)
|
||||
max_length: Maximum length for string values
|
||||
|
||||
Returns:
|
||||
Sanitized value
|
||||
|
||||
Raises:
|
||||
ValueError: If string value fails security validation
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
|
||||
value = value.strip()
|
||||
|
||||
# Length check first
|
||||
if len(value) > max_length:
|
||||
raise ValueError(
|
||||
f"Filter value too long ({len(value)} characters). "
|
||||
f"Maximum allowed length is {max_length} characters."
|
||||
)
|
||||
|
||||
# Strip all HTML tags using nh3
|
||||
value = _strip_html_tags(value)
|
||||
|
||||
# Check for dangerous patterns
|
||||
_check_dangerous_patterns(value, "Filter value")
|
||||
|
||||
# Check for dangerous SQL procedures (filter-specific)
|
||||
v_lower = value.lower()
|
||||
if "xp_cmdshell" in v_lower or "sp_executesql" in v_lower:
|
||||
raise ValueError("Filter value contains potentially malicious SQL procedures.")
|
||||
|
||||
# SQL injection patterns specific to filter values
|
||||
sql_patterns = [
|
||||
r";\s*(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b",
|
||||
r"'\s*OR\s*'",
|
||||
r"'\s*AND\s*'",
|
||||
r"--\s*",
|
||||
r"/\*",
|
||||
r"UNION\s+SELECT",
|
||||
]
|
||||
for pattern in sql_patterns:
|
||||
if re.search(pattern, value, re.IGNORECASE):
|
||||
raise ValueError(
|
||||
"Filter value contains potentially malicious SQL patterns."
|
||||
)
|
||||
|
||||
# Check for shell metacharacters that could indicate injection attempts
|
||||
# Note: We allow '&' alone as it's common in text ("A & B") and is only
|
||||
# dangerous in shell contexts, not in database queries
|
||||
if re.search(r"[;|`$()]", value):
|
||||
raise ValueError("Filter value contains potentially unsafe shell characters.")
|
||||
|
||||
# Check for hex encoding
|
||||
if re.search(r"\\x[0-9a-fA-F]{2}", value):
|
||||
raise ValueError("Filter value contains hex encoding which is not allowed.")
|
||||
|
||||
# Remove dangerous Unicode characters
|
||||
value = _remove_dangerous_unicode(value)
|
||||
|
||||
return value
|
||||
@@ -11357,33 +11357,7 @@ msgid "Tree layout"
|
||||
msgstr "Diseño del árbol"
|
||||
|
||||
msgid "Tree orientation"
|
||||
Findings (brief):
|
||||
|
||||
- No git merge conflict markers found (no <<<<<<< / ======= / >>>>>>>).
|
||||
- PO header mismatch: "Language: en" — this is an Spanish file; set to "es".
|
||||
- Duplicate msgid entries with conflicting/empty translations:
|
||||
- " at line %(line)d" — one entry has " en la línea %(line)d", another has an empty msgstr.
|
||||
- "Dashboard cannot be copied due to invalid parameters." — appears multiple times with different/empty msgstr values.
|
||||
- "%(subtitle)s\nThis may be triggered by:\n %(issue)s" — msgstr is empty in one occurrence.
|
||||
- There are other repeated msgids with one occurrence left untranslated (examples: search for repeated msgid strings with one msgstr == "").
|
||||
- Empty translations (examples):
|
||||
- msgid "%(subtitle)s\nThis may be triggered by:\n %(issue)s" → msgstr "".
|
||||
- Several other msgid entries have msgstr "" (scan for msgstr "" occurrences).
|
||||
- Fuzzy entries present (e.g. entries annotated "#, fuzzy") — these need review and removal of the fuzzy flag after correction.
|
||||
- Typo in a translation: msgid "This action will permanently delete the user." → msgstr contains "uduario." (should be "usuario.").
|
||||
|
||||
Recommended next steps:
|
||||
- Fix header Language to "es".
|
||||
- Remove/fix duplicate msgids: consolidate into a single entry and keep the correct translation.
|
||||
- Fill in missing msgstr values (or mark as untranslated intentionally).
|
||||
- Review and resolve fuzzy entries, then remove the "fuzzy" flag.
|
||||
- Fix obvious typos (e.g., "uduario" → "usuario").
|
||||
|
||||
If you want, I can produce a patch that:
|
||||
- updates header Language to "es",
|
||||
- removes duplicate entries by keeping the first translated occurrence,
|
||||
- lists all msgids with empty msgstr for you to translate,
|
||||
or show exact locations (line ranges) for each problem. Which would you prefer?
|
||||
msgstr "Orientación del árbol"
|
||||
|
||||
msgid "Treemap"
|
||||
msgstr "Diagrama de árbol"
|
||||
|
||||
480
tests/unit_tests/mcp_service/utils/test_sanitization.py
Normal file
480
tests/unit_tests/mcp_service/utils/test_sanitization.py
Normal file
@@ -0,0 +1,480 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.mcp_service.utils.sanitization import (
|
||||
_check_dangerous_patterns,
|
||||
_check_sql_patterns,
|
||||
_remove_dangerous_unicode,
|
||||
_strip_html_tags,
|
||||
sanitize_filter_value,
|
||||
sanitize_user_input,
|
||||
)
|
||||
|
||||
# --- _strip_html_tags tests ---
|
||||
|
||||
|
||||
def test_strip_html_tags_plain_text():
|
||||
assert _strip_html_tags("hello world") == "hello world"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_ampersand():
|
||||
assert _strip_html_tags("A & B") == "A & B"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_multiple_ampersands():
|
||||
assert _strip_html_tags("A & B & C") == "A & B & C"
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_bold_tags():
|
||||
assert _strip_html_tags("<b>hello</b>") == "hello"
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_script_tags():
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "</script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_entity_encoded_script():
|
||||
"""Entity-encoded tags must be decoded and stripped, not passed through."""
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_double_encoded_script():
|
||||
"""Double-encoded entities must also be decoded and stripped."""
|
||||
result = _strip_html_tags("&lt;script&gt;alert(1)&lt;/script&gt;")
|
||||
assert "<script>" not in result
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_img_onerror():
|
||||
result = _strip_html_tags('<img src=x onerror="alert(1)">')
|
||||
assert "<img" not in result
|
||||
assert "onerror" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_strips_div_tags():
|
||||
assert _strip_html_tags("<div>content</div>") == "content"
|
||||
|
||||
|
||||
def test_strip_html_tags_preserves_less_than_in_text():
|
||||
"""A bare < not forming a tag should be preserved."""
|
||||
result = _strip_html_tags("5 < 10")
|
||||
assert "5" in result
|
||||
assert "10" in result
|
||||
|
||||
|
||||
def test_strip_html_tags_empty_string():
|
||||
assert _strip_html_tags("") == ""
|
||||
|
||||
|
||||
def test_strip_html_tags_triple_encoded_script():
|
||||
"""Triple-encoded entities must also be decoded and stripped."""
|
||||
result = _strip_html_tags(
|
||||
"&amp;lt;script&amp;gt;alert(1)&amp;lt;/script&amp;gt;"
|
||||
)
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_mixed_encoded_and_raw():
|
||||
"""Both raw and entity-encoded tags should be stripped."""
|
||||
result = _strip_html_tags("<b>bold</b> and <i>italic</i>")
|
||||
assert "<b>" not in result
|
||||
assert "<i>" not in result
|
||||
assert "bold" in result
|
||||
assert "italic" in result
|
||||
|
||||
|
||||
def test_strip_html_tags_deep_encoding_terminates():
|
||||
"""Verify the iterative decode loop terminates on many encoding layers."""
|
||||
value = "<script>alert(1)</script>"
|
||||
for _ in range(10):
|
||||
value = value.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
result = _strip_html_tags(value)
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_entity_ampersand():
|
||||
"""& in input should become & in output."""
|
||||
assert _strip_html_tags("A & B") == "A & B"
|
||||
|
||||
|
||||
# --- _check_dangerous_patterns tests ---
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_safe_input():
|
||||
_check_dangerous_patterns("hello world", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_javascript_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_vbscript_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("vbscript:MsgBox", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_data_scheme():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("data:text/html,<script>", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_case_insensitive():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
_check_dangerous_patterns("JAVASCRIPT:alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onclick():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onclick=alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onerror():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onerror = alert(1)", "test")
|
||||
|
||||
|
||||
def test_check_dangerous_patterns_onload():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
_check_dangerous_patterns("onload=fetch('x')", "test")
|
||||
|
||||
|
||||
# --- _check_sql_patterns tests ---
|
||||
|
||||
|
||||
def test_check_sql_patterns_safe_input():
|
||||
_check_sql_patterns("revenue_total", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_drop_table():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("DROP TABLE users", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_delete():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("DELETE FROM users", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_semicolon():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value; other", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_sql_comment_dash():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value -- comment", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_sql_comment_block():
|
||||
with pytest.raises(ValueError, match="unsafe SQL comment"):
|
||||
_check_sql_patterns("value /* comment */", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_pipe():
|
||||
with pytest.raises(ValueError, match="unsafe characters"):
|
||||
_check_sql_patterns("value | other", "test")
|
||||
|
||||
|
||||
def test_check_sql_patterns_case_insensitive():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
_check_sql_patterns("drop table users", "test")
|
||||
|
||||
|
||||
# --- _remove_dangerous_unicode tests ---
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_plain_text():
|
||||
assert _remove_dangerous_unicode("hello") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_zero_width_space():
|
||||
assert _remove_dangerous_unicode("he\u200bllo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_zero_width_joiner():
|
||||
assert _remove_dangerous_unicode("he\u200dllo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_bom():
|
||||
assert _remove_dangerous_unicode("\ufeffhello") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_null_byte():
|
||||
assert _remove_dangerous_unicode("he\x00llo") == "hello"
|
||||
|
||||
|
||||
def test_remove_dangerous_unicode_preserves_normal_unicode():
|
||||
assert _remove_dangerous_unicode("café résumé") == "café résumé"
|
||||
|
||||
|
||||
# --- sanitize_user_input tests ---
|
||||
|
||||
|
||||
def test_sanitize_user_input_plain_text():
|
||||
assert sanitize_user_input("hello", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_preserves_ampersand():
|
||||
assert sanitize_user_input("A & B", "test") == "A & B"
|
||||
|
||||
|
||||
def test_sanitize_user_input_strips_html():
|
||||
assert sanitize_user_input("<b>hello</b>", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_none_not_allowed():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input(None, "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_none_allowed():
|
||||
assert sanitize_user_input(None, "test", allow_empty=True) is None
|
||||
|
||||
|
||||
def test_sanitize_user_input_empty_string_not_allowed():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input("", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_empty_string_allowed():
|
||||
assert sanitize_user_input("", "test", allow_empty=True) is None
|
||||
|
||||
|
||||
def test_sanitize_user_input_whitespace_only():
|
||||
with pytest.raises(ValueError, match="cannot be empty"):
|
||||
sanitize_user_input(" ", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_strips_whitespace():
|
||||
assert sanitize_user_input(" hello ", "test") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_too_long():
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
sanitize_user_input("a" * 256, "test", max_length=255)
|
||||
|
||||
|
||||
def test_sanitize_user_input_max_length_ok():
|
||||
result = sanitize_user_input("a" * 255, "test", max_length=255)
|
||||
assert result == "a" * 255
|
||||
|
||||
|
||||
def test_sanitize_user_input_blocks_javascript():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_user_input("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_blocks_event_handler():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
sanitize_user_input("onclick=alert(1)", "test")
|
||||
|
||||
|
||||
def test_sanitize_user_input_sql_keywords_not_checked_by_default():
|
||||
result = sanitize_user_input("DROP TABLE", "test")
|
||||
assert result == "DROP TABLE"
|
||||
|
||||
|
||||
def test_sanitize_user_input_sql_keywords_checked_when_enabled():
|
||||
with pytest.raises(ValueError, match="unsafe SQL keywords"):
|
||||
sanitize_user_input("DROP TABLE users", "test", check_sql_keywords=True)
|
||||
|
||||
|
||||
def test_sanitize_user_input_removes_zero_width_chars():
|
||||
result = sanitize_user_input("hel\u200blo", "test")
|
||||
assert result == "hello"
|
||||
|
||||
|
||||
def test_sanitize_user_input_xss_entity_encoded():
|
||||
"""Entity-encoded XSS attempts must be neutralized."""
|
||||
result = sanitize_user_input("<script>alert(1)</script>", "test")
|
||||
assert "<script>" not in result
|
||||
|
||||
|
||||
def test_sanitize_user_input_entity_encoded_javascript():
|
||||
"""Entity-encoded javascript: scheme should be caught after decoding."""
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_user_input("javascript:alert(1)", "test")
|
||||
|
||||
|
||||
# --- sanitize_filter_value tests ---
|
||||
|
||||
|
||||
def test_sanitize_filter_value_integer():
|
||||
assert sanitize_filter_value(42) == 42
|
||||
|
||||
|
||||
def test_sanitize_filter_value_float():
|
||||
assert sanitize_filter_value(3.14) == 3.14
|
||||
|
||||
|
||||
def test_sanitize_filter_value_bool():
|
||||
assert sanitize_filter_value(True) is True
|
||||
|
||||
|
||||
def test_sanitize_filter_value_plain_string():
|
||||
assert sanitize_filter_value("hello") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_preserves_ampersand():
|
||||
assert sanitize_filter_value("A & B") == "A & B"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_strips_html():
|
||||
assert sanitize_filter_value("<b>hello</b>") == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_too_long():
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
sanitize_filter_value("a" * 1001)
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_javascript():
|
||||
with pytest.raises(ValueError, match="malicious URL scheme"):
|
||||
sanitize_filter_value("javascript:alert(1)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_xp_cmdshell():
|
||||
with pytest.raises(ValueError, match="malicious SQL procedures"):
|
||||
sanitize_filter_value("xp_cmdshell('dir')")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_sp_executesql():
|
||||
with pytest.raises(ValueError, match="malicious SQL procedures"):
|
||||
sanitize_filter_value("sp_executesql @stmt")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_union_select():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' UNION SELECT * FROM users")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_sql_comment():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("value -- drop")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_shell_semicolon():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("value;rm -rf")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_shell_pipe():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("value|cat /etc/passwd")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_backtick():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("`whoami`")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_hex_encoding():
|
||||
with pytest.raises(ValueError, match="hex encoding"):
|
||||
sanitize_filter_value("\\x41\\x42")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_allows_ampersand_alone():
|
||||
"""Ampersand is safe in filter values (only dangerous in shell contexts)."""
|
||||
assert sanitize_filter_value("AT&T") == "AT&T"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_removes_zero_width_chars():
|
||||
result = sanitize_filter_value("hel\u200blo")
|
||||
assert result == "hello"
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_or_injection():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' OR '1'='1")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_and_injection():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("' AND '1'='1")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_block_comment():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("value /* comment */")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_semicolon_drop():
|
||||
with pytest.raises(ValueError, match="malicious SQL patterns"):
|
||||
sanitize_filter_value("; DROP TABLE users")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_parentheses():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("$(whoami)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_dollar_sign():
|
||||
with pytest.raises(ValueError, match="unsafe shell characters"):
|
||||
sanitize_filter_value("$HOME")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_blocks_event_handler():
|
||||
with pytest.raises(ValueError, match="malicious event handler"):
|
||||
sanitize_filter_value("onerror=alert(1)")
|
||||
|
||||
|
||||
def test_sanitize_filter_value_xss_entity_encoded():
|
||||
"""Entity-encoded XSS in filter values must be neutralized."""
|
||||
result = sanitize_filter_value("<img src=x onerror=alert(1)>")
|
||||
assert "<img" not in result
|
||||
|
||||
|
||||
# --- Defense-in-depth: verify html.unescape is not used after nh3 ---
|
||||
|
||||
|
||||
def test_strip_html_tags_does_not_unescape_angle_brackets():
|
||||
"""Ensure nh3 entity output is not fully unescaped back to raw HTML.
|
||||
|
||||
nh3.clean may pass through HTML entities (e.g. <script>) from
|
||||
the input without stripping them. A full html.unescape() on nh3's
|
||||
output could reintroduce raw angle brackets, creating an XSS vector.
|
||||
"""
|
||||
# Plain text passes through unchanged
|
||||
result = _strip_html_tags("safe text")
|
||||
assert result == "safe text"
|
||||
|
||||
# Verify ampersand preservation still works
|
||||
result = _strip_html_tags("A & B")
|
||||
assert result == "A & B"
|
||||
|
||||
# Verify real tags are stripped
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
|
||||
# Entity-encoded script tags must not become real tags in the output
|
||||
result = _strip_html_tags("<script>alert(1)</script>")
|
||||
assert "<script>" not in result
|
||||
assert "</script>" not in result
|
||||
|
||||
|
||||
def test_strip_html_tags_img_onerror_entity_bypass():
|
||||
"""Entity-encoded img/onerror should not survive sanitization."""
|
||||
result = _strip_html_tags("<img src=x onerror=alert(1)>")
|
||||
assert "<img" not in result
|
||||
assert "onerror" not in result
|
||||
Reference in New Issue
Block a user