Compare commits

..

1 Commits

Author SHA1 Message Date
Claude Code
8d3c5496f0 test(sql-parser): pin TimescaleDB hyperfunctions parse on postgresql
Closes #32028

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 18:50:36 -07:00
16 changed files with 83 additions and 439 deletions

View File

@@ -16,7 +16,6 @@
* specific language governing permissions and limitations
* under the License.
*/
import { SupersetClient } from '@superset-ui/core';
import { logging } from '@apache-superset/core/utils';
import type { common as core } from '@apache-superset/core';
import ExtensionsLoader from './ExtensionsLoader';
@@ -112,33 +111,3 @@ test('handles initialization errors gracefully', async () => {
errorSpy.mockRestore();
appendChildSpy.mockRestore();
});
test('logs success after initializeExtensions completes', async () => {
const loader = ExtensionsLoader.getInstance();
const infoSpy = jest.spyOn(logging, 'info').mockImplementation();
jest.spyOn(SupersetClient, 'get').mockResolvedValue({
json: { result: [] },
} as any);
await loader.initializeExtensions();
expect(infoSpy).toHaveBeenCalledWith('Extensions initialized successfully.');
infoSpy.mockRestore();
});
test('logs error when initializeExtensions fails', async () => {
const loader = ExtensionsLoader.getInstance();
const errorSpy = jest.spyOn(logging, 'error').mockImplementation();
const fetchError = new Error('Network error');
jest.spyOn(SupersetClient, 'get').mockRejectedValue(fetchError);
await loader.initializeExtensions();
expect(errorSpy).toHaveBeenCalledWith(
'Error setting up extensions:',
fetchError,
);
errorSpy.mockRestore();
});

View File

@@ -34,8 +34,6 @@ class ExtensionsLoader {
private extensionIndex: Map<string, Extension> = new Map();
private initializationPromise: Promise<void> | null = null;
// eslint-disable-next-line no-useless-constructor
private constructor() {
// Private constructor for singleton pattern
@@ -56,27 +54,16 @@ class ExtensionsLoader {
* Initializes extensions by fetching the list from the API and loading each one.
* @throws Error if initialization fails.
*/
public initializeExtensions(): Promise<void> {
if (this.initializationPromise) {
return this.initializationPromise;
}
this.initializationPromise = (async () => {
try {
const response = await SupersetClient.get({
endpoint: '/api/v1/extensions/',
});
const extensions: Extension[] = response.json.result;
await Promise.all(
extensions.map(async extension => {
await this.initializeExtension(extension);
}),
);
logging.info('Extensions initialized successfully.');
} catch (error) {
logging.error('Error setting up extensions:', error);
}
})();
return this.initializationPromise;
public async initializeExtensions(): Promise<void> {
const response = await SupersetClient.get({
endpoint: '/api/v1/extensions/',
});
const extensions: Extension[] = response.json.result;
await Promise.all(
extensions.map(async extension => {
await this.initializeExtension(extension);
}),
);
}
/**

View File

@@ -18,6 +18,7 @@
*/
import { render, waitFor } from 'spec/helpers/testing-library';
import { FeatureFlag, isFeatureEnabled } from '@superset-ui/core';
import { logging } from '@apache-superset/core/utils';
import fetchMock from 'fetch-mock';
import ExtensionsStartup from './ExtensionsStartup';
import ExtensionsLoader from './ExtensionsLoader';
@@ -191,12 +192,14 @@ test('only initializes once even with multiple renders', async () => {
loader.initializeExtensions = originalInitialize;
});
test('initializes ExtensionsLoader when EnableExtensions feature flag is enabled', async () => {
test('initializes ExtensionsLoader and logs success when EnableExtensions feature flag is enabled', async () => {
// Ensure feature flag is enabled
mockIsFeatureEnabled.mockImplementation(
(flag: FeatureFlag) => flag === FeatureFlag.EnableExtensions,
);
const infoSpy = jest.spyOn(logging, 'info').mockImplementation();
// Mock the initializeExtensions method to succeed
const originalInitialize = ExtensionsLoader.prototype.initializeExtensions;
ExtensionsLoader.prototype.initializeExtensions = jest
@@ -217,10 +220,15 @@ test('initializes ExtensionsLoader when EnableExtensions feature flag is enabled
expect(
ExtensionsLoader.prototype.initializeExtensions,
).toHaveBeenCalledTimes(1);
// Verify success message was logged
expect(infoSpy).toHaveBeenCalledWith(
'Extensions initialized successfully.',
);
});
// Restore original method
ExtensionsLoader.prototype.initializeExtensions = originalInitialize;
infoSpy.mockRestore();
});
test('does not initialize ExtensionsLoader when EnableExtensions feature flag is disabled', async () => {
@@ -251,36 +259,38 @@ test('does not initialize ExtensionsLoader when EnableExtensions feature flag is
initializeSpy.mockRestore();
});
test('continues rendering children even when ExtensionsLoader initialization fails', async () => {
test('logs error when ExtensionsLoader initialization fails', async () => {
// Ensure feature flag is enabled
mockIsFeatureEnabled.mockReturnValue(true);
// Mock the initializeExtensions method to reject — ExtensionsLoader handles
// its own error logging internally
const errorSpy = jest.spyOn(logging, 'error').mockImplementation();
// Mock the initializeExtensions method to throw an error
const originalInitialize = ExtensionsLoader.prototype.initializeExtensions;
ExtensionsLoader.prototype.initializeExtensions = jest
.fn()
.mockImplementation(() => Promise.resolve());
.mockImplementation(() => {
throw new Error('Test initialization error');
});
const { container } = render(
<ExtensionsStartup>
<div data-testid="child" />
</ExtensionsStartup>,
{
useRedux: true,
initialState: mockInitialState,
},
);
render(<ExtensionsStartup />, {
useRedux: true,
initialState: mockInitialState,
});
await waitFor(() => {
// Verify feature flag was checked
expect(mockIsFeatureEnabled).toHaveBeenCalledWith(
FeatureFlag.EnableExtensions,
);
expect(
container.querySelector('[data-testid="child"]'),
).toBeInTheDocument();
// Verify error was logged
expect(errorSpy).toHaveBeenCalledWith(
'Error setting up extensions:',
expect.any(Error),
);
});
// Restore original method
ExtensionsLoader.prototype.initializeExtensions = originalInitialize;
errorSpy.mockRestore();
});

View File

@@ -82,7 +82,17 @@ const ExtensionsStartup: React.FC<{ children?: React.ReactNode }> = ({
const setup = async () => {
if (isFeatureEnabled(FeatureFlag.EnableExtensions)) {
await ExtensionsLoader.getInstance().initializeExtensions();
try {
await ExtensionsLoader.getInstance().initializeExtensions();
supersetCore.utils.logging.info(
'Extensions initialized successfully.',
);
} catch (error) {
supersetCore.utils.logging.error(
'Error setting up extensions:',
error,
);
}
}
setInitialized(true);
};

View File

@@ -36,7 +36,6 @@ else:
from flask import Flask, Response
from werkzeug.exceptions import NotFound
from superset.extensions.cache_middleware import ExtensionCacheMiddleware
from superset.extensions.local_extensions_watcher import (
start_local_extensions_watcher_thread,
)
@@ -67,6 +66,7 @@ def create_app(
or app.config["APPLICATION_ROOT"],
)
if app_root != "/":
app.wsgi_app = AppRootMiddleware(app.wsgi_app, app_root)
# If not set, manually configure options that depend on the
# value of app_root so things work out of the box
if not app.config["STATIC_ASSETS_PREFIX"]:
@@ -77,13 +77,6 @@ def create_app(
app_initializer = app.config.get("APP_INITIALIZER", SupersetAppInitializer)(app)
app_initializer.init_app()
# Must be applied before AppRootMiddleware so the path prefix
# is stripped before the extension asset path regex runs.
app.wsgi_app = ExtensionCacheMiddleware(app.wsgi_app)
if app_root != "/":
app.wsgi_app = AppRootMiddleware(app.wsgi_app, app_root)
# Set up LOCAL_EXTENSIONS file watcher when in debug mode
if app.debug:
start_local_extensions_watcher_thread(app)

View File

@@ -27,13 +27,9 @@ from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.sql.visitors import VisitableType
from superset import db, security_manager
from superset.commands.dataset.exceptions import (
DatasetAccessDeniedError,
DatasetForbiddenDataURI,
)
from superset.commands.dataset.exceptions import DatasetForbiddenDataURI
from superset.commands.exceptions import ImportFailedError
from superset.connectors.sqla.models import SqlaTable
from superset.exceptions import SupersetSecurityException
from superset.models.core import Database
from superset.sql.parse import Table
from superset.utils import json
@@ -176,12 +172,6 @@ def import_dataset( # noqa: C901
if dataset.id is None:
db.session.flush()
if not ignore_permissions:
try:
security_manager.raise_for_access(datasource=dataset)
except SupersetSecurityException as ex:
raise DatasetAccessDeniedError() from ex
try:
table_exists = dataset.database.has_table(
Table(dataset.table_name, dataset.schema, dataset.catalog),

View File

@@ -58,11 +58,7 @@ class QueryDAO(BaseDAO[Query]):
@staticmethod
def stop_query(client_id: str) -> None:
query = (
db.session.query(Query)
.filter(Query.client_id == client_id, Query.user_id == get_user_id())
.one_or_none()
)
query = db.session.query(Query).filter_by(client_id=client_id).one_or_none()
if not query:
raise QueryNotFoundException(f"Query with client_id {client_id} not found")

View File

@@ -225,9 +225,4 @@ class ExtensionsRestApi(BaseApi):
if not mimetype:
mimetype = "application/octet-stream"
response = send_file(BytesIO(chunk), mimetype=mimetype)
# Chunk filenames include a content hash, so they are immutable.
response.cache_control.max_age = 31536000
response.cache_control.public = True
response.cache_control.immutable = True
return response
return send_file(BytesIO(chunk), mimetype=mimetype)

View File

@@ -1,73 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import re
from types import TracebackType
from typing import Callable, Iterable, TYPE_CHECKING
if TYPE_CHECKING:
from _typeshed.wsgi import StartResponse, WSGIApplication, WSGIEnvironment
# Matches only the static asset endpoint: /api/v1/extensions/<publisher>/<name>/<file>
# Does not match the list (/), get (/<publisher>/<name>), or info (/_info) endpoints.
_ASSET_PATH_RE = re.compile(r"^/api/v1/extensions/[^/]+/[^/]+/[^/]+$")
class ExtensionCacheMiddleware:
"""Strip 'Cookie' from the Vary header on extension asset responses.
Flask's session interface appends Vary: Cookie unconditionally after every
after_request hook runs, so it cannot be removed at the view layer. This
middleware intercepts the WSGI response at the lowest level, after all
Flask processing is complete.
"""
def __init__(self, wsgi_app: WSGIApplication) -> None:
self.wsgi_app = wsgi_app
def __call__(
self, environ: WSGIEnvironment, start_response: StartResponse
) -> Iterable[bytes]:
path = environ.get("PATH_INFO", "")
if not _ASSET_PATH_RE.match(path):
return self.wsgi_app(environ, start_response)
def patched_start_response(
status: str,
response_headers: list[tuple[str, str]],
exc_info: (
tuple[type[BaseException], BaseException, TracebackType]
| tuple[None, None, None]
| None
) = None,
) -> Callable[[bytes], object]:
new_headers = []
for name, value in response_headers:
if name.lower() == "vary":
parts = [
v.strip()
for v in value.split(",")
if v.strip().lower() != "cookie"
]
if parts:
new_headers.append((name, ", ".join(parts)))
else:
new_headers.append((name, value))
return start_response(status, new_headers, exc_info)
return self.wsgi_app(environ, patched_start_response)

View File

@@ -29,7 +29,8 @@ BLOCKLIST = {
# sqlite creates a local DB, which allows mapping server's filesystem
re.compile(r"sqlite(?:\+[^\s]*)?$"),
# shillelagh allows opening local files (eg, 'SELECT * FROM "csv:///etc/passwd"')
re.compile(r"shillelagh(?:\+[^\s]*)?$"),
re.compile(r"shillelagh$"),
re.compile(r"shillelagh\+apsw$"),
}

View File

@@ -72,30 +72,11 @@ from superset.security.analytics_db_safety import check_sqlalchemy_uri
True,
"shillelagh cannot be used as a data source for security reasons.",
),
(
"shillelagh+:///home/superset/bad.db",
True,
"shillelagh cannot be used as a data source for security reasons.",
),
("shillelagh+:///home/superset/bad.db", False, None),
(
"shillelagh+something:///home/superset/bad.db",
True,
"shillelagh cannot be used as a data source for security reasons.",
),
(
"shillelagh+csv:///etc/passwd",
True,
"shillelagh cannot be used as a data source for security reasons.",
),
(
"shillelagh+json:///etc/passwd",
True,
"shillelagh cannot be used as a data source for security reasons.",
),
(
"shillelagh+gsheets:///",
True,
"shillelagh cannot be used as a data source for security reasons.",
False,
None,
),
],
)

View File

@@ -279,49 +279,3 @@ def test_query_dao_stop_query(
QueryDAO.stop_query(query_obj.client_id)
query = db.session.query(Query).one()
assert query.status == QueryStatus.STOPPED
def test_query_dao_stop_query_wrong_user(
mocker: MockerFixture, app: Any, session: Session
) -> None:
"""A user cannot stop a query that belongs to a different user."""
from superset import db
from superset.common.db_query_status import QueryStatus
from superset.models.core import Database
from superset.models.sql_lab import Query
engine = db.session.get_bind()
Query.metadata.create_all(engine) # pylint: disable=no-member
database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
query_obj = Query(
client_id="foo",
database=database,
tab_name="test_tab",
sql_editor_id="test_editor_id",
sql="select * from bar",
select_sql="select * from bar",
executed_sql="select * from bar",
limit=100,
select_as_cta=False,
rows=100,
error_message="none",
results_key="abc",
status=QueryStatus.RUNNING,
user_id=1,
)
db.session.add(database)
db.session.add(query_obj)
# Simulate a different user (user 2) attempting to stop user 1's query
mocker.patch("superset.daos.query.get_user_id", return_value=2)
from superset.daos.query import QueryDAO
with pytest.raises(QueryNotFoundException):
QueryDAO.stop_query(query_obj.client_id)
query = db.session.query(Query).one()
assert query.status == QueryStatus.RUNNING

View File

@@ -31,7 +31,6 @@ from sqlalchemy.orm.session import Session
from superset import db, security_manager
from superset.commands.dataset.exceptions import (
DatasetAccessDeniedError,
DatasetForbiddenDataURI,
)
from superset.commands.dataset.importers.v1.utils import (
@@ -745,44 +744,6 @@ def test_import_dataset_without_owner_permission(
mock_can_access.assert_called_with("can_write", "Dataset")
def test_import_dataset_access_check(
mocker: MockerFixture,
session: Session,
) -> None:
"""
Test that import_dataset raises DatasetAccessDeniedError when the user does not
have datasource-level access to the target dataset.
"""
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
mocker.patch.object(security_manager, "can_access", return_value=True)
mocker.patch.object(
security_manager,
"raise_for_access",
side_effect=SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR,
message="User does not have access to this datasource",
level=ErrorLevel.ERROR,
)
),
)
engine = db.session.get_bind()
SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
db.session.add(database)
db.session.flush()
config = copy.deepcopy(dataset_fixture)
config["database_id"] = database.id
with pytest.raises(DatasetAccessDeniedError):
import_dataset(config)
@pytest.mark.parametrize(
"allowed_urls, data_uri, expected, exception_class",
[

View File

@@ -1,156 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, Callable
from superset.extensions.cache_middleware import ExtensionCacheMiddleware
ResponseHeaders = list[tuple[str, str]]
def make_wsgi_app(
status: str = "200 OK",
headers: ResponseHeaders | None = None,
) -> Callable[..., Any]:
"""Returns a minimal WSGI app that calls start_response with the given headers."""
def app(environ, start_response): # noqa: ARG001
start_response(status, headers or [])
return [b"body"]
return app
def call_middleware(
path: str,
upstream_headers: ResponseHeaders,
) -> ResponseHeaders:
"""Run middleware for a given path, return headers passed to start_response."""
captured: list[ResponseHeaders] = []
def start_response(status, headers, exc_info=None): # noqa: ARG001
captured.append(headers)
wsgi_app = make_wsgi_app(headers=upstream_headers)
middleware = ExtensionCacheMiddleware(wsgi_app)
environ = {"PATH_INFO": path}
list(middleware(environ, start_response))
return captured[0]
# --- Path matching ---
def test_asset_path_is_intercepted() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/main.js",
[("Vary", "Accept-Encoding, Cookie")],
)
vary = dict(headers).get("Vary", "")
assert "Cookie" not in vary
def test_list_endpoint_is_not_intercepted() -> None:
upstream = [("Vary", "Accept-Encoding, Cookie")]
headers = call_middleware("/api/v1/extensions/", upstream)
assert headers == upstream
def test_get_endpoint_is_not_intercepted() -> None:
upstream = [("Vary", "Accept-Encoding, Cookie")]
headers = call_middleware("/api/v1/extensions/acme/my-ext", upstream)
assert headers == upstream
def test_info_endpoint_is_not_intercepted() -> None:
upstream = [("Vary", "Accept-Encoding, Cookie")]
headers = call_middleware("/api/v1/extensions/_info", upstream)
assert headers == upstream
def test_unrelated_path_is_not_intercepted() -> None:
upstream = [("Vary", "Accept-Encoding, Cookie")]
headers = call_middleware("/api/v1/dashboard/", upstream)
assert headers == upstream
# --- Vary stripping logic ---
def test_strips_cookie_from_vary() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Vary", "Accept-Encoding, Cookie")],
)
assert dict(headers)["Vary"] == "Accept-Encoding"
def test_strips_cookie_case_insensitive() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Vary", "Accept-Encoding, COOKIE")],
)
assert dict(headers)["Vary"] == "Accept-Encoding"
def test_removes_vary_header_when_cookie_is_only_value() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Vary", "Cookie")],
)
assert "Vary" not in dict(headers)
def test_multiple_vary_headers_all_stripped() -> None:
"""Some middleware stacks emit multiple separate Vary headers."""
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Vary", "Cookie"), ("Vary", "Accept-Encoding, Cookie")],
)
vary_values = [v for k, v in headers if k == "Vary"]
assert all("Cookie" not in v for v in vary_values)
assert vary_values == ["Accept-Encoding"]
def test_non_vary_headers_are_preserved() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.wasm",
[
("Content-Type", "application/wasm"),
("Cache-Control", "public, max-age=31536000, immutable"),
("Vary", "Accept-Encoding, Cookie"),
],
)
d = dict(headers)
assert d["Content-Type"] == "application/wasm"
assert d["Cache-Control"] == "public, max-age=31536000, immutable"
def test_vary_without_cookie_is_unchanged() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Vary", "Accept-Encoding")],
)
assert dict(headers)["Vary"] == "Accept-Encoding"
def test_no_vary_header_produces_no_vary() -> None:
headers = call_middleware(
"/api/v1/extensions/acme/my-ext/chunk.js",
[("Content-Type", "application/javascript")],
)
assert "Vary" not in dict(headers)

View File

@@ -1164,6 +1164,32 @@ def test_has_mutation(engine: str, sql: str, expected: bool) -> None:
assert SQLScript(sql, engine).has_mutation() == expected
@pytest.mark.parametrize(
"sql",
[
"SELECT last(my_value_column, my_time_column) FROM my_table",
"SELECT first(my_value_column, my_time_column) FROM my_table",
"SELECT time_bucket('1 hour', my_time_column) AS bucket FROM my_table",
],
)
def test_postgres_parses_timescaledb_hyperfunctions(sql: str) -> None:
"""
Regression for #32028: TimescaleDB extends Postgres with hyperfunctions
(``last``, ``first``, ``time_bucket``, etc.) that take more arguments
than vanilla Postgres equivalents. SQL Lab tolerates them (it routes
raw SQL straight to the engine), but the dashboard chart path runs the
SQL through ``SQLScript`` for inspection. A strict per-function arity
check in sqlglot was rejecting these queries with ``The number of
provided arguments (2) is greater than the maximum number of supported
arguments (1)``, which broke dashboards built on TimescaleDB datasets.
These tests pin that the parse path tolerates Postgres-dialect SQL
using TimescaleDB hyperfunction signatures. If a future sqlglot
upgrade reintroduces the strict arity check, this fails immediately.
"""
SQLScript(sql, "postgresql") # Must not raise.
def test_get_settings() -> None:
"""
Test `get_settings` in some edge cases.