mirror of
https://github.com/apache/superset.git
synced 2026-06-27 18:35:32 +00:00
Compare commits
15 Commits
chore/ci-c
...
adopt/data
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9774116462 | ||
|
|
b22e43185b | ||
|
|
a42a54905d | ||
|
|
1ded69415c | ||
|
|
d4b9081712 | ||
|
|
fcb9a431de | ||
|
|
90f86f0de0 | ||
|
|
be8eb2ef8e | ||
|
|
d46ad2c757 | ||
|
|
37c2b65b31 | ||
|
|
79f6b3632a | ||
|
|
be4db99949 | ||
|
|
333b7d92cf | ||
|
|
8383ef41e0 | ||
|
|
b21d907bcb |
@@ -519,6 +519,80 @@ For a connection to a SQL endpoint you need to use the HTTP path from the endpoi
|
||||
{"connect_args": {"http_path": "/sql/1.0/endpoints/****", "driver_path": "/path/to/odbc/driver"}}
|
||||
```
|
||||
|
||||
##### OAuth2 Authentication
|
||||
|
||||
Superset supports OAuth2 authentication for Databricks, allowing users to authenticate with their personal Databricks accounts instead of using shared access tokens. This provides better security and audit capabilities.
|
||||
|
||||
###### Prerequisites
|
||||
|
||||
1. Create an OAuth2 application in your Databricks account:
|
||||
- Go to your Databricks account console
|
||||
- Navigate to **Settings** → **Developer** → **OAuth apps**
|
||||
- Create a new OAuth app with the redirect URI: `http://your-superset-host:port/api/v1/database/oauth2/`
|
||||
|
||||
2. Configure OAuth2 in your `superset_config.py`:
|
||||
|
||||
```python
|
||||
from datetime import timedelta
|
||||
|
||||
# OAuth2 configuration for Databricks
|
||||
# The authorization endpoint is derived from your Databricks workspace host; the
|
||||
# token endpoint must be set explicitly (see notes below).
|
||||
DATABASE_OAUTH2_CLIENTS = {
|
||||
"Databricks (legacy)": {
|
||||
"id": "your-databricks-client-id",
|
||||
"secret": "your-databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"token_request_uri": "https://your-workspace-host/oidc/v1/token",
|
||||
},
|
||||
"Databricks": {
|
||||
"id": "your-databricks-client-id",
|
||||
"secret": "your-databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"token_request_uri": "https://your-workspace-host/oidc/v1/token",
|
||||
},
|
||||
}
|
||||
|
||||
# OAuth2 redirect URI (adjust hostname/port for your setup)
|
||||
DATABASE_OAUTH2_REDIRECT_URI = "http://your-superset-host:port/api/v1/database/oauth2/"
|
||||
|
||||
# Optional: OAuth2 timeout
|
||||
DATABASE_OAUTH2_TIMEOUT = timedelta(seconds=30)
|
||||
```
|
||||
|
||||
Replace the following placeholders:
|
||||
- `your-databricks-client-id`: Your Databricks OAuth2 application client ID
|
||||
- `your-databricks-client-secret`: Your Databricks OAuth2 application client secret
|
||||
- `your-superset-host:port`: Your Superset instance hostname and port
|
||||
|
||||
**Multi-Cloud Provider Support**
|
||||
|
||||
Databricks fronts the user-to-machine (U2M) OAuth2 flow on every workspace at
|
||||
`https://<workspace-host>/oidc/v1/authorize` and
|
||||
`https://<workspace-host>/oidc/v1/token`, regardless of whether the workspace
|
||||
runs on AWS, Azure, or GCP. Superset derives the **authorization** endpoint
|
||||
directly from your connection's host, so no cloud provider or account/tenant
|
||||
identifier needs to be configured.
|
||||
|
||||
The **token** endpoint cannot be auto-derived (token exchange has no database
|
||||
context to read the host), so you must supply `token_request_uri` in
|
||||
`DATABASE_OAUTH2_CLIENTS`, set to `https://<workspace-host>/oidc/v1/token` for
|
||||
your workspace.
|
||||
|
||||
If you supply a fully-resolved `authorization_request_uri` (and/or
|
||||
`token_request_uri`), those values take precedence over the host-derived
|
||||
defaults.
|
||||
|
||||
###### Usage
|
||||
|
||||
Once configured, users can:
|
||||
|
||||
1. Connect to Databricks databases normally using access tokens
|
||||
2. When querying data, Superset will automatically redirect users to authenticate with Databricks if needed
|
||||
3. User-specific OAuth2 tokens will be used for database connections, providing better security and audit trails
|
||||
|
||||
This feature works with both "Databricks (legacy)" and "Databricks" engine types and automatically supports all major cloud providers (AWS, Azure, GCP).
|
||||
|
||||
#### Denodo
|
||||
|
||||
The recommended connector library for Denodo is
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, TYPE_CHECKING, TypedDict, Union
|
||||
from typing import Any, Callable, cast, TYPE_CHECKING, TypedDict, Union
|
||||
|
||||
from apispec import APISpec
|
||||
from apispec.ext.marshmallow import MarshmallowPlugin
|
||||
from flask import g
|
||||
from flask_babel import gettext as __
|
||||
from marshmallow import fields, Schema
|
||||
from marshmallow.validate import Range
|
||||
@@ -38,12 +39,18 @@ from superset.db_engine_specs.base import (
|
||||
)
|
||||
from superset.db_engine_specs.hive import HiveEngineSpec
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import OAuth2Error
|
||||
from superset.utils import json
|
||||
from superset.utils.core import get_user_agent, QuerySource
|
||||
from superset.utils.network import is_hostname_valid, is_port_open
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.models.core import Database
|
||||
from superset.superset_typing import (
|
||||
OAuth2ClientConfig,
|
||||
OAuth2State,
|
||||
OAuth2TokenResponse,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
@@ -277,6 +284,135 @@ class DatabricksDynamicBaseEngineSpec(BasicParametersMixin, DatabricksBaseEngine
|
||||
"port": "port",
|
||||
}
|
||||
|
||||
# The Databricks SQL driver has no dedicated authentication exception, so an
|
||||
# expired or missing token surfaces as a generic driver error. These case-
|
||||
# insensitive substrings flag the errors that should bootstrap a re-auth.
|
||||
oauth2_auth_failure_signals = (
|
||||
"http 401",
|
||||
"unauthorized",
|
||||
"unauthenticated",
|
||||
"invalid access token",
|
||||
"invalid token",
|
||||
"expired token",
|
||||
"token expired",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _workspace_oauth2_endpoint(cls, database: Database, path: str) -> str:
|
||||
"""
|
||||
Build a Databricks OAuth2 (U2M) endpoint from the workspace host.
|
||||
|
||||
Databricks fronts the user-to-machine OAuth2 flow on every workspace at
|
||||
``https://<workspace-host>/oidc/v1/{authorize,token}`` across AWS, Azure
|
||||
and GCP, so the endpoints derive directly from the connection host and
|
||||
need no account or tenant identifier.
|
||||
"""
|
||||
host = database.url_object.host
|
||||
if not host:
|
||||
raise OAuth2Error(
|
||||
"Databricks OAuth2 endpoint could not be resolved: the database "
|
||||
"connection has no host."
|
||||
)
|
||||
return f"https://{host}/oidc/v1/{path}"
|
||||
|
||||
@classmethod
|
||||
def needs_oauth2(cls, ex: Exception) -> bool:
|
||||
"""
|
||||
Identify driver errors that should trigger the OAuth2 dance.
|
||||
|
||||
Unlike Trino (``TrinoAuthError``) or GSheets (``UnauthenticatedError``),
|
||||
the Databricks driver raises no dedicated auth exception, so in addition
|
||||
to the base ``isinstance`` check we match the auth signals above on the
|
||||
error message (mirrors ``GSheetsEngineSpec.needs_oauth2``).
|
||||
"""
|
||||
if not (g and hasattr(g, "user")):
|
||||
return False
|
||||
if isinstance(ex, cls.oauth2_exception):
|
||||
return True
|
||||
message = str(ex).lower()
|
||||
return any(signal in message for signal in cls.oauth2_auth_failure_signals)
|
||||
|
||||
@classmethod
|
||||
def get_oauth2_authorization_uri(
|
||||
cls,
|
||||
config: "OAuth2ClientConfig",
|
||||
state: "OAuth2State",
|
||||
code_verifier: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Return the URI for the initial OAuth2 request.
|
||||
|
||||
A fully-resolved ``authorization_request_uri`` from
|
||||
``DATABASE_OAUTH2_CLIENTS`` is preserved; otherwise the endpoint is
|
||||
derived from the workspace host (``https://<host>/oidc/v1/authorize``),
|
||||
which is valid on AWS, Azure and GCP.
|
||||
"""
|
||||
if not config.get("authorization_request_uri"):
|
||||
from superset import db
|
||||
from superset.models.core import Database
|
||||
|
||||
database_id = state["database_id"]
|
||||
if database := db.session.get(Database, database_id):
|
||||
config = cast(
|
||||
"OAuth2ClientConfig",
|
||||
dict(config)
|
||||
| {
|
||||
"authorization_request_uri": cls._workspace_oauth2_endpoint(
|
||||
database, "authorize"
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
return super().get_oauth2_authorization_uri(config, state, code_verifier)
|
||||
|
||||
@classmethod
|
||||
def get_oauth2_token(
|
||||
cls,
|
||||
config: "OAuth2ClientConfig",
|
||||
code: str,
|
||||
code_verifier: str | None = None,
|
||||
) -> "OAuth2TokenResponse":
|
||||
"""
|
||||
Exchange the authorization code for refresh/access tokens.
|
||||
|
||||
Token exchange runs in a separate request with no database context, so
|
||||
the workspace host is not available to derive the endpoint here. Require
|
||||
a configured ``token_request_uri``
|
||||
(``https://<workspace-host>/oidc/v1/token``) and fail fast rather than
|
||||
POST to an unresolved endpoint.
|
||||
"""
|
||||
if not config.get("token_request_uri"):
|
||||
raise OAuth2Error(
|
||||
"Databricks OAuth2 token endpoint is not configured: set "
|
||||
"`token_request_uri` to https://<workspace-host>/oidc/v1/token "
|
||||
"in DATABASE_OAUTH2_CLIENTS."
|
||||
)
|
||||
|
||||
return super().get_oauth2_token(config, code, code_verifier)
|
||||
|
||||
@classmethod
|
||||
def impersonate_user(
|
||||
cls,
|
||||
database: Database,
|
||||
username: str | None,
|
||||
user_token: str | None,
|
||||
url: URL,
|
||||
engine_kwargs: dict[str, Any],
|
||||
) -> tuple[URL, dict[str, Any]]:
|
||||
"""
|
||||
Update connection with OAuth2 access token for user impersonation.
|
||||
"""
|
||||
if user_token:
|
||||
# Replace the access token in the URL with the user's OAuth2 token
|
||||
url = url.set(password=user_token)
|
||||
|
||||
# Also update connect_args if they contain access token
|
||||
connect_args = engine_kwargs.setdefault("connect_args", {})
|
||||
if "access_token" in connect_args:
|
||||
connect_args["access_token"] = user_token
|
||||
|
||||
return url, engine_kwargs
|
||||
|
||||
@staticmethod
|
||||
def get_extra_params(
|
||||
database: Database, source: QuerySource | None = None
|
||||
@@ -474,6 +610,16 @@ class DatabricksNativeEngineSpec(DatabricksDynamicBaseEngineSpec):
|
||||
supports_dynamic_catalog = True
|
||||
supports_cross_catalog_queries = True
|
||||
|
||||
# OAuth 2.0 support. The flow (endpoint resolution from the workspace host,
|
||||
# `needs_oauth2` detection) is shared via `DatabricksDynamicBaseEngineSpec`.
|
||||
supports_oauth2 = True
|
||||
oauth2_scope = "sql"
|
||||
|
||||
# Authorization endpoint is derived from the workspace host at runtime; the
|
||||
# token endpoint must be configured (no DB context at exchange time).
|
||||
oauth2_authorization_request_uri = ""
|
||||
oauth2_token_request_uri = ""
|
||||
|
||||
@classmethod
|
||||
def build_sqlalchemy_uri( # type: ignore
|
||||
cls, parameters: DatabricksNativeParametersType, *_
|
||||
@@ -685,6 +831,16 @@ class DatabricksPythonConnectorEngineSpec(DatabricksDynamicBaseEngineSpec):
|
||||
|
||||
supports_dynamic_schema = supports_catalog = supports_dynamic_catalog = True
|
||||
|
||||
# OAuth 2.0 support. The flow (endpoint resolution from the workspace host,
|
||||
# `needs_oauth2` detection) is shared via `DatabricksDynamicBaseEngineSpec`.
|
||||
supports_oauth2 = True
|
||||
oauth2_scope = "sql"
|
||||
|
||||
# Authorization endpoint is derived from the workspace host at runtime; the
|
||||
# token endpoint must be configured (no DB context at exchange time).
|
||||
oauth2_authorization_request_uri = ""
|
||||
oauth2_token_request_uri = ""
|
||||
|
||||
@classmethod
|
||||
def build_sqlalchemy_uri( # type: ignore
|
||||
cls, parameters: DatabricksPythonConnectorParametersType, *_
|
||||
|
||||
@@ -17,14 +17,23 @@
|
||||
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
from sqlalchemy.engine.url import make_url
|
||||
|
||||
from superset.db_engine_specs.databricks import DatabricksNativeEngineSpec
|
||||
from superset.db_engine_specs.base import OAuth2State
|
||||
from superset.db_engine_specs.databricks import (
|
||||
DatabricksNativeEngineSpec,
|
||||
DatabricksPythonConnectorEngineSpec,
|
||||
)
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import OAuth2Error, OAuth2RedirectError
|
||||
from superset.superset_typing import OAuth2ClientConfig
|
||||
from superset.utils import json
|
||||
from superset.utils.oauth2 import decode_oauth2_state
|
||||
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
|
||||
from tests.unit_tests.fixtures.common import dttm # noqa: F401
|
||||
|
||||
@@ -291,3 +300,595 @@ def test_get_prequeries(mocker: MockerFixture) -> None:
|
||||
"USE CATALOG `evil`` USE CATALOG bad`",
|
||||
"USE SCHEMA `evil`` USE SCHEMA bad`",
|
||||
]
|
||||
|
||||
|
||||
# OAuth2 Tests
|
||||
|
||||
|
||||
def test_oauth2_attributes() -> None:
|
||||
"""
|
||||
Test that OAuth2 attributes are properly set for both engine specs.
|
||||
"""
|
||||
# Test DatabricksNativeEngineSpec
|
||||
assert DatabricksNativeEngineSpec.supports_oauth2 is True
|
||||
assert DatabricksNativeEngineSpec.oauth2_scope == "sql"
|
||||
# The authorization endpoint is derived from the workspace host at runtime;
|
||||
# the token endpoint must be configured explicitly.
|
||||
assert DatabricksNativeEngineSpec.oauth2_authorization_request_uri == ""
|
||||
assert DatabricksNativeEngineSpec.oauth2_token_request_uri == ""
|
||||
|
||||
# Test DatabricksPythonConnectorEngineSpec
|
||||
assert DatabricksPythonConnectorEngineSpec.supports_oauth2 is True
|
||||
assert DatabricksPythonConnectorEngineSpec.oauth2_scope == "sql"
|
||||
assert DatabricksPythonConnectorEngineSpec.oauth2_authorization_request_uri == ""
|
||||
assert DatabricksPythonConnectorEngineSpec.oauth2_token_request_uri == ""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"message",
|
||||
[
|
||||
"Error during request to server: HTTP 401 Unauthorized",
|
||||
"Invalid access token",
|
||||
"The access token expired",
|
||||
"UNAUTHENTICATED: token is no longer valid",
|
||||
],
|
||||
)
|
||||
def test_needs_oauth2_detects_auth_failure_from_message(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
message: str,
|
||||
) -> None:
|
||||
"""
|
||||
The Databricks driver has no dedicated auth exception, so `needs_oauth2`
|
||||
matches auth-failure signals in the error message to bootstrap a re-auth.
|
||||
"""
|
||||
g = mocker.patch("superset.db_engine_specs.databricks.g")
|
||||
g.user = mocker.MagicMock()
|
||||
|
||||
assert spec.needs_oauth2(Exception(message)) is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"message",
|
||||
[
|
||||
"Table not found",
|
||||
# A bare 401 in an unrelated position must not look like an auth failure.
|
||||
"Query failed at line 401: syntax error",
|
||||
],
|
||||
)
|
||||
def test_needs_oauth2_ignores_unrelated_errors(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
message: str,
|
||||
) -> None:
|
||||
"""
|
||||
A non-auth driver error must not trigger the OAuth2 dance.
|
||||
"""
|
||||
g = mocker.patch("superset.db_engine_specs.databricks.g")
|
||||
g.user = mocker.MagicMock()
|
||||
|
||||
assert spec.needs_oauth2(Exception(message)) is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
def test_needs_oauth2_matches_oauth2_redirect_error(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
) -> None:
|
||||
"""
|
||||
The inherited `isinstance` check against `oauth2_exception` still holds.
|
||||
"""
|
||||
g = mocker.patch("superset.db_engine_specs.databricks.g")
|
||||
g.user = mocker.MagicMock()
|
||||
|
||||
ex = OAuth2RedirectError("https://example/authorize", "tab", "redirect")
|
||||
assert spec.needs_oauth2(ex) is True
|
||||
|
||||
|
||||
def test_impersonate_user_with_token(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test impersonate_user method with OAuth2 token for DatabricksNativeEngineSpec.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
original_url = make_url(
|
||||
"databricks+connector://token:original-token@host:443/database"
|
||||
)
|
||||
engine_kwargs = {"connect_args": {"access_token": "original-token"}}
|
||||
|
||||
# Test with user token
|
||||
url, kwargs = DatabricksNativeEngineSpec.impersonate_user(
|
||||
database=database,
|
||||
username="user1",
|
||||
user_token="user-oauth-token", # noqa: S106
|
||||
url=original_url,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
# Check that the password (token) was updated in the URL
|
||||
assert url.password == "user-oauth-token" # noqa: S105
|
||||
# Check that access_token was updated in connect_args
|
||||
assert kwargs["connect_args"]["access_token"] == "user-oauth-token" # noqa: S105
|
||||
|
||||
|
||||
def test_impersonate_user_without_token(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test impersonate_user method without OAuth2 token.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
original_url = make_url(
|
||||
"databricks+connector://token:original-token@host:443/database"
|
||||
)
|
||||
engine_kwargs = {"connect_args": {"access_token": "original-token"}}
|
||||
|
||||
# Test without user token
|
||||
url, kwargs = DatabricksNativeEngineSpec.impersonate_user(
|
||||
database=database,
|
||||
username="user1",
|
||||
user_token=None,
|
||||
url=original_url,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
# Check that nothing was changed
|
||||
assert url.password == "original-token" # noqa: S105
|
||||
assert kwargs["connect_args"]["access_token"] == "original-token" # noqa: S105
|
||||
|
||||
|
||||
def test_impersonate_user_python_connector(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test impersonate_user method for DatabricksPythonConnectorEngineSpec.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
original_url = make_url(
|
||||
"databricks://token:original-token@host:443?http_path=path&catalog=main&schema=default"
|
||||
)
|
||||
engine_kwargs = {"connect_args": {"access_token": "original-token"}}
|
||||
|
||||
# Test with user token
|
||||
url, kwargs = DatabricksPythonConnectorEngineSpec.impersonate_user(
|
||||
database=database,
|
||||
username="user1",
|
||||
user_token="user-oauth-token", # noqa: S106
|
||||
url=original_url,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
# Check that the password (token) was updated in the URL
|
||||
assert url.password == "user-oauth-token" # noqa: S105
|
||||
# Check that access_token was updated in connect_args
|
||||
assert kwargs["connect_args"]["access_token"] == "user-oauth-token" # noqa: S105
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def oauth2_config_native() -> OAuth2ClientConfig:
|
||||
"""
|
||||
Config for Databricks Native OAuth2.
|
||||
"""
|
||||
return {
|
||||
"id": "databricks-client-id",
|
||||
"secret": "databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/authorize",
|
||||
"token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
"request_content_type": "json",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def oauth2_config_python() -> OAuth2ClientConfig:
|
||||
"""
|
||||
Config for Databricks Python Connector OAuth2.
|
||||
"""
|
||||
return {
|
||||
"id": "databricks-client-id",
|
||||
"secret": "databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/authorize",
|
||||
"token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
"request_content_type": "json",
|
||||
}
|
||||
|
||||
|
||||
def test_is_oauth2_enabled_no_config_native(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test `is_oauth2_enabled` when OAuth2 is not configured for Native engine.
|
||||
"""
|
||||
mocker.patch(
|
||||
"flask.current_app.config",
|
||||
new={"DATABASE_OAUTH2_CLIENTS": {}},
|
||||
)
|
||||
|
||||
assert DatabricksNativeEngineSpec.is_oauth2_enabled() is False
|
||||
|
||||
|
||||
def test_is_oauth2_enabled_config_native(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test `is_oauth2_enabled` when OAuth2 is configured for Native engine.
|
||||
"""
|
||||
mocker.patch(
|
||||
"flask.current_app.config",
|
||||
new={
|
||||
"DATABASE_OAUTH2_CLIENTS": {
|
||||
"Databricks (legacy)": {
|
||||
"id": "client-id",
|
||||
"secret": "client-secret",
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
assert DatabricksNativeEngineSpec.is_oauth2_enabled() is True
|
||||
|
||||
|
||||
def test_is_oauth2_enabled_no_config_python(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test `is_oauth2_enabled` when OAuth2 is not configured for Python Connector engine.
|
||||
"""
|
||||
mocker.patch(
|
||||
"flask.current_app.config",
|
||||
new={"DATABASE_OAUTH2_CLIENTS": {}},
|
||||
)
|
||||
|
||||
assert DatabricksPythonConnectorEngineSpec.is_oauth2_enabled() is False
|
||||
|
||||
|
||||
def test_is_oauth2_enabled_config_python(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test `is_oauth2_enabled` when OAuth2 is configured for Python Connector engine.
|
||||
"""
|
||||
mocker.patch(
|
||||
"flask.current_app.config",
|
||||
new={
|
||||
"DATABASE_OAUTH2_CLIENTS": {
|
||||
"Databricks": {
|
||||
"id": "client-id",
|
||||
"secret": "client-secret",
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
assert DatabricksPythonConnectorEngineSpec.is_oauth2_enabled() is True
|
||||
|
||||
|
||||
def test_get_oauth2_authorization_uri_native(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_native: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_authorization_uri` for Native engine.
|
||||
"""
|
||||
from superset.db_engine_specs.base import OAuth2State
|
||||
|
||||
state: OAuth2State = {
|
||||
"database_id": 1,
|
||||
"user_id": 1,
|
||||
"default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"tab_id": "1234",
|
||||
}
|
||||
|
||||
url = DatabricksNativeEngineSpec.get_oauth2_authorization_uri(
|
||||
oauth2_config_native, state
|
||||
)
|
||||
parsed = urlparse(url)
|
||||
assert parsed.netloc == "accounts.cloud.databricks.com"
|
||||
assert parsed.path == "/oidc/accounts/12345/v1/authorize"
|
||||
|
||||
query = parse_qs(parsed.query)
|
||||
assert query["scope"][0] == "sql"
|
||||
encoded_state = query["state"][0].replace("%2E", ".")
|
||||
assert decode_oauth2_state(encoded_state) == state
|
||||
|
||||
|
||||
def test_get_oauth2_authorization_uri_python(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_python: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_authorization_uri` for Python Connector engine.
|
||||
"""
|
||||
from superset.db_engine_specs.base import OAuth2State
|
||||
|
||||
state: OAuth2State = {
|
||||
"database_id": 1,
|
||||
"user_id": 1,
|
||||
"default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"tab_id": "1234",
|
||||
}
|
||||
|
||||
url = DatabricksPythonConnectorEngineSpec.get_oauth2_authorization_uri(
|
||||
oauth2_config_python, state
|
||||
)
|
||||
parsed = urlparse(url)
|
||||
assert parsed.netloc == "accounts.cloud.databricks.com"
|
||||
assert parsed.path == "/oidc/accounts/12345/v1/authorize"
|
||||
|
||||
query = parse_qs(parsed.query)
|
||||
assert query["scope"][0] == "sql"
|
||||
encoded_state = query["state"][0].replace("%2E", ".")
|
||||
assert decode_oauth2_state(encoded_state) == state
|
||||
|
||||
|
||||
def test_get_oauth2_token_native(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_native: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_token` for Native engine.
|
||||
"""
|
||||
requests = mocker.patch("superset.db_engine_specs.base.requests")
|
||||
requests.post().json.return_value = {
|
||||
"access_token": "access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "refresh-token",
|
||||
}
|
||||
|
||||
assert DatabricksNativeEngineSpec.get_oauth2_token(
|
||||
oauth2_config_native, "authorization-code"
|
||||
) == {
|
||||
"access_token": "access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "refresh-token",
|
||||
}
|
||||
requests.post.assert_called_with(
|
||||
"https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
json={
|
||||
"code": "authorization-code",
|
||||
"client_id": "databricks-client-id",
|
||||
"client_secret": "databricks-client-secret",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"grant_type": "authorization_code",
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
|
||||
def test_get_oauth2_token_python(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_python: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_token` for Python Connector engine.
|
||||
"""
|
||||
requests = mocker.patch("superset.db_engine_specs.base.requests")
|
||||
requests.post().json.return_value = {
|
||||
"access_token": "access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "refresh-token",
|
||||
}
|
||||
|
||||
assert DatabricksPythonConnectorEngineSpec.get_oauth2_token(
|
||||
oauth2_config_python, "authorization-code"
|
||||
) == {
|
||||
"access_token": "access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "refresh-token",
|
||||
}
|
||||
requests.post.assert_called_with(
|
||||
"https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
json={
|
||||
"code": "authorization-code",
|
||||
"client_id": "databricks-client-id",
|
||||
"client_secret": "databricks-client-secret",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"grant_type": "authorization_code",
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
|
||||
def test_get_oauth2_fresh_token_native(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_native: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_fresh_token` for Native engine.
|
||||
"""
|
||||
requests = mocker.patch("superset.db_engine_specs.base.requests")
|
||||
requests.post().json.return_value = {
|
||||
"access_token": "new-access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "new-refresh-token",
|
||||
}
|
||||
|
||||
assert DatabricksNativeEngineSpec.get_oauth2_fresh_token(
|
||||
oauth2_config_native, "old-refresh-token"
|
||||
) == {
|
||||
"access_token": "new-access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "new-refresh-token",
|
||||
}
|
||||
requests.post.assert_called_with(
|
||||
"https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
json={
|
||||
"client_id": "databricks-client-id",
|
||||
"client_secret": "databricks-client-secret",
|
||||
"refresh_token": "old-refresh-token",
|
||||
"grant_type": "refresh_token",
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
|
||||
def _oauth2_state() -> OAuth2State:
|
||||
"""
|
||||
Build the default OAuth2 state shared by the OAuth2 tests.
|
||||
"""
|
||||
state: OAuth2State = {
|
||||
"database_id": 1,
|
||||
"user_id": 1,
|
||||
"default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"tab_id": "1234",
|
||||
}
|
||||
return state
|
||||
|
||||
|
||||
def _unresolved_oauth2_config() -> OAuth2ClientConfig:
|
||||
"""
|
||||
Config as built by `get_oauth2_config` when no endpoints are overridden:
|
||||
the URIs default to the spec's empty class attributes.
|
||||
"""
|
||||
return {
|
||||
"id": "databricks-client-id",
|
||||
"secret": "databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"authorization_request_uri": "",
|
||||
"token_request_uri": "",
|
||||
"request_content_type": "json",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"host",
|
||||
[
|
||||
"dbc-abc.cloud.databricks.com",
|
||||
"adb-123456789.12.azuredatabricks.net",
|
||||
"123456789.gcp.databricks.com",
|
||||
],
|
||||
)
|
||||
def test_get_oauth2_authorization_uri_derives_from_workspace_host(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
host: str,
|
||||
) -> None:
|
||||
"""
|
||||
With no configured `authorization_request_uri`, the endpoint is derived from
|
||||
the workspace host (`https://<host>/oidc/v1/authorize`) on every cloud, with
|
||||
no account/tenant identifier required.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
database.url_object.host = host
|
||||
mocker.patch("superset.db.session.get", return_value=database)
|
||||
|
||||
url = spec.get_oauth2_authorization_uri(
|
||||
_unresolved_oauth2_config(), _oauth2_state()
|
||||
)
|
||||
parsed = urlparse(url)
|
||||
assert parsed.netloc == host
|
||||
assert parsed.path == "/oidc/v1/authorize"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
def test_get_oauth2_authorization_uri_preserves_configured(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
) -> None:
|
||||
"""
|
||||
A fully-resolved `authorization_request_uri` is never overwritten by the
|
||||
host-derived endpoint, and no database lookup is needed.
|
||||
"""
|
||||
session_get = mocker.patch("superset.db.session.get")
|
||||
config = _unresolved_oauth2_config()
|
||||
config["authorization_request_uri"] = (
|
||||
"https://accounts.cloud.databricks.com/oidc/accounts/override/v1/authorize"
|
||||
)
|
||||
|
||||
url = spec.get_oauth2_authorization_uri(config, _oauth2_state())
|
||||
assert urlparse(url).path == "/oidc/accounts/override/v1/authorize"
|
||||
session_get.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
def test_get_oauth2_authorization_uri_fails_without_host(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
) -> None:
|
||||
"""
|
||||
When the endpoint must be derived but the connection has no host, fail fast
|
||||
instead of emitting an invalid `https:///oidc/v1/authorize` URL.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
database.url_object.host = None
|
||||
mocker.patch("superset.db.session.get", return_value=database)
|
||||
|
||||
with pytest.raises(OAuth2Error):
|
||||
spec.get_oauth2_authorization_uri(_unresolved_oauth2_config(), _oauth2_state())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec",
|
||||
[DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec],
|
||||
)
|
||||
def test_get_oauth2_token_fails_without_uri(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Token exchange has no database context to auto-detect the endpoint, so a
|
||||
missing `token_request_uri` fails fast rather than POSTing to `.../{}/...`.
|
||||
"""
|
||||
with pytest.raises(OAuth2Error):
|
||||
spec.get_oauth2_token(_unresolved_oauth2_config(), "authorization-code")
|
||||
|
||||
|
||||
def test_get_oauth2_fresh_token_python(
|
||||
mocker: MockerFixture,
|
||||
oauth2_config_python: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
Test `get_oauth2_fresh_token` for Python Connector engine.
|
||||
"""
|
||||
requests = mocker.patch("superset.db_engine_specs.base.requests")
|
||||
requests.post().json.return_value = {
|
||||
"access_token": "new-access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "new-refresh-token",
|
||||
}
|
||||
|
||||
assert DatabricksPythonConnectorEngineSpec.get_oauth2_fresh_token(
|
||||
oauth2_config_python, "old-refresh-token"
|
||||
) == {
|
||||
"access_token": "new-access-token",
|
||||
"expires_in": 3600,
|
||||
"scope": "sql",
|
||||
"token_type": "Bearer",
|
||||
"refresh_token": "new-refresh-token",
|
||||
}
|
||||
requests.post.assert_called_with(
|
||||
"https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token",
|
||||
json={
|
||||
"client_id": "databricks-client-id",
|
||||
"client_secret": "databricks-client-secret",
|
||||
"refresh_token": "old-refresh-token",
|
||||
"grant_type": "refresh_token",
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
127
tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py
Normal file
127
tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from superset.db_engine_specs.databricks import (
|
||||
DatabricksNativeEngineSpec,
|
||||
DatabricksPythonConnectorEngineSpec,
|
||||
)
|
||||
from superset.superset_typing import OAuth2ClientConfig
|
||||
from superset.utils.oauth2 import decode_oauth2_state
|
||||
|
||||
# Multi-Cloud Provider Tests
|
||||
#
|
||||
# Databricks fronts the user-to-machine OAuth2 flow on every workspace at
|
||||
# `https://<workspace-host>/oidc/v1/{authorize,token}`, regardless of cloud, so
|
||||
# the authorization endpoint derives from the connection host with no per-cloud
|
||||
# account/tenant identifier.
|
||||
|
||||
SPECS = [DatabricksNativeEngineSpec, DatabricksPythonConnectorEngineSpec]
|
||||
|
||||
# Representative workspace hosts for each cloud provider.
|
||||
CLOUD_HOSTS = [
|
||||
"my-cluster.cloud.databricks.com", # AWS
|
||||
"adb-123456789.12.azuredatabricks.net", # Azure
|
||||
"123456789.gcp.databricks.com", # GCP
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def oauth2_config_no_uri() -> OAuth2ClientConfig:
|
||||
"""
|
||||
Config for Databricks OAuth2 without a pre-configured endpoint, so the
|
||||
authorization endpoint is derived from the workspace host.
|
||||
"""
|
||||
return {
|
||||
"id": "databricks-client-id",
|
||||
"secret": "databricks-client-secret",
|
||||
"scope": "sql",
|
||||
"redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"authorization_request_uri": "",
|
||||
"token_request_uri": "",
|
||||
"request_content_type": "json",
|
||||
}
|
||||
|
||||
|
||||
def _mock_database(mocker: MockerFixture, host: str) -> MagicMock:
|
||||
"""
|
||||
Build a mock database whose URL resolves to the given workspace host.
|
||||
"""
|
||||
database = mocker.MagicMock()
|
||||
database.url_object.host = host
|
||||
database.id = 1
|
||||
return database
|
||||
|
||||
|
||||
@pytest.mark.parametrize("spec", SPECS)
|
||||
@pytest.mark.parametrize("host", CLOUD_HOSTS)
|
||||
def test_get_oauth2_authorization_uri_uses_workspace_host(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
host: str,
|
||||
oauth2_config_no_uri: OAuth2ClientConfig,
|
||||
) -> None:
|
||||
"""
|
||||
The authorization endpoint is the workspace host on AWS, Azure, and GCP.
|
||||
"""
|
||||
from superset.db_engine_specs.base import OAuth2State
|
||||
|
||||
mocker.patch(
|
||||
"superset.extensions.db.session.get",
|
||||
return_value=_mock_database(mocker, host),
|
||||
)
|
||||
|
||||
state: OAuth2State = {
|
||||
"database_id": 1,
|
||||
"user_id": 1,
|
||||
"default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/",
|
||||
"tab_id": "1234",
|
||||
}
|
||||
|
||||
url = spec.get_oauth2_authorization_uri(oauth2_config_no_uri, state)
|
||||
parsed = urlparse(url)
|
||||
assert parsed.netloc == host
|
||||
assert parsed.path == "/oidc/v1/authorize"
|
||||
|
||||
query = parse_qs(parsed.query)
|
||||
assert query["scope"][0] == "sql"
|
||||
encoded_state = query["state"][0].replace("%2E", ".")
|
||||
assert decode_oauth2_state(encoded_state) == state
|
||||
|
||||
|
||||
@pytest.mark.parametrize("spec", SPECS)
|
||||
@pytest.mark.parametrize("host", CLOUD_HOSTS)
|
||||
def test_workspace_oauth2_endpoint_builds_token_uri(
|
||||
mocker: MockerFixture,
|
||||
spec: Any,
|
||||
host: str,
|
||||
) -> None:
|
||||
"""
|
||||
The helper builds the matching token endpoint from the same workspace host.
|
||||
"""
|
||||
database = _mock_database(mocker, host)
|
||||
assert (
|
||||
spec._workspace_oauth2_endpoint(database, "token")
|
||||
== f"https://{host}/oidc/v1/token"
|
||||
)
|
||||
Reference in New Issue
Block a user