fix: Check migration status before initializing database-dependent features (#34679)

Co-authored-by: Claude <noreply@anthropic.com>
(cherry picked from commit c568d463b9)
This commit is contained in:
Maxime Beauchemin
2025-08-21 11:25:47 -07:00
committed by Joe Li
parent 3b3aa1e302
commit 2ffc1b95ba
3 changed files with 148 additions and 363 deletions

View File

@@ -21,6 +21,10 @@ import os
import sys
from typing import cast, Iterable, Optional
from alembic.config import Config
from alembic.runtime.migration import MigrationContext
from alembic.script import ScriptDirectory
if sys.version_info >= (3, 11):
from wsgiref.types import StartResponse, WSGIApplication, WSGIEnvironment
else:
@@ -96,6 +100,78 @@ class SupersetApp(Flask):
return Response("", status=204) # No Content
return super().send_static_file(filename)
def _is_database_up_to_date(self) -> bool:
"""
Check if database migrations are up to date.
Returns False if there are pending migrations or unable to determine.
"""
try:
# Import here to avoid circular import issues
from superset.extensions import db
# Get current revision from database
with db.engine.connect() as connection:
context = MigrationContext.configure(connection)
current_rev = context.get_current_revision()
# Get head revision from migration files
alembic_cfg = Config()
alembic_cfg.set_main_option("script_location", "superset:migrations")
script = ScriptDirectory.from_config(alembic_cfg)
head_rev = script.get_current_head()
# Database is up-to-date if current revision matches head
is_current = current_rev == head_rev
if not is_current:
logger.debug(
"Pending migrations. Current: %s, Head: %s",
current_rev,
head_rev,
)
return is_current
except Exception as e:
logger.debug("Could not check migration status: %s", e)
return False
def sync_config_to_db(self) -> None:
"""
Synchronize configuration to database.
This method handles database-dependent features that need to be synced
after the app is initialized and database connection is available.
This is separated from app initialization to support multi-tenant
environments where database connection might not be available during
app startup.
"""
try:
# Import here to avoid circular import issues
from superset.extensions import feature_flag_manager
# Check if database is up-to-date with migrations
if not self._is_database_up_to_date():
logger.info("Pending database migrations: run 'superset db upgrade'")
return
logger.info("Syncing configuration to database...")
# Register SQLA event listeners for tagging system
if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"):
from superset.tags.core import register_sqla_event_listeners
register_sqla_event_listeners()
# Seed system themes from configuration
from superset.commands.theme.seed import SeedSystemThemesCommand
SeedSystemThemesCommand().run()
logger.info("Configuration sync to database completed successfully")
except Exception as e:
logger.error("Failed to sync configuration to database: %s", e)
# Don't raise the exception to avoid breaking app startup
# in multi-tenant environments
class AppRootMiddleware:
"""A middleware that attaches the application to a fixed prefix location.

View File

@@ -35,8 +35,6 @@ from flask_appbuilder.utils.base import get_safe_redirect
from flask_babel import lazy_gettext as _, refresh
from flask_compress import Compress
from flask_session import Session
from sqlalchemy import inspect
from sqlalchemy.exc import OperationalError
from werkzeug.middleware.proxy_fix import ProxyFix
from superset.constants import CHANGE_ME_SECRET_KEY
@@ -64,7 +62,6 @@ from superset.extensions import (
from superset.security import SupersetSecurityManager
from superset.sql.parse import SQLGLOT_DIALECTS
from superset.superset_typing import FlaskResponse
from superset.tags.core import register_sqla_event_listeners
from superset.utils.core import is_test, pessimistic_connection_handling
from superset.utils.decorators import transaction
from superset.utils.log import DBEventLogger, get_event_logger_from_cfg_value
@@ -503,54 +500,6 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
icon="fa-lock",
)
def _init_database_dependent_features(self) -> None:
"""
Initialize features that require database tables to exist.
This is called during app initialization but checks table existence
to handle cases where the app starts before database migration.
"""
# Check if database URI is a fallback value before trying to connect
db_uri = self.database_uri
if not db_uri or any(
fallback in db_uri.lower()
for fallback in ["nouser", "nopassword", "nohost", "nodb"]
):
logger.warning(
"Database URI appears to be a fallback value. "
"Skipping database-dependent initialization. "
"This may indicate the workspace context is not ready yet."
)
return
try:
inspector = inspect(db.engine)
# Check if core tables exist (use 'dashboards' as proxy for Superset tables)
if not inspector.has_table("dashboards"):
logger.debug(
"Superset tables not yet created. Skipping database-dependent "
"initialization. These features will be initialized after "
"migration."
)
return
except OperationalError as e:
logger.debug(
"Error inspecting database tables. Skipping database-dependent "
"initialization: %s",
e,
)
return
# Register SQLA event listeners for tagging system
if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"):
register_sqla_event_listeners()
# Seed system themes from configuration
from superset.commands.theme.seed import SeedSystemThemesCommand
if inspector.has_table("themes"):
SeedSystemThemesCommand().run()
def init_app_in_ctx(self) -> None:
"""
Runs init logic in the context of the app
@@ -568,8 +517,9 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
if flask_app_mutator := self.config["FLASK_APP_MUTATOR"]:
flask_app_mutator(self.superset_app)
# Initialize database-dependent features only if database is ready
self._init_database_dependent_features()
# Sync configuration to database (themes, etc.)
# This can be called separately in multi-tenant environments
self.superset_app.sync_config_to_db()
self.init_views()

View File

@@ -19,98 +19,77 @@ from unittest.mock import MagicMock, patch
from sqlalchemy.exc import OperationalError
from superset.app import SupersetApp
from superset.initialization import SupersetAppInitializer
class TestSupersetAppInitializer:
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.logger")
def test_init_database_dependent_features_skips_when_no_tables(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that initialization is skipped when core tables don't exist."""
class TestSupersetApp:
@patch("superset.app.logger")
def test_sync_config_to_db_skips_when_no_tables(self, mock_logger):
"""Test that sync is skipped when database is not up-to-date."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = False
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
app = SupersetApp(__name__)
app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"}
# Execute
app_initializer._init_database_dependent_features()
# Mock _is_database_up_to_date to return False
with patch.object(app, "_is_database_up_to_date", return_value=False):
# Execute
app.sync_config_to_db()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_inspector.has_table.assert_called_once_with("dashboards")
mock_logger.debug.assert_called_once_with(
"Superset tables not yet created. Skipping database-dependent "
"initialization. These features will be initialized after "
"migration."
mock_logger.info.assert_called_once_with(
"Pending database migrations: run 'superset db upgrade'"
)
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.logger")
def test_init_database_dependent_features_handles_operational_error(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that OperationalError during inspection is handled gracefully."""
@patch("superset.extensions.db")
@patch("superset.app.logger")
def test_sync_config_to_db_handles_operational_error(self, mock_logger, mock_db):
"""Test that OperationalError during migration check is handled gracefully."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
app = SupersetApp(__name__)
app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"}
error_msg = "Cannot connect to database"
mock_inspect_func.side_effect = OperationalError(error_msg, None, None)
mock_db.engine = MagicMock()
# Mock db.engine.connect to raise an OperationalError
mock_db.engine.connect.side_effect = OperationalError(error_msg, None, None)
# Execute
app_initializer._init_database_dependent_features()
app.sync_config_to_db()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_logger.debug.assert_called_once()
call_args = mock_logger.debug.call_args
assert "Error inspecting database tables" in call_args[0][0]
# The error is passed as second argument with %s formatting
assert str(call_args[0][1]) == str(OperationalError(error_msg, None, None))
# Assert - _is_database_up_to_date should catch the error and return False
# which causes the info log about pending migrations
mock_logger.info.assert_called_once_with(
"Pending database migrations: run 'superset db upgrade'"
)
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.feature_flag_manager")
@patch("superset.initialization.register_sqla_event_listeners")
@patch("superset.initialization.logger")
@patch("superset.extensions.feature_flag_manager")
@patch("superset.app.logger")
@patch("superset.commands.theme.seed.SeedSystemThemesCommand")
def test_init_database_dependent_features_initializes_when_tables_exist(
def test_sync_config_to_db_initializes_when_tables_exist(
self,
mock_seed_themes_command,
mock_logger,
mock_register_listeners,
mock_feature_flag_manager,
mock_inspect_func,
mock_db,
):
"""Test that features are initialized when database tables exist."""
"""Test that features are initialized when database is up-to-date."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = True
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
app = SupersetApp(__name__)
app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"}
mock_feature_flag_manager.is_feature_enabled.return_value = True
mock_seed_themes = MagicMock()
mock_seed_themes_command.return_value = mock_seed_themes
# Execute
app_initializer._init_database_dependent_features()
# Mock _is_database_up_to_date to return True
with (
patch.object(app, "_is_database_up_to_date", return_value=True),
patch(
"superset.tags.core.register_sqla_event_listeners"
) as mock_register_listeners,
):
# Execute
app.sync_config_to_db()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
# Check both tables are checked
assert mock_inspector.has_table.call_count == 2
mock_inspector.has_table.assert_any_call("dashboards")
mock_inspector.has_table.assert_any_call("themes")
mock_feature_flag_manager.is_feature_enabled.assert_called_with(
"TAGGING_SYSTEM"
)
@@ -118,73 +97,40 @@ class TestSupersetAppInitializer:
# Should seed themes
mock_seed_themes_command.assert_called_once()
mock_seed_themes.run.assert_called_once()
# Should not log skip message when tables exist
mock_logger.debug.assert_not_called()
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.feature_flag_manager")
@patch("superset.initialization.register_sqla_event_listeners")
@patch("superset.commands.theme.seed.SeedSystemThemesCommand")
def test_init_database_dependent_features_skips_tagging_when_disabled(
self,
mock_seed_themes_command,
mock_register_listeners,
mock_feature_flag_manager,
mock_inspect_func,
mock_db,
):
"""Test that tagging system is not initialized when feature flag is disabled."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = True
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
mock_feature_flag_manager.is_feature_enabled.return_value = False
mock_seed_themes = MagicMock()
mock_seed_themes_command.return_value = mock_seed_themes
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_feature_flag_manager.is_feature_enabled.assert_called_with(
"TAGGING_SYSTEM"
# Should log successful completion
mock_logger.info.assert_any_call("Syncing configuration to database...")
mock_logger.info.assert_any_call(
"Configuration sync to database completed successfully"
)
mock_register_listeners.assert_not_called()
# Check both tables are checked
assert mock_inspector.has_table.call_count == 2
mock_inspector.has_table.assert_any_call("dashboards")
mock_inspector.has_table.assert_any_call("themes")
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
class TestSupersetAppInitializer:
@patch("superset.initialization.logger")
def test_init_database_dependent_features_handles_inspector_error_in_has_table(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that OperationalError from has_table check is handled gracefully."""
def test_init_app_in_ctx_calls_sync_config_to_db(self, mock_logger):
"""Test that initialization calls app.sync_config_to_db()."""
# Setup
mock_app = MagicMock()
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db",
"FLASK_APP_MUTATOR": None,
}
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
error_msg = "Table check failed"
mock_inspector.has_table.side_effect = OperationalError(error_msg, None, None)
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
# Execute
app_initializer._init_database_dependent_features()
# Execute init_app_in_ctx which calls sync_config_to_db
with (
patch.object(app_initializer, "configure_fab"),
patch.object(app_initializer, "configure_url_map_converters"),
patch.object(app_initializer, "configure_data_sources"),
patch.object(app_initializer, "configure_auth_provider"),
patch.object(app_initializer, "configure_async_queries"),
patch.object(app_initializer, "configure_ssh_manager"),
patch.object(app_initializer, "configure_stats_manager"),
patch.object(app_initializer, "init_views"),
):
app_initializer.init_app_in_ctx()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_inspector.has_table.assert_called_once_with("dashboards")
# Should handle the error gracefully
mock_logger.debug.assert_called_once()
call_args = mock_logger.debug.call_args
assert "Error inspecting database tables" in call_args[0][0]
# Assert that sync_config_to_db was called on the app
mock_app.sync_config_to_db.assert_called_once()
def test_database_uri_lazy_property(self):
"""Test database_uri property uses lazy initialization with smart caching."""
@@ -211,110 +157,6 @@ class TestSupersetAppInitializer:
uri2 == test_uri
) # Should still return cached value (not "different_uri")
def test_database_uri_lazy_property_with_missing_config(self):
"""Test that database_uri property returns empty string when config missing."""
# Setup
mock_app = MagicMock()
mock_app.config = {} # Empty config
app_initializer = SupersetAppInitializer(mock_app)
# Should return empty string when config key doesn't exist
uri = app_initializer.database_uri
assert uri == ""
# Empty string is a fallback value, so it should NOT be cached
assert app_initializer._db_uri_cache is None
def test_database_uri_prevents_nouser_fallback(self):
"""Test that lazy initialization prevents nouser fallback during deployment."""
# Setup - simulate deployment scenario where config is loaded properly
mock_app = MagicMock()
# Config is properly loaded with real database URI (not nouser)
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://realuser:realpass@realhost:5432/realdb"
}
app_initializer = SupersetAppInitializer(mock_app)
# Access the database URI - should get the real URI, not nouser
uri = app_initializer.database_uri
assert uri == "postgresql://realuser:realpass@realhost:5432/realdb"
assert "nouser" not in uri
assert "nopassword" not in uri
assert "nohost" not in uri
assert "nodb" not in uri
# Verify cache is working
assert app_initializer._db_uri_cache is not None
assert (
app_initializer._db_uri_cache
== "postgresql://realuser:realpass@realhost:5432/realdb"
)
@patch("superset.initialization.make_url_safe")
@patch("superset.initialization.db")
def test_set_db_default_isolation_uses_lazy_property(
self, mock_db, mock_make_url_safe
):
"""Test that set_db_default_isolation uses the lazy database_uri property."""
# Setup
mock_app = MagicMock()
test_uri = "postgresql://user:pass@host:5432/testdb"
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": test_uri,
"SQLALCHEMY_ENGINE_OPTIONS": {},
}
app_initializer = SupersetAppInitializer(mock_app)
# Mock make_url_safe to return a URL with postgresql backend
mock_url = MagicMock()
mock_url.get_backend_name.return_value = "postgresql"
mock_make_url_safe.return_value = mock_url
# Mock db.engine
mock_engine = MagicMock()
mock_db.engine = mock_engine
# Execute
app_initializer.set_db_default_isolation()
# Assert that make_url_safe was called with the lazy property value
mock_make_url_safe.assert_called_once_with(test_uri)
# Should set isolation level to READ COMMITTED for postgresql
mock_engine.execution_options.assert_called_once_with(
isolation_level="READ COMMITTED"
)
# Verify the cache was created
assert app_initializer._db_uri_cache is not None
assert app_initializer._db_uri_cache == test_uri
@patch("superset.initialization.make_url_safe")
@patch("superset.initialization.db")
def test_set_db_default_isolation_with_empty_uri(self, mock_db, mock_make_url_safe):
"""Test that set_db_default_isolation handles empty URI gracefully."""
# Setup
mock_app = MagicMock()
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "", # Empty URI
"SQLALCHEMY_ENGINE_OPTIONS": {},
}
app_initializer = SupersetAppInitializer(mock_app)
# Mock make_url_safe to return a URL with no backend
mock_url = MagicMock()
mock_url.get_backend_name.return_value = None
mock_make_url_safe.return_value = mock_url
# Execute
app_initializer.set_db_default_isolation()
# Should handle empty URI gracefully
mock_make_url_safe.assert_called_once_with("")
# Should not set isolation level for empty/unknown backend
mock_db.engine.execution_options.assert_not_called()
def test_database_uri_doesnt_cache_fallback_values(self):
"""Test that fallback values like 'nouser' are not cached."""
# Setup
@@ -345,86 +187,3 @@ class TestSupersetAppInitializer:
app_initializer._db_uri_cache
== "postgresql://realuser:realpass@realhost:5432/realdb"
)
# Third access should use cache even if config changes again
config_dict["SQLALCHEMY_DATABASE_URI"] = (
"postgresql://different:uri@host:5432/db"
)
uri3 = app_initializer.database_uri
assert (
uri3 == "postgresql://realuser:realpass@realhost:5432/realdb"
) # Still cached value
def test_database_uri_caches_valid_uri(self):
"""Test that valid URIs are properly cached."""
# Setup
mock_app = MagicMock()
valid_uri = "postgresql://validuser:validpass@validhost:5432/validdb"
mock_app.config = {"SQLALCHEMY_DATABASE_URI": valid_uri}
app_initializer = SupersetAppInitializer(mock_app)
# First access should cache valid URI
uri1 = app_initializer.database_uri
assert uri1 == valid_uri
assert app_initializer._db_uri_cache is not None
assert app_initializer._db_uri_cache == valid_uri
# Change config
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://changed:uri@host:5432/db"
}
# Second access should still return cached value
uri2 = app_initializer.database_uri
assert uri2 == valid_uri # Still the cached value, not the changed one
def test_database_uri_fallback_patterns(self):
"""Test that various fallback patterns are not cached."""
# Test various fallback patterns
fallback_uris = [
"postgresql://nouser:nopassword@nohost:5432/nodb",
"mysql://NOUSER:NOPASSWORD@NOHOST:3306/NODB",
"postgresql://noUser:pass@host:5432/db", # Contains 'nouser' (case insens.)
"sqlite:///nohost.db", # Contains 'nohost'
"", # Empty string
]
for fallback_uri in fallback_uris:
# Create a fresh initializer for each test
mock_app = MagicMock()
mock_app.config = {"SQLALCHEMY_DATABASE_URI": fallback_uri}
app_initializer = SupersetAppInitializer(mock_app)
uri = app_initializer.database_uri
# Should return the value but not cache it
assert uri == fallback_uri
assert app_initializer._db_uri_cache is None, (
f"Should not cache: {fallback_uri}"
)
@patch("superset.initialization.logger")
@patch("superset.initialization.inspect")
@patch("superset.initialization.db")
def test_init_database_dependent_features_skips_with_fallback_uri(
self, mock_db, mock_inspect, mock_logger
):
"""Test that database-dependent features are skipped when URI is a fallback."""
# Setup
mock_app = MagicMock()
# Set a fallback URI that would cause connection to fail
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://nouser:nopassword@nohost:5432/nodb"
}
app_initializer = SupersetAppInitializer(mock_app)
# Execute
app_initializer._init_database_dependent_features()
# Assert - should not try to inspect database
mock_inspect.assert_not_called()
# Should log warning about fallback URI
mock_logger.warning.assert_called_once()
warning_message = mock_logger.warning.call_args[0][0]
assert "fallback value" in warning_message
assert "workspace context" in warning_message