diff --git a/superset/app.py b/superset/app.py index 0a8f73d53e7..b1fec63cf30 100644 --- a/superset/app.py +++ b/superset/app.py @@ -21,6 +21,10 @@ import os import sys from typing import cast, Iterable, Optional +from alembic.config import Config +from alembic.runtime.migration import MigrationContext +from alembic.script import ScriptDirectory + if sys.version_info >= (3, 11): from wsgiref.types import StartResponse, WSGIApplication, WSGIEnvironment else: @@ -96,6 +100,78 @@ class SupersetApp(Flask): return Response("", status=204) # No Content return super().send_static_file(filename) + def _is_database_up_to_date(self) -> bool: + """ + Check if database migrations are up to date. + Returns False if there are pending migrations or unable to determine. + """ + try: + # Import here to avoid circular import issues + from superset.extensions import db + + # Get current revision from database + with db.engine.connect() as connection: + context = MigrationContext.configure(connection) + current_rev = context.get_current_revision() + + # Get head revision from migration files + alembic_cfg = Config() + alembic_cfg.set_main_option("script_location", "superset:migrations") + script = ScriptDirectory.from_config(alembic_cfg) + head_rev = script.get_current_head() + + # Database is up-to-date if current revision matches head + is_current = current_rev == head_rev + if not is_current: + logger.debug( + "Pending migrations. Current: %s, Head: %s", + current_rev, + head_rev, + ) + return is_current + except Exception as e: + logger.debug("Could not check migration status: %s", e) + return False + + def sync_config_to_db(self) -> None: + """ + Synchronize configuration to database. + This method handles database-dependent features that need to be synced + after the app is initialized and database connection is available. + + This is separated from app initialization to support multi-tenant + environments where database connection might not be available during + app startup. + """ + try: + # Import here to avoid circular import issues + from superset.extensions import feature_flag_manager + + # Check if database is up-to-date with migrations + if not self._is_database_up_to_date(): + logger.info("Pending database migrations: run 'superset db upgrade'") + return + + logger.info("Syncing configuration to database...") + + # Register SQLA event listeners for tagging system + if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"): + from superset.tags.core import register_sqla_event_listeners + + register_sqla_event_listeners() + + # Seed system themes from configuration + from superset.commands.theme.seed import SeedSystemThemesCommand + + SeedSystemThemesCommand().run() + + logger.info("Configuration sync to database completed successfully") + + except Exception as e: + logger.error("Failed to sync configuration to database: %s", e) + # Don't raise the exception to avoid breaking app startup + # in multi-tenant environments + class AppRootMiddleware: """A middleware that attaches the application to a fixed prefix location. diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index 51adf27ccab..e59b80fecff 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -35,8 +35,6 @@ from flask_appbuilder.utils.base import get_safe_redirect from flask_babel import lazy_gettext as _, refresh from flask_compress import Compress from flask_session import Session -from sqlalchemy import inspect -from sqlalchemy.exc import OperationalError from werkzeug.middleware.proxy_fix import ProxyFix from superset.constants import CHANGE_ME_SECRET_KEY @@ -64,7 +62,6 @@ from superset.extensions import ( from superset.security import SupersetSecurityManager from superset.sql.parse import SQLGLOT_DIALECTS from superset.superset_typing import FlaskResponse -from superset.tags.core import register_sqla_event_listeners from superset.utils.core import is_test, pessimistic_connection_handling from superset.utils.decorators import transaction from superset.utils.log import DBEventLogger, get_event_logger_from_cfg_value @@ -503,54 +500,6 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods icon="fa-lock", ) - def _init_database_dependent_features(self) -> None: - """ - Initialize features that require database tables to exist. - This is called during app initialization but checks table existence - to handle cases where the app starts before database migration. - """ - # Check if database URI is a fallback value before trying to connect - db_uri = self.database_uri - if not db_uri or any( - fallback in db_uri.lower() - for fallback in ["nouser", "nopassword", "nohost", "nodb"] - ): - logger.warning( - "Database URI appears to be a fallback value. " - "Skipping database-dependent initialization. " - "This may indicate the workspace context is not ready yet." - ) - return - - try: - inspector = inspect(db.engine) - - # Check if core tables exist (use 'dashboards' as proxy for Superset tables) - if not inspector.has_table("dashboards"): - logger.debug( - "Superset tables not yet created. Skipping database-dependent " - "initialization. These features will be initialized after " - "migration." - ) - return - except OperationalError as e: - logger.debug( - "Error inspecting database tables. Skipping database-dependent " - "initialization: %s", - e, - ) - return - - # Register SQLA event listeners for tagging system - if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"): - register_sqla_event_listeners() - - # Seed system themes from configuration - from superset.commands.theme.seed import SeedSystemThemesCommand - - if inspector.has_table("themes"): - SeedSystemThemesCommand().run() - def init_app_in_ctx(self) -> None: """ Runs init logic in the context of the app @@ -568,8 +517,9 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods if flask_app_mutator := self.config["FLASK_APP_MUTATOR"]: flask_app_mutator(self.superset_app) - # Initialize database-dependent features only if database is ready - self._init_database_dependent_features() + # Sync configuration to database (themes, etc.) + # This can be called separately in multi-tenant environments + self.superset_app.sync_config_to_db() self.init_views() diff --git a/tests/unit_tests/initialization_test.py b/tests/unit_tests/initialization_test.py index bc955b4b310..01fde0967c9 100644 --- a/tests/unit_tests/initialization_test.py +++ b/tests/unit_tests/initialization_test.py @@ -19,98 +19,77 @@ from unittest.mock import MagicMock, patch from sqlalchemy.exc import OperationalError +from superset.app import SupersetApp from superset.initialization import SupersetAppInitializer -class TestSupersetAppInitializer: - @patch("superset.initialization.db") - @patch("superset.initialization.inspect") - @patch("superset.initialization.logger") - def test_init_database_dependent_features_skips_when_no_tables( - self, mock_logger, mock_inspect_func, mock_db - ): - """Test that initialization is skipped when core tables don't exist.""" +class TestSupersetApp: + @patch("superset.app.logger") + def test_sync_config_to_db_skips_when_no_tables(self, mock_logger): + """Test that sync is skipped when database is not up-to-date.""" # Setup - mock_app = MagicMock() - app_initializer = SupersetAppInitializer(mock_app) - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = False - mock_inspect_func.return_value = mock_inspector - mock_db.engine = MagicMock() + app = SupersetApp(__name__) + app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"} - # Execute - app_initializer._init_database_dependent_features() + # Mock _is_database_up_to_date to return False + with patch.object(app, "_is_database_up_to_date", return_value=False): + # Execute + app.sync_config_to_db() # Assert - mock_inspect_func.assert_called_once_with(mock_db.engine) - mock_inspector.has_table.assert_called_once_with("dashboards") - mock_logger.debug.assert_called_once_with( - "Superset tables not yet created. Skipping database-dependent " - "initialization. These features will be initialized after " - "migration." + mock_logger.info.assert_called_once_with( + "Pending database migrations: run 'superset db upgrade'" ) - @patch("superset.initialization.db") - @patch("superset.initialization.inspect") - @patch("superset.initialization.logger") - def test_init_database_dependent_features_handles_operational_error( - self, mock_logger, mock_inspect_func, mock_db - ): - """Test that OperationalError during inspection is handled gracefully.""" + @patch("superset.extensions.db") + @patch("superset.app.logger") + def test_sync_config_to_db_handles_operational_error(self, mock_logger, mock_db): + """Test that OperationalError during migration check is handled gracefully.""" # Setup - mock_app = MagicMock() - app_initializer = SupersetAppInitializer(mock_app) + app = SupersetApp(__name__) + app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"} error_msg = "Cannot connect to database" - mock_inspect_func.side_effect = OperationalError(error_msg, None, None) - mock_db.engine = MagicMock() + + # Mock db.engine.connect to raise an OperationalError + mock_db.engine.connect.side_effect = OperationalError(error_msg, None, None) # Execute - app_initializer._init_database_dependent_features() + app.sync_config_to_db() - # Assert - mock_inspect_func.assert_called_once_with(mock_db.engine) - mock_logger.debug.assert_called_once() - call_args = mock_logger.debug.call_args - assert "Error inspecting database tables" in call_args[0][0] - # The error is passed as second argument with %s formatting - assert str(call_args[0][1]) == str(OperationalError(error_msg, None, None)) + # Assert - _is_database_up_to_date should catch the error and return False + # which causes the info log about pending migrations + mock_logger.info.assert_called_once_with( + "Pending database migrations: run 'superset db upgrade'" + ) - @patch("superset.initialization.db") - @patch("superset.initialization.inspect") - @patch("superset.initialization.feature_flag_manager") - @patch("superset.initialization.register_sqla_event_listeners") - @patch("superset.initialization.logger") + @patch("superset.extensions.feature_flag_manager") + @patch("superset.app.logger") @patch("superset.commands.theme.seed.SeedSystemThemesCommand") - def test_init_database_dependent_features_initializes_when_tables_exist( + def test_sync_config_to_db_initializes_when_tables_exist( self, mock_seed_themes_command, mock_logger, - mock_register_listeners, mock_feature_flag_manager, - mock_inspect_func, - mock_db, ): - """Test that features are initialized when database tables exist.""" + """Test that features are initialized when database is up-to-date.""" # Setup - mock_app = MagicMock() - app_initializer = SupersetAppInitializer(mock_app) - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspect_func.return_value = mock_inspector - mock_db.engine = MagicMock() + app = SupersetApp(__name__) + app.config = {"SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db"} mock_feature_flag_manager.is_feature_enabled.return_value = True mock_seed_themes = MagicMock() mock_seed_themes_command.return_value = mock_seed_themes - # Execute - app_initializer._init_database_dependent_features() + # Mock _is_database_up_to_date to return True + with ( + patch.object(app, "_is_database_up_to_date", return_value=True), + patch( + "superset.tags.core.register_sqla_event_listeners" + ) as mock_register_listeners, + ): + # Execute + app.sync_config_to_db() # Assert - mock_inspect_func.assert_called_once_with(mock_db.engine) - # Check both tables are checked - assert mock_inspector.has_table.call_count == 2 - mock_inspector.has_table.assert_any_call("dashboards") - mock_inspector.has_table.assert_any_call("themes") mock_feature_flag_manager.is_feature_enabled.assert_called_with( "TAGGING_SYSTEM" ) @@ -118,73 +97,40 @@ class TestSupersetAppInitializer: # Should seed themes mock_seed_themes_command.assert_called_once() mock_seed_themes.run.assert_called_once() - # Should not log skip message when tables exist - mock_logger.debug.assert_not_called() - - @patch("superset.initialization.db") - @patch("superset.initialization.inspect") - @patch("superset.initialization.feature_flag_manager") - @patch("superset.initialization.register_sqla_event_listeners") - @patch("superset.commands.theme.seed.SeedSystemThemesCommand") - def test_init_database_dependent_features_skips_tagging_when_disabled( - self, - mock_seed_themes_command, - mock_register_listeners, - mock_feature_flag_manager, - mock_inspect_func, - mock_db, - ): - """Test that tagging system is not initialized when feature flag is disabled.""" - # Setup - mock_app = MagicMock() - app_initializer = SupersetAppInitializer(mock_app) - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspect_func.return_value = mock_inspector - mock_db.engine = MagicMock() - mock_feature_flag_manager.is_feature_enabled.return_value = False - mock_seed_themes = MagicMock() - mock_seed_themes_command.return_value = mock_seed_themes - - # Execute - app_initializer._init_database_dependent_features() - - # Assert - mock_feature_flag_manager.is_feature_enabled.assert_called_with( - "TAGGING_SYSTEM" + # Should log successful completion + mock_logger.info.assert_any_call("Syncing configuration to database...") + mock_logger.info.assert_any_call( + "Configuration sync to database completed successfully" ) - mock_register_listeners.assert_not_called() - # Check both tables are checked - assert mock_inspector.has_table.call_count == 2 - mock_inspector.has_table.assert_any_call("dashboards") - mock_inspector.has_table.assert_any_call("themes") - @patch("superset.initialization.db") - @patch("superset.initialization.inspect") + +class TestSupersetAppInitializer: @patch("superset.initialization.logger") - def test_init_database_dependent_features_handles_inspector_error_in_has_table( - self, mock_logger, mock_inspect_func, mock_db - ): - """Test that OperationalError from has_table check is handled gracefully.""" + def test_init_app_in_ctx_calls_sync_config_to_db(self, mock_logger): + """Test that initialization calls app.sync_config_to_db().""" # Setup mock_app = MagicMock() + mock_app.config = { + "SQLALCHEMY_DATABASE_URI": "postgresql://user:pass@host:5432/db", + "FLASK_APP_MUTATOR": None, + } app_initializer = SupersetAppInitializer(mock_app) - mock_inspector = MagicMock() - error_msg = "Table check failed" - mock_inspector.has_table.side_effect = OperationalError(error_msg, None, None) - mock_inspect_func.return_value = mock_inspector - mock_db.engine = MagicMock() - # Execute - app_initializer._init_database_dependent_features() + # Execute init_app_in_ctx which calls sync_config_to_db + with ( + patch.object(app_initializer, "configure_fab"), + patch.object(app_initializer, "configure_url_map_converters"), + patch.object(app_initializer, "configure_data_sources"), + patch.object(app_initializer, "configure_auth_provider"), + patch.object(app_initializer, "configure_async_queries"), + patch.object(app_initializer, "configure_ssh_manager"), + patch.object(app_initializer, "configure_stats_manager"), + patch.object(app_initializer, "init_views"), + ): + app_initializer.init_app_in_ctx() - # Assert - mock_inspect_func.assert_called_once_with(mock_db.engine) - mock_inspector.has_table.assert_called_once_with("dashboards") - # Should handle the error gracefully - mock_logger.debug.assert_called_once() - call_args = mock_logger.debug.call_args - assert "Error inspecting database tables" in call_args[0][0] + # Assert that sync_config_to_db was called on the app + mock_app.sync_config_to_db.assert_called_once() def test_database_uri_lazy_property(self): """Test database_uri property uses lazy initialization with smart caching.""" @@ -211,110 +157,6 @@ class TestSupersetAppInitializer: uri2 == test_uri ) # Should still return cached value (not "different_uri") - def test_database_uri_lazy_property_with_missing_config(self): - """Test that database_uri property returns empty string when config missing.""" - # Setup - mock_app = MagicMock() - mock_app.config = {} # Empty config - app_initializer = SupersetAppInitializer(mock_app) - - # Should return empty string when config key doesn't exist - uri = app_initializer.database_uri - assert uri == "" - # Empty string is a fallback value, so it should NOT be cached - assert app_initializer._db_uri_cache is None - - def test_database_uri_prevents_nouser_fallback(self): - """Test that lazy initialization prevents nouser fallback during deployment.""" - # Setup - simulate deployment scenario where config is loaded properly - mock_app = MagicMock() - - # Config is properly loaded with real database URI (not nouser) - mock_app.config = { - "SQLALCHEMY_DATABASE_URI": "postgresql://realuser:realpass@realhost:5432/realdb" - } - app_initializer = SupersetAppInitializer(mock_app) - - # Access the database URI - should get the real URI, not nouser - uri = app_initializer.database_uri - assert uri == "postgresql://realuser:realpass@realhost:5432/realdb" - assert "nouser" not in uri - assert "nopassword" not in uri - assert "nohost" not in uri - assert "nodb" not in uri - - # Verify cache is working - assert app_initializer._db_uri_cache is not None - assert ( - app_initializer._db_uri_cache - == "postgresql://realuser:realpass@realhost:5432/realdb" - ) - - @patch("superset.initialization.make_url_safe") - @patch("superset.initialization.db") - def test_set_db_default_isolation_uses_lazy_property( - self, mock_db, mock_make_url_safe - ): - """Test that set_db_default_isolation uses the lazy database_uri property.""" - # Setup - mock_app = MagicMock() - test_uri = "postgresql://user:pass@host:5432/testdb" - mock_app.config = { - "SQLALCHEMY_DATABASE_URI": test_uri, - "SQLALCHEMY_ENGINE_OPTIONS": {}, - } - app_initializer = SupersetAppInitializer(mock_app) - - # Mock make_url_safe to return a URL with postgresql backend - mock_url = MagicMock() - mock_url.get_backend_name.return_value = "postgresql" - mock_make_url_safe.return_value = mock_url - - # Mock db.engine - mock_engine = MagicMock() - mock_db.engine = mock_engine - - # Execute - app_initializer.set_db_default_isolation() - - # Assert that make_url_safe was called with the lazy property value - mock_make_url_safe.assert_called_once_with(test_uri) - - # Should set isolation level to READ COMMITTED for postgresql - mock_engine.execution_options.assert_called_once_with( - isolation_level="READ COMMITTED" - ) - - # Verify the cache was created - assert app_initializer._db_uri_cache is not None - assert app_initializer._db_uri_cache == test_uri - - @patch("superset.initialization.make_url_safe") - @patch("superset.initialization.db") - def test_set_db_default_isolation_with_empty_uri(self, mock_db, mock_make_url_safe): - """Test that set_db_default_isolation handles empty URI gracefully.""" - # Setup - mock_app = MagicMock() - mock_app.config = { - "SQLALCHEMY_DATABASE_URI": "", # Empty URI - "SQLALCHEMY_ENGINE_OPTIONS": {}, - } - app_initializer = SupersetAppInitializer(mock_app) - - # Mock make_url_safe to return a URL with no backend - mock_url = MagicMock() - mock_url.get_backend_name.return_value = None - mock_make_url_safe.return_value = mock_url - - # Execute - app_initializer.set_db_default_isolation() - - # Should handle empty URI gracefully - mock_make_url_safe.assert_called_once_with("") - - # Should not set isolation level for empty/unknown backend - mock_db.engine.execution_options.assert_not_called() - def test_database_uri_doesnt_cache_fallback_values(self): """Test that fallback values like 'nouser' are not cached.""" # Setup @@ -345,86 +187,3 @@ class TestSupersetAppInitializer: app_initializer._db_uri_cache == "postgresql://realuser:realpass@realhost:5432/realdb" ) - - # Third access should use cache even if config changes again - config_dict["SQLALCHEMY_DATABASE_URI"] = ( - "postgresql://different:uri@host:5432/db" - ) - uri3 = app_initializer.database_uri - assert ( - uri3 == "postgresql://realuser:realpass@realhost:5432/realdb" - ) # Still cached value - - def test_database_uri_caches_valid_uri(self): - """Test that valid URIs are properly cached.""" - # Setup - mock_app = MagicMock() - valid_uri = "postgresql://validuser:validpass@validhost:5432/validdb" - mock_app.config = {"SQLALCHEMY_DATABASE_URI": valid_uri} - app_initializer = SupersetAppInitializer(mock_app) - - # First access should cache valid URI - uri1 = app_initializer.database_uri - assert uri1 == valid_uri - assert app_initializer._db_uri_cache is not None - assert app_initializer._db_uri_cache == valid_uri - - # Change config - mock_app.config = { - "SQLALCHEMY_DATABASE_URI": "postgresql://changed:uri@host:5432/db" - } - - # Second access should still return cached value - uri2 = app_initializer.database_uri - assert uri2 == valid_uri # Still the cached value, not the changed one - - def test_database_uri_fallback_patterns(self): - """Test that various fallback patterns are not cached.""" - # Test various fallback patterns - fallback_uris = [ - "postgresql://nouser:nopassword@nohost:5432/nodb", - "mysql://NOUSER:NOPASSWORD@NOHOST:3306/NODB", - "postgresql://noUser:pass@host:5432/db", # Contains 'nouser' (case insens.) - "sqlite:///nohost.db", # Contains 'nohost' - "", # Empty string - ] - - for fallback_uri in fallback_uris: - # Create a fresh initializer for each test - mock_app = MagicMock() - mock_app.config = {"SQLALCHEMY_DATABASE_URI": fallback_uri} - app_initializer = SupersetAppInitializer(mock_app) - - uri = app_initializer.database_uri - - # Should return the value but not cache it - assert uri == fallback_uri - assert app_initializer._db_uri_cache is None, ( - f"Should not cache: {fallback_uri}" - ) - - @patch("superset.initialization.logger") - @patch("superset.initialization.inspect") - @patch("superset.initialization.db") - def test_init_database_dependent_features_skips_with_fallback_uri( - self, mock_db, mock_inspect, mock_logger - ): - """Test that database-dependent features are skipped when URI is a fallback.""" - # Setup - mock_app = MagicMock() - # Set a fallback URI that would cause connection to fail - mock_app.config = { - "SQLALCHEMY_DATABASE_URI": "postgresql://nouser:nopassword@nohost:5432/nodb" - } - app_initializer = SupersetAppInitializer(mock_app) - - # Execute - app_initializer._init_database_dependent_features() - - # Assert - should not try to inspect database - mock_inspect.assert_not_called() - # Should log warning about fallback URI - mock_logger.warning.assert_called_once() - warning_message = mock_logger.warning.call_args[0][0] - assert "fallback value" in warning_message - assert "workspace context" in warning_message