# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import annotations import logging from abc import ABC, abstractmethod from enum import Enum from time import sleep from typing import Any, TYPE_CHECKING from flask import current_app as app from packaging import version from selenium import __version__ as selenium_version from selenium.common.exceptions import ( StaleElementReferenceException, TimeoutException, WebDriverException, ) from selenium.webdriver import chrome, firefox, FirefoxProfile from selenium.webdriver.common.by import By from selenium.webdriver.common.service import Service from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.support import expected_conditions as EC # noqa: N812 from selenium.webdriver.support.ui import WebDriverWait from superset.extensions import machine_auth_provider_factory from superset.utils.retries import retry_call from superset.utils.screenshot_utils import take_tiled_screenshot WindowSize = tuple[int, int] logger = logging.getLogger(__name__) # Installation message for missing Playwright (Cypress doesn't work with DeckGL) PLAYWRIGHT_INSTALL_MESSAGE = ( "To complete the migration from Cypress " "and enable WebGL/DeckGL screenshot support, install Playwright with: " "pip install playwright && playwright install chromium" ) if TYPE_CHECKING: from typing import Any from flask_appbuilder.security.sqla.models import User try: from playwright.sync_api import ( BrowserContext, Error as PlaywrightError, Locator, Page, sync_playwright, TimeoutError as PlaywrightTimeout, ) except ImportError: from typing import Any # Define dummy classes when playwright is not available BrowserContext = Any PlaywrightError = Exception PlaywrightTimeout = Exception Locator = Any Page = Any sync_playwright = None def check_playwright_availability() -> bool: """ Lightweight check for Playwright availability. First checks if browser binary exists, falls back to launch test if needed. """ if sync_playwright is None: return False try: with sync_playwright() as p: # First try lightweight check - just verify executable exists try: executable_path = p.chromium.executable_path if executable_path: return True except Exception: # Fall back to full launch test if executable_path fails logger.debug( "Executable path check failed, falling back to launch test" ) # Fallback: actually launch browser to ensure it works browser = p.chromium.launch(headless=True) browser.close() return True except Exception as e: logger.warning( "Playwright module is installed but browser launch failed. " "Run 'playwright install chromium' to install browser binaries. " "Error: %s", str(e), ) return False PLAYWRIGHT_AVAILABLE = check_playwright_availability() def validate_webdriver_config() -> dict[str, Any]: """ Validate webdriver configuration and dependencies. Used to check migration status from Cypress to Playwright. Returns a dictionary with the status of available webdrivers and feature flags. """ from superset import feature_flag_manager return { "selenium_available": True, # Always available as required dependency "playwright_available": PLAYWRIGHT_AVAILABLE, "playwright_feature_enabled": feature_flag_manager.is_feature_enabled( "PLAYWRIGHT_REPORTS_AND_THUMBNAILS" ), "recommended_action": ( PLAYWRIGHT_INSTALL_MESSAGE if not PLAYWRIGHT_AVAILABLE else None ), } class DashboardStandaloneMode(Enum): HIDE_NAV = 1 HIDE_NAV_AND_TITLE = 2 REPORT = 3 class ChartStandaloneMode(Enum): HIDE_NAV = "true" SHOW_NAV = 0 # pylint: disable=too-few-public-methods class WebDriverProxy(ABC): def __init__(self, driver_type: str, window: WindowSize | None = None): self._driver_type = driver_type self._window: WindowSize = window or (800, 600) self._screenshot_locate_wait = app.config["SCREENSHOT_LOCATE_WAIT"] self._screenshot_load_wait = app.config["SCREENSHOT_LOAD_WAIT"] @abstractmethod def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None: """ Run webdriver and return a screenshot """ class WebDriverPlaywright(WebDriverProxy): @staticmethod def auth(user: User, context: BrowserContext) -> BrowserContext: return machine_auth_provider_factory.instance.authenticate_browser_context( context, user ) @staticmethod def find_unexpected_errors(page: Page) -> list[str]: error_messages = [] try: alert_divs = page.get_by_role("alert").all() logger.debug( "%i alert elements have been found in the screenshot", len(alert_divs) ) for alert_div in alert_divs: # See More button alert_div.get_by_role("button").click() # wait for modal to show up page.locator(".ant-modal-content").wait_for(state="visible") err_msg_div = page.locator(".ant-modal-content .ant-modal-body") # # # collect error message error_messages.append(err_msg_div.text_content()) # # # Use HTML so that error messages are shown in the same style (color) error_as_html = err_msg_div.inner_html().replace("'", "\\'") # # # close modal after collecting error messages page.locator(".ant-modal-content .ant-modal-close").click() # # # wait until the modal becomes invisible page.locator(".ant-modal-content").wait_for(state="detached") try: # Even if some errors can't be updated in the screenshot, # keep all the errors in the server log and do not fail the loop alert_div.evaluate( "(node, error_html) => node.innerHtml = error_html", [error_as_html], ) except PlaywrightError: logger.exception("Failed to update error messages using alert_div") except PlaywrightError: logger.exception("Failed to capture unexpected errors") return error_messages @staticmethod def _get_screenshot(page: Page, element: Locator, element_name: str) -> bytes: if element_name == "standalone": return page.screenshot(full_page=True) else: return element.screenshot() def get_screenshot( # pylint: disable=too-many-locals, too-many-statements # noqa: C901 self, url: str, element_name: str, user: User ) -> bytes | None: if not PLAYWRIGHT_AVAILABLE: logger.info( "Playwright not available - falling back to Selenium. " "Note: WebGL/Canvas charts may not render correctly with Selenium. " "%s", PLAYWRIGHT_INSTALL_MESSAGE, ) return None with sync_playwright() as playwright: browser_args = app.config["WEBDRIVER_OPTION_ARGS"] browser = playwright.chromium.launch(args=browser_args) pixel_density = app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1) viewport_height = self._window[1] viewport_width = self._window[0] context = browser.new_context( bypass_csp=True, viewport={ "height": viewport_height, "width": viewport_width, }, device_scale_factor=pixel_density, ) context.set_default_timeout( app.config["SCREENSHOT_PLAYWRIGHT_DEFAULT_TIMEOUT"] ) self.auth(user, context) page = context.new_page() try: page.goto( url, wait_until=app.config["SCREENSHOT_PLAYWRIGHT_WAIT_EVENT"], ) except PlaywrightTimeout: logger.exception( "Web event %s not detected. Page %s might not have been fully loaded", # noqa: E501 app.config["SCREENSHOT_PLAYWRIGHT_WAIT_EVENT"], url, ) img: bytes | None = None selenium_headstart = app.config["SCREENSHOT_SELENIUM_HEADSTART"] logger.debug("Sleeping for %i seconds", selenium_headstart) page.wait_for_timeout(selenium_headstart * 1000) element: Locator try: try: # page didn't load logger.debug( "Wait for the presence of %s at url: %s", element_name, url ) element = page.locator(f".{element_name}") element.wait_for() except PlaywrightTimeout: logger.exception("Timed out requesting url %s", url) raise try: # chart containers didn't render logger.debug("Wait for chart containers to draw at url: %s", url) slice_container_locator = page.locator(".chart-container") for slice_container_elem in slice_container_locator.all(): slice_container_elem.wait_for() except PlaywrightTimeout: logger.exception( "Timed out waiting for chart containers to draw at url %s", url, ) raise try: # charts took too long to load logger.debug( "Wait for loading element of charts to be gone at url: %s", url ) for loading_element in page.locator(".loading").all(): loading_element.wait_for(state="detached") except PlaywrightTimeout: logger.exception( "Timed out waiting for charts to load at url %s", url ) raise selenium_animation_wait = app.config[ "SCREENSHOT_SELENIUM_ANIMATION_WAIT" ] logger.debug( "Wait %i seconds for chart animation", selenium_animation_wait ) page.wait_for_timeout(selenium_animation_wait * 1000) logger.debug( "Taking a PNG screenshot of url %s as user %s", url, user.username, ) if app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]: unexpected_errors = WebDriverPlaywright.find_unexpected_errors(page) if unexpected_errors: logger.warning( "%i errors found in the screenshot. URL: %s. Errors are: %s", # noqa: E501 len(unexpected_errors), url, unexpected_errors, ) # Detect large dashboards and use tiled screenshots if enabled tiled_enabled = app.config.get("SCREENSHOT_TILED_ENABLED", False) if tiled_enabled: chart_count = page.evaluate( 'document.querySelectorAll(".chart-container").length' ) dashboard_height = page.evaluate( f'document.querySelector(".{element_name}").scrollHeight || 0' ) chart_threshold = app.config.get( "SCREENSHOT_TILED_CHART_THRESHOLD", 20 ) height_threshold = app.config.get( "SCREENSHOT_TILED_HEIGHT_THRESHOLD", 5000 ) tile_height = app.config.get( "SCREENSHOT_TILED_VIEWPORT_HEIGHT", viewport_height ) # Use tiled screenshots for large dashboards use_tiled = ( chart_count >= chart_threshold or dashboard_height > height_threshold ) and dashboard_height > tile_height if use_tiled: logger.info( "Large dashboard detected: %s charts, %spx height. " "Using tiled screenshots.", chart_count, dashboard_height, ) # set viewport height to tile height for easier calculations page.set_viewport_size( {"height": tile_height, "width": viewport_width} ) img = take_tiled_screenshot(page, element_name, tile_height) if img is None: logger.warning( ( "Tiled screenshot failed, " "falling back to standard screenshot" ) ) img = WebDriverPlaywright._get_screenshot( page, element, element_name ) else: img = WebDriverPlaywright._get_screenshot( page, element, element_name ) else: img = WebDriverPlaywright._get_screenshot( page, element, element_name ) except PlaywrightTimeout: # raise again for the finally block, but handled above pass except PlaywrightError: logger.exception( "Encountered an unexpected error when requesting url %s", url ) return img class WebDriverSelenium(WebDriverProxy): def _create_firefox_driver( self, pixel_density: float ) -> tuple[type[WebDriver], type[Service], dict[str, Any]]: """Create Firefox driver configuration.""" options = firefox.options.Options() profile = FirefoxProfile() profile.set_preference("layout.css.devPixelsPerPx", str(pixel_density)) options.profile = profile return ( firefox.webdriver.WebDriver, firefox.service.Service, {"options": options}, ) def _create_chrome_driver( self, pixel_density: float ) -> tuple[type[WebDriver], type[Service], dict[str, Any]]: """Create Chrome driver configuration.""" options = chrome.options.Options() options.add_argument(f"--force-device-scale-factor={pixel_density}") options.add_argument(f"--window-size={self._window[0]},{self._window[1]}") return ( chrome.webdriver.WebDriver, chrome.service.Service, {"options": options}, ) def _normalize_timeout_values(self, config: dict[str, Any]) -> dict[str, Any]: """Convert timeout values to float for urllib3 2.x compatibility.""" timeout_keys = [ "timeout", "connect_timeout", "socket_timeout", "read_timeout", "page_load_timeout", "implicit_wait", "command_executor_timeout", "connection_timeout", ] for key, value in config.items(): if any(timeout_key in key.lower() for timeout_key in timeout_keys): if value is None or value == "None" or value == "null": config[key] = None else: try: config[key] = float(value) except (ValueError, TypeError): config[key] = None logger.warning( "Invalid timeout value for %s: %s, setting to None", key, value, ) return config def create(self) -> WebDriver: pixel_density = app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1) # Get driver class and initial kwargs based on driver type if self._driver_type == "firefox": driver_class, service_class, kwargs = self._create_firefox_driver( pixel_density ) elif self._driver_type == "chrome": driver_class, service_class, kwargs = self._create_chrome_driver( pixel_density ) else: raise Exception( # pylint: disable=broad-exception-raised f"Webdriver name ({self._driver_type}) not supported" ) # Add additional arguments from config options = kwargs["options"] for arg in list(app.config["WEBDRIVER_OPTION_ARGS"]): options.add_argument(arg) # Fix timeout values for urllib3 2.x compatibility webdriver_config = app.config["WEBDRIVER_CONFIGURATION"].copy() webdriver_config = self._normalize_timeout_values(webdriver_config) kwargs.update(webdriver_config) # Set the binary location if provided # We need to pop it from the dict due to selenium_version < 4.10.0 options.binary_location = webdriver_config.pop("binary_location", "") if version.parse(selenium_version) < version.parse("4.10.0"): kwargs |= webdriver_config else: driver_opts = dict( webdriver_config.get("options", {"capabilities": {}, "preferences": {}}) ) driver_srv = dict( webdriver_config.get( "service", { "log_output": "/dev/null", "service_args": [], "port": 0, "env": {}, }, ) ) for name, value in driver_opts.get("capabilities", {}).items(): options.set_capability(name, value) if hasattr(options, "profile"): for name, value in driver_opts.get("preferences", {}).items(): options.profile.set_preference(str(name), value) kwargs |= { "options": options, "service": service_class(**driver_srv), } logger.debug("Init selenium driver") return driver_class(**kwargs) def auth(self, user: User) -> WebDriver: driver = self.create() return machine_auth_provider_factory.instance.authenticate_webdriver( driver, user ) @staticmethod def destroy(driver: WebDriver, tries: int = 2) -> None: """Destroy a driver""" # This is some very flaky code in selenium. Hence the retries # and catch-all exceptions try: retry_call(driver.close, max_tries=tries) except Exception: # pylint: disable=broad-except # noqa: S110 pass try: driver.quit() except Exception: # pylint: disable=broad-except # noqa: S110 pass @staticmethod def find_unexpected_errors(driver: WebDriver) -> list[str]: error_messages = [] try: alert_divs = driver.find_elements(By.XPATH, "//div[@role = 'alert']") logger.debug( "%i alert elements have been found in the screenshot", len(alert_divs) ) for alert_div in alert_divs: # See More button alert_div.find_element(By.XPATH, ".//*[@role = 'button']").click() # wait for modal to show up modal = WebDriverWait( driver, app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_VISIBLE"], ).until( EC.visibility_of_any_elements_located( (By.CLASS_NAME, "ant-modal-content") ) )[0] err_msg_div = modal.find_element(By.CLASS_NAME, "ant-modal-body") # collect error message error_messages.append(err_msg_div.text) # close modal after collecting error messages modal.find_element(By.CLASS_NAME, "ant-modal-close").click() # wait until the modal becomes invisible WebDriverWait( driver, app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_INVISIBLE"], ).until(EC.invisibility_of_element(modal)) # Use HTML so that error messages are shown in the same style (color) error_as_html = err_msg_div.get_attribute("innerHTML").replace( "'", "\\'" ) try: # Even if some errors can't be updated in the screenshot, # keep all the errors in the server log and do not fail the loop driver.execute_script( f"arguments[0].innerHTML = '{error_as_html}'", alert_div ) except WebDriverException: logger.exception("Failed to update error messages using alert_div") except WebDriverException: logger.exception("Failed to capture unexpected errors") return error_messages def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None: # noqa: C901 driver = self.auth(user) driver.set_window_size(*self._window) driver.get(url) img: bytes | None = None selenium_headstart = app.config["SCREENSHOT_SELENIUM_HEADSTART"] logger.debug("Sleeping for %i seconds", selenium_headstart) sleep(selenium_headstart) try: try: # page didn't load logger.debug( "Wait for the presence of %s at url: %s", element_name, url ) element = WebDriverWait(driver, self._screenshot_locate_wait).until( EC.presence_of_element_located((By.CLASS_NAME, element_name)) ) except TimeoutException: logger.exception("Selenium timed out requesting url %s", url) raise try: # chart containers didn't render logger.debug("Wait for chart containers to draw at url: %s", url) WebDriverWait(driver, self._screenshot_locate_wait).until( EC.visibility_of_all_elements_located( (By.CLASS_NAME, "chart-container") ) ) except TimeoutException: logger.info("Timeout Exception caught") # Fallback to allow a screenshot of an empty dashboard try: WebDriverWait(driver, 0).until( EC.visibility_of_all_elements_located( (By.CLASS_NAME, "grid-container") ) ) except: logger.exception( "Selenium timed out waiting for dashboard to draw at url %s", url, ) raise try: # charts took too long to load logger.debug( "Wait for loading element of charts to be gone at url: %s", url ) WebDriverWait(driver, self._screenshot_load_wait).until_not( EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) ) except TimeoutException: logger.exception( "Selenium timed out waiting for charts to load at url %s", url ) raise selenium_animation_wait = app.config["SCREENSHOT_SELENIUM_ANIMATION_WAIT"] logger.debug("Wait %i seconds for chart animation", selenium_animation_wait) sleep(selenium_animation_wait) logger.debug( "Taking a PNG screenshot of url %s as user %s", url, user.username, ) if app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]: unexpected_errors = WebDriverSelenium.find_unexpected_errors(driver) if unexpected_errors: logger.warning( "%i errors found in the screenshot. URL: %s. Errors are: %s", len(unexpected_errors), url, unexpected_errors, ) img = element.screenshot_as_png except Exception as ex: logger.warning("exception in webdriver", exc_info=ex) raise except TimeoutException: # raise again for the finally block, but handled above raise except StaleElementReferenceException: logger.exception( "Selenium got a stale element while requesting url %s", url, ) raise except WebDriverException: logger.exception( "Encountered an unexpected error when requesting url %s", url ) raise finally: self.destroy(driver, app.config["SCREENSHOT_SELENIUM_RETRIES"]) return img