Files
superset2/superset/utils/webdriver.py
2025-12-18 17:30:22 +01:00

695 lines
27 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from enum import Enum
from time import sleep
from typing import Any, TYPE_CHECKING
from flask import current_app as app
from packaging import version
from selenium import __version__ as selenium_version
from selenium.common.exceptions import (
StaleElementReferenceException,
TimeoutException,
WebDriverException,
)
from selenium.webdriver import chrome, firefox, FirefoxProfile
from selenium.webdriver.common.by import By
from selenium.webdriver.common.service import Service
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC # noqa: N812
from selenium.webdriver.support.ui import WebDriverWait
from superset.extensions import machine_auth_provider_factory
from superset.utils.retries import retry_call
from superset.utils.screenshot_utils import take_tiled_screenshot
WindowSize = tuple[int, int]
logger = logging.getLogger(__name__)
# Installation message for missing Playwright (Cypress doesn't work with DeckGL)
PLAYWRIGHT_INSTALL_MESSAGE = (
"To complete the migration from Cypress "
"and enable WebGL/DeckGL screenshot support, install Playwright with: "
"pip install playwright && playwright install chromium"
)
if TYPE_CHECKING:
from typing import Any
from flask_appbuilder.security.sqla.models import User
try:
from playwright.sync_api import (
BrowserContext,
Error as PlaywrightError,
Locator,
Page,
sync_playwright,
TimeoutError as PlaywrightTimeout,
)
except ImportError:
from typing import Any
# Define dummy classes when playwright is not available
BrowserContext = Any
PlaywrightError = Exception
PlaywrightTimeout = Exception
Locator = Any
Page = Any
sync_playwright = None
def check_playwright_availability() -> bool:
"""
Lightweight check for Playwright availability.
First checks if browser binary exists, falls back to launch test if needed.
"""
if sync_playwright is None:
return False
try:
with sync_playwright() as p:
# First try lightweight check - just verify executable exists
try:
executable_path = p.chromium.executable_path
if executable_path:
return True
except Exception:
# Fall back to full launch test if executable_path fails
logger.debug(
"Executable path check failed, falling back to launch test"
)
# Fallback: actually launch browser to ensure it works
browser = p.chromium.launch(headless=True)
browser.close()
return True
except Exception as e:
logger.warning(
"Playwright module is installed but browser launch failed. "
"Run 'playwright install chromium' to install browser binaries. "
"Error: %s",
str(e),
)
return False
PLAYWRIGHT_AVAILABLE = check_playwright_availability()
def validate_webdriver_config() -> dict[str, Any]:
"""
Validate webdriver configuration and dependencies.
Used to check migration status from Cypress to Playwright.
Returns a dictionary with the status of available webdrivers
and feature flags.
"""
from superset import feature_flag_manager
return {
"selenium_available": True, # Always available as required dependency
"playwright_available": PLAYWRIGHT_AVAILABLE,
"playwright_feature_enabled": feature_flag_manager.is_feature_enabled(
"PLAYWRIGHT_REPORTS_AND_THUMBNAILS"
),
"recommended_action": (
PLAYWRIGHT_INSTALL_MESSAGE if not PLAYWRIGHT_AVAILABLE else None
),
}
class DashboardStandaloneMode(Enum):
HIDE_NAV = 1
HIDE_NAV_AND_TITLE = 2
REPORT = 3
class ChartStandaloneMode(Enum):
HIDE_NAV = "true"
SHOW_NAV = 0
# pylint: disable=too-few-public-methods
class WebDriverProxy(ABC):
def __init__(self, driver_type: str, window: WindowSize | None = None):
self._driver_type = driver_type
self._window: WindowSize = window or (800, 600)
self._screenshot_locate_wait = app.config["SCREENSHOT_LOCATE_WAIT"]
self._screenshot_load_wait = app.config["SCREENSHOT_LOAD_WAIT"]
@abstractmethod
def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None:
"""
Run webdriver and return a screenshot
"""
class WebDriverPlaywright(WebDriverProxy):
@staticmethod
def auth(user: User, context: BrowserContext) -> BrowserContext:
return machine_auth_provider_factory.instance.authenticate_browser_context(
context, user
)
@staticmethod
def find_unexpected_errors(page: Page) -> list[str]:
error_messages = []
try:
alert_divs = page.get_by_role("alert").all()
logger.debug(
"%i alert elements have been found in the screenshot", len(alert_divs)
)
for alert_div in alert_divs:
# See More button
alert_div.get_by_role("button").click()
# wait for modal to show up
page.locator(".ant-modal-content").wait_for(state="visible")
err_msg_div = page.locator(".ant-modal-content .ant-modal-body")
#
# # collect error message
error_messages.append(err_msg_div.text_content())
#
# # Use HTML so that error messages are shown in the same style (color)
error_as_html = err_msg_div.inner_html().replace("'", "\\'")
#
# # close modal after collecting error messages
page.locator(".ant-modal-content .ant-modal-close").click()
#
# # wait until the modal becomes invisible
page.locator(".ant-modal-content").wait_for(state="detached")
try:
# Even if some errors can't be updated in the screenshot,
# keep all the errors in the server log and do not fail the loop
alert_div.evaluate(
"(node, error_html) => node.innerHtml = error_html",
[error_as_html],
)
except PlaywrightError:
logger.exception("Failed to update error messages using alert_div")
except PlaywrightError:
logger.exception("Failed to capture unexpected errors")
return error_messages
@staticmethod
def _get_screenshot(page: Page, element: Locator, element_name: str) -> bytes:
if element_name == "standalone":
return page.screenshot(full_page=True)
else:
return element.screenshot()
def get_screenshot( # pylint: disable=too-many-locals, too-many-statements # noqa: C901
self, url: str, element_name: str, user: User
) -> bytes | None:
if not PLAYWRIGHT_AVAILABLE:
logger.info(
"Playwright not available - falling back to Selenium. "
"Note: WebGL/Canvas charts may not render correctly with Selenium. "
"%s",
PLAYWRIGHT_INSTALL_MESSAGE,
)
return None
with sync_playwright() as playwright:
browser_args = app.config["WEBDRIVER_OPTION_ARGS"]
browser = playwright.chromium.launch(args=browser_args)
pixel_density = app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1)
viewport_height = self._window[1]
viewport_width = self._window[0]
context = browser.new_context(
bypass_csp=True,
viewport={
"height": viewport_height,
"width": viewport_width,
},
device_scale_factor=pixel_density,
)
context.set_default_timeout(
app.config["SCREENSHOT_PLAYWRIGHT_DEFAULT_TIMEOUT"]
)
self.auth(user, context)
page = context.new_page()
try:
page.goto(
url,
wait_until=app.config["SCREENSHOT_PLAYWRIGHT_WAIT_EVENT"],
)
except PlaywrightTimeout:
logger.exception(
"Web event %s not detected. Page %s might not have been fully loaded", # noqa: E501
app.config["SCREENSHOT_PLAYWRIGHT_WAIT_EVENT"],
url,
)
img: bytes | None = None
selenium_headstart = app.config["SCREENSHOT_SELENIUM_HEADSTART"]
logger.debug("Sleeping for %i seconds", selenium_headstart)
page.wait_for_timeout(selenium_headstart * 1000)
element: Locator
try:
try:
# page didn't load
logger.debug(
"Wait for the presence of %s at url: %s", element_name, url
)
element = page.locator(f".{element_name}")
element.wait_for()
except PlaywrightTimeout:
logger.exception("Timed out requesting url %s", url)
raise
try:
# chart containers didn't render
logger.debug("Wait for chart containers to draw at url: %s", url)
slice_container_locator = page.locator(".chart-container")
for slice_container_elem in slice_container_locator.all():
slice_container_elem.wait_for()
except PlaywrightTimeout:
logger.exception(
"Timed out waiting for chart containers to draw at url %s",
url,
)
raise
try:
# charts took too long to load
logger.debug(
"Wait for loading element of charts to be gone at url: %s", url
)
for loading_element in page.locator(".loading").all():
loading_element.wait_for(state="detached")
except PlaywrightTimeout:
logger.exception(
"Timed out waiting for charts to load at url %s", url
)
raise
selenium_animation_wait = app.config[
"SCREENSHOT_SELENIUM_ANIMATION_WAIT"
]
logger.debug(
"Wait %i seconds for chart animation", selenium_animation_wait
)
page.wait_for_timeout(selenium_animation_wait * 1000)
logger.debug(
"Taking a PNG screenshot of url %s as user %s",
url,
user.username,
)
if app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]:
unexpected_errors = WebDriverPlaywright.find_unexpected_errors(page)
if unexpected_errors:
logger.warning(
"%i errors found in the screenshot. URL: %s. Errors are: %s", # noqa: E501
len(unexpected_errors),
url,
unexpected_errors,
)
# Detect large dashboards and use tiled screenshots if enabled
tiled_enabled = app.config.get("SCREENSHOT_TILED_ENABLED", False)
if tiled_enabled:
chart_count = page.evaluate(
'document.querySelectorAll(".chart-container").length'
)
dashboard_height = page.evaluate(
f'document.querySelector(".{element_name}").scrollHeight || 0'
)
chart_threshold = app.config.get(
"SCREENSHOT_TILED_CHART_THRESHOLD", 20
)
height_threshold = app.config.get(
"SCREENSHOT_TILED_HEIGHT_THRESHOLD", 5000
)
tile_height = app.config.get(
"SCREENSHOT_TILED_VIEWPORT_HEIGHT", viewport_height
)
# Use tiled screenshots for large dashboards
use_tiled = (
chart_count >= chart_threshold
or dashboard_height > height_threshold
) and dashboard_height > tile_height
if use_tiled:
logger.info(
"Large dashboard detected: %s charts, %spx height. "
"Using tiled screenshots.",
chart_count,
dashboard_height,
)
# set viewport height to tile height for easier calculations
page.set_viewport_size(
{"height": tile_height, "width": viewport_width}
)
img = take_tiled_screenshot(page, element_name, tile_height)
if img is None:
logger.warning(
(
"Tiled screenshot failed, "
"falling back to standard screenshot"
)
)
img = WebDriverPlaywright._get_screenshot(
page, element, element_name
)
else:
img = WebDriverPlaywright._get_screenshot(
page, element, element_name
)
else:
img = WebDriverPlaywright._get_screenshot(
page, element, element_name
)
except PlaywrightTimeout:
# raise again for the finally block, but handled above
pass
except PlaywrightError:
logger.exception(
"Encountered an unexpected error when requesting url %s", url
)
finally:
browser.close()
return img
class WebDriverSelenium(WebDriverProxy):
def _create_firefox_driver(
self, pixel_density: float
) -> tuple[type[WebDriver], type[Service], dict[str, Any]]:
"""Create Firefox driver configuration."""
options = firefox.options.Options()
profile = FirefoxProfile()
profile.set_preference("layout.css.devPixelsPerPx", str(pixel_density))
options.profile = profile
return (
firefox.webdriver.WebDriver,
firefox.service.Service,
{"options": options},
)
def _create_chrome_driver(
self, pixel_density: float
) -> tuple[type[WebDriver], type[Service], dict[str, Any]]:
"""Create Chrome driver configuration."""
options = chrome.options.Options()
options.add_argument(f"--force-device-scale-factor={pixel_density}")
options.add_argument(f"--window-size={self._window[0]},{self._window[1]}")
return (
chrome.webdriver.WebDriver,
chrome.service.Service,
{"options": options},
)
def _normalize_timeout_values(self, config: dict[str, Any]) -> dict[str, Any]:
"""Convert timeout values to float for urllib3 2.x compatibility."""
timeout_keys = [
"timeout",
"connect_timeout",
"socket_timeout",
"read_timeout",
"page_load_timeout",
"implicit_wait",
"command_executor_timeout",
"connection_timeout",
]
for key, value in config.items():
if any(timeout_key in key.lower() for timeout_key in timeout_keys):
if value is None or value == "None" or value == "null":
config[key] = None
else:
try:
config[key] = float(value)
except (ValueError, TypeError):
config[key] = None
logger.warning(
"Invalid timeout value for %s: %s, setting to None",
key,
value,
)
return config
def create(self) -> WebDriver:
pixel_density = app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1)
# Get driver class and initial kwargs based on driver type
if self._driver_type == "firefox":
driver_class, service_class, kwargs = self._create_firefox_driver(
pixel_density
)
elif self._driver_type == "chrome":
driver_class, service_class, kwargs = self._create_chrome_driver(
pixel_density
)
else:
raise Exception( # pylint: disable=broad-exception-raised
f"Webdriver name ({self._driver_type}) not supported"
)
# Add additional arguments from config
options = kwargs["options"]
for arg in list(app.config["WEBDRIVER_OPTION_ARGS"]):
options.add_argument(arg)
# Fix timeout values for urllib3 2.x compatibility
webdriver_config = app.config["WEBDRIVER_CONFIGURATION"].copy()
webdriver_config = self._normalize_timeout_values(webdriver_config)
kwargs.update(webdriver_config)
# Set the binary location if provided
# We need to pop it from the dict due to selenium_version < 4.10.0
options.binary_location = webdriver_config.pop("binary_location", "")
if version.parse(selenium_version) < version.parse("4.10.0"):
kwargs |= webdriver_config
else:
driver_opts = dict(
webdriver_config.get("options", {"capabilities": {}, "preferences": {}})
)
driver_srv = dict(
webdriver_config.get(
"service",
{
"log_output": "/dev/null",
"service_args": [],
"port": 0,
"env": {},
},
)
)
for name, value in driver_opts.get("capabilities", {}).items():
options.set_capability(name, value)
if hasattr(options, "profile"):
for name, value in driver_opts.get("preferences", {}).items():
options.profile.set_preference(str(name), value)
kwargs |= {
"options": options,
"service": service_class(**driver_srv),
}
logger.debug("Init selenium driver")
return driver_class(**kwargs)
def auth(self, user: User) -> WebDriver:
driver = self.create()
return machine_auth_provider_factory.instance.authenticate_webdriver(
driver, user
)
@staticmethod
def destroy(driver: WebDriver, tries: int = 2) -> None:
"""Destroy a driver"""
# This is some very flaky code in selenium. Hence the retries
# and catch-all exceptions
try:
retry_call(driver.close, max_tries=tries)
except Exception: # pylint: disable=broad-except # noqa: S110
pass
try:
driver.quit()
except Exception: # pylint: disable=broad-except # noqa: S110
pass
@staticmethod
def find_unexpected_errors(driver: WebDriver) -> list[str]:
error_messages = []
try:
alert_divs = driver.find_elements(By.XPATH, "//div[@role = 'alert']")
logger.debug(
"%i alert elements have been found in the screenshot", len(alert_divs)
)
for alert_div in alert_divs:
# See More button
alert_div.find_element(By.XPATH, ".//*[@role = 'button']").click()
# wait for modal to show up
modal = WebDriverWait(
driver,
app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_VISIBLE"],
).until(
EC.visibility_of_any_elements_located(
(By.CLASS_NAME, "ant-modal-content")
)
)[0]
err_msg_div = modal.find_element(By.CLASS_NAME, "ant-modal-body")
# collect error message
error_messages.append(err_msg_div.text)
# close modal after collecting error messages
modal.find_element(By.CLASS_NAME, "ant-modal-close").click()
# wait until the modal becomes invisible
WebDriverWait(
driver,
app.config["SCREENSHOT_WAIT_FOR_ERROR_MODAL_INVISIBLE"],
).until(EC.invisibility_of_element(modal))
# Use HTML so that error messages are shown in the same style (color)
error_as_html = err_msg_div.get_attribute("innerHTML").replace(
"'", "\\'"
)
try:
# Even if some errors can't be updated in the screenshot,
# keep all the errors in the server log and do not fail the loop
driver.execute_script(
f"arguments[0].innerHTML = '{error_as_html}'", alert_div
)
except WebDriverException:
logger.exception("Failed to update error messages using alert_div")
except WebDriverException:
logger.exception("Failed to capture unexpected errors")
return error_messages
def get_screenshot(self, url: str, element_name: str, user: User) -> bytes | None: # noqa: C901
driver = self.auth(user)
driver.set_window_size(*self._window)
driver.get(url)
img: bytes | None = None
selenium_headstart = app.config["SCREENSHOT_SELENIUM_HEADSTART"]
logger.debug("Sleeping for %i seconds", selenium_headstart)
sleep(selenium_headstart)
try:
try:
# page didn't load
logger.debug(
"Wait for the presence of %s at url: %s", element_name, url
)
element = WebDriverWait(driver, self._screenshot_locate_wait).until(
EC.presence_of_element_located((By.CLASS_NAME, element_name))
)
except TimeoutException:
logger.exception("Selenium timed out requesting url %s", url)
raise
try:
# chart containers didn't render
logger.debug("Wait for chart containers to draw at url: %s", url)
WebDriverWait(driver, self._screenshot_locate_wait).until(
EC.visibility_of_all_elements_located(
(By.CLASS_NAME, "chart-container")
)
)
except TimeoutException:
logger.info("Timeout Exception caught")
# Fallback to allow a screenshot of an empty dashboard
try:
WebDriverWait(driver, 0).until(
EC.visibility_of_all_elements_located(
(By.CLASS_NAME, "grid-container")
)
)
except:
logger.exception(
"Selenium timed out waiting for dashboard to draw at url %s",
url,
)
raise
try:
# charts took too long to load
logger.debug(
"Wait for loading element of charts to be gone at url: %s", url
)
WebDriverWait(driver, self._screenshot_load_wait).until_not(
EC.presence_of_all_elements_located((By.CLASS_NAME, "loading"))
)
except TimeoutException:
logger.exception(
"Selenium timed out waiting for charts to load at url %s", url
)
raise
selenium_animation_wait = app.config["SCREENSHOT_SELENIUM_ANIMATION_WAIT"]
logger.debug("Wait %i seconds for chart animation", selenium_animation_wait)
sleep(selenium_animation_wait)
logger.debug(
"Taking a PNG screenshot of url %s as user %s",
url,
user.username,
)
if app.config["SCREENSHOT_REPLACE_UNEXPECTED_ERRORS"]:
unexpected_errors = WebDriverSelenium.find_unexpected_errors(driver)
if unexpected_errors:
logger.warning(
"%i errors found in the screenshot. URL: %s. Errors are: %s",
len(unexpected_errors),
url,
unexpected_errors,
)
img = element.screenshot_as_png
except Exception as ex:
logger.warning("exception in webdriver", exc_info=ex)
raise
except TimeoutException:
# raise again for the finally block, but handled above
raise
except StaleElementReferenceException:
logger.exception(
"Selenium got a stale element while requesting url %s",
url,
)
raise
except WebDriverException:
logger.exception(
"Encountered an unexpected error when requesting url %s", url
)
raise
finally:
self.destroy(driver, app.config["SCREENSHOT_SELENIUM_RETRIES"])
return img