diff --git a/UPDATING.md b/UPDATING.md index 2100aef2671..fcd69c18fd4 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -23,6 +23,9 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. ## Next +- [34561](https://github.com/apache/superset/pull/34561) Added tiled screenshot functionality for Playwright-based reports to handle large dashboards more efficiently. When enabled (default: `SCREENSHOT_TILED_ENABLED = True`), dashboards with 20+ charts or height exceeding 5000px will be captured using multiple viewport-sized tiles and combined into a single image. This improves report generation performance and reliability for large dashboards. +Note: Pillow is now a required dependency (previously optional) to support image processing for tiled screenshots. +`thumbnails` optional dependency is now deprecated and will be removed in the next major release (7.0). - [33084](https://github.com/apache/superset/pull/33084) The DISALLOWED_SQL_FUNCTIONS configuration now includes additional potentially sensitive database functions across PostgreSQL, MySQL, SQLite, MS SQL Server, and ClickHouse. Existing queries using these functions may now be blocked. Review your SQL Lab queries and dashboards if you encounter "disallowed function" errors after upgrading - [34235](https://github.com/apache/superset/pull/34235) CSV exports now use `utf-8-sig` encoding by default to include a UTF-8 BOM, improving compatibility with Excel. - [34258](https://github.com/apache/superset/pull/34258) changing the default in Dockerfile to INCLUDE_CHROMIUM="false" (from "true") in the past. This ensures the `lean` layer is lean by default, and people can opt-in to the `chromium` layer by setting the build arg `INCLUDE_CHROMIUM=true`. This is a breaking change for anyone using the `lean` layer, as it will no longer include Chromium by default. diff --git a/pyproject.toml b/pyproject.toml index edfcd145aef..a8b605e888f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ dependencies = [ "parsedatetime", "paramiko>=3.4.0", "pgsanity", + "Pillow>=11.0.0, <12", "polyline>=2.0.0, <3.0", "pyparsing>=3.0.6, <4", "python-dateutil", @@ -181,7 +182,7 @@ tdengine = [ "taos-ws-py>=0.3.8" ] teradata = ["teradatasql>=16.20.0.23"] -thumbnails = ["Pillow>=10.0.1, <11"] +thumbnails = [] # deprecated, will be removed in 7.0 vertica = ["sqlalchemy-vertica-python>=0.5.9, < 0.6"] netezza = ["nzalchemy>=11.0.2"] starrocks = ["starrocks>=1.0.0"] @@ -400,6 +401,7 @@ authorized_licenses = [ "isc license (iscl)", "isc license", "mit", + "mit-cmu", "mozilla public license 2.0 (mpl 2.0)", "osi approved", "osi approved", diff --git a/requirements/base.txt b/requirements/base.txt index 7126e0fc391..2041f7dea9d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -266,6 +266,8 @@ parsedatetime==2.6 # via apache-superset (pyproject.toml) pgsanity==0.2.9 # via apache-superset (pyproject.toml) +pillow==11.3.0 + # via apache_superset (pyproject.toml) platformdirs==4.3.8 # via requests-cache ply==3.11 diff --git a/requirements/development.txt b/requirements/development.txt index 41b065cd67f..a35654c374c 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -537,7 +537,7 @@ pgsanity==0.2.9 # via # -c requirements/base.txt # apache-superset -pillow==10.3.0 +pillow==11.3.0 # via # apache-superset # matplotlib diff --git a/superset/config.py b/superset/config.py index ebc772a7412..7b33423acd4 100644 --- a/superset/config.py +++ b/superset/config.py @@ -868,6 +868,14 @@ SCREENSHOT_PLAYWRIGHT_DEFAULT_TIMEOUT = int( timedelta(seconds=60).total_seconds() * 1000 ) +# Tiled screenshot configuration for large dashboards +SCREENSHOT_TILED_ENABLED = True # Enable tiled screenshots for large dashboards +SCREENSHOT_TILED_CHART_THRESHOLD = 20 # Minimum charts to trigger tiled screenshots +SCREENSHOT_TILED_HEIGHT_THRESHOLD = ( + 5000 # Minimum height (px) to trigger tiled screenshots +) +SCREENSHOT_TILED_VIEWPORT_HEIGHT = 2000 # Height of each tile in pixels + # --------------------------------------------------- # Image and file configuration # --------------------------------------------------- diff --git a/superset/utils/screenshot_utils.py b/superset/utils/screenshot_utils.py new file mode 100644 index 00000000000..e84eb12cfa8 --- /dev/null +++ b/superset/utils/screenshot_utils.py @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import io +import logging +from typing import TYPE_CHECKING + +from PIL import Image + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + try: + from playwright.sync_api import Page + except ImportError: + Page = None + + +def combine_screenshot_tiles(screenshot_tiles: list[bytes]) -> bytes: + """ + Combine multiple screenshot tiles into a single vertical image. + + Args: + screenshot_tiles: List of screenshot bytes in PNG format + + Returns: + Combined screenshot as bytes + """ + if not screenshot_tiles: + return b"" + + if len(screenshot_tiles) == 1: + return screenshot_tiles[0] + + try: + # Open all images + images = [Image.open(io.BytesIO(tile)) for tile in screenshot_tiles] + + # Calculate total dimensions + total_width = max(img.width for img in images) + total_height = sum(img.height for img in images) + + # Create combined image + combined = Image.new("RGB", (total_width, total_height), "white") + + # Paste each tile + y_offset = 0 + for img in images: + combined.paste(img, (0, y_offset)) + y_offset += img.height + + # Convert back to bytes + output = io.BytesIO() + combined.save(output, format="PNG") + return output.getvalue() + + except Exception as e: + logger.exception(f"Failed to combine screenshot tiles: {e}") + # Return the first tile as fallback + return screenshot_tiles[0] + + +def take_tiled_screenshot( + page: "Page", element_name: str, viewport_height: int = 2000 +) -> bytes | None: + """ + Take a tiled screenshot of a large dashboard by scrolling and capturing sections. + + Args: + page: Playwright page object + element_name: CSS class name of the element to screenshot + viewport_height: Height of each tile in pixels + + Returns: + Combined screenshot bytes or None if failed + """ + try: + # Get the target element + element = page.locator(f".{element_name}") + element.wait_for(timeout=30000) # 30 second timeout + + # Get dashboard dimensions and position + element_info = page.evaluate(f"""() => {{ + const el = document.querySelector(".{element_name}"); + const rect = el.getBoundingClientRect(); + return {{ + height: el.scrollHeight, + top: rect.top + window.scrollY, + left: rect.left + window.scrollX, + width: el.scrollWidth + }}; + }}""") + + dashboard_height = element_info["height"] + dashboard_top = element_info["top"] + dashboard_left = element_info["left"] + dashboard_width = element_info["width"] + + logger.info( + f"Dashboard: {dashboard_width}x{dashboard_height}px at " + f"({dashboard_left}, {dashboard_top})" + ) + + # Calculate number of tiles needed + num_tiles = max(1, (dashboard_height + viewport_height - 1) // viewport_height) + logger.info(f"Taking {num_tiles} screenshot tiles") + + screenshot_tiles = [] + + for i in range(num_tiles): + # Calculate scroll position to show this tile's content + scroll_y = dashboard_top + (i * viewport_height) + + # Scroll the window to the desired position + page.evaluate(f"window.scrollTo(0, {scroll_y})") + logger.debug(f"Scrolled window to {scroll_y} for tile {i + 1}/{num_tiles}") + + # Wait for scroll to settle and content to load + page.wait_for_timeout(2000) # 2 second wait per tile + + # Get the current element position after scroll + current_element_box = page.evaluate(f"""() => {{ + const el = document.querySelector(".{element_name}"); + const rect = el.getBoundingClientRect(); + return {{ + x: rect.left, + y: rect.top, + width: rect.width, + height: rect.height + }}; + }}""") + + # Calculate what portion of the element we want to capture for this tile + tile_start_in_element = i * viewport_height + remaining_content = dashboard_height - tile_start_in_element + tile_content_height = min(viewport_height, remaining_content) + + # Clip to capture only the current tile portion of the element + clip = { + "x": current_element_box["x"], + "y": current_element_box["y"], + "width": current_element_box["width"], + "height": min(tile_content_height, current_element_box["height"]), + } + + # Take screenshot with clipping to capture only this tile's content + tile_screenshot = page.screenshot(type="png", clip=clip) + screenshot_tiles.append(tile_screenshot) + + logger.debug(f"Captured tile {i + 1}/{num_tiles} with clip {clip}") + + # Combine all tiles + logger.info("Combining screenshot tiles...") + combined_screenshot = combine_screenshot_tiles(screenshot_tiles) + + # Reset window scroll position + page.evaluate("window.scrollTo(0, 0)") + + return combined_screenshot + + except Exception as e: + logger.exception(f"Tiled screenshot failed: {e}") + return None diff --git a/superset/utils/webdriver.py b/superset/utils/webdriver.py index 6570e550f47..19770f0f99f 100644 --- a/superset/utils/webdriver.py +++ b/superset/utils/webdriver.py @@ -41,6 +41,7 @@ from selenium.webdriver.support.ui import WebDriverWait from superset import feature_flag_manager from superset.extensions import machine_auth_provider_factory from superset.utils.retries import retry_call +from superset.utils.screenshot_utils import take_tiled_screenshot WindowSize = tuple[int, int] logger = logging.getLogger(__name__) @@ -231,7 +232,54 @@ class WebDriverPlaywright(WebDriverProxy): url, unexpected_errors, ) - img = element.screenshot() + # Detect large dashboards and use tiled screenshots if enabled + tiled_enabled = app.config.get("SCREENSHOT_TILED_ENABLED", False) + + if tiled_enabled: + chart_count = page.evaluate( + 'document.querySelectorAll(".chart-container").length' + ) + dashboard_height = page.evaluate( + f'document.querySelector(".{element_name}").scrollHeight || 0' + ) + chart_threshold = app.config.get( + "SCREENSHOT_TILED_CHART_THRESHOLD", 20 + ) + height_threshold = app.config.get( + "SCREENSHOT_TILED_HEIGHT_THRESHOLD", 5000 + ) + viewport_height = app.config.get( + "SCREENSHOT_TILED_VIEWPORT_HEIGHT", self._window[1] + ) + + # Use tiled screenshots for large dashboards + use_tiled = ( + chart_count >= chart_threshold + or dashboard_height > height_threshold + ) + + if use_tiled: + logger.info( + ( + f"Large dashboard detected: {chart_count} charts, " + f"{dashboard_height}px height. Using tiled screenshots." + ) + ) + img = take_tiled_screenshot( + page, element_name, viewport_height=viewport_height + ) + if img is None: + logger.warning( + ( + "Tiled screenshot failed, " + "falling back to standard screenshot" + ) + ) + img = element.screenshot() + else: + img = element.screenshot() + else: + img = element.screenshot() except PlaywrightTimeout: # raise again for the finally block, but handled above pass diff --git a/tests/unit_tests/utils/test_screenshot_utils.py b/tests/unit_tests/utils/test_screenshot_utils.py new file mode 100644 index 00000000000..19ca889bea5 --- /dev/null +++ b/tests/unit_tests/utils/test_screenshot_utils.py @@ -0,0 +1,314 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import io +from unittest.mock import MagicMock, patch + +import pytest +from PIL import Image + +from superset.utils.screenshot_utils import ( + combine_screenshot_tiles, + take_tiled_screenshot, +) + + +class TestCombineScreenshotTiles: + def _create_test_image(self, width: int, height: int, color: str = "red") -> bytes: + """Helper to create test PNG image bytes.""" + img = Image.new("RGB", (width, height), color) + output = io.BytesIO() + img.save(output, format="PNG") + return output.getvalue() + + def test_empty_tiles_returns_empty_bytes(self): + """Test that empty tiles list returns empty bytes.""" + result = combine_screenshot_tiles([]) + assert result == b"" + + def test_single_tile_returns_original(self): + """Test that single tile returns the original image.""" + test_image = self._create_test_image(100, 100) + result = combine_screenshot_tiles([test_image]) + assert result == test_image + + def test_combine_multiple_tiles_vertically(self): + """Test combining multiple tiles into a single vertical image.""" + # Create test images with different colors + tile1 = self._create_test_image(100, 50, "red") + tile2 = self._create_test_image(100, 75, "green") + tile3 = self._create_test_image(100, 25, "blue") + + result = combine_screenshot_tiles([tile1, tile2, tile3]) + + # Verify result is not empty + assert result != b"" + + # Verify the combined image has correct dimensions + combined_img = Image.open(io.BytesIO(result)) + assert combined_img.width == 100 # Max width of all tiles + assert combined_img.height == 150 # Sum of all heights (50 + 75 + 25) + + # Verify the image format is PNG + assert combined_img.format == "PNG" + + def test_combine_tiles_different_widths(self): + """Test combining tiles with different widths uses max width.""" + tile1 = self._create_test_image(50, 100, "red") + tile2 = self._create_test_image(150, 100, "green") + tile3 = self._create_test_image(100, 100, "blue") + + result = combine_screenshot_tiles([tile1, tile2, tile3]) + + combined_img = Image.open(io.BytesIO(result)) + assert combined_img.width == 150 # Max width + assert combined_img.height == 300 # Sum of heights + + def test_combine_tiles_handles_pil_error(self): + """Test that PIL errors are handled gracefully.""" + # Create one valid image and one invalid + valid_tile = self._create_test_image(100, 100) + invalid_tile = b"invalid_image_data" + + result = combine_screenshot_tiles([valid_tile, invalid_tile]) + + # Should return the first (valid) tile as fallback + assert result == valid_tile + + def test_combine_tiles_logs_exception(self): + """Test that exceptions are logged properly.""" + with patch("superset.utils.screenshot_utils.logger") as mock_logger: + # Create invalid image data that will cause PIL to raise an exception + invalid_tile = b"definitely_not_an_image" + valid_tile = self._create_test_image(100, 100) + + result = combine_screenshot_tiles([valid_tile, invalid_tile]) + + # Should have logged the exception + mock_logger.exception.assert_called_once() + # Should return first tile as fallback + assert result == valid_tile + + +class TestTakeTiledScreenshot: + @pytest.fixture + def mock_page(self): + """Create a mock Playwright page object.""" + page = MagicMock() + + # Mock element locator + element = MagicMock() + page.locator.return_value = element + + # Mock element info - simulating a 5000px tall dashboard + element_info = {"height": 5000, "top": 100, "left": 50, "width": 800} + element_box = {"x": 50, "y": 200, "width": 800, "height": 600} + + # For 3 tiles (5000px / 2000px = 2.5, rounded up to 3): + # 1 initial call + 3 scroll + 3 element box + 1 reset scroll = 8 calls + page.evaluate.side_effect = [ + element_info, # Initial call for dashboard dimensions + None, # First scroll call + element_box, # First element box call + None, # Second scroll call + element_box, # Second element box call + None, # Third scroll call + element_box, # Third element box call + None, # Final reset scroll call + ] + + # Mock screenshot method + fake_screenshot = b"fake_screenshot_data" + page.screenshot.return_value = fake_screenshot + + return page + + def test_successful_tiled_screenshot(self, mock_page): + """Test successful tiled screenshot generation.""" + with patch( + "superset.utils.screenshot_utils.combine_screenshot_tiles" + ) as mock_combine: + mock_combine.return_value = b"combined_screenshot" + + result = take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Should return combined screenshot + assert result == b"combined_screenshot" + + # Should have called screenshot method multiple times + # (3 tiles for 5000px height) + assert mock_page.screenshot.call_count == 3 + + # Should have called combine function + mock_combine.assert_called_once() + + def test_element_not_found_returns_none(self): + """Test that missing element returns None.""" + mock_page = MagicMock() + element = MagicMock() + element.wait_for.side_effect = Exception("Element not found") + mock_page.locator.return_value = element + + result = take_tiled_screenshot(mock_page, "nonexistent", viewport_height=2000) + + assert result is None + + def test_tile_calculation_logic(self, mock_page): + """Test that tiles are calculated correctly.""" + # Mock dashboard height of 3500px with viewport of 2000px + element_info = {"height": 3500, "top": 100, "left": 50, "width": 800} + element_box = {"x": 50, "y": 200, "width": 800, "height": 600} + + # For 2 tiles (3500px / 2000px = 1.75, rounded up to 2): + # 1 initial call + 2 scroll + 2 element box + 1 reset scroll = 6 calls + mock_page.evaluate.side_effect = [ + element_info, + None, # First scroll call + element_box, # First element box call + None, # Second scroll call + element_box, # Second element box call + None, # Reset scroll call + ] + + with patch( + "superset.utils.screenshot_utils.combine_screenshot_tiles" + ) as mock_combine: + mock_combine.return_value = b"combined" + + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Should take 2 screenshots (3500px / 2000px = 1.75, rounded up to 2) + assert mock_page.screenshot.call_count == 2 + + def test_scroll_positions_calculated_correctly(self, mock_page): + """Test that scroll positions are calculated correctly.""" + # Override the fixture's side_effect for this specific test + element_info = {"height": 5000, "top": 100, "left": 50, "width": 800} + element_box = {"x": 50, "y": 200, "width": 800, "height": 600} + + mock_page.evaluate.side_effect = [ + element_info, # Initial call for dashboard dimensions + None, # First scroll call + element_box, # First element box call + None, # Second scroll call + element_box, # Second element box call + None, # Third scroll call + element_box, # Third element box call + None, # Reset scroll call + ] + + with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"): + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Check scroll positions (dashboard_top = 100) + scroll_calls = [ + call + for call in mock_page.evaluate.call_args_list + if "scrollTo" in str(call) + ] + + # Should have scrolled to positions: 100, 2100, 4100 + expected_scrolls = [ + "window.scrollTo(0, 100)", + "window.scrollTo(0, 2100)", + "window.scrollTo(0, 4100)", + ] + actual_scrolls = [call[0][0] for call in scroll_calls] + + assert len(actual_scrolls) == 4 # 3 tile scrolls + 1 reset + for expected in expected_scrolls: + assert expected in actual_scrolls + + def test_reset_scroll_position(self, mock_page): + """Test that scroll position is reset after screenshot.""" + # Override the fixture's side_effect for this specific test + element_info = {"height": 5000, "top": 100, "left": 50, "width": 800} + element_box = {"x": 50, "y": 200, "width": 800, "height": 600} + + mock_page.evaluate.side_effect = [ + element_info, # Initial call for dashboard dimensions + None, # First scroll call + element_box, # First element box call + None, # Second scroll call + element_box, # Second element box call + None, # Third scroll call + element_box, # Third element box call + None, # Reset scroll call + ] + + with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"): + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Check that final call resets scroll to top + final_call = mock_page.evaluate.call_args_list[-1] + assert "window.scrollTo(0, 0)" in str(final_call) + + def test_logs_dashboard_info(self, mock_page): + """Test that dashboard info is logged.""" + with patch("superset.utils.screenshot_utils.logger") as mock_logger: + with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"): + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Should log dashboard dimensions + mock_logger.info.assert_any_call("Dashboard: 800x5000px at (50, 100)") + # Should log number of tiles + mock_logger.info.assert_any_call("Taking 3 screenshot tiles") + + def test_exception_handling_returns_none(self): + """Test that exceptions are handled and None is returned.""" + mock_page = MagicMock() + mock_page.locator.side_effect = Exception("Unexpected error") + + with patch("superset.utils.screenshot_utils.logger") as mock_logger: + result = take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + assert result is None + mock_logger.exception.assert_called_once_with( + "Tiled screenshot failed: Unexpected error" + ) + + def test_wait_timeouts_between_tiles(self, mock_page): + """Test that there are appropriate waits between tiles.""" + with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"): + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Should have called wait_for_timeout for each tile (3 tiles) + assert mock_page.wait_for_timeout.call_count == 3 + + # Each wait should be 2000ms (2 seconds) + for call in mock_page.wait_for_timeout.call_args_list: + assert call[0][0] == 2000 + + def test_screenshot_clip_parameters(self, mock_page): + """Test that screenshot clipping parameters are correct.""" + with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"): + take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000) + + # Check screenshot calls have correct clip parameters + screenshot_calls = mock_page.screenshot.call_args_list + + for call in screenshot_calls: + kwargs = call[1] + assert kwargs["type"] == "png" + assert "clip" in kwargs + + clip = kwargs["clip"] + assert clip["x"] == 50 + assert clip["y"] == 200 + assert clip["width"] == 800 + # Height should be min of viewport_height and remaining content + assert clip["height"] <= 600 # Element height from mock