fix: Flakiness around scrolling during taking tiled screenshots with Playwright (#36051)

(cherry picked from commit 63dfd95aa2)
This commit is contained in:
Kamil Gabryjelski
2025-11-10 12:57:28 +01:00
committed by Joe Li
parent 70117eb55f
commit 933ec0a918
3 changed files with 151 additions and 203 deletions

View File

@@ -25,6 +25,9 @@ from PIL import Image
logger = logging.getLogger(__name__)
# Time to wait after scrolling for content to settle and load (in milliseconds)
SCROLL_SETTLE_TIMEOUT_MS = 1000
if TYPE_CHECKING:
try:
from playwright.sync_api import Page
@@ -77,7 +80,7 @@ def combine_screenshot_tiles(screenshot_tiles: list[bytes]) -> bytes:
def take_tiled_screenshot(
page: "Page", element_name: str, viewport_height: int = 2000
page: "Page", element_name: str, tile_height: int
) -> bytes | None:
"""
Take a tiled screenshot of a large dashboard by scrolling and capturing sections.
@@ -85,7 +88,7 @@ def take_tiled_screenshot(
Args:
page: Playwright page object
element_name: CSS class name of the element to screenshot
viewport_height: Height of each tile in pixels
tile_height: Height of each tile in pixels
Returns:
Combined screenshot bytes or None if failed
@@ -100,17 +103,17 @@ def take_tiled_screenshot(
const el = document.querySelector(".{element_name}");
const rect = el.getBoundingClientRect();
return {{
width: el.scrollWidth,
height: el.scrollHeight,
top: rect.top + window.scrollY,
left: rect.left + window.scrollX,
width: el.scrollWidth
top: rect.top + window.scrollY,
}};
}}""")
dashboard_height = element_info["height"]
dashboard_top = element_info["top"]
dashboard_left = element_info["left"]
dashboard_width = element_info["width"]
dashboard_height = element_info["height"]
dashboard_left = element_info["left"]
dashboard_top = element_info["top"]
logger.info(
f"Dashboard: {dashboard_width}x{dashboard_height}px at "
@@ -118,60 +121,54 @@ def take_tiled_screenshot(
)
# Calculate number of tiles needed
num_tiles = max(1, (dashboard_height + viewport_height - 1) // viewport_height)
logger.info(f"Taking {num_tiles} screenshot tiles")
num_tiles = max(1, (dashboard_height + tile_height - 1) // tile_height)
logger.info("Taking %s screenshot tiles", num_tiles)
screenshot_tiles = []
for i in range(num_tiles):
# Calculate scroll position to show this tile's content
scroll_y = dashboard_top + (i * viewport_height)
scroll_y = dashboard_top + (i * tile_height)
# Scroll the window to the desired position
page.evaluate(f"window.scrollTo(0, {scroll_y})")
logger.debug(f"Scrolled window to {scroll_y} for tile {i + 1}/{num_tiles}")
logger.debug(
"Scrolled window to %s for tile %s/%s", scroll_y, i + 1, num_tiles
)
# Wait for scroll to settle and content to load
page.wait_for_timeout(2000) # 2 second wait per tile
# Get the current element position after scroll
current_element_box = page.evaluate(f"""() => {{
const el = document.querySelector(".{element_name}");
const rect = el.getBoundingClientRect();
return {{
x: rect.left,
y: rect.top,
width: rect.width,
height: rect.height
}};
}}""")
page.wait_for_timeout(SCROLL_SETTLE_TIMEOUT_MS)
# Calculate what portion of the element we want to capture for this tile
tile_start_in_element = i * viewport_height
tile_start_in_element = i * tile_height
remaining_content = dashboard_height - tile_start_in_element
tile_content_height = min(viewport_height, remaining_content)
# Determine clip height (use visible element height in viewport)
clip_height = min(tile_content_height, current_element_box["height"])
clip_height = min(tile_height, remaining_content)
clip_y = (
0
if tile_height < remaining_content
else tile_height - remaining_content
)
clip_x = dashboard_left
# Skip tile if dimensions are invalid (width or height <= 0)
# This can happen if element is completely scrolled out of viewport
if clip_height <= 0:
if clip_height <= 0 or clip_y < 0:
logger.warning(
"Skipping tile %s/%s due to invalid clip dimensions: "
"width=%s, height=%s (element may be scrolled out of viewport)",
"x=%s, y=%s, width=%s, height=%s "
"(element may be scrolled out of viewport)",
i + 1,
num_tiles,
current_element_box["width"],
clip_x,
clip_y,
dashboard_width,
clip_height,
)
continue
# Clip to capture only the current tile portion of the element
clip = {
"x": current_element_box["x"],
"y": current_element_box["y"],
"width": current_element_box["width"],
"x": clip_x,
"y": clip_y,
"width": dashboard_width,
"height": clip_height,
}
@@ -182,12 +179,9 @@ def take_tiled_screenshot(
logger.debug(f"Captured tile {i + 1}/{num_tiles} with clip {clip}")
# Combine all tiles
logger.info("Combining screenshot tiles...")
logger.debug("Captured tile %s/%s with clip %s", i + 1, num_tiles, clip)
combined_screenshot = combine_screenshot_tiles(screenshot_tiles)
# Reset window scroll position
page.evaluate("window.scrollTo(0, 0)")
return combined_screenshot
except Exception as e:

View File

@@ -162,11 +162,13 @@ class WebDriverPlaywright(WebDriverProxy):
browser_args = app.config["WEBDRIVER_OPTION_ARGS"]
browser = playwright.chromium.launch(args=browser_args)
pixel_density = app.config["WEBDRIVER_WINDOW"].get("pixel_density", 1)
viewport_height = self._window[1]
viewport_width = self._window[0]
context = browser.new_context(
bypass_csp=True,
viewport={
"height": self._window[1],
"width": self._window[0],
"height": viewport_height,
"width": viewport_width,
},
device_scale_factor=pixel_density,
)
@@ -266,15 +268,15 @@ class WebDriverPlaywright(WebDriverProxy):
height_threshold = app.config.get(
"SCREENSHOT_TILED_HEIGHT_THRESHOLD", 5000
)
viewport_height = app.config.get(
"SCREENSHOT_TILED_VIEWPORT_HEIGHT", self._window[1]
tile_height = app.config.get(
"SCREENSHOT_TILED_VIEWPORT_HEIGHT", viewport_height
)
# Use tiled screenshots for large dashboards
use_tiled = (
chart_count >= chart_threshold
or dashboard_height > height_threshold
)
) and dashboard_height > tile_height
if use_tiled:
logger.info(
@@ -283,9 +285,11 @@ class WebDriverPlaywright(WebDriverProxy):
f"{dashboard_height}px height. Using tiled screenshots."
)
)
img = take_tiled_screenshot(
page, element_name, viewport_height=viewport_height
# set viewport height to tile height for easier calculations
page.set_viewport_size(
{"height": tile_height, "width": viewport_width}
)
img = take_tiled_screenshot(page, element_name, tile_height)
if img is None:
logger.warning(
(

View File

@@ -23,6 +23,7 @@ from PIL import Image
from superset.utils.screenshot_utils import (
combine_screenshot_tiles,
SCROLL_SETTLE_TIMEOUT_MS,
take_tiled_screenshot,
)
@@ -114,22 +115,11 @@ class TestTakeTiledScreenshot:
element = MagicMock()
page.locator.return_value = element
# Mock element info - simulating a 5000px tall dashboard
# Mock element info - simulating a 5000px tall dashboard at position 100
element_info = {"height": 5000, "top": 100, "left": 50, "width": 800}
element_box = {"x": 50, "y": 200, "width": 800, "height": 600}
# For 3 tiles (5000px / 2000px = 2.5, rounded up to 3):
# 1 initial call + 3 scroll + 3 element box + 1 reset scroll = 8 calls
page.evaluate.side_effect = [
element_info, # Initial call for dashboard dimensions
None, # First scroll call
element_box, # First element box call
None, # Second scroll call
element_box, # Second element box call
None, # Third scroll call
element_box, # Third element box call
None, # Final reset scroll call
]
# Only one evaluate call needed for dashboard dimensions
page.evaluate.return_value = element_info
# Mock screenshot method
fake_screenshot = b"fake_screenshot_data"
@@ -144,7 +134,7 @@ class TestTakeTiledScreenshot:
) as mock_combine:
mock_combine.return_value = b"combined_screenshot"
result = take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
result = take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should return combined screenshot
assert result == b"combined_screenshot"
@@ -163,7 +153,7 @@ class TestTakeTiledScreenshot:
element.wait_for.side_effect = Exception("Element not found")
mock_page.locator.return_value = element
result = take_tiled_screenshot(mock_page, "nonexistent", viewport_height=2000)
result = take_tiled_screenshot(mock_page, "nonexistent", tile_height=2000)
assert result is None
@@ -171,102 +161,30 @@ class TestTakeTiledScreenshot:
"""Test that tiles are calculated correctly."""
# Mock dashboard height of 3500px with viewport of 2000px
element_info = {"height": 3500, "top": 100, "left": 50, "width": 800}
element_box = {"x": 50, "y": 200, "width": 800, "height": 600}
# For 2 tiles (3500px / 2000px = 1.75, rounded up to 2):
# 1 initial call + 2 scroll + 2 element box + 1 reset scroll = 6 calls
mock_page.evaluate.side_effect = [
element_info,
None, # First scroll call
element_box, # First element box call
None, # Second scroll call
element_box, # Second element box call
None, # Reset scroll call
]
# Override the fixture's evaluate return for this test
mock_page.evaluate.return_value = element_info
with patch(
"superset.utils.screenshot_utils.combine_screenshot_tiles"
) as mock_combine:
mock_combine.return_value = b"combined"
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should take 2 screenshots (3500px / 2000px = 1.75, rounded up to 2)
assert mock_page.screenshot.call_count == 2
def test_scroll_positions_calculated_correctly(self, mock_page):
"""Test that scroll positions are calculated correctly."""
# Override the fixture's side_effect for this specific test
element_info = {"height": 5000, "top": 100, "left": 50, "width": 800}
element_box = {"x": 50, "y": 200, "width": 800, "height": 600}
mock_page.evaluate.side_effect = [
element_info, # Initial call for dashboard dimensions
None, # First scroll call
element_box, # First element box call
None, # Second scroll call
element_box, # Second element box call
None, # Third scroll call
element_box, # Third element box call
None, # Reset scroll call
]
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
# Check scroll positions (dashboard_top = 100)
scroll_calls = [
call
for call in mock_page.evaluate.call_args_list
if "scrollTo" in str(call)
]
# Should have scrolled to positions: 100, 2100, 4100
expected_scrolls = [
"window.scrollTo(0, 100)",
"window.scrollTo(0, 2100)",
"window.scrollTo(0, 4100)",
]
actual_scrolls = [call[0][0] for call in scroll_calls]
assert len(actual_scrolls) == 4 # 3 tile scrolls + 1 reset
for expected in expected_scrolls:
assert expected in actual_scrolls
def test_reset_scroll_position(self, mock_page):
"""Test that scroll position is reset after screenshot."""
# Override the fixture's side_effect for this specific test
element_info = {"height": 5000, "top": 100, "left": 50, "width": 800}
element_box = {"x": 50, "y": 200, "width": 800, "height": 600}
mock_page.evaluate.side_effect = [
element_info, # Initial call for dashboard dimensions
None, # First scroll call
element_box, # First element box call
None, # Second scroll call
element_box, # Second element box call
None, # Third scroll call
element_box, # Third element box call
None, # Reset scroll call
]
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
# Check that final call resets scroll to top
final_call = mock_page.evaluate.call_args_list[-1]
assert "window.scrollTo(0, 0)" in str(final_call)
def test_logs_dashboard_info(self, mock_page):
"""Test that dashboard info is logged."""
with patch("superset.utils.screenshot_utils.logger") as mock_logger:
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should log dashboard dimensions
mock_logger.info.assert_any_call("Dashboard: 800x5000px at (50, 100)")
# Should log number of tiles
mock_logger.info.assert_any_call("Taking 3 screenshot tiles")
mock_logger.info.assert_any_call("Taking %s screenshot tiles", 3)
def test_exception_handling_returns_none(self):
"""Test that exceptions are handled and None is returned."""
@@ -274,74 +192,51 @@ class TestTakeTiledScreenshot:
mock_page.locator.side_effect = Exception("Unexpected error")
with patch("superset.utils.screenshot_utils.logger") as mock_logger:
result = take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
result = take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
assert result is None
mock_logger.exception.assert_called_once_with(
"Tiled screenshot failed: Unexpected error"
)
def test_wait_timeouts_between_tiles(self, mock_page):
"""Test that there are appropriate waits between tiles."""
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
# Should have called wait_for_timeout for each tile (3 tiles)
assert mock_page.wait_for_timeout.call_count == 3
# Each wait should be 2000ms (2 seconds)
for call in mock_page.wait_for_timeout.call_args_list:
assert call[0][0] == 2000
def test_screenshot_clip_parameters(self, mock_page):
"""Test that screenshot clipping parameters are correct."""
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", viewport_height=2000)
take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Check screenshot calls have correct clip parameters
screenshot_calls = mock_page.screenshot.call_args_list
for call in screenshot_calls:
# Should have 3 tiles (5000px / 2000px = 2.5, rounded up to 3)
assert len(screenshot_calls) == 3
# All tiles use the same x and width
for _, call in enumerate(screenshot_calls):
kwargs = call[1]
assert kwargs["type"] == "png"
assert "clip" in kwargs
assert kwargs["clip"]["x"] == 50
assert kwargs["clip"]["width"] == 800
clip = kwargs["clip"]
assert clip["x"] == 50
assert clip["y"] == 200
assert clip["width"] == 800
# Height should be min of viewport_height and remaining content
assert clip["height"] <= 600 # Element height from mock
# Check y positions and heights for each tile
# Tile 1: clip_y=0, height=2000 (tile_height < remaining: 5000)
assert screenshot_calls[0][1]["clip"]["y"] == 0
assert screenshot_calls[0][1]["clip"]["height"] == 2000
def test_skips_tiles_with_zero_height(self):
"""Test that tiles with zero height are skipped."""
mock_page = MagicMock()
# Tile 2: clip_y=0, height=2000 (tile_height < remaining: 3000)
assert screenshot_calls[1][1]["clip"]["y"] == 0
assert screenshot_calls[1][1]["clip"]["height"] == 2000
# Mock element locator
element = MagicMock()
mock_page.locator.return_value = element
# Tile 3: clip_y=1000 (tile_height - remaining: 2000 - 1000)
# height=1000 (remaining content)
assert screenshot_calls[2][1]["clip"]["y"] == 1000
assert screenshot_calls[2][1]["clip"]["height"] == 1000
# Mock element info - 4000px tall dashboard
def test_handles_invalid_tile_dimensions(self, mock_page):
"""Test that tiles with invalid dimensions are skipped."""
# Mock a dashboard where the last tile would have 0 or negative height
# This simulates edge cases in height calculations
element_info = {"height": 4000, "top": 100, "left": 50, "width": 800}
# First tile: valid clip region
valid_element_box = {"x": 50, "y": 200, "width": 800, "height": 600}
# Second tile: element scrolled completely out (zero height)
invalid_element_box = {"x": 50, "y": -100, "width": 800, "height": 0}
# For 2 tiles (4000px / 2000px = 2):
# 1 initial + 2 scroll + 2 element box + 1 reset = 6 calls
mock_page.evaluate.side_effect = [
element_info, # Initial call for dashboard dimensions
None, # First scroll call
valid_element_box, # First element box (valid)
None, # Second scroll call
invalid_element_box, # Second element box (zero height)
None, # Reset scroll call
]
mock_page.screenshot.return_value = b"fake_screenshot"
mock_page.evaluate.return_value = element_info
with patch("superset.utils.screenshot_utils.logger") as mock_logger:
with patch(
@@ -349,21 +244,76 @@ class TestTakeTiledScreenshot:
) as mock_combine:
mock_combine.return_value = b"combined"
result = take_tiled_screenshot(
mock_page, "dashboard", viewport_height=2000
)
# Use exact viewport height that divides evenly
result = take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should still succeed with partial tiles
# Should succeed
assert result == b"combined"
# Should only take 1 screenshot (second tile skipped)
assert mock_page.screenshot.call_count == 1
# Should take 2 screenshots (4000px / 2000px = 2)
assert mock_page.screenshot.call_count == 2
# Should log warning about skipped tile
mock_logger.warning.assert_called_once()
warning_call = mock_logger.warning.call_args
# Check the format string
assert "invalid clip dimensions" in warning_call[0][0]
# Check the arguments (tile 2/2)
assert warning_call[0][1] == 2 # tile number (i + 1)
assert warning_call[0][2] == 2 # num_tiles
# Should not log any warnings about invalid dimensions
warning_calls = [
call
for call in mock_logger.warning.call_args_list
if "invalid clip dimensions" in str(call)
]
assert len(warning_calls) == 0
def test_skips_tile_with_zero_height(self, mock_page):
"""Test that a tile with zero or negative height is skipped."""
# This test verifies the clip_height <= 0 check
# We'll manually test the logic by creating a scenario where
# remaining_content becomes <= 0
element_info = {"height": 2000, "top": 100, "left": 50, "width": 800}
mock_page.evaluate.return_value = element_info
with patch(
"superset.utils.screenshot_utils.combine_screenshot_tiles"
) as mock_combine:
mock_combine.return_value = b"combined"
# Use viewport height equal to element height
result = take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should succeed with 1 tile
assert result == b"combined"
assert mock_page.screenshot.call_count == 1
def test_scroll_positions_calculated_correctly(self, mock_page):
"""Test that window scroll positions are calculated correctly."""
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Check page.evaluate calls for scrolling
# First call is for dimensions, subsequent are for scrolling
evaluate_calls = mock_page.evaluate.call_args_list
# Should have 1 dimension query + 3 scroll calls
assert len(evaluate_calls) == 4
# First call is for dimensions (contains querySelector)
assert "querySelector" in str(evaluate_calls[0])
# Subsequent calls are scroll positions
# Tile 1: scroll to y=100 (dashboard_top + 0 * tile_height)
assert evaluate_calls[1][0][0] == "window.scrollTo(0, 100)"
# Tile 2: scroll to y=2100 (dashboard_top + 1 * tile_height)
assert evaluate_calls[2][0][0] == "window.scrollTo(0, 2100)"
# Tile 3: scroll to y=4100 (dashboard_top + 2 * tile_height)
assert evaluate_calls[3][0][0] == "window.scrollTo(0, 4100)"
def test_reset_scroll_position(self, mock_page):
"""Test that scroll position waits are called after each scroll."""
with patch("superset.utils.screenshot_utils.combine_screenshot_tiles"):
take_tiled_screenshot(mock_page, "dashboard", tile_height=2000)
# Should call wait_for_timeout 3 times (once per tile)
assert mock_page.wait_for_timeout.call_count == 3
# Each wait should use the scroll settle timeout constant
for call in mock_page.wait_for_timeout.call_args_list:
assert call[0][0] == SCROLL_SETTLE_TIMEOUT_MS