Files
superset2/superset/utils/screenshots.py
Will Barrett 8e23d4f369 chore: Upgrade pylint to 2.5.3 and fix most new rules (#10101)
* Bump pylint version to 2.5.3

* Add a global disable for the most common new pylint error

* Fix a bunch of files containing very few errors

* More pylint tweakage, low-hanging fruit

* More easy stuff...

* Fix more erroring files

* Fix the last couple of errors, clean pylint!

* Black

* Fix mypy issue in connectors/druid/models.py
2020-06-18 14:03:42 -07:00

330 lines
12 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
import time
import urllib.parse
from io import BytesIO
from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
from flask import current_app, request, Response, session, url_for
from flask_login import login_user
from retry.api import retry_call
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver import chrome, firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from werkzeug.http import parse_cookie
logger = logging.getLogger(__name__)
try:
from PIL import Image # pylint: disable=import-error
except ModuleNotFoundError:
logger.info("No PIL installation found")
if TYPE_CHECKING:
# pylint: disable=unused-import
from flask_appbuilder.security.sqla.models import User
from flask_caching import Cache
# Time in seconds, we will wait for the page to load and render
SELENIUM_CHECK_INTERVAL = 2
SELENIUM_RETRIES = 5
SELENIUM_HEADSTART = 3
WindowSize = Tuple[int, int]
def get_auth_cookies(user: "User") -> List[Dict[Any, Any]]:
# Login with the user specified to get the reports
with current_app.test_request_context("/login"):
login_user(user)
# A mock response object to get the cookie information from
response = Response()
current_app.session_interface.save_session(current_app, session, response)
cookies = []
# Set the cookies in the driver
for name, value in response.headers:
if name.lower() == "set-cookie":
cookie = parse_cookie(value)
cookies.append(cookie["session"])
return cookies
def auth_driver(driver: WebDriver, user: "User") -> WebDriver:
"""
Default AuthDriverFuncType type that sets a session cookie flask-login style
:return: WebDriver
"""
if user:
# Set the cookies in the driver
for cookie in get_auth_cookies(user):
info = dict(name="session", value=cookie)
driver.add_cookie(info)
elif request.cookies:
cookies = request.cookies
for k, v in cookies.items():
cookie = dict(name=k, value=v)
driver.add_cookie(cookie)
return driver
def headless_url(path: str) -> str:
return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL", ""), path)
def get_url_path(view: str, **kwargs: Any) -> str:
with current_app.test_request_context():
return headless_url(url_for(view, **kwargs))
class AuthWebDriverProxy:
def __init__(
self,
driver_type: str,
window: Optional[WindowSize] = None,
auth_func: Optional[
Callable[..., Any]
] = None, # pylint: disable=bad-whitespace
):
self._driver_type = driver_type
self._window: WindowSize = window or (800, 600)
config_auth_func = current_app.config.get("WEBDRIVER_AUTH_FUNC", auth_driver)
self._auth_func = auth_func or config_auth_func
def create(self) -> WebDriver:
if self._driver_type == "firefox":
driver_class = firefox.webdriver.WebDriver
options = firefox.options.Options()
elif self._driver_type == "chrome":
driver_class = chrome.webdriver.WebDriver
options = chrome.options.Options()
arg: str = f"--window-size={self._window[0]},{self._window[1]}"
options.add_argument(arg)
else:
raise Exception(f"Webdriver name ({self._driver_type}) not supported")
# Prepare args for the webdriver init
options.add_argument("--headless")
kwargs: Dict[Any, Any] = dict(options=options)
kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"])
logger.info("Init selenium driver")
return driver_class(**kwargs)
def auth(self, user: "User") -> WebDriver:
# Setting cookies requires doing a request first
driver = self.create()
driver.get(headless_url("/login/"))
return self._auth_func(driver, user)
@staticmethod
def destroy(driver: WebDriver, tries: int = 2) -> None:
"""Destroy a driver"""
# This is some very flaky code in selenium. Hence the retries
# and catch-all exceptions
try:
retry_call(driver.close, tries=tries)
except Exception: # pylint: disable=broad-except
pass
try:
driver.quit()
except Exception: # pylint: disable=broad-except
pass
def get_screenshot(
self, url: str, element_name: str, user: "User", retries: int = SELENIUM_RETRIES
) -> Optional[bytes]:
driver = self.auth(user)
driver.set_window_size(*self._window)
driver.get(url)
img: Optional[bytes] = None
logger.debug("Sleeping for %i seconds", SELENIUM_HEADSTART)
time.sleep(SELENIUM_HEADSTART)
try:
logger.debug("Wait for the presence of %s", element_name)
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, element_name))
)
logger.debug("Wait for .loading to be done")
WebDriverWait(driver, 60).until_not(
EC.presence_of_all_elements_located((By.CLASS_NAME, "loading"))
)
logger.info("Taking a PNG screenshot")
img = element.screenshot_as_png
except TimeoutException:
logger.error("Selenium timed out")
except WebDriverException as ex:
logger.error(ex)
# Some webdrivers do not support screenshots for elements.
# In such cases, take a screenshot of the entire page.
img = driver.screenshot() # pylint: disable=no-member
finally:
self.destroy(driver, retries)
return img
class BaseScreenshot:
driver_type = "chrome"
thumbnail_type: str = ""
element: str = ""
window_size: WindowSize = (800, 600)
thumb_size: WindowSize = (400, 300)
def __init__(self, model_id: int):
self.model_id: int = model_id
self.screenshot: Optional[bytes] = None
self._driver = AuthWebDriverProxy(self.driver_type, self.window_size)
@property
def cache_key(self) -> str:
return f"thumb__{self.thumbnail_type}__{self.model_id}"
@property
def url(self) -> str:
raise NotImplementedError()
def get_screenshot(self, user: "User") -> Optional[bytes]:
self.screenshot = self._driver.get_screenshot(self.url, self.element, user)
return self.screenshot
def get(
self,
user: "User" = None,
cache: "Cache" = None,
thumb_size: Optional[WindowSize] = None,
) -> Optional[BytesIO]:
"""
Get thumbnail screenshot has BytesIO from cache or fetch
:param user: None to use current user or User Model to login and fetch
:param cache: The cache to use
:param thumb_size: Override thumbnail site
"""
payload: Optional[bytes] = None
thumb_size = thumb_size or self.thumb_size
if cache:
payload = cache.get(self.cache_key)
if not payload:
payload = self.compute_and_cache(
user=user, thumb_size=thumb_size, cache=cache
)
else:
logger.info("Loaded thumbnail from cache: %s", self.cache_key)
if payload:
return BytesIO(payload)
return None
def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]:
payload = cache.get(self.cache_key)
if payload:
return BytesIO(payload)
return None
def compute_and_cache( # pylint: disable=too-many-arguments
self,
user: "User" = None,
thumb_size: Optional[WindowSize] = None,
cache: "Cache" = None,
force: bool = True,
) -> Optional[bytes]:
"""
Fetches the screenshot, computes the thumbnail and caches the result
:param user: If no user is given will use the current context
:param cache: The cache to keep the thumbnail payload
:param window_size: The window size from which will process the thumb
:param thumb_size: The final thumbnail size
:param force: Will force the computation even if it's already cached
:return: Image payload
"""
cache_key = self.cache_key
if not force and cache and cache.get(cache_key):
logger.info("Thumb already cached, skipping...")
return None
thumb_size = thumb_size or self.thumb_size
logger.info("Processing url for thumbnail: %s", cache_key)
payload = None
# Assuming all sorts of things can go wrong with Selenium
try:
payload = self.get_screenshot(user=user)
except Exception as ex: # pylint: disable=broad-except
logger.error("Failed at generating thumbnail %s", ex)
if payload and self.window_size != thumb_size:
try:
payload = self.resize_image(payload, thumb_size=thumb_size)
except Exception as ex: # pylint: disable=broad-except
logger.error("Failed at resizing thumbnail %s", ex)
payload = None
if payload and cache:
logger.info("Caching thumbnail: %s %s", cache_key, str(cache))
cache.set(cache_key, payload)
return payload
@classmethod
def resize_image(
cls,
img_bytes: bytes,
output: str = "png",
thumb_size: Optional[WindowSize] = None,
crop: bool = True,
) -> bytes:
thumb_size = thumb_size or cls.thumb_size
img = Image.open(BytesIO(img_bytes))
logger.debug("Selenium image size: %s", str(img.size))
if crop and img.size[1] != cls.window_size[1]:
desired_ratio = float(cls.window_size[1]) / cls.window_size[0]
desired_width = int(img.size[0] * desired_ratio)
logger.debug("Cropping to: %s*%s", str(img.size[0]), str(desired_width))
img = img.crop((0, 0, img.size[0], desired_width))
logger.debug("Resizing to %s", str(thumb_size))
img = img.resize(thumb_size, Image.ANTIALIAS)
new_img = BytesIO()
if output != "png":
img = img.convert("RGB")
img.save(new_img, output)
new_img.seek(0)
return new_img.read()
class ChartScreenshot(BaseScreenshot):
thumbnail_type: str = "chart"
element: str = "chart-container"
window_size: WindowSize = (600, int(600 * 0.75))
thumb_size: WindowSize = (300, int(300 * 0.75))
@property
def url(self) -> str:
return get_url_path("Superset.slice", slice_id=self.model_id, standalone="true")
class DashboardScreenshot(BaseScreenshot):
thumbnail_type: str = "dashboard"
element: str = "grid-container"
window_size: WindowSize = (1600, int(1600 * 0.75))
thumb_size: WindowSize = (400, int(400 * 0.75))
@property
def url(self) -> str:
return get_url_path("Superset.dashboard", dashboard_id=self.model_id)