mirror of
https://github.com/apache/superset.git
synced 2026-04-20 08:34:37 +00:00
chore: Migrating reports to AuthWebdriverProxy (#10567)
* Migrating reports to AuthWebdriverProxy * Extracting out webdriver proxy / Adding thumbnail tests to CI * Adding license * Adding license again * Empty commit * Adding thumbnail tests to CI * Switching thumbnail test to Postgres * Linting * Adding mypy:ignore / removing thumbnail tests from CI * Putting ignore statement back * Updating docs * First cut at authprovider * First cut at authprovider mostly working - still needs more tests * Auth provider tests added * Linting * Linting again... * Linting again... * Busting CI cache * Reverting workflow change * Fixing dataclasses * Reverting back to master * linting? * Reverting installation.rst * Reverting package-lock.json * Addressing feedback * Blacking * Lazy logging strings * UPDATING.md note
This commit is contained in:
113
superset/utils/machine_auth.py
Normal file
113
superset/utils/machine_auth.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import importlib
|
||||
import logging
|
||||
from typing import Callable, Dict, TYPE_CHECKING
|
||||
|
||||
from flask import current_app, Flask, request, Response, session
|
||||
from flask_login import login_user
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from werkzeug.http import parse_cookie
|
||||
|
||||
from superset.utils.urls import headless_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# pylint: disable=unused-import
|
||||
from flask_appbuilder.security.sqla.models import User
|
||||
|
||||
|
||||
class MachineAuthProvider:
|
||||
def __init__(
|
||||
self, auth_webdriver_func_override: Callable[[WebDriver, "User"], WebDriver]
|
||||
):
|
||||
# This is here in order to allow for the authenticate_webdriver func to be
|
||||
# overridden via config, as opposed to the entire provider implementation
|
||||
self._auth_webdriver_func_override = auth_webdriver_func_override
|
||||
|
||||
def authenticate_webdriver(self, driver: WebDriver, user: "User",) -> WebDriver:
|
||||
"""
|
||||
Default AuthDriverFuncType type that sets a session cookie flask-login style
|
||||
:return: The WebDriver passed in (fluent)
|
||||
"""
|
||||
# Short-circuit this method if we have an override configured
|
||||
if self._auth_webdriver_func_override:
|
||||
return self._auth_webdriver_func_override(driver, user)
|
||||
|
||||
# Setting cookies requires doing a request first
|
||||
driver.get(headless_url("/login/"))
|
||||
|
||||
if user:
|
||||
cookies = self.get_auth_cookies(user)
|
||||
elif request.cookies:
|
||||
cookies = request.cookies
|
||||
else:
|
||||
cookies = {}
|
||||
|
||||
for cookie_name, cookie_val in cookies.items():
|
||||
driver.add_cookie(dict(name=cookie_name, value=cookie_val))
|
||||
|
||||
return driver
|
||||
|
||||
@staticmethod
|
||||
def get_auth_cookies(user: "User") -> Dict[str, str]:
|
||||
# Login with the user specified to get the reports
|
||||
with current_app.test_request_context("/login"):
|
||||
login_user(user)
|
||||
# A mock response object to get the cookie information from
|
||||
response = Response()
|
||||
current_app.session_interface.save_session(current_app, session, response)
|
||||
|
||||
cookies = {}
|
||||
|
||||
# Grab any "set-cookie" headers from the login response
|
||||
for name, value in response.headers:
|
||||
if name.lower() == "set-cookie":
|
||||
# This yields a MultiDict, which is ordered -- something like
|
||||
# MultiDict([('session', 'value-we-want), ('HttpOnly', ''), etc...
|
||||
# Therefore, we just need to grab the first tuple and add it to our
|
||||
# final dict
|
||||
cookie = parse_cookie(value)
|
||||
cookie_tuple = list(cookie.items())[0]
|
||||
cookies[cookie_tuple[0]] = cookie_tuple[1]
|
||||
|
||||
return cookies
|
||||
|
||||
|
||||
class MachineAuthProviderFactory:
|
||||
def __init__(self) -> None:
|
||||
self._auth_provider = None
|
||||
|
||||
def init_app(self, app: Flask) -> None:
|
||||
auth_provider_fqclass = app.config["MACHINE_AUTH_PROVIDER_CLASS"]
|
||||
auth_provider_classname = auth_provider_fqclass[
|
||||
auth_provider_fqclass.rfind(".") + 1 :
|
||||
]
|
||||
auth_provider_module_name = auth_provider_fqclass[
|
||||
0 : auth_provider_fqclass.rfind(".")
|
||||
]
|
||||
auth_provider_class = getattr(
|
||||
importlib.import_module(auth_provider_module_name), auth_provider_classname
|
||||
)
|
||||
|
||||
self._auth_provider = auth_provider_class(app.config["WEBDRIVER_AUTH_FUNC"])
|
||||
|
||||
@property
|
||||
def instance(self) -> MachineAuthProvider:
|
||||
return self._auth_provider # type: ignore
|
||||
@@ -15,23 +15,13 @@
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
import time
|
||||
from io import BytesIO
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
|
||||
from typing import Optional, TYPE_CHECKING, Union
|
||||
|
||||
from flask import current_app, request, Response, session
|
||||
from flask_login import login_user
|
||||
from retry.api import retry_call
|
||||
from selenium.common.exceptions import TimeoutException, WebDriverException
|
||||
from selenium.webdriver import chrome, firefox
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from werkzeug.http import parse_cookie
|
||||
from flask import current_app
|
||||
|
||||
from superset.utils.hashing import md5_sha_from_dict
|
||||
from superset.utils.urls import headless_url
|
||||
from superset.utils.webdriver import WebDriverProxy, WindowSize
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,140 +35,6 @@ if TYPE_CHECKING:
|
||||
from flask_appbuilder.security.sqla.models import User
|
||||
from flask_caching import Cache
|
||||
|
||||
# Time in seconds, we will wait for the page to load and render
|
||||
SELENIUM_CHECK_INTERVAL = 2
|
||||
SELENIUM_RETRIES = 5
|
||||
SELENIUM_HEADSTART = 3
|
||||
|
||||
WindowSize = Tuple[int, int]
|
||||
|
||||
|
||||
def get_auth_cookies(user: "User") -> List[Dict[Any, Any]]:
|
||||
# Login with the user specified to get the reports
|
||||
with current_app.test_request_context("/login"):
|
||||
login_user(user)
|
||||
# A mock response object to get the cookie information from
|
||||
response = Response()
|
||||
current_app.session_interface.save_session(current_app, session, response)
|
||||
|
||||
cookies = []
|
||||
|
||||
# Set the cookies in the driver
|
||||
for name, value in response.headers:
|
||||
if name.lower() == "set-cookie":
|
||||
cookie = parse_cookie(value)
|
||||
cookies.append(cookie["session"])
|
||||
return cookies
|
||||
|
||||
|
||||
def auth_driver(driver: WebDriver, user: "User") -> WebDriver:
|
||||
"""
|
||||
Default AuthDriverFuncType type that sets a session cookie flask-login style
|
||||
:return: WebDriver
|
||||
"""
|
||||
if user:
|
||||
# Set the cookies in the driver
|
||||
for cookie in get_auth_cookies(user):
|
||||
info = dict(name="session", value=cookie)
|
||||
driver.add_cookie(info)
|
||||
elif request.cookies:
|
||||
cookies = request.cookies
|
||||
for k, v in cookies.items():
|
||||
cookie = dict(name=k, value=v)
|
||||
driver.add_cookie(cookie)
|
||||
return driver
|
||||
|
||||
|
||||
class AuthWebDriverProxy:
|
||||
def __init__(
|
||||
self,
|
||||
driver_type: str,
|
||||
window: Optional[WindowSize] = None,
|
||||
auth_func: Optional[
|
||||
Callable[..., Any]
|
||||
] = None, # pylint: disable=bad-whitespace
|
||||
):
|
||||
self._driver_type = driver_type
|
||||
self._window: WindowSize = window or (800, 600)
|
||||
config_auth_func = current_app.config.get("WEBDRIVER_AUTH_FUNC", auth_driver)
|
||||
self._auth_func = auth_func or config_auth_func
|
||||
|
||||
def create(self) -> WebDriver:
|
||||
if self._driver_type == "firefox":
|
||||
driver_class = firefox.webdriver.WebDriver
|
||||
options = firefox.options.Options()
|
||||
elif self._driver_type == "chrome":
|
||||
driver_class = chrome.webdriver.WebDriver
|
||||
options = chrome.options.Options()
|
||||
arg: str = f"--window-size={self._window[0]},{self._window[1]}"
|
||||
options.add_argument(arg)
|
||||
# TODO: 2 lines attempting retina PPI don't seem to be working
|
||||
options.add_argument("--force-device-scale-factor=2.0")
|
||||
options.add_argument("--high-dpi-support=2.0")
|
||||
else:
|
||||
raise Exception(f"Webdriver name ({self._driver_type}) not supported")
|
||||
# Prepare args for the webdriver init
|
||||
options.add_argument("--headless")
|
||||
kwargs: Dict[Any, Any] = dict(options=options)
|
||||
kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"])
|
||||
logger.info("Init selenium driver")
|
||||
return driver_class(**kwargs)
|
||||
|
||||
def auth(self, user: "User") -> WebDriver:
|
||||
# Setting cookies requires doing a request first
|
||||
driver = self.create()
|
||||
driver.get(headless_url("/login/"))
|
||||
return self._auth_func(driver, user)
|
||||
|
||||
@staticmethod
|
||||
def destroy(driver: WebDriver, tries: int = 2) -> None:
|
||||
"""Destroy a driver"""
|
||||
# This is some very flaky code in selenium. Hence the retries
|
||||
# and catch-all exceptions
|
||||
try:
|
||||
retry_call(driver.close, tries=tries)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
try:
|
||||
driver.quit()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
|
||||
def get_screenshot(
|
||||
self,
|
||||
url: str,
|
||||
element_name: str,
|
||||
user: "User",
|
||||
retries: int = SELENIUM_RETRIES,
|
||||
) -> Optional[bytes]:
|
||||
driver = self.auth(user)
|
||||
driver.set_window_size(*self._window)
|
||||
driver.get(url)
|
||||
img: Optional[bytes] = None
|
||||
logger.debug("Sleeping for %i seconds", SELENIUM_HEADSTART)
|
||||
time.sleep(SELENIUM_HEADSTART)
|
||||
try:
|
||||
logger.debug("Wait for the presence of %s", element_name)
|
||||
element = WebDriverWait(
|
||||
driver, current_app.config["SCREENSHOT_LOCATE_WAIT"]
|
||||
).until(EC.presence_of_element_located((By.CLASS_NAME, element_name)))
|
||||
logger.debug("Wait for .loading to be done")
|
||||
WebDriverWait(driver, current_app.config["SCREENSHOT_LOAD_WAIT"]).until_not(
|
||||
EC.presence_of_all_elements_located((By.CLASS_NAME, "loading"))
|
||||
)
|
||||
logger.info("Taking a PNG screenshot")
|
||||
img = element.screenshot_as_png
|
||||
except TimeoutException:
|
||||
logger.error("Selenium timed out")
|
||||
except WebDriverException as ex:
|
||||
logger.error(ex)
|
||||
# Some webdrivers do not support screenshots for elements.
|
||||
# In such cases, take a screenshot of the entire page.
|
||||
img = driver.screenshot() # pylint: disable=no-member
|
||||
finally:
|
||||
self.destroy(driver, retries)
|
||||
return img
|
||||
|
||||
|
||||
class BaseScreenshot:
|
||||
driver_type = current_app.config.get("EMAIL_REPORTS_WEBDRIVER", "chrome")
|
||||
@@ -192,9 +48,9 @@ class BaseScreenshot:
|
||||
self.url = url
|
||||
self.screenshot: Optional[bytes] = None
|
||||
|
||||
def driver(self, window_size: Optional[WindowSize] = None) -> AuthWebDriverProxy:
|
||||
def driver(self, window_size: Optional[WindowSize] = None) -> WebDriverProxy:
|
||||
window_size = window_size or self.window_size
|
||||
return AuthWebDriverProxy(self.driver_type, window_size)
|
||||
return WebDriverProxy(self.driver_type, window_size)
|
||||
|
||||
def cache_key(
|
||||
self,
|
||||
|
||||
131
superset/utils/webdriver.py
Normal file
131
superset/utils/webdriver.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
from flask import current_app
|
||||
from retry.api import retry_call
|
||||
from selenium.common.exceptions import TimeoutException, WebDriverException
|
||||
from selenium.webdriver import chrome, firefox
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
|
||||
from superset.extensions import machine_auth_provider_factory
|
||||
|
||||
WindowSize = Tuple[int, int]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Time in seconds, we will wait for the page to load and render
|
||||
SELENIUM_CHECK_INTERVAL = 2
|
||||
SELENIUM_RETRIES = 5
|
||||
SELENIUM_HEADSTART = 3
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# pylint: disable=unused-import
|
||||
from flask_appbuilder.security.sqla.models import User
|
||||
|
||||
|
||||
class WebDriverProxy:
|
||||
def __init__(
|
||||
self, driver_type: str, window: Optional[WindowSize] = None,
|
||||
):
|
||||
self._driver_type = driver_type
|
||||
self._window: WindowSize = window or (800, 600)
|
||||
self._screenshot_locate_wait = current_app.config["SCREENSHOT_LOCATE_WAIT"]
|
||||
self._screenshot_load_wait = current_app.config["SCREENSHOT_LOAD_WAIT"]
|
||||
|
||||
def create(self) -> WebDriver:
|
||||
if self._driver_type == "firefox":
|
||||
driver_class = firefox.webdriver.WebDriver
|
||||
options = firefox.options.Options()
|
||||
elif self._driver_type == "chrome":
|
||||
driver_class = chrome.webdriver.WebDriver
|
||||
options = chrome.options.Options()
|
||||
options.add_argument(f"--window-size={self._window[0]},{self._window[1]}")
|
||||
else:
|
||||
raise Exception(f"Webdriver name ({self._driver_type}) not supported")
|
||||
# Prepare args for the webdriver init
|
||||
|
||||
# Add additional configured options
|
||||
for arg in current_app.config["WEBDRIVER_OPTION_ARGS"]:
|
||||
options.add_argument(arg)
|
||||
|
||||
kwargs: Dict[Any, Any] = dict(options=options)
|
||||
kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"])
|
||||
logger.info("Init selenium driver")
|
||||
|
||||
return driver_class(**kwargs)
|
||||
|
||||
def auth(self, user: "User") -> WebDriver:
|
||||
driver = self.create()
|
||||
return machine_auth_provider_factory.instance.authenticate_webdriver(
|
||||
driver, user
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def destroy(driver: WebDriver, tries: int = 2) -> None:
|
||||
"""Destroy a driver"""
|
||||
# This is some very flaky code in selenium. Hence the retries
|
||||
# and catch-all exceptions
|
||||
try:
|
||||
retry_call(driver.close, tries=tries)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
try:
|
||||
driver.quit()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
|
||||
def get_screenshot(
|
||||
self,
|
||||
url: str,
|
||||
element_name: str,
|
||||
user: "User",
|
||||
retries: int = SELENIUM_RETRIES,
|
||||
) -> Optional[bytes]:
|
||||
driver = self.auth(user)
|
||||
driver.set_window_size(*self._window)
|
||||
driver.get(url)
|
||||
img: Optional[bytes] = None
|
||||
logger.debug("Sleeping for %i seconds", SELENIUM_HEADSTART)
|
||||
time.sleep(SELENIUM_HEADSTART)
|
||||
try:
|
||||
logger.debug("Wait for the presence of %s", element_name)
|
||||
element = WebDriverWait(driver, self._screenshot_locate_wait).until(
|
||||
EC.presence_of_element_located((By.CLASS_NAME, element_name))
|
||||
)
|
||||
logger.debug("Wait for .loading to be done")
|
||||
WebDriverWait(driver, self._screenshot_load_wait).until_not(
|
||||
EC.presence_of_all_elements_located((By.CLASS_NAME, "loading"))
|
||||
)
|
||||
logger.info("Taking a PNG screenshot or url %s", url)
|
||||
img = element.screenshot_as_png
|
||||
except TimeoutException:
|
||||
logger.error("Selenium timed out requesting url %s", url)
|
||||
except WebDriverException as ex:
|
||||
logger.error(ex)
|
||||
# Some webdrivers do not support screenshots for elements.
|
||||
# In such cases, take a screenshot of the entire page.
|
||||
img = driver.screenshot() # pylint: disable=no-member
|
||||
finally:
|
||||
self.destroy(driver, retries)
|
||||
return img
|
||||
Reference in New Issue
Block a user