Files
superset2/.github/workflows/bashlib.sh
Claude Code a43f3a421b ci: run E2E backend under gunicorn instead of flask dev server
Both `cypress-run-all` and `playwright-run` started the Superset backend
with `flask run --no-debugger -p $port`. The Flask development server is
single-threaded and has no crash-recovery, so heavy tests — most notably
`playwright/tests/dashboard/export.spec.ts:61` (Export YAML) and
`dashboard-list.spec.ts:266` (Import zip) — can knock the backend offline
for the rest of the run. Subsequent tests then cascade-fail with
`ECONNREFUSED`, `socket hang up`, `Missing CSRF token`, and
`page.goto: net::ERR_ABORTED; maybe frame was detached`.

Across the last 50 master runs of the E2E workflow, 6 failed (12%),
every single one with this signature.

Switch both runners to gunicorn with the same shape used in
`docker/entrypoints/run-server.sh`:

- `--workers 4 --worker-class gthread --threads 20` — concurrency that
  matches what the real product runs.
- `--timeout 120` — kill stuck workers instead of letting them hang the
  entire suite.
- `--max-requests 500 --max-requests-jitter 50` — recycle workers
  periodically so memory accumulation from long suites doesn't OOM the
  process.
- `--access-logfile - --error-logfile -` — keep the same per-run log
  capture pattern.

Only frontend (JS) coverage is captured in E2E (verified — bashlib.sh
only instruments the JS assets), so multi-worker gunicorn doesn't break
the existing coverage path.
2026-05-18 23:08:14 -05:00

348 lines
11 KiB
Bash

#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
GITHUB_WORKSPACE=${GITHUB_WORKSPACE:-.}
ASSETS_MANIFEST="$GITHUB_WORKSPACE/superset/static/assets/manifest.json"
# Rounded job start time, used to create a unique Cypress build id for
# parallelization so we can manually rerun a job after 20 minutes
NONCE=$(echo "$(date "+%Y%m%d%H%M") - ($(date +%M)%20)" | bc)
# Echo only when not in parallel mode
say() {
if [[ $(echo "$INPUT_PARALLEL" | tr '[:lower:]' '[:upper:]') != 'TRUE' ]]; then
echo "$1"
fi
}
pip-upgrade() {
say "::group::Upgrade pip"
pip install --upgrade pip
say "::endgroup::"
}
# prepare (lint and build) frontend code
npm-install() {
cd "$GITHUB_WORKSPACE/superset-frontend"
# cache-restore npm
say "::group::Install npm packages"
echo "npm: $(npm --version)"
echo "node: $(node --version)"
npm ci
say "::endgroup::"
# cache-save npm
}
build-assets() {
cd "$GITHUB_WORKSPACE/superset-frontend"
say "::group::Build static assets"
npm run build
say "::endgroup::"
}
build-instrumented-assets() {
cd "$GITHUB_WORKSPACE/superset-frontend"
say "::group::Build static assets with JS instrumented for test coverage"
cache-restore instrumented-assets
if [[ -f "$ASSETS_MANIFEST" ]]; then
echo 'Skip frontend build because instrumented static assets already exist.'
else
npm run build-instrumented
cache-save instrumented-assets
fi
say "::endgroup::"
}
setup-postgres() {
say "::group::Install dependency for unit tests"
sudo apt-get update && sudo apt-get install --yes libecpg-dev
say "::group::Initialize database"
psql "postgresql://superset:superset@127.0.0.1:15432/superset" <<-EOF
DROP SCHEMA IF EXISTS sqllab_test_db CASCADE;
DROP SCHEMA IF EXISTS admin_database CASCADE;
CREATE SCHEMA sqllab_test_db;
CREATE SCHEMA admin_database;
EOF
say "::endgroup::"
}
setup-mysql() {
say "::group::Initialize database"
mysql -h 127.0.0.1 -P 13306 -u root --password=root <<-EOF
SET GLOBAL transaction_isolation='READ-COMMITTED';
SET GLOBAL TRANSACTION ISOLATION LEVEL READ COMMITTED;
DROP DATABASE IF EXISTS superset;
CREATE DATABASE superset DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
DROP DATABASE IF EXISTS sqllab_test_db;
CREATE DATABASE sqllab_test_db DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
DROP DATABASE IF EXISTS admin_database;
CREATE DATABASE admin_database DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci;
CREATE USER 'superset'@'%' IDENTIFIED BY 'superset';
GRANT ALL ON *.* TO 'superset'@'%';
FLUSH PRIVILEGES;
EOF
say "::endgroup::"
}
testdata() {
cd "$GITHUB_WORKSPACE"
say "::group::Load test data"
# must specify PYTHONPATH to make `tests.superset_test_config` importable
export PYTHONPATH="$GITHUB_WORKSPACE"
pip install -e .
superset db upgrade
superset load_test_users
superset load_examples --load-test-data
superset init
say "::endgroup::"
}
playwright_testdata() {
cd "$GITHUB_WORKSPACE"
say "::group::Load all examples for Playwright tests"
# must specify PYTHONPATH to make `tests.superset_test_config` importable
export PYTHONPATH="$GITHUB_WORKSPACE"
pip install -e .
superset db upgrade
superset load_test_users
superset load_examples
superset init
# Enable DML on the examples database so Playwright tests can create/drop
# temporary tables via SQL Lab without depending on external data sources.
superset shell <<'PYEOF'
import sys
from superset.extensions import db
from superset.models.core import Database
examples_db = db.session.query(Database).filter_by(database_name='examples').first()
if not examples_db:
sys.exit('ERROR: examples database not found. load_examples may have failed.')
examples_db.allow_dml = True
db.session.commit()
print('Enabled allow_dml on examples database')
PYEOF
say "::endgroup::"
}
celery-worker() {
cd "$GITHUB_WORKSPACE"
say "::group::Start Celery worker"
# must specify PYTHONPATH to make `tests.superset_test_config` importable
export PYTHONPATH="$GITHUB_WORKSPACE"
celery \
--app=superset.tasks.celery_app:app \
worker \
--concurrency=2 \
--detach \
--optimization=fair
say "::endgroup::"
}
cypress-install() {
cd "$GITHUB_WORKSPACE/superset-frontend/cypress-base"
cache-restore cypress
say "::group::Install Cypress"
npm ci
say "::endgroup::"
cache-save cypress
}
cypress-run-all() {
local USE_DASHBOARD=$1
local APP_ROOT=$2
cd "$GITHUB_WORKSPACE/superset-frontend/cypress-base"
# Start the Superset backend via gunicorn (not `flask run`). The Flask
# development server is single-threaded and has no crash-recovery, so
# heavy tests (dashboard import/export, SQL Lab) can knock it offline
# for the rest of the run — surfacing as `ECONNREFUSED` / `socket hang up`
# / `Missing CSRF token` cascades. Gunicorn gives us multiple workers,
# a request timeout, and worker-recycling under load.
local flasklog="${HOME}/flask.log"
local port=8081
CYPRESS_BASE_URL="http://localhost:${port}"
if [ -n "$APP_ROOT" ]; then
export SUPERSET_APP_ROOT=$APP_ROOT
CYPRESS_BASE_URL=${CYPRESS_BASE_URL}${APP_ROOT}
fi
export CYPRESS_BASE_URL
nohup gunicorn \
--bind "127.0.0.1:$port" \
--workers 4 \
--worker-class gthread \
--threads 20 \
--timeout 120 \
--max-requests 500 \
--max-requests-jitter 50 \
--access-logfile - \
--error-logfile - \
"superset.app:create_app()" \
>"$flasklog" 2>&1 </dev/null &
local flaskProcessId=$!
USE_DASHBOARD_FLAG=''
if [ "$USE_DASHBOARD" = "true" ]; then
USE_DASHBOARD_FLAG='--use-dashboard'
fi
# UNCOMMENT the next few commands to monitor memory usage
# monitor_memory & # Start memory monitoring in the background
# memoryMonitorPid=$!
python ../../scripts/cypress_run.py --parallelism $PARALLELISM --parallelism-id $PARALLEL_ID --group $PARALLEL_ID --retries 5 $USE_DASHBOARD_FLAG
# kill $memoryMonitorPid
# After job is done, print out Flask log for debugging
echo "::group::Flask log for default run"
cat "$flasklog"
echo "::endgroup::"
# make sure the program exits
kill $flaskProcessId
}
playwright-install() {
cd "$GITHUB_WORKSPACE/superset-frontend"
say "::group::Install Playwright browsers"
npx playwright install --with-deps chromium
# Create output directories for test results and debugging
mkdir -p playwright-results
mkdir -p test-results
say "::endgroup::"
}
playwright-run() {
local APP_ROOT=$1
local TEST_PATH=$2
# Start the Superset backend via gunicorn from the project root.
# See cypress-run-all() above for the rationale — the Flask dev server
# cannot survive the dashboard import/export tests under load.
cd "$GITHUB_WORKSPACE"
local flasklog="${HOME}/flask-playwright.log"
local port=8081
PLAYWRIGHT_BASE_URL="http://localhost:${port}"
if [ -n "$APP_ROOT" ]; then
export SUPERSET_APP_ROOT=$APP_ROOT
PLAYWRIGHT_BASE_URL=${PLAYWRIGHT_BASE_URL}${APP_ROOT}/
fi
export PLAYWRIGHT_BASE_URL
nohup gunicorn \
--bind "127.0.0.1:$port" \
--workers 4 \
--worker-class gthread \
--threads 20 \
--timeout 120 \
--max-requests 500 \
--max-requests-jitter 50 \
--access-logfile - \
--error-logfile - \
"superset.app:create_app()" \
>"$flasklog" 2>&1 </dev/null &
local flaskProcessId=$!
# Ensure cleanup on exit
trap "kill $flaskProcessId 2>/dev/null || true" EXIT
# Wait for server to be ready with health check
local timeout=60
say "Waiting for gunicorn server to start on port $port..."
while [ $timeout -gt 0 ]; do
if curl -f ${PLAYWRIGHT_BASE_URL}/health >/dev/null 2>&1; then
say "gunicorn server is ready"
break
fi
sleep 1
timeout=$((timeout - 1))
done
if [ $timeout -eq 0 ]; then
echo "::error::gunicorn server failed to start within 60 seconds"
echo "::group::Server startup log"
cat "$flasklog"
echo "::endgroup::"
return 1
fi
# Change to frontend directory for Playwright execution
cd "$GITHUB_WORKSPACE/superset-frontend"
say "::group::Run Playwright tests"
echo "Running Playwright with baseURL: ${PLAYWRIGHT_BASE_URL}"
if [ -n "$TEST_PATH" ]; then
# Check if there are any test files in the specified path
if ! find "playwright/tests/${TEST_PATH}" -name "*.spec.ts" -type f 2>/dev/null | grep -q .; then
echo "No test files found in ${TEST_PATH} - skipping test run"
say "::endgroup::"
kill $flaskProcessId
return 0
fi
echo "Running tests: ${TEST_PATH}"
# Set INCLUDE_EXPERIMENTAL=true to allow experimental tests to run
export INCLUDE_EXPERIMENTAL=true
npx playwright test "${TEST_PATH}" --output=playwright-results
local status=$?
# Unset to prevent leaking into subsequent commands
unset INCLUDE_EXPERIMENTAL
else
echo "Running all required tests (experimental/ excluded via playwright.config.ts)"
npx playwright test --output=playwright-results
local status=$?
fi
say "::endgroup::"
# After job is done, print out Flask log for debugging
echo "::group::Flask log for Playwright run"
cat "$flasklog"
echo "::endgroup::"
# make sure the program exits
kill $flaskProcessId
return $status
}
eyes-storybook-dependencies() {
say "::group::install eyes-storyook dependencies"
sudo apt-get update -y && sudo apt-get -y install gconf-service ca-certificates libxshmfence-dev fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgbm1 libgcc1 libgconf-2-4 libglib2.0-0 libgdk-pixbuf2.0-0 libgtk-3-0 libnspr4 libnss3 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 lsb-release xdg-utils libappindicator1
say "::endgroup::"
}
monitor_memory() {
# This is a small utility to monitor memory usage. Useful for debugging memory in GHA.
# To use wrap your command as follows
#
# monitor_memory & # Start memory monitoring in the background
# memoryMonitorPid=$!
# YOUR_COMMAND_HERE
# kill $memoryMonitorPid
while true; do
echo "$(date) - Top 5 memory-consuming processes:"
ps -eo pid,comm,%mem --sort=-%mem | head -n 6 # First line is the header, next 5 are top processes
sleep 2
done
}