From 8d985d223bc21f7f5c6cd9f2b88f61940ba3b88a Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Fri, 22 May 2026 21:09:14 -0700 Subject: [PATCH] ci(e2e): run backend under gunicorn instead of flask dev server (#40234) Co-authored-by: Claude Code --- .github/workflows/bashlib.sh | 123 ++++++++++++++++++++++++++--------- 1 file changed, 91 insertions(+), 32 deletions(-) diff --git a/.github/workflows/bashlib.sh b/.github/workflows/bashlib.sh index 76f44d28f1b..3b2d1af2f8e 100644 --- a/.github/workflows/bashlib.sh +++ b/.github/workflows/bashlib.sh @@ -175,10 +175,13 @@ cypress-run-all() { local APP_ROOT=$2 cd "$GITHUB_WORKSPACE/superset-frontend/cypress-base" - # Start Flask and run it in background - # --no-debugger means disable the interactive debugger on the 500 page - # so errors can print to stderr. - local flasklog="${HOME}/flask.log" + # Start the Superset backend via gunicorn (not `flask run`). The Flask + # development server is single-threaded and has no crash-recovery, so + # heavy tests (dashboard import/export, SQL Lab) can knock it offline + # for the rest of the run — surfacing as `ECONNREFUSED` / `socket hang up` + # / `Missing CSRF token` cascades. Gunicorn gives us multiple workers, + # a request timeout, and worker-recycling under load. + local serverlog="${HOME}/superset-cypress.log" local port=8081 CYPRESS_BASE_URL="http://localhost:${port}" if [ -n "$APP_ROOT" ]; then @@ -187,8 +190,58 @@ cypress-run-all() { fi export CYPRESS_BASE_URL - nohup flask run --no-debugger -p $port >"$flasklog" 2>&1 "$serverlog" 2>&1 /dev/null || true + ' EXIT + + # Wait for the backend to be ready before launching Cypress; otherwise + # the first spec can race the server bind and see connection errors. + local timeout=60 + say "Waiting for gunicorn server to start on port $port..." + while [ $timeout -gt 0 ]; do + if curl -f "http://localhost:${port}${APP_ROOT}/health" >/dev/null 2>&1; then + say "gunicorn server is ready" + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + if [ $timeout -eq 0 ]; then + echo "::error::gunicorn server failed to start within 60 seconds" + echo "::group::Server startup log" + cat "$serverlog" + echo "::endgroup::" + return 1 + fi USE_DASHBOARD_FLAG='' if [ "$USE_DASHBOARD" = "true" ]; then @@ -200,13 +253,6 @@ cypress-run-all() { # memoryMonitorPid=$! python ../../scripts/cypress_run.py --parallelism $PARALLELISM --parallelism-id $PARALLEL_ID --group $PARALLEL_ID --retries 5 $USE_DASHBOARD_FLAG # kill $memoryMonitorPid - - # After job is done, print out Flask log for debugging - echo "::group::Flask log for default run" - cat "$flasklog" - echo "::endgroup::" - # make sure the program exits - kill $flaskProcessId } playwright-install() { @@ -224,9 +270,11 @@ playwright-run() { local APP_ROOT=$1 local TEST_PATH=$2 - # Start Flask from the project root (same as Cypress) + # Start the Superset backend via gunicorn from the project root. + # See cypress-run-all() above for the rationale — the Flask dev server + # cannot survive the dashboard import/export tests under load. cd "$GITHUB_WORKSPACE" - local flasklog="${HOME}/flask-playwright.log" + local serverlog="${HOME}/superset-playwright.log" local port=8081 PLAYWRIGHT_BASE_URL="http://localhost:${port}" if [ -n "$APP_ROOT" ]; then @@ -235,18 +283,37 @@ playwright-run() { fi export PLAYWRIGHT_BASE_URL - nohup flask run --no-debugger -p $port >"$flasklog" 2>&1 "$serverlog" 2>&1 /dev/null || true" EXIT + # Ensure cleanup on exit (and emit the server log on failure) + trap ' + echo "::group::gunicorn log for Playwright run" + cat "'"$serverlog"'" || true + echo "::endgroup::" + kill '"$serverPid"' 2>/dev/null || true + ' EXIT # Wait for server to be ready with health check local timeout=60 - say "Waiting for Flask server to start on port $port..." + say "Waiting for gunicorn server to start on port $port..." while [ $timeout -gt 0 ]; do if curl -f ${PLAYWRIGHT_BASE_URL}/health >/dev/null 2>&1; then - say "Flask server is ready" + say "gunicorn server is ready" break fi sleep 1 @@ -254,9 +321,9 @@ playwright-run() { done if [ $timeout -eq 0 ]; then - echo "::error::Flask server failed to start within 60 seconds" - echo "::group::Flask startup log" - cat "$flasklog" + echo "::error::gunicorn server failed to start within 60 seconds" + echo "::group::Server startup log" + cat "$serverlog" echo "::endgroup::" return 1 fi @@ -271,7 +338,6 @@ playwright-run() { if ! find "playwright/tests/${TEST_PATH}" -name "*.spec.ts" -type f 2>/dev/null | grep -q .; then echo "No test files found in ${TEST_PATH} - skipping test run" say "::endgroup::" - kill $flaskProcessId return 0 fi echo "Running tests: ${TEST_PATH}" @@ -288,13 +354,6 @@ playwright-run() { fi say "::endgroup::" - # After job is done, print out Flask log for debugging - echo "::group::Flask log for Playwright run" - cat "$flasklog" - echo "::endgroup::" - # make sure the program exits - kill $flaskProcessId - return $status }