From d74b1b2a11e3c4d96ce9c3593d6b5ca2c0724c2d Mon Sep 17 00:00:00 2001 From: "Sure Admin (bot)" Date: Sat, 16 May 2026 15:26:30 +0200 Subject: [PATCH] fix(preview): use worker list metadata for cleanup (#1799) * fix(preview): use worker list metadata for cleanup * fix(preview): handle cleanup edge cases * fix(preview): harden scheduled cleanup errors * feat(preview): add warmup screen and readiness gate * fix(preview): report success after image deploy * fix(preview): stop blocking healthy previews on stale status --- .github/workflows/preview-cleanup.yml | 66 ++++--- .github/workflows/preview-deploy.yml | 9 +- workers/preview/src/index.ts | 249 +++++++++++++++++++++++++- 3 files changed, 294 insertions(+), 30 deletions(-) diff --git a/.github/workflows/preview-cleanup.yml b/.github/workflows/preview-cleanup.yml index 6c9cbf753..dba8ddccd 100644 --- a/.github/workflows/preview-cleanup.yml +++ b/.github/workflows/preview-cleanup.yml @@ -139,11 +139,28 @@ jobs: # Get list of all preview workers echo "Fetching list of preview workers..." - # Use Cloudflare API to list workers - WORKERS=$(curl -s -X GET \ + # Use Cloudflare API to list workers and read modified_on from the list response. + # The per-script endpoint returns raw script content, not JSON metadata. + WORKERS_RESPONSE=$(curl -fsS -X GET \ "https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/workers/scripts" \ -H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \ - -H "Content-Type: application/json" | jq -r '.result[] | select(.id | startswith("sure-preview-")) | .id') + -H "Content-Type: application/json") || { + echo "Failed to fetch preview worker list from Cloudflare" + exit 1 + } + + if ! echo "$WORKERS_RESPONSE" | jq -e '.success == true and (.result | type == "array")' >/dev/null 2>&1; then + echo "Cloudflare API returned an invalid worker list response" + echo "$WORKERS_RESPONSE" | jq -c '.errors // .' + exit 1 + fi + + WORKERS=$(echo "$WORKERS_RESPONSE" | jq -r ' + .result[] + | select(.id | startswith("sure-preview-")) + | [.id, (.modified_on // "")] + | @tsv + ') if [ -z "$WORKERS" ]; then echo "No preview workers found" @@ -151,46 +168,49 @@ jobs: fi echo "Found preview workers:" - echo "$WORKERS" + echo "$WORKERS" | cut -f1 # Check each worker's deployment time CUTOFF_TIME=$(date -d '24 hours ago' +%s) - for WORKER in $WORKERS; do + while IFS=$'\t' read -r WORKER MODIFIED_ON; do + [ -n "$WORKER" ] || continue echo "Checking $WORKER..." - # Get worker details to find last deployment time - WORKER_INFO=$(curl -s -X GET \ - "https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/workers/scripts/$WORKER" \ - -H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \ - -H "Content-Type: application/json") + if [ -z "$MODIFIED_ON" ]; then + echo "No modified_on timestamp for $WORKER; skipping" + continue + fi - MODIFIED_ON=$(echo "$WORKER_INFO" | jq -r '.result.modified_on // empty') - - if [ -n "$MODIFIED_ON" ]; then - MODIFIED_TS=$(date -d "$MODIFIED_ON" +%s 2>/dev/null || echo "0") - - if [ "$MODIFIED_TS" -lt "$CUTOFF_TIME" ]; then - echo "Worker $WORKER is older than 24 hours, deleting..." - wrangler delete --name "$WORKER" --force || echo "Failed to delete $WORKER" + if ! MODIFIED_TS=$(date -d "$MODIFIED_ON" +%s 2>/dev/null); then + echo "Invalid modified_on timestamp for $WORKER ($MODIFIED_ON); skipping" + continue + fi + if [ "$MODIFIED_TS" -lt "$CUTOFF_TIME" ]; then + echo "Worker $WORKER is older than 24 hours, deleting..." + if wrangler delete --name "$WORKER" --force; then # Extract PR number and cleanup GitHub deployment PR_NUM=$(echo "$WORKER" | sed 's/sure-preview-//') - if [ -n "$PR_NUM" ]; then + if [[ "$PR_NUM" =~ ^[1-9][0-9]*$ ]]; then echo "Cleaning up GitHub deployment for PR #$PR_NUM" gh api \ -X GET "/repos/${{ github.repository }}/deployments?environment=preview-pr-$PR_NUM" \ - --jq '.[].id' | while read -r DEPLOY_ID; do + --jq '.[].id' 2>/dev/null | while read -r DEPLOY_ID; do gh api \ -X POST "/repos/${{ github.repository }}/deployments/$DEPLOY_ID/statuses" \ -f state=inactive \ -f description="Preview expired after 24 hours" || true - done + done || echo "No deployments to cleanup or error occurred" + else + echo "Could not extract a valid PR number from $WORKER; skipping deployment cleanup" fi else - echo "Worker $WORKER is still within 24-hour window, keeping..." + echo "Failed to delete $WORKER; skipping deployment status update" fi + else + echo "Worker $WORKER is still within 24-hour window, keeping..." fi - done + done <<< "$WORKERS" echo "Cleanup complete" diff --git a/.github/workflows/preview-deploy.yml b/.github/workflows/preview-deploy.yml index 9d89ff413..4ecf03b5e 100644 --- a/.github/workflows/preview-deploy.yml +++ b/.github/workflows/preview-deploy.yml @@ -112,6 +112,13 @@ jobs: PREVIEW_URL="https://sure-preview-${{ github.event.pull_request.number }}.${{ secrets.CLOUDFLARE_WORKERS_SUBDOMAIN }}.workers.dev" echo "preview_url=${PREVIEW_URL}" >> "$GITHUB_OUTPUT" + - name: Warm preview container + env: + PREVIEW_URL: ${{ steps.deploy.outputs.preview_url }} + run: | + echo "Triggering preview wake-up..." + curl -fsS "$PREVIEW_URL/" >/dev/null || true + - name: Update Deployment Status if: always() && steps.deployment.outputs.result uses: actions/github-script@v7 @@ -135,7 +142,7 @@ jobs: const previewUrl = '${{ steps.deploy.outputs.preview_url }}'; const commentBody = `## 🚀 Preview Deployment Ready - Your preview environment has been deployed to Cloudflare Containers. + Your preview environment has been deployed to Cloudflare Containers with the PR's Docker image. **Preview URL:** ${previewUrl} diff --git a/workers/preview/src/index.ts b/workers/preview/src/index.ts index becb8fbe7..872cf1a99 100644 --- a/workers/preview/src/index.ts +++ b/workers/preview/src/index.ts @@ -4,8 +4,61 @@ interface Env { RAILS_CONTAINER: DurableObjectNamespace; } +interface DiagnosticPayload { + stage?: string; + detail?: string; +} + +interface DiagnosticRecord { + event?: string; + at?: string; + payload?: DiagnosticPayload; + state?: { status?: string; lastChange?: number }; + message?: string; +} + +interface PreviewProgress { + phase: "cold" | "warming" | "loading-demo-data" | "ready" | "failed"; + stage: string | null; + message: string; + detail: string; +} + +interface PreviewStatusPayload { + state: unknown; + containerRunning: boolean; + diagnostics: DiagnosticRecord | null; + diagnosticsHistory: DiagnosticRecord[]; + previewReady: boolean; + previewFailed: boolean; + progress: PreviewProgress; +} + const DIAGNOSTICS_KEY = "preview-diagnostics"; const DIAGNOSTICS_HISTORY_KEY = "preview-diagnostics-history"; +const READY_STAGES = new Set(["demo-data-ready", "demo-data-skip"]); +const FAILED_STAGES = new Set(["demo-data-failed", "failed"]); +const WAITING_MESSAGES: Record = { + boot: "Waking preview…", + "redis-start": "Starting Redis…", + "redis-ready": "Redis is ready.", + "postgres-start": "Starting PostgreSQL…", + "postgres-ready": "PostgreSQL is ready.", + "postgres-already-running": "PostgreSQL is already running.", + "db-setup": "Setting up the preview database…", + "db-prepare": "Running database setup…", + "db-prepare-done": "Database setup finished.", + "demo-data-check": "Checking sample data…", + "demo-data-user-present": "Found the demo user. Verifying sample data…", + "demo-data-deferred": "Rails is up. Loading sample data…", + "demo-data-load": "Loading sample data…", + "demo-data-ready": "Sample data is ready.", + "demo-data-skip": "Sample data is already ready.", + "demo-data-failed": "Sample data failed to load.", + "rails-start": "Starting Rails…", + "rails-up-ready": "Rails is up. Finishing sample data…", + "rails-up-timeout": "Rails is taking longer than expected to start.", +}; export class RailsContainer extends Container { defaultPort = 3000; @@ -53,16 +106,191 @@ export class RailsContainer extends Container { await this.ctx.storage.put(DIAGNOSTICS_HISTORY_KEY, history); } + private async getDiagnostics(): Promise<{ + state: unknown; + containerRunning: boolean; + diagnostics: DiagnosticRecord | null; + diagnosticsHistory: DiagnosticRecord[]; + }> { + return { + state: await this.getState(), + containerRunning: this.runtimeContainer.running, + diagnostics: ((await this.ctx.storage.get(DIAGNOSTICS_KEY)) as DiagnosticRecord | undefined) ?? null, + diagnosticsHistory: + ((await this.ctx.storage.get(DIAGNOSTICS_HISTORY_KEY)) as DiagnosticRecord[] | undefined) ?? [], + }; + } + + private async probeRailsUp(): Promise { + try { + const response = await this.containerFetch(new Request("https://container.internal/up"), this.defaultPort); + return response.ok; + } catch { + return false; + } + } + + private async buildPreviewStatus(base: { + state: unknown; + containerRunning: boolean; + diagnostics: DiagnosticRecord | null; + diagnosticsHistory: DiagnosticRecord[]; + }, options?: { probe?: boolean }): Promise { + const allDiagnostics = [...base.diagnosticsHistory, ...(base.diagnostics ? [base.diagnostics] : [])]; + const entrypointDiagnostics = allDiagnostics.filter( + (item) => item.event === "entrypoint" && typeof item.payload?.stage === "string" + ); + const latestEntrypoint = entrypointDiagnostics.at(-1) ?? null; + const latestStage = latestEntrypoint?.payload?.stage ?? null; + const latestDetail = latestEntrypoint?.payload?.detail ?? base.diagnostics?.message ?? ""; + const sampleDataReady = entrypointDiagnostics.some((item) => READY_STAGES.has(item.payload?.stage ?? "")); + const liveProbeReady = options?.probe ? await this.probeRailsUp() : false; + const railsResponding = + liveProbeReady || + (typeof base.state === "object" && base.state !== null && "status" in base.state + ? (base.state as { status?: string }).status === "healthy" + : false) || + entrypointDiagnostics.some((item) => item.payload?.stage === "rails-up-ready"); + const previewReady = liveProbeReady || (sampleDataReady && railsResponding); + const previewFailed = + entrypointDiagnostics.some((item) => FAILED_STAGES.has(item.payload?.stage ?? "")) || + base.diagnostics?.event === "error"; + + let phase: PreviewProgress["phase"] = "cold"; + if (previewFailed) { + phase = "failed"; + } else if (previewReady) { + phase = "ready"; + } else if ( + latestStage === "demo-data-load" || + latestStage === "demo-data-deferred" || + latestStage === "rails-up-ready" || + latestStage === "demo-data-check" || + latestStage === "demo-data-user-present" + ) { + phase = "loading-demo-data"; + } else if (base.containerRunning || latestEntrypoint) { + phase = "warming"; + } + + const message = sampleDataReady && !previewReady + ? "Finishing preview startup…" + : (latestStage ? WAITING_MESSAGES[latestStage] : undefined) ?? + (previewFailed + ? "Preview startup hit an error." + : previewReady + ? "Preview is ready." + : base.containerRunning + ? "Warming preview…" + : "Starting preview…"); + + return { + ...base, + previewReady, + previewFailed, + progress: { + phase, + stage: latestStage, + message, + detail: latestDetail, + }, + }; + } + + private wantsHtml(request: Request): boolean { + if (request.method !== "GET") return false; + const accept = request.headers.get("accept") ?? ""; + const secFetchDest = request.headers.get("sec-fetch-dest") ?? ""; + return accept.includes("text/html") || secFetchDest === "document"; + } + + private renderWaitPage(request: Request, status: PreviewStatusPayload, errorMessage?: string): Response { + const targetPath = new URL(request.url).pathname + new URL(request.url).search; + const escapedTargetPath = JSON.stringify(targetPath); + const escapedMessage = JSON.stringify(status.progress.message); + const escapedDetail = JSON.stringify( + status.progress.detail || errorMessage || "This preview is waking up and loading sample data." + ); + + const html = ` + + + + + Waking preview… + + + +
+
+ +

+

+

Please wait — this preview is cold-starting and will redirect automatically when the sample data is ready.

+

+
+
+ + +`; + + return new Response(html, { + status: status.previewFailed ? 503 : 202, + headers: { + "content-type": "text/html; charset=utf-8", + "cache-control": "no-store, max-age=0", + "retry-after": "3", + }, + }); + } + override async fetch(request: Request): Promise { const url = new URL(request.url); if (url.pathname === "/_container_status") { - return Response.json({ - state: await this.getState(), - containerRunning: this.runtimeContainer.running, - diagnostics: (await this.ctx.storage.get(DIAGNOSTICS_KEY)) ?? null, - diagnosticsHistory: (await this.ctx.storage.get(DIAGNOSTICS_HISTORY_KEY)) ?? [], - }); + return Response.json(await this.buildPreviewStatus(await this.getDiagnostics(), { probe: true })); } if (url.pathname === "/_container_event" && request.method === "POST") { @@ -84,6 +312,15 @@ export class RailsContainer extends Container { message: error instanceof Error ? error.message : String(error), }); + const status = await this.buildPreviewStatus(await this.getDiagnostics()); + if (this.wantsHtml(request) && !status.previewReady) { + return this.renderWaitPage( + request, + status, + error instanceof Error ? error.message : String(error) + ); + } + return new Response( `Failed to serve preview container: ${error instanceof Error ? error.message : String(error)}`, { status: 500 }