diff --git a/.github/workflows/preview-deploy.yml b/.github/workflows/preview-deploy.yml index 76fd9ee56..63ea9590d 100644 --- a/.github/workflows/preview-deploy.yml +++ b/.github/workflows/preview-deploy.yml @@ -203,8 +203,15 @@ jobs: cp trusted/workers/preview/wrangler.toml "$preview_dir/wrangler.toml" cp -R trusted/workers/preview/src "$preview_dir/src" + diagnostics_nonce="$(openssl rand -hex 32)" sed -i "s/\${PR_NUMBER}/${PR_NUMBER}/g" "$preview_dir/wrangler.toml" sed -i "s/\${PR_NUMBER}/${PR_NUMBER}/g" "$preview_dir/src/index.ts" + sed -i "s/\${PREVIEW_DIAGNOSTICS_NONCE}/${diagnostics_nonce}/g" "$preview_dir/src/index.ts" + + if grep -F "\${PREVIEW_DIAGNOSTICS_NONCE}" "$preview_dir/src/index.ts" >/dev/null; then + echo "Preview diagnostics nonce placeholder was not replaced" >&2 + exit 1 + fi cd "$preview_dir" npm ci --ignore-scripts --no-audit --no-fund @@ -236,10 +243,31 @@ jobs: set -euo pipefail cd "$RUNNER_TEMP/sure-preview-worker" + config_path="$RUNNER_TEMP/sure-preview-worker/wrangler.toml" image_tag="sure-preview-pr-${PR_NUMBER}:${HEAD_SHA}" push_log="$RUNNER_TEMP/wrangler-containers-push.log" clean_log="$RUNNER_TEMP/wrangler-containers-push.clean.log" + # wrangler containers push validates wrangler.toml, so point the trusted + # config at the loaded CI image before replacing it with the registry ref. + LOCAL_IMAGE_TAG="$image_tag" node - "$config_path" <<'NODE' + const fs = require('node:fs'); + + const configPath = process.argv[2]; + const imageTag = process.env.LOCAL_IMAGE_TAG; + + if (!/^sure-preview-pr-[1-9][0-9]*:[a-f0-9]{40}$/.test(imageTag || '')) { + throw new Error('Expected local preview image tag for wrangler containers push'); + } + + const original = fs.readFileSync(configPath, 'utf8'); + const updated = original.replace(/image = "[^"]+"/, `image = ${JSON.stringify(imageTag)}`); + if (updated === original) { + throw new Error('Expected wrangler.toml to contain an image entry to rewrite before push'); + } + fs.writeFileSync(configPath, updated); + NODE + ./node_modules/.bin/wrangler containers push "$image_tag" 2>&1 | tee "$push_log" perl -pe 's/\e\[[0-9;]*[A-Za-z]//g' "$push_log" > "$clean_log" image_ref=$(grep -Eo 'registry\.cloudflare\.com/[^[:space:]]+' "$clean_log" | tail -n 1 | tr -d '\r') @@ -331,7 +359,7 @@ jobs: diagnostics_file="$RUNNER_TEMP/preview-diagnostics.json" last_error="" - for attempt in $(seq 1 20); do + for attempt in $(seq 1 40); do if curl -fsS --connect-timeout 5 --max-time 15 "$PREVIEW_URL/_container_status" -o "$diagnostics_file"; then if jq -e '.previewReady == true or .previewFailed == true' "$diagnostics_file" >/dev/null; then break @@ -357,8 +385,20 @@ jobs: exit 1 fi + if ! jq -e '.previewReady == true' "$diagnostics_file" >/dev/null; then + echo "Preview diagnostics from _container_status did not reach previewReady=true:" >&2 + jq -c . "$diagnostics_file" >&2 + exit 1 + fi + + if ! jq -e '.timings.previewReadyAt != null and .timings.secondsToPreviewReady != null' "$diagnostics_file" >/dev/null; then + echo "Preview diagnostics are missing readiness timing fields:" >&2 + jq -c . "$diagnostics_file" >&2 + exit 1 + fi + - name: Upload preview diagnostics - if: success() + if: always() && steps.deploy.outputs.preview_url != '' uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 with: name: preview-diagnostics-pr-${{ env.PR_NUMBER }}-${{ env.HEAD_SHA }} diff --git a/Dockerfile.preview b/Dockerfile.preview index 3b687b964..ace289b12 100644 --- a/Dockerfile.preview +++ b/Dockerfile.preview @@ -73,13 +73,14 @@ set -e cd /rails emit_status() { - if [ -n "$PREVIEW_ORIGIN" ]; then + if [ -n "$PREVIEW_ORIGIN" ] && [ -n "$PREVIEW_DIAGNOSTICS_NONCE" ]; then local stage="$1" local detail="$2" local payload payload=$(STAGE="$stage" DETAIL="$detail" ruby -rjson -e 'print JSON.generate({stage: ENV.fetch("STAGE"), detail: ENV.fetch("DETAIL", "")})' 2>/dev/null) || return 0 curl -fsS -X POST "$PREVIEW_ORIGIN/_container_event" \ -H 'content-type: application/json' \ + -H "x-preview-diagnostics-nonce: $PREVIEW_DIAGNOSTICS_NONCE" \ --data "$payload" >/dev/null || true fi } diff --git a/bin/preview_deploy_security_check.rb b/bin/preview_deploy_security_check.rb index 877300637..5b4b5b3b1 100644 --- a/bin/preview_deploy_security_check.rb +++ b/bin/preview_deploy_security_check.rb @@ -8,6 +8,8 @@ PREVIEW_WORKFLOW_PATH = File.join(ROOT, ".github/workflows/preview-deploy.yml") PR_WORKFLOW_PATH = File.join(ROOT, ".github/workflows/pr.yml") LOCKFILE_PATH = File.join(ROOT, "workers/preview/package-lock.json") RESOLVER_PATH = File.join(ROOT, "workers/preview/deploy/resolve_preview_request.cjs") +PREVIEW_WORKER_PATH = File.join(ROOT, "workers/preview/src/index.ts") +PREVIEW_DOCKERFILE_PATH = File.join(ROOT, "Dockerfile.preview") PINNED_ACTION = /\A[^@\s]+@[a-f0-9]{40}\z/ EXPECTED_ACTION_PINS = { "actions/checkout" => "93cb6efe18208431cddfb8368fd83d5badbf9bfd", # v5 @@ -55,6 +57,9 @@ REQUIRED_PREPARE_LINES = [ 'cp trusted/workers/preview/tsconfig.json "$preview_dir/tsconfig.json"', 'cp trusted/workers/preview/wrangler.toml "$preview_dir/wrangler.toml"', 'cp -R trusted/workers/preview/src "$preview_dir/src"', + 'diagnostics_nonce="$(openssl rand -hex 32)"', + 'sed -i "s/\${PREVIEW_DIAGNOSTICS_NONCE}/${diagnostics_nonce}/g" "$preview_dir/src/index.ts"', + "Preview diagnostics nonce placeholder was not replaced", "npm ci --ignore-scripts --no-audit --no-fund" ].freeze REQUIRED_IMAGE_BUILD_LINES = [ @@ -149,6 +154,8 @@ preview_workflow = YAML.safe_load_file(PREVIEW_WORKFLOW_PATH, aliases: true) pr_workflow = YAML.safe_load_file(PR_WORKFLOW_PATH, aliases: true) lockfile = JSON.parse(File.read(LOCKFILE_PATH)) resolver_script = File.read(RESOLVER_PATH) +preview_worker_script = File.read(PREVIEW_WORKER_PATH) +preview_dockerfile = File.read(PREVIEW_DOCKERFILE_PATH) preview_on = workflow_on(preview_workflow) pr_on = workflow_on(pr_workflow) @@ -260,6 +267,7 @@ comment_on_pr = step!(preview_comment_steps, "Comment on PR") [ "download artifact token", download_artifact.dig("with", "github-token"), "${{ github.token }}" ], [ "download artifact path", download_artifact.dig("with", "path"), "${{ runner.temp }}/preview-image" ], [ "fork deployment record guard", create_deployment.fetch("if"), "env.IS_FORK == 'false'" ], + [ "diagnostics upload if", upload_diagnostics.fetch("if"), "always() && steps.deploy.outputs.preview_url != ''" ], [ "diagnostics upload name", upload_diagnostics.dig("with", "name"), "preview-diagnostics-pr-${{ env.PR_NUMBER }}-${{ env.HEAD_SHA }}" ], [ "diagnostics upload path", upload_diagnostics.dig("with", "path"), "${{ runner.temp }}/preview-diagnostics.json" ], [ "diagnostics upload retention", upload_diagnostics.dig("with", "retention-days"), 3 ], @@ -360,15 +368,56 @@ assert(!image_build_run.include?('cat > "$manifest_file" < kept.has(record))", + 'const PREVIEW_DIAGNOSTICS_NONCE = "${PREVIEW_DIAGNOSTICS_NONCE}"', + "PREVIEW_DIAGNOSTICS_NONCE", + 'request.headers.get("x-preview-diagnostics-nonce")', + "return new Response(\"not found\", { status: 404 })", + "timings: PreviewTimings", + "buildPreviewTimings", + "const previewReady = sampleDataReady && railsResponding", + "previewReadyAt", + "secondsToRailsReady", + "secondsToDemoDataReady", + "secondsFromRailsReadyToDemoDataReady", + "secondsToPreviewReady" +].each { |needle| assert(preview_worker_script.include?(needle), "preview worker must include #{needle.inspect}") } + +[ + '[ -n "$PREVIEW_ORIGIN" ] && [ -n "$PREVIEW_DIAGNOSTICS_NONCE" ]', + '-H "x-preview-diagnostics-nonce: $PREVIEW_DIAGNOSTICS_NONCE"' +].each { |needle| assert(preview_dockerfile.include?(needle), "preview Dockerfile entrypoint must include #{needle.inspect}") } + secret_steps = deploy_steps.select { |step| env_hash(step).then { |env| env.key?("CLOUDFLARE_API_TOKEN") || env.key?("CLOUDFLARE_ACCOUNT_ID") } } assert(secret_steps.map { |step| step["name"] } == [ push_image["name"], deploy["name"] ], "only image push and deploy may receive Cloudflare secrets") assert_secret_env_sources!(push_image, EXPECTED_PUSH_SECRET_ENV) diff --git a/workers/preview/src/index.ts b/workers/preview/src/index.ts index 5e60a8757..93c9c8991 100644 --- a/workers/preview/src/index.ts +++ b/workers/preview/src/index.ts @@ -24,6 +24,22 @@ interface PreviewProgress { detail: string; } +interface PreviewTimings { + containerStartedAt: string | null; + bootStartedAt: string | null; + railsStartedAt: string | null; + railsReadyAt: string | null; + demoDataStartedAt: string | null; + demoDataReadyAt: string | null; + demoDataReadyStage: string | null; + demoDataFailedAt: string | null; + previewReadyAt: string | null; + secondsToRailsReady: number | null; + secondsToDemoDataReady: number | null; + secondsFromRailsReadyToDemoDataReady: number | null; + secondsToPreviewReady: number | null; +} + interface PreviewStatusPayload { state: unknown; containerRunning: boolean; @@ -31,13 +47,28 @@ interface PreviewStatusPayload { diagnosticsHistory: DiagnosticRecord[]; previewReady: boolean; previewFailed: boolean; + timings: PreviewTimings; progress: PreviewProgress; } const DIAGNOSTICS_KEY = "preview-diagnostics"; const DIAGNOSTICS_HISTORY_KEY = "preview-diagnostics-history"; +const DIAGNOSTICS_HISTORY_LIMIT = 50; +const PREVIEW_DIAGNOSTICS_NONCE = "${PREVIEW_DIAGNOSTICS_NONCE}"; const READY_STAGES = new Set(["demo-data-ready", "demo-data-skip"]); const FAILED_STAGES = new Set(["demo-data-failed", "failed"]); +const TIMING_ANCHOR_STAGES = new Set([ + "boot", + "rails-start", + "rails-up-ready", + "demo-data-check", + "demo-data-deferred", + "demo-data-load", + "demo-data-ready", + "demo-data-skip", + "demo-data-failed", + "failed", +]); const WAITING_MESSAGES: Record = { boot: "Waking preview…", "redis-start": "Starting Redis…", @@ -78,6 +109,7 @@ export class RailsContainer extends Container { BINDING: "::", DEMO_DATA_SEED: "${PR_NUMBER}", PREVIEW_ORIGIN: "https://sure-preview-${PR_NUMBER}.sure-finances.workers.dev", + PREVIEW_DIAGNOSTICS_NONCE, }; sleepAfter = "30m"; enableInternet = true; @@ -90,20 +122,37 @@ export class RailsContainer extends Container { const diagnostic = { ...payload, state: await this.getState(), - }; + } as DiagnosticRecord; await this.ctx.storage.put(DIAGNOSTICS_KEY, diagnostic); const history = - ((await this.ctx.storage.get(DIAGNOSTICS_HISTORY_KEY)) as Record[] | undefined) ?? []; + ((await this.ctx.storage.get(DIAGNOSTICS_HISTORY_KEY)) as DiagnosticRecord[] | undefined) ?? []; history.push(diagnostic); + await this.ctx.storage.put(DIAGNOSTICS_HISTORY_KEY, this.trimDiagnosticsHistory(history)); + } - if (history.length > 20) { - history.splice(0, history.length - 20); - } + private isTimingAnchor(record: DiagnosticRecord): boolean { + return ( + record.event === "start" || + (record.event === "entrypoint" && + typeof record.payload?.stage === "string" && + TIMING_ANCHOR_STAGES.has(record.payload.stage)) + ); + } - await this.ctx.storage.put(DIAGNOSTICS_HISTORY_KEY, history); + private trimDiagnosticsHistory(history: DiagnosticRecord[]): DiagnosticRecord[] { + if (history.length <= DIAGNOSTICS_HISTORY_LIMIT) return history; + + const anchors = history.filter((record) => this.isTimingAnchor(record)).slice(-DIAGNOSTICS_HISTORY_LIMIT); + const anchored = new Set(anchors); + const remainingSlots = Math.max(DIAGNOSTICS_HISTORY_LIMIT - anchors.length, 0); + const recentNonAnchors = + remainingSlots > 0 ? history.filter((record) => !anchored.has(record)).slice(-remainingSlots) : []; + const kept = new Set([...anchors, ...recentNonAnchors]); + + return history.filter((record) => kept.has(record)); } private async getDiagnostics(): Promise<{ @@ -130,6 +179,60 @@ export class RailsContainer extends Container { } } + private validTimestamp(value: string | undefined): string | null { + if (!value) return null; + + const timestamp = Date.parse(value); + return Number.isNaN(timestamp) ? null : value; + } + + private secondsBetween(startAt: string | null, endAt: string | null): number | null { + if (!startAt || !endAt) return null; + + const start = Date.parse(startAt); + const end = Date.parse(endAt); + if (Number.isNaN(start) || Number.isNaN(end) || end < start) return null; + + return Math.round(((end - start) / 1000) * 100) / 100; + } + + private buildPreviewTimings(allDiagnostics: DiagnosticRecord[], previewReady: boolean): PreviewTimings { + const entrypointDiagnostics = allDiagnostics.filter( + (item) => item.event === "entrypoint" && typeof item.payload?.stage === "string" + ); + const firstEventAt = (event: string) => this.validTimestamp(allDiagnostics.find((item) => item.event === event)?.at); + const firstStageAt = (...stages: string[]) => + this.validTimestamp(entrypointDiagnostics.find((item) => stages.includes(item.payload?.stage ?? ""))?.at); + + const containerStartedAt = firstEventAt("start"); + const bootStartedAt = firstStageAt("boot") ?? containerStartedAt; + const railsStartedAt = firstStageAt("rails-start"); + const railsReadyAt = firstStageAt("rails-up-ready"); + const demoDataStartedAt = + firstStageAt("demo-data-load") ?? firstStageAt("demo-data-deferred") ?? firstStageAt("demo-data-check"); + const demoDataReady = entrypointDiagnostics.find((item) => READY_STAGES.has(item.payload?.stage ?? "")); + const demoDataReadyAt = this.validTimestamp(demoDataReady?.at); + const demoDataReadyStage = demoDataReady?.payload?.stage ?? null; + const demoDataFailedAt = firstStageAt("demo-data-failed"); + const previewReadyAt = previewReady ? (demoDataReadyAt ?? railsReadyAt) : null; + + return { + containerStartedAt, + bootStartedAt, + railsStartedAt, + railsReadyAt, + demoDataStartedAt, + demoDataReadyAt, + demoDataReadyStage, + demoDataFailedAt, + previewReadyAt, + secondsToRailsReady: this.secondsBetween(bootStartedAt, railsReadyAt), + secondsToDemoDataReady: this.secondsBetween(demoDataStartedAt, demoDataReadyAt), + secondsFromRailsReadyToDemoDataReady: this.secondsBetween(railsReadyAt, demoDataReadyAt), + secondsToPreviewReady: this.secondsBetween(bootStartedAt, previewReadyAt), + }; + } + private async buildPreviewStatus(base: { state: unknown; containerRunning: boolean; @@ -151,10 +254,11 @@ export class RailsContainer extends Container { ? (base.state as { status?: string }).status === "healthy" : false) || entrypointDiagnostics.some((item) => item.payload?.stage === "rails-up-ready"); - const previewReady = liveProbeReady || (sampleDataReady && railsResponding); + const previewReady = sampleDataReady && railsResponding; const previewFailed = entrypointDiagnostics.some((item) => FAILED_STAGES.has(item.payload?.stage ?? "")) || base.diagnostics?.event === "error"; + const timings = this.buildPreviewTimings(allDiagnostics, previewReady); let phase: PreviewProgress["phase"] = "cold"; if (previewFailed) { @@ -188,6 +292,7 @@ export class RailsContainer extends Container { ...base, previewReady, previewFailed, + timings, progress: { phase, stage: latestStage, @@ -294,6 +399,10 @@ export class RailsContainer extends Container { } if (url.pathname === "/_container_event" && request.method === "POST") { + if (request.headers.get("x-preview-diagnostics-nonce") !== PREVIEW_DIAGNOSTICS_NONCE) { + return new Response("not found", { status: 404 }); + } + const payload = await request.json(); await this.recordDiagnostic({ event: "entrypoint",