Compare commits

...

3 Commits

Author SHA1 Message Date
Evan
fed40796fd ci(mypy): add scripts/__init__.py to fix duplicate-module error
mypy was passed scripts/change_detector.py (resolved as top-level
module "change_detector") and the new test importing it as
"scripts.change_detector", causing a "Source file found twice under
different module names" error. Making scripts/ an explicit package
resolves the ambiguity. The script is still invoked path-based in CI
(python scripts/change_detector.py), so this is non-breaking.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 14:50:42 -07:00
Evan
5a11cc2177 test(ci): add unit tests for change_detector workflow_run resolution
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 14:50:42 -07:00
Claude Code
39f93eb63c ci: gate Cypress/Playwright behind pre-commit via workflow_run
Make the E2E workflow run only after the "pre-commit checks" workflow
completes successfully, instead of in parallel with it. When a PR has
formatting/lint/type errors that pre-commit catches in ~4 minutes, the
Cypress shards and Playwright jobs (~15-20 min each, full setup) no longer
spin up only to be wasted.

Mechanism:
- Trigger switches from push/pull_request to `workflow_run` on "pre-commit
  checks" (which itself runs on push + pull_request, preserving coverage).
- The entry `changes` job gates on
  `github.event.workflow_run.conclusion == 'success'`; on pre-commit failure
  it (and every downstream `needs: changes` job) is skipped, provisioning no
  runners.
- change_detector.py learns a `workflow_run` event path: it recovers the
  originating event/SHA/PR from WF_RUN_* env vars (fork PRs lack PR context
  in the payload, so they conservatively assume-all-changed).
- Checkouts and the push-only /app/prefix matrix switch read
  `github.event.workflow_run.*` instead of the live event.
- A `report-status` job posts an aggregate "E2E / required" commit status
  back to the PR head SHA, since workflow_run checks don't attach to PRs
  automatically. This becomes the required check in branch protection.

This stacks on #40718 (the job-level `changes` gating) and also contains
those changes; the net-new here is the workflow_run gate plus the
change_detector workflow_run support.

Known tradeoffs (see PR description): GitHub uses the default-branch copy of
the workflow for workflow_run, so edits won't take effect until merged; fork
PRs run the full suite; branch protection must require "E2E / required".

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 14:50:41 -07:00
4 changed files with 220 additions and 18 deletions

View File

@@ -1,12 +1,16 @@
name: E2E
on:
push:
branches:
- "master"
- "[0-9].[0-9]*"
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
# Gated behind pre-commit: this workflow runs only after the "pre-commit
# checks" workflow completes, and (via the job-level `if` below) only when
# it succeeded. That keeps the expensive Cypress/Playwright runners from
# spinning up while a PR still has formatting/lint/type errors that
# pre-commit catches in a fraction of the time. pre-commit itself runs on
# push (master/release) and pull_request, so this preserves coverage for
# both event types.
workflow_run:
workflows: ["pre-commit checks"]
types: [completed]
workflow_dispatch:
inputs:
use_dashboard:
@@ -23,11 +27,18 @@ on:
default: ''
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
# workflow_run has no PR number in context; key on the originating branch.
group: ${{ github.workflow }}-${{ github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
jobs:
changes:
# The pre-commit gate: only proceed when pre-commit succeeded (or on a
# manual dispatch). On failure this job is skipped, and every downstream
# job (needs: changes) is skipped with it — no runners are provisioned.
if: >-
github.event_name == 'workflow_dispatch' ||
github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-24.04
permissions:
contents: read
@@ -40,11 +51,18 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }}
# The shared change-detector action reads the live event context, which
# under workflow_run points at the default branch. Call the script
# directly instead, passing the originating event/SHA/PR via WF_RUN_*.
- name: Check for file changes
id: check
uses: ./.github/actions/change-detector/
with:
token: ${{ secrets.GITHUB_TOKEN }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WF_RUN_EVENT: ${{ github.event.workflow_run.event }}
WF_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
WF_RUN_PR_NUMBER: ${{ github.event.workflow_run.pull_requests[0].number }}
run: python scripts/change_detector.py
cypress-matrix:
needs: changes
@@ -63,7 +81,7 @@ jobs:
matrix:
parallel_id: [0, 1]
browser: ["chrome"]
app_root: ${{ github.event_name == 'push' && fromJSON('["", "/app/prefix"]') || fromJSON('[""]') }}
app_root: ${{ github.event.workflow_run.event == 'push' && fromJSON('["", "/app/prefix"]') || fromJSON('[""]') }}
# The /app/prefix variant (push events only) is smoke-tested on a single
# shard rather than the full matrix, so exclude it from the other shards.
exclude:
@@ -93,13 +111,13 @@ jobs:
steps:
# -------------------------------------------------------
# Conditional checkout based on context
- name: Checkout for push or pull_request event
if: github.event_name == 'push' || github.event_name == 'pull_request'
- name: Checkout (gated by pre-commit via workflow_run)
if: github.event_name == 'workflow_run'
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
ref: ${{ github.event.workflow_run.head_sha }}
- name: Checkout using ref (workflow_dispatch)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.ref != ''
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
@@ -177,7 +195,7 @@ jobs:
fail-fast: false
matrix:
browser: ["chromium"]
app_root: ${{ github.event_name == 'push' && fromJSON('["", "/app/prefix"]') || fromJSON('[""]') }}
app_root: ${{ github.event.workflow_run.event == 'push' && fromJSON('["", "/app/prefix"]') || fromJSON('[""]') }}
env:
SUPERSET_ENV: development
SUPERSET_CONFIG: tests.integration_tests.superset_test_config
@@ -200,13 +218,13 @@ jobs:
steps:
# -------------------------------------------------------
# Conditional checkout based on context (same as Cypress workflow)
- name: Checkout for push or pull_request event
if: github.event_name == 'push' || github.event_name == 'pull_request'
- name: Checkout (gated by pre-commit via workflow_run)
if: github.event_name == 'workflow_run'
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
ref: ${{ github.event.workflow_run.head_sha }}
- name: Checkout using ref (workflow_dispatch)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.ref != ''
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
@@ -274,3 +292,34 @@ jobs:
${{ github.workspace }}/superset-frontend/playwright-results/
${{ github.workspace }}/superset-frontend/test-results/
name: playwright-artifact-${{ github.run_id }}-${{ github.job }}-${{ matrix.browser }}--${{ steps.set-safe-app-root.outputs.safe_app_root }}
# workflow_run runs don't attach their checks to the originating PR, so post
# an aggregate commit status back onto the PR head SHA. Make THIS the required
# status check in branch protection (in place of the individual E2E jobs).
report-status:
needs: [cypress-matrix, playwright-tests]
if: always() && github.event_name == 'workflow_run'
runs-on: ubuntu-24.04
permissions:
statuses: write
steps:
- name: Report aggregate E2E status to PR commit
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
with:
script: |
// 'skipped' is acceptable: the change-detector legitimately skips
// jobs when no relevant files changed. Only real failures fail.
const results = [
'${{ needs.cypress-matrix.result }}',
'${{ needs.playwright-tests.result }}',
];
const ok = results.every((r) => r === 'success' || r === 'skipped');
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: context.payload.workflow_run.head_sha,
state: ok ? 'success' : 'failure',
context: 'E2E / required',
description: ok ? 'E2E passed (or skipped)' : 'E2E failed',
target_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
});

16
scripts/__init__.py Normal file
View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -109,6 +109,37 @@ def is_int(s: str) -> bool:
return bool(re.match(r"^-?\d+$", s))
def resolve_workflow_run_files(repo: str, sha: str) -> Optional[List[str]]:
"""Resolve changed files for a workflow_run-triggered run.
When a workflow is gated behind another (e.g. running only after
pre-commit succeeds), GitHub re-dispatches it as a `workflow_run` event
whose context points at the default branch rather than the originating
diff. Recover the original event and head SHA from the workflow_run
payload, exposed via the WF_RUN_* env vars. Returns ``None`` (meaning
"assume all changed") when the diff can't be resolved.
"""
original_event = os.getenv("WF_RUN_EVENT") or "push"
print("ORIGINAL_EVENT", original_event)
if original_event == "pull_request":
pr_number = os.getenv("WF_RUN_PR_NUMBER", "")
if not is_int(pr_number):
# Fork PRs don't populate workflow_run.pull_requests, so we can't
# resolve the diff -> assume all changed (run everything).
print("workflow_run without PR context, assuming all changed")
return None
files = fetch_changed_files_pr(repo, pr_number)
print("PR files:")
print_files(files)
return files
head_sha = os.getenv("WF_RUN_HEAD_SHA") or sha
files = fetch_changed_files_push(repo, head_sha)
print("Files touched since previous commit:")
print_files(files)
return files
def main(event_type: str, sha: str, repo: str) -> None:
"""Main function to check for file changes based on event context."""
print("SHA:", sha)
@@ -126,6 +157,9 @@ def main(event_type: str, sha: str, repo: str) -> None:
print("Files touched since previous commit:")
print_files(files)
elif event_type == "workflow_run":
files = resolve_workflow_run_files(repo, sha)
elif event_type in ("workflow_dispatch", "schedule"):
# Manual or cron-triggered runs aren't tied to a specific diff, so
# treat every group as changed. `files = None` makes the loop below

View File

@@ -0,0 +1,103 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest import mock
from scripts import change_detector
REPO = "apache/superset"
def test_resolve_workflow_run_files_pull_request(monkeypatch) -> None:
"""A workflow_run originating from a pull_request resolves the PR diff."""
monkeypatch.setenv("WF_RUN_EVENT", "pull_request")
monkeypatch.setenv("WF_RUN_PR_NUMBER", "123")
with (
mock.patch.object(
change_detector,
"fetch_changed_files_pr",
return_value=["superset/foo.py"],
) as fetch_pr,
mock.patch.object(change_detector, "fetch_changed_files_push") as fetch_push,
):
files = change_detector.resolve_workflow_run_files(REPO, "deadbeef")
assert files == ["superset/foo.py"]
fetch_pr.assert_called_once_with(REPO, "123")
fetch_push.assert_not_called()
def test_resolve_workflow_run_files_push(monkeypatch) -> None:
"""A workflow_run originating from a push resolves the push diff via head SHA."""
monkeypatch.setenv("WF_RUN_EVENT", "push")
monkeypatch.setenv("WF_RUN_HEAD_SHA", "abc123")
with (
mock.patch.object(
change_detector,
"fetch_changed_files_push",
return_value=["superset-frontend/bar.tsx"],
) as fetch_push,
mock.patch.object(change_detector, "fetch_changed_files_pr") as fetch_pr,
):
files = change_detector.resolve_workflow_run_files(REPO, "fallback-sha")
assert files == ["superset-frontend/bar.tsx"]
# The originating head SHA wins over the (default-branch) fallback SHA.
fetch_push.assert_called_once_with(REPO, "abc123")
fetch_pr.assert_not_called()
def test_resolve_workflow_run_files_push_defaults_to_fallback_sha(
monkeypatch,
) -> None:
"""Without WF_RUN_HEAD_SHA the push path falls back to the passed SHA."""
monkeypatch.delenv("WF_RUN_EVENT", raising=False)
monkeypatch.delenv("WF_RUN_HEAD_SHA", raising=False)
with mock.patch.object(
change_detector,
"fetch_changed_files_push",
return_value=[],
) as fetch_push:
change_detector.resolve_workflow_run_files(REPO, "fallback-sha")
fetch_push.assert_called_once_with(REPO, "fallback-sha")
def test_resolve_workflow_run_files_pr_missing_number(monkeypatch) -> None:
"""A fork PR without a resolvable number returns None (assume all changed)."""
monkeypatch.setenv("WF_RUN_EVENT", "pull_request")
monkeypatch.delenv("WF_RUN_PR_NUMBER", raising=False)
with mock.patch.object(change_detector, "fetch_changed_files_pr") as fetch_pr:
files = change_detector.resolve_workflow_run_files(REPO, "deadbeef")
assert files is None
fetch_pr.assert_not_called()
def test_resolve_workflow_run_files_pr_invalid_number(monkeypatch) -> None:
"""A non-integer PR number is treated as unresolvable and returns None."""
monkeypatch.setenv("WF_RUN_EVENT", "pull_request")
monkeypatch.setenv("WF_RUN_PR_NUMBER", "not-a-number")
with mock.patch.object(change_detector, "fetch_changed_files_pr") as fetch_pr:
files = change_detector.resolve_workflow_run_files(REPO, "deadbeef")
assert files is None
fetch_pr.assert_not_called()