mirror of
https://github.com/apache/superset.git
synced 2026-06-01 21:59:26 +00:00
Co-authored-by: Claude Code <noreply@anthropic.com> Co-authored-by: Đỗ Trọng Hải <41283691+hainenber@users.noreply.github.com>
251 lines
9.0 KiB
Python
Executable File
251 lines
9.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
"""
|
|
Check that source-code changes don't cause translation regressions.
|
|
|
|
Usage
|
|
-----
|
|
Count non-fuzzy translated entries in all .po files and write JSON to stdout:
|
|
|
|
python check_translation_regression.py --count
|
|
|
|
Compare the current .po state against a previously-recorded baseline and fail
|
|
if any language lost translations:
|
|
|
|
python check_translation_regression.py --compare /path/to/before.json
|
|
|
|
Optionally write a markdown report to a file (used by CI to post a PR comment):
|
|
|
|
python check_translation_regression.py --compare before.json --report report.md
|
|
|
|
Use a translations directory other than the repo default (used by CI to count
|
|
against a separate base-branch worktree):
|
|
|
|
python check_translation_regression.py --count \\
|
|
--translations-dir /tmp/base-worktree/superset/translations
|
|
|
|
Typical CI workflow
|
|
-------------------
|
|
1. Create a base-branch worktree alongside the PR worktree
|
|
2. Run babel_update.sh in the base worktree (extract from BASE source)
|
|
3. Record baseline: python ... --count --translations-dir BASE_TREE > before.json
|
|
4. Run babel_update.sh in the PR worktree (extract from PR source) starting
|
|
from the same pristine BASE translations
|
|
5. Compare: python ... --compare before.json [--report report.md]
|
|
|
|
Comparing two babel_update outputs that started from the same BASE .po files
|
|
isolates regressions caused by the PR's source diff from any pre-existing
|
|
drift on the base branch.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
DEFAULT_TRANSLATIONS_DIR = (
|
|
Path(__file__).resolve().parent.parent.parent / "superset" / "translations"
|
|
)
|
|
|
|
# English .po files use empty msgstr by convention (source language == target),
|
|
# so they always show 0 translated entries and should not be checked.
|
|
SKIP_LANGS = {"en"}
|
|
|
|
|
|
def count_translated(po_file: Path) -> int:
|
|
"""Return the number of non-fuzzy translated messages in a .po file.
|
|
|
|
Raises:
|
|
subprocess.CalledProcessError: if ``msgfmt`` fails (e.g. malformed
|
|
.po file). The regression check exists to surface translation
|
|
problems, so a silent zero would defeat its purpose — let the
|
|
caller see a malformed file as a hard failure.
|
|
"""
|
|
import shutil # noqa: PLC0415
|
|
|
|
msgfmt = shutil.which("msgfmt") or "msgfmt"
|
|
result = subprocess.run( # noqa: S603
|
|
[msgfmt, "--statistics", "-o", "/dev/null", str(po_file)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
# stderr: "123 translated messages, 4 fuzzy translations, 56 untranslated messages."
|
|
match = re.search(r"(\d+) translated message", result.stderr)
|
|
if not match:
|
|
raise RuntimeError(
|
|
f"Could not parse msgfmt --statistics output for {po_file}: "
|
|
f"{result.stderr!r}"
|
|
)
|
|
return int(match.group(1))
|
|
|
|
|
|
def get_counts(translations_dir: Path) -> dict[str, int]:
|
|
counts: dict[str, int] = {}
|
|
for po_file in sorted(translations_dir.glob("*/LC_MESSAGES/messages.po")):
|
|
lang = po_file.parent.parent.name
|
|
if lang in SKIP_LANGS:
|
|
continue
|
|
try:
|
|
counts[lang] = count_translated(po_file)
|
|
except (subprocess.CalledProcessError, RuntimeError) as exc:
|
|
# A malformed .po file (msgfmt non-zero exit, or stderr we
|
|
# can't parse) is a real problem worth seeing, but it shouldn't
|
|
# take the whole regression check down with it — that would
|
|
# hide every other language's status. Skip and warn instead;
|
|
# the missing lang will not appear in the comparison output.
|
|
print(
|
|
f"WARNING: skipping {lang} — {po_file} could not be counted: {exc}",
|
|
file=sys.stderr,
|
|
)
|
|
return counts
|
|
|
|
|
|
def build_regression_report(regressions: list[tuple[str, int, int]]) -> str:
|
|
"""Build a markdown report for posting as a PR comment."""
|
|
rows = "\n".join(
|
|
f"| `{lang}` | {b} | {a} | -{b - a} |" for lang, b, a in regressions
|
|
)
|
|
affected = ", ".join(f"`{lang}`" for lang, _, _ in regressions)
|
|
return (
|
|
"## ⚠️ Translation Regression Detected\n\n"
|
|
f"This PR causes existing translations to become fuzzy or be removed "
|
|
f"in {affected}. Please fix the affected `.po` files before merging.\n\n"
|
|
"| Language | Before | After | Lost |\n"
|
|
"|----------|-------:|------:|-----:|\n"
|
|
f"{rows}\n\n"
|
|
"### How to fix\n\n"
|
|
"**1. Install dependencies** (if not already set up):\n\n"
|
|
"```bash\n"
|
|
"pip install -r superset/translations/requirements.txt\n"
|
|
"sudo apt-get install gettext # or: brew install gettext\n"
|
|
"```\n\n"
|
|
"**2. Re-extract strings and sync `.po` files:**\n\n"
|
|
"```bash\n"
|
|
"./scripts/translations/babel_update.sh\n"
|
|
"```\n\n"
|
|
"This rewrites `superset/translations/messages.pot` from the current "
|
|
"source files and merges the changes into every `.po` file. Strings "
|
|
"whose `msgid` changed will be marked `#, fuzzy`.\n\n"
|
|
f"**3. Resolve the fuzzy entries** in the affected language files "
|
|
f"({affected}):\n\n"
|
|
"```bash\n"
|
|
"grep -n '#, fuzzy' superset/translations/<lang>/LC_MESSAGES/messages.po\n"
|
|
"```\n\n"
|
|
"For each fuzzy entry, either rewrite the `msgstr` to match the new "
|
|
"string and remove the `#, fuzzy` line, or clear the `msgstr` to "
|
|
'`""` if you cannot provide a translation.\n\n'
|
|
"**4. Commit your changes to the `.po` files.**\n"
|
|
)
|
|
|
|
|
|
def cmd_count(translations_dir: Path) -> None:
|
|
counts = get_counts(translations_dir)
|
|
print(json.dumps(counts, indent=2))
|
|
|
|
|
|
def cmd_compare(
|
|
before_path: str,
|
|
translations_dir: Path,
|
|
report_path: Optional[str] = None,
|
|
) -> None:
|
|
with open(before_path) as f:
|
|
before: dict[str, int] = json.load(f)
|
|
|
|
after = get_counts(translations_dir)
|
|
|
|
regressions: list[tuple[str, int, int]] = []
|
|
for lang, before_count in sorted(before.items()):
|
|
after_count = after.get(lang, 0)
|
|
if after_count < before_count:
|
|
regressions.append((lang, before_count, after_count))
|
|
|
|
if regressions:
|
|
print("Translation regression detected!\n")
|
|
for lang, b, a in regressions:
|
|
lost = b - a
|
|
print(f" {lang}: {b} -> {a} (-{lost} string(s) became fuzzy or removed)")
|
|
print(
|
|
"\nStrings renamed or deleted by this PR invalidated existing translations."
|
|
)
|
|
print(
|
|
"Update the affected .po files to restore the lost entries before merging."
|
|
)
|
|
if report_path:
|
|
Path(report_path).write_text(
|
|
build_regression_report(regressions), encoding="utf-8"
|
|
)
|
|
sys.exit(1)
|
|
|
|
# All good — print a summary so it's easy to read in CI logs.
|
|
print("No translation regressions.\n")
|
|
for lang in sorted(after):
|
|
b = before.get(lang, 0)
|
|
a = after[lang]
|
|
if a > b:
|
|
delta = f"+{a - b}"
|
|
elif a == b:
|
|
delta = "no change"
|
|
else:
|
|
delta = f"-{b - a}"
|
|
print(f" {lang}: {b} -> {a} ({delta})")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Check for translation regressions in .po files."
|
|
)
|
|
action = parser.add_mutually_exclusive_group(required=True)
|
|
action.add_argument(
|
|
"--count",
|
|
action="store_true",
|
|
help="Output translation counts per language as JSON.",
|
|
)
|
|
action.add_argument(
|
|
"--compare",
|
|
metavar="BEFORE_JSON",
|
|
help="Compare current counts against a baseline JSON file.",
|
|
)
|
|
parser.add_argument(
|
|
"--report",
|
|
metavar="REPORT_MD",
|
|
help="When --compare detects regressions, write a markdown report here.",
|
|
)
|
|
parser.add_argument(
|
|
"--translations-dir",
|
|
type=Path,
|
|
default=DEFAULT_TRANSLATIONS_DIR,
|
|
help=(
|
|
"Path to the translations directory containing per-language "
|
|
"LC_MESSAGES/messages.po files (default: <repo>/superset/translations)."
|
|
),
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.count:
|
|
cmd_count(args.translations_dir)
|
|
else:
|
|
cmd_compare(args.compare, args.translations_dir, args.report)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|