mirror of
https://github.com/apache/superset.git
synced 2026-05-02 22:44:28 +00:00
Compare commits
4 Commits
docs/testi
...
more-scrap
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70bb8f5b1c | ||
|
|
26e4c52b13 | ||
|
|
a3caecbfea | ||
|
|
e0ab62f1a0 |
152
.github/workflows/get_star_ranking.py
vendored
Normal file
152
.github/workflows/get_star_ranking.py
vendored
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
GITHUB_API_URL = "https://api.github.com/graphql"
|
||||||
|
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # Replace with your actual token
|
||||||
|
if GITHUB_TOKEN is None:
|
||||||
|
raise ValueError("GITHUB_TOKEN environment variable not set")
|
||||||
|
|
||||||
|
QUERY = """
|
||||||
|
{
|
||||||
|
search(query: "stars:>1", type: REPOSITORY, first: 100, after: AFTER_CURSOR) {
|
||||||
|
edges {
|
||||||
|
cursor
|
||||||
|
node {
|
||||||
|
... on Repository {
|
||||||
|
nameWithOwner
|
||||||
|
stargazers {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
watchers {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
openIssues: issues(states: OPEN) {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
discussions {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
|
||||||
|
TARGET_REPOSITORY: Optional[str] = None # Set this to None to print all repositories
|
||||||
|
MAX_REPOS = 200 # Limit to the first 200 repositories
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
RETRY_DELAY = 5 # seconds
|
||||||
|
|
||||||
|
|
||||||
|
class RepositoryFetchError(Exception):
|
||||||
|
"""Custom exception for repository fetching errors."""
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_repositories(after_cursor: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
query = QUERY.replace(
|
||||||
|
"AFTER_CURSOR", f'"{after_cursor}"' if after_cursor else "null"
|
||||||
|
)
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
GITHUB_API_URL, json={"query": query}, headers=HEADERS, timeout=30
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
if "errors" in result:
|
||||||
|
print(f"GraphQL errors: {json.dumps(result['errors'], indent=2)}")
|
||||||
|
raise RepositoryFetchError("GraphQL query returned errors")
|
||||||
|
if "data" not in result or "search" not in result["data"]:
|
||||||
|
print(f"Unexpected response structure: {json.dumps(result, indent=2)}")
|
||||||
|
raise RepositoryFetchError("Unexpected response structure")
|
||||||
|
return result["data"]["search"]
|
||||||
|
except RequestException as request_error:
|
||||||
|
print(f"Attempt {attempt + 1} failed: {str(request_error)}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
print(f"Retrying in {RETRY_DELAY} seconds...")
|
||||||
|
time.sleep(RETRY_DELAY)
|
||||||
|
else:
|
||||||
|
print("Max retries reached. Exiting.")
|
||||||
|
raise RepositoryFetchError(
|
||||||
|
"Failed to fetch repositories after all retries"
|
||||||
|
) from request_error
|
||||||
|
except json.JSONDecodeError as json_error:
|
||||||
|
print(f"Error decoding JSON response: {str(json_error)}")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
print(f"Retrying in {RETRY_DELAY} seconds...")
|
||||||
|
time.sleep(RETRY_DELAY)
|
||||||
|
else:
|
||||||
|
print("Max retries reached. Exiting.")
|
||||||
|
raise RepositoryFetchError(
|
||||||
|
"Failed to decode JSON response"
|
||||||
|
) from json_error
|
||||||
|
|
||||||
|
raise RepositoryFetchError("Failed to fetch repositories after all retries")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
repositories: List[Tuple[int, str, int, int, int, int]] = []
|
||||||
|
after_cursor: Optional[str] = None
|
||||||
|
rank = 0
|
||||||
|
target_repo_found = False
|
||||||
|
|
||||||
|
while not target_repo_found and rank < MAX_REPOS:
|
||||||
|
try:
|
||||||
|
result = fetch_repositories(after_cursor)
|
||||||
|
except RepositoryFetchError as fetch_error:
|
||||||
|
print(f"Error fetching repositories: {str(fetch_error)}")
|
||||||
|
break
|
||||||
|
|
||||||
|
for edge in result["edges"]:
|
||||||
|
rank += 1
|
||||||
|
repo = edge["node"]
|
||||||
|
repo_name = repo["nameWithOwner"]
|
||||||
|
stars = repo["stargazers"]["totalCount"]
|
||||||
|
watchers = repo["watchers"]["totalCount"]
|
||||||
|
open_issues = repo["openIssues"]["totalCount"]
|
||||||
|
discussions = repo["discussions"]["totalCount"]
|
||||||
|
|
||||||
|
repositories.append(
|
||||||
|
(rank, repo_name, stars, watchers, open_issues, discussions)
|
||||||
|
)
|
||||||
|
|
||||||
|
if TARGET_REPOSITORY and repo_name == TARGET_REPOSITORY:
|
||||||
|
target_repo_found = True
|
||||||
|
print(
|
||||||
|
f"Found target repository: {repo_name} at rank {rank} with {stars} stars"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
if rank >= MAX_REPOS:
|
||||||
|
break
|
||||||
|
|
||||||
|
if target_repo_found or not result["edges"] or rank >= MAX_REPOS:
|
||||||
|
break
|
||||||
|
after_cursor = result["edges"][-1]["cursor"]
|
||||||
|
|
||||||
|
# Save to CSV
|
||||||
|
with open("github_top_repositories.csv", "w", newline="") as csvfile:
|
||||||
|
csvwriter = csv.writer(csvfile)
|
||||||
|
csvwriter.writerow(
|
||||||
|
["Rank", "Repository", "Stars", "Watchers", "Open Issues", "Discussions"]
|
||||||
|
)
|
||||||
|
csvwriter.writerows(repositories)
|
||||||
|
|
||||||
|
# Print all rows if TARGET_REPOSITORY is None or False
|
||||||
|
if not TARGET_REPOSITORY:
|
||||||
|
for repo in repositories:
|
||||||
|
print(
|
||||||
|
f"Rank: {repo[0]}, Repository: {repo[1]}, Stars: {repo[2]}, Watchers: {repo[3]}, Open Issues: {repo[4]}, Discussions: {repo[5]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
78
.github/workflows/get_star_ranking.sh
vendored
Executable file
78
.github/workflows/get_star_ranking.sh
vendored
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# GitHub token (replace with your own token)
|
||||||
|
GITHUB_TOKEN="blah"
|
||||||
|
|
||||||
|
OUTPUT_FILE="top_repos.csv"
|
||||||
|
|
||||||
|
# Function to perform the GraphQL query
|
||||||
|
perform_query() {
|
||||||
|
local cursor=$1
|
||||||
|
local query
|
||||||
|
|
||||||
|
if [ "$cursor" == "null" ]; then
|
||||||
|
query='{"query": "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}'
|
||||||
|
else
|
||||||
|
query=$(jq -n --arg cursor "$cursor" \
|
||||||
|
'{query: "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100, after: \($cursor|@json)) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}')
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -s -H "Authorization: bearer $GITHUB_TOKEN" -H "Content-Type: application/json" \
|
||||||
|
--data "$query" \
|
||||||
|
https://api.github.com/graphql
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initial query without a cursor
|
||||||
|
result=$(perform_query "null")
|
||||||
|
|
||||||
|
# Print the raw result for debugging
|
||||||
|
echo "Initial result: $result"
|
||||||
|
|
||||||
|
# Check if the result contains errors
|
||||||
|
if echo "$result" | jq -e '.errors' > /dev/null; then
|
||||||
|
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract repositories and next cursor
|
||||||
|
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
|
||||||
|
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
|
||||||
|
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
|
||||||
|
|
||||||
|
# Print CSV header to file
|
||||||
|
echo "Rank,Repository,Stars" > "$OUTPUT_FILE"
|
||||||
|
|
||||||
|
# Initialize rank
|
||||||
|
rank=1
|
||||||
|
|
||||||
|
# Continue fetching pages until no more results
|
||||||
|
while true; do
|
||||||
|
# Append repositories with rank to file
|
||||||
|
while IFS= read -r line; do
|
||||||
|
echo "$rank,$line" >> "$OUTPUT_FILE"
|
||||||
|
rank=$((rank + 1))
|
||||||
|
done <<< "$repositories"
|
||||||
|
|
||||||
|
# Break the loop if no more pages
|
||||||
|
if [ "$has_next_page" != "true" ]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Fetch next page
|
||||||
|
result=$(perform_query "$next_cursor")
|
||||||
|
|
||||||
|
# Print the raw result for debugging
|
||||||
|
echo "Result: $result"
|
||||||
|
|
||||||
|
# Check if the result contains errors
|
||||||
|
if echo "$result" | jq -e '.errors' > /dev/null; then
|
||||||
|
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
|
||||||
|
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
|
||||||
|
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Repositories written to $OUTPUT_FILE"
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -121,3 +121,4 @@ docker/*local*
|
|||||||
|
|
||||||
# Jest test report
|
# Jest test report
|
||||||
test-report.html
|
test-report.html
|
||||||
|
.github/workflows/github_top_repositories.csv
|
||||||
|
|||||||
Reference in New Issue
Block a user