mirror of
https://github.com/apache/superset.git
synced 2026-04-28 12:34:23 +00:00
Compare commits
4 Commits
semantic-l
...
more-scrap
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70bb8f5b1c | ||
|
|
26e4c52b13 | ||
|
|
a3caecbfea | ||
|
|
e0ab62f1a0 |
152
.github/workflows/get_star_ranking.py
vendored
Normal file
152
.github/workflows/get_star_ranking.py
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
# Configuration
|
||||
GITHUB_API_URL = "https://api.github.com/graphql"
|
||||
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # Replace with your actual token
|
||||
if GITHUB_TOKEN is None:
|
||||
raise ValueError("GITHUB_TOKEN environment variable not set")
|
||||
|
||||
QUERY = """
|
||||
{
|
||||
search(query: "stars:>1", type: REPOSITORY, first: 100, after: AFTER_CURSOR) {
|
||||
edges {
|
||||
cursor
|
||||
node {
|
||||
... on Repository {
|
||||
nameWithOwner
|
||||
stargazers {
|
||||
totalCount
|
||||
}
|
||||
watchers {
|
||||
totalCount
|
||||
}
|
||||
openIssues: issues(states: OPEN) {
|
||||
totalCount
|
||||
}
|
||||
discussions {
|
||||
totalCount
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
|
||||
TARGET_REPOSITORY: Optional[str] = None # Set this to None to print all repositories
|
||||
MAX_REPOS = 200 # Limit to the first 200 repositories
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5 # seconds
|
||||
|
||||
|
||||
class RepositoryFetchError(Exception):
|
||||
"""Custom exception for repository fetching errors."""
|
||||
|
||||
|
||||
def fetch_repositories(after_cursor: Optional[str] = None) -> Dict[str, Any]:
|
||||
query = QUERY.replace(
|
||||
"AFTER_CURSOR", f'"{after_cursor}"' if after_cursor else "null"
|
||||
)
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
response = requests.post(
|
||||
GITHUB_API_URL, json={"query": query}, headers=HEADERS, timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
if "errors" in result:
|
||||
print(f"GraphQL errors: {json.dumps(result['errors'], indent=2)}")
|
||||
raise RepositoryFetchError("GraphQL query returned errors")
|
||||
if "data" not in result or "search" not in result["data"]:
|
||||
print(f"Unexpected response structure: {json.dumps(result, indent=2)}")
|
||||
raise RepositoryFetchError("Unexpected response structure")
|
||||
return result["data"]["search"]
|
||||
except RequestException as request_error:
|
||||
print(f"Attempt {attempt + 1} failed: {str(request_error)}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
print(f"Retrying in {RETRY_DELAY} seconds...")
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
print("Max retries reached. Exiting.")
|
||||
raise RepositoryFetchError(
|
||||
"Failed to fetch repositories after all retries"
|
||||
) from request_error
|
||||
except json.JSONDecodeError as json_error:
|
||||
print(f"Error decoding JSON response: {str(json_error)}")
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
print(f"Retrying in {RETRY_DELAY} seconds...")
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
print("Max retries reached. Exiting.")
|
||||
raise RepositoryFetchError(
|
||||
"Failed to decode JSON response"
|
||||
) from json_error
|
||||
|
||||
raise RepositoryFetchError("Failed to fetch repositories after all retries")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
repositories: List[Tuple[int, str, int, int, int, int]] = []
|
||||
after_cursor: Optional[str] = None
|
||||
rank = 0
|
||||
target_repo_found = False
|
||||
|
||||
while not target_repo_found and rank < MAX_REPOS:
|
||||
try:
|
||||
result = fetch_repositories(after_cursor)
|
||||
except RepositoryFetchError as fetch_error:
|
||||
print(f"Error fetching repositories: {str(fetch_error)}")
|
||||
break
|
||||
|
||||
for edge in result["edges"]:
|
||||
rank += 1
|
||||
repo = edge["node"]
|
||||
repo_name = repo["nameWithOwner"]
|
||||
stars = repo["stargazers"]["totalCount"]
|
||||
watchers = repo["watchers"]["totalCount"]
|
||||
open_issues = repo["openIssues"]["totalCount"]
|
||||
discussions = repo["discussions"]["totalCount"]
|
||||
|
||||
repositories.append(
|
||||
(rank, repo_name, stars, watchers, open_issues, discussions)
|
||||
)
|
||||
|
||||
if TARGET_REPOSITORY and repo_name == TARGET_REPOSITORY:
|
||||
target_repo_found = True
|
||||
print(
|
||||
f"Found target repository: {repo_name} at rank {rank} with {stars} stars"
|
||||
)
|
||||
break
|
||||
|
||||
if rank >= MAX_REPOS:
|
||||
break
|
||||
|
||||
if target_repo_found or not result["edges"] or rank >= MAX_REPOS:
|
||||
break
|
||||
after_cursor = result["edges"][-1]["cursor"]
|
||||
|
||||
# Save to CSV
|
||||
with open("github_top_repositories.csv", "w", newline="") as csvfile:
|
||||
csvwriter = csv.writer(csvfile)
|
||||
csvwriter.writerow(
|
||||
["Rank", "Repository", "Stars", "Watchers", "Open Issues", "Discussions"]
|
||||
)
|
||||
csvwriter.writerows(repositories)
|
||||
|
||||
# Print all rows if TARGET_REPOSITORY is None or False
|
||||
if not TARGET_REPOSITORY:
|
||||
for repo in repositories:
|
||||
print(
|
||||
f"Rank: {repo[0]}, Repository: {repo[1]}, Stars: {repo[2]}, Watchers: {repo[3]}, Open Issues: {repo[4]}, Discussions: {repo[5]}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
78
.github/workflows/get_star_ranking.sh
vendored
Executable file
78
.github/workflows/get_star_ranking.sh
vendored
Executable file
@@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
|
||||
# GitHub token (replace with your own token)
|
||||
GITHUB_TOKEN="blah"
|
||||
|
||||
OUTPUT_FILE="top_repos.csv"
|
||||
|
||||
# Function to perform the GraphQL query
|
||||
perform_query() {
|
||||
local cursor=$1
|
||||
local query
|
||||
|
||||
if [ "$cursor" == "null" ]; then
|
||||
query='{"query": "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}'
|
||||
else
|
||||
query=$(jq -n --arg cursor "$cursor" \
|
||||
'{query: "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100, after: \($cursor|@json)) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}')
|
||||
fi
|
||||
|
||||
curl -s -H "Authorization: bearer $GITHUB_TOKEN" -H "Content-Type: application/json" \
|
||||
--data "$query" \
|
||||
https://api.github.com/graphql
|
||||
}
|
||||
|
||||
# Initial query without a cursor
|
||||
result=$(perform_query "null")
|
||||
|
||||
# Print the raw result for debugging
|
||||
echo "Initial result: $result"
|
||||
|
||||
# Check if the result contains errors
|
||||
if echo "$result" | jq -e '.errors' > /dev/null; then
|
||||
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract repositories and next cursor
|
||||
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
|
||||
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
|
||||
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
|
||||
|
||||
# Print CSV header to file
|
||||
echo "Rank,Repository,Stars" > "$OUTPUT_FILE"
|
||||
|
||||
# Initialize rank
|
||||
rank=1
|
||||
|
||||
# Continue fetching pages until no more results
|
||||
while true; do
|
||||
# Append repositories with rank to file
|
||||
while IFS= read -r line; do
|
||||
echo "$rank,$line" >> "$OUTPUT_FILE"
|
||||
rank=$((rank + 1))
|
||||
done <<< "$repositories"
|
||||
|
||||
# Break the loop if no more pages
|
||||
if [ "$has_next_page" != "true" ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
# Fetch next page
|
||||
result=$(perform_query "$next_cursor")
|
||||
|
||||
# Print the raw result for debugging
|
||||
echo "Result: $result"
|
||||
|
||||
# Check if the result contains errors
|
||||
if echo "$result" | jq -e '.errors' > /dev/null; then
|
||||
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
|
||||
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
|
||||
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
|
||||
done
|
||||
|
||||
echo "Repositories written to $OUTPUT_FILE"
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -121,3 +121,4 @@ docker/*local*
|
||||
|
||||
# Jest test report
|
||||
test-report.html
|
||||
.github/workflows/github_top_repositories.csv
|
||||
|
||||
Reference in New Issue
Block a user