Compare commits

...

4 Commits

Author SHA1 Message Date
Evan Rusackas
70bb8f5b1c pull token from env var. 2024-07-17 10:01:22 -06:00
Evan Rusackas
26e4c52b13 script works! 2024-07-12 08:54:48 -06:00
Evan Rusackas
a3caecbfea ignore script csv file(s) 2024-07-12 08:54:32 -06:00
Evan Rusackas
e0ab62f1a0 getting star ranking, v0.1 2024-07-09 22:31:53 -06:00
3 changed files with 231 additions and 0 deletions

152
.github/workflows/get_star_ranking.py vendored Normal file
View File

@@ -0,0 +1,152 @@
import csv
import json
import os
import time
from typing import Any, Dict, List, Optional, Tuple
import requests
from requests.exceptions import RequestException
# Configuration
GITHUB_API_URL = "https://api.github.com/graphql"
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # Replace with your actual token
if GITHUB_TOKEN is None:
raise ValueError("GITHUB_TOKEN environment variable not set")
QUERY = """
{
search(query: "stars:>1", type: REPOSITORY, first: 100, after: AFTER_CURSOR) {
edges {
cursor
node {
... on Repository {
nameWithOwner
stargazers {
totalCount
}
watchers {
totalCount
}
openIssues: issues(states: OPEN) {
totalCount
}
discussions {
totalCount
}
}
}
}
}
}
"""
HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
TARGET_REPOSITORY: Optional[str] = None # Set this to None to print all repositories
MAX_REPOS = 200 # Limit to the first 200 repositories
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
class RepositoryFetchError(Exception):
"""Custom exception for repository fetching errors."""
def fetch_repositories(after_cursor: Optional[str] = None) -> Dict[str, Any]:
query = QUERY.replace(
"AFTER_CURSOR", f'"{after_cursor}"' if after_cursor else "null"
)
for attempt in range(MAX_RETRIES):
try:
response = requests.post(
GITHUB_API_URL, json={"query": query}, headers=HEADERS, timeout=30
)
response.raise_for_status()
result = response.json()
if "errors" in result:
print(f"GraphQL errors: {json.dumps(result['errors'], indent=2)}")
raise RepositoryFetchError("GraphQL query returned errors")
if "data" not in result or "search" not in result["data"]:
print(f"Unexpected response structure: {json.dumps(result, indent=2)}")
raise RepositoryFetchError("Unexpected response structure")
return result["data"]["search"]
except RequestException as request_error:
print(f"Attempt {attempt + 1} failed: {str(request_error)}")
if attempt < MAX_RETRIES - 1:
print(f"Retrying in {RETRY_DELAY} seconds...")
time.sleep(RETRY_DELAY)
else:
print("Max retries reached. Exiting.")
raise RepositoryFetchError(
"Failed to fetch repositories after all retries"
) from request_error
except json.JSONDecodeError as json_error:
print(f"Error decoding JSON response: {str(json_error)}")
if attempt < MAX_RETRIES - 1:
print(f"Retrying in {RETRY_DELAY} seconds...")
time.sleep(RETRY_DELAY)
else:
print("Max retries reached. Exiting.")
raise RepositoryFetchError(
"Failed to decode JSON response"
) from json_error
raise RepositoryFetchError("Failed to fetch repositories after all retries")
def main() -> None:
repositories: List[Tuple[int, str, int, int, int, int]] = []
after_cursor: Optional[str] = None
rank = 0
target_repo_found = False
while not target_repo_found and rank < MAX_REPOS:
try:
result = fetch_repositories(after_cursor)
except RepositoryFetchError as fetch_error:
print(f"Error fetching repositories: {str(fetch_error)}")
break
for edge in result["edges"]:
rank += 1
repo = edge["node"]
repo_name = repo["nameWithOwner"]
stars = repo["stargazers"]["totalCount"]
watchers = repo["watchers"]["totalCount"]
open_issues = repo["openIssues"]["totalCount"]
discussions = repo["discussions"]["totalCount"]
repositories.append(
(rank, repo_name, stars, watchers, open_issues, discussions)
)
if TARGET_REPOSITORY and repo_name == TARGET_REPOSITORY:
target_repo_found = True
print(
f"Found target repository: {repo_name} at rank {rank} with {stars} stars"
)
break
if rank >= MAX_REPOS:
break
if target_repo_found or not result["edges"] or rank >= MAX_REPOS:
break
after_cursor = result["edges"][-1]["cursor"]
# Save to CSV
with open("github_top_repositories.csv", "w", newline="") as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(
["Rank", "Repository", "Stars", "Watchers", "Open Issues", "Discussions"]
)
csvwriter.writerows(repositories)
# Print all rows if TARGET_REPOSITORY is None or False
if not TARGET_REPOSITORY:
for repo in repositories:
print(
f"Rank: {repo[0]}, Repository: {repo[1]}, Stars: {repo[2]}, Watchers: {repo[3]}, Open Issues: {repo[4]}, Discussions: {repo[5]}"
)
if __name__ == "__main__":
main()

78
.github/workflows/get_star_ranking.sh vendored Executable file
View File

@@ -0,0 +1,78 @@
#!/bin/bash
# GitHub token (replace with your own token)
GITHUB_TOKEN="blah"
OUTPUT_FILE="top_repos.csv"
# Function to perform the GraphQL query
perform_query() {
local cursor=$1
local query
if [ "$cursor" == "null" ]; then
query='{"query": "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}'
else
query=$(jq -n --arg cursor "$cursor" \
'{query: "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100, after: \($cursor|@json)) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}')
fi
curl -s -H "Authorization: bearer $GITHUB_TOKEN" -H "Content-Type: application/json" \
--data "$query" \
https://api.github.com/graphql
}
# Initial query without a cursor
result=$(perform_query "null")
# Print the raw result for debugging
echo "Initial result: $result"
# Check if the result contains errors
if echo "$result" | jq -e '.errors' > /dev/null; then
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
exit 1
fi
# Extract repositories and next cursor
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
# Print CSV header to file
echo "Rank,Repository,Stars" > "$OUTPUT_FILE"
# Initialize rank
rank=1
# Continue fetching pages until no more results
while true; do
# Append repositories with rank to file
while IFS= read -r line; do
echo "$rank,$line" >> "$OUTPUT_FILE"
rank=$((rank + 1))
done <<< "$repositories"
# Break the loop if no more pages
if [ "$has_next_page" != "true" ]; then
break
fi
# Fetch next page
result=$(perform_query "$next_cursor")
# Print the raw result for debugging
echo "Result: $result"
# Check if the result contains errors
if echo "$result" | jq -e '.errors' > /dev/null; then
echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
exit 1
fi
repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
done
echo "Repositories written to $OUTPUT_FILE"

1
.gitignore vendored
View File

@@ -121,3 +121,4 @@ docker/*local*
# Jest test report
test-report.html
.github/workflows/github_top_repositories.csv