pull token from env var.

script works!
ignore script csv file(s)
2026-05-02 22:44:28 +00:00 · 2024-07-17 10:01:22 -06:00 · 2024-07-12 08:54:48 -06:00 · 2024-07-12 08:54:32 -06:00 · 2024-07-09 22:31:53 -06:00
3 changed files with 231 additions and 0 deletions
--- a/.github/workflows/get_star_ranking.py
+++ b/.github/workflows/get_star_ranking.py
@@ -0,0 +1,152 @@
 import csv
 import json
 import os
 import time
 from typing import Any, Dict, List, Optional, Tuple
 import requests
 from requests.exceptions import RequestException
 # Configuration
 GITHUB_API_URL = "https://api.github.com/graphql"
 GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")  # Replace with your actual token
 if GITHUB_TOKEN is None:
    raise ValueError("GITHUB_TOKEN environment variable not set")
 QUERY = """
 {
  search(query: "stars:>1", type: REPOSITORY, first: 100, after: AFTER_CURSOR) {
    edges {
      cursor
      node {
        ... on Repository {
          nameWithOwner
          stargazers {
            totalCount
          }
          watchers {
            totalCount
          }
          openIssues: issues(states: OPEN) {
            totalCount
          }
          discussions {
            totalCount
          }
        }
      }
    }
  }
 }
 """
 HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
 TARGET_REPOSITORY: Optional[str] = None  # Set this to None to print all repositories
 MAX_REPOS = 200  # Limit to the first 200 repositories
 MAX_RETRIES = 3
 RETRY_DELAY = 5  # seconds
 class RepositoryFetchError(Exception):
    """Custom exception for repository fetching errors."""
 def fetch_repositories(after_cursor: Optional[str] = None) -> Dict[str, Any]:
    query = QUERY.replace(
        "AFTER_CURSOR", f'"{after_cursor}"' if after_cursor else "null"
    )
    for attempt in range(MAX_RETRIES):
        try:
            response = requests.post(
                GITHUB_API_URL, json={"query": query}, headers=HEADERS, timeout=30
            )
            response.raise_for_status()
            result = response.json()
            if "errors" in result:
                print(f"GraphQL errors: {json.dumps(result['errors'], indent=2)}")
                raise RepositoryFetchError("GraphQL query returned errors")
            if "data" not in result or "search" not in result["data"]:
                print(f"Unexpected response structure: {json.dumps(result, indent=2)}")
                raise RepositoryFetchError("Unexpected response structure")
            return result["data"]["search"]
        except RequestException as request_error:
            print(f"Attempt {attempt + 1} failed: {str(request_error)}")
            if attempt < MAX_RETRIES - 1:
                print(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)
            else:
                print("Max retries reached. Exiting.")
                raise RepositoryFetchError(
                    "Failed to fetch repositories after all retries"
                ) from request_error
        except json.JSONDecodeError as json_error:
            print(f"Error decoding JSON response: {str(json_error)}")
            if attempt < MAX_RETRIES - 1:
                print(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)
            else:
                print("Max retries reached. Exiting.")
                raise RepositoryFetchError(
                    "Failed to decode JSON response"
                ) from json_error
    raise RepositoryFetchError("Failed to fetch repositories after all retries")
 def main() -> None:
    repositories: List[Tuple[int, str, int, int, int, int]] = []
    after_cursor: Optional[str] = None
    rank = 0
    target_repo_found = False
    while not target_repo_found and rank < MAX_REPOS:
        try:
            result = fetch_repositories(after_cursor)
        except RepositoryFetchError as fetch_error:
            print(f"Error fetching repositories: {str(fetch_error)}")
            break
        for edge in result["edges"]:
            rank += 1
            repo = edge["node"]
            repo_name = repo["nameWithOwner"]
            stars = repo["stargazers"]["totalCount"]
            watchers = repo["watchers"]["totalCount"]
            open_issues = repo["openIssues"]["totalCount"]
            discussions = repo["discussions"]["totalCount"]
            repositories.append(
                (rank, repo_name, stars, watchers, open_issues, discussions)
            )
            if TARGET_REPOSITORY and repo_name == TARGET_REPOSITORY:
                target_repo_found = True
                print(
                    f"Found target repository: {repo_name} at rank {rank} with {stars} stars"
                )
                break
            if rank >= MAX_REPOS:
                break
        if target_repo_found or not result["edges"] or rank >= MAX_REPOS:
            break
        after_cursor = result["edges"][-1]["cursor"]
    # Save to CSV
    with open("github_top_repositories.csv", "w", newline="") as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(
            ["Rank", "Repository", "Stars", "Watchers", "Open Issues", "Discussions"]
        )
        csvwriter.writerows(repositories)
    # Print all rows if TARGET_REPOSITORY is None or False
    if not TARGET_REPOSITORY:
        for repo in repositories:
            print(
                f"Rank: {repo[0]}, Repository: {repo[1]}, Stars: {repo[2]}, Watchers: {repo[3]}, Open Issues: {repo[4]}, Discussions: {repo[5]}"
            )
 if __name__ == "__main__":
    main()
--- a/.github/workflows/get_star_ranking.sh
+++ b/.github/workflows/get_star_ranking.sh
@@ -0,0 +1,78 @@
 #!/bin/bash
 # GitHub token (replace with your own token)
 GITHUB_TOKEN="blah"
 OUTPUT_FILE="top_repos.csv"
 # Function to perform the GraphQL query
 perform_query() {
  local cursor=$1
  local query
  if [ "$cursor" == "null" ]; then
    query='{"query": "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}'
  else
    query=$(jq -n --arg cursor "$cursor" \
      '{query: "query { search(query: \"stars:>0\", type: REPOSITORY, first: 100, after: \($cursor|@json)) { edges { cursor node { ... on Repository { nameWithOwner stargazers { totalCount } } } } pageInfo { endCursor hasNextPage } } }"}')
  fi
  curl -s -H "Authorization: bearer $GITHUB_TOKEN" -H "Content-Type: application/json" \
    --data "$query" \
    https://api.github.com/graphql
 }
 # Initial query without a cursor
 result=$(perform_query "null")
 # Print the raw result for debugging
 echo "Initial result: $result"
 # Check if the result contains errors
 if echo "$result" | jq -e '.errors' > /dev/null; then
  echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
  exit 1
 fi
 # Extract repositories and next cursor
 repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
 next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
 has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
 # Print CSV header to file
 echo "Rank,Repository,Stars" > "$OUTPUT_FILE"
 # Initialize rank
 rank=1
 # Continue fetching pages until no more results
 while true; do
  # Append repositories with rank to file
  while IFS= read -r line; do
    echo "$rank,$line" >> "$OUTPUT_FILE"
    rank=$((rank + 1))
  done <<< "$repositories"
  # Break the loop if no more pages
  if [ "$has_next_page" != "true" ]; then
    break
  fi
  # Fetch next page
  result=$(perform_query "$next_cursor")
  # Print the raw result for debugging
  echo "Result: $result"
  # Check if the result contains errors
  if echo "$result" | jq -e '.errors' > /dev/null; then
    echo "Error: $(echo "$result" | jq -r '.errors[0].message')"
    exit 1
  fi
  repositories=$(echo "$result" | jq -r '.data.search.edges[] | "\(.node.nameWithOwner), \(.node.stargazers.totalCount)"')
  next_cursor=$(echo "$result" | jq -r '.data.search.pageInfo.endCursor')
  has_next_page=$(echo "$result" | jq -r '.data.search.pageInfo.hasNextPage')
 done
 echo "Repositories written to $OUTPUT_FILE"
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,4 @@ docker/*local*
 # Jest test report
 test-report.html
 .github/workflows/github_top_repositories.csv
Author	SHA1	Message	Date
Evan Rusackas	70bb8f5b1c	pull token from env var.	2024-07-17 10:01:22 -06:00
Evan Rusackas	26e4c52b13	script works!	2024-07-12 08:54:48 -06:00
Evan Rusackas	a3caecbfea	ignore script csv file(s)	2024-07-12 08:54:32 -06:00
Evan Rusackas	e0ab62f1a0	getting star ranking, v0.1	2024-07-09 22:31:53 -06:00