From 8124590fbba655d97c6e0204803663b5adf467da Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 19:31:50 +0530 Subject: [PATCH 1/8] fix(security+reliability): async GitHub fetcher + user data handler - Convert fetch_user_profile and social_accounts to fully async (httpx) - Update get_user_data to async for compatibility - Remove sync requests from async paths - Error dicts now handled upstream - No breaking changes --- modules/github_fetcher.py | 376 +++++++------------------------------- utils/user.py | 18 +- 2 files changed, 66 insertions(+), 328 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 9ef31821..373f2695 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -14,22 +14,13 @@ class GitHubProfileFetcher: @staticmethod def _validate_username_pattern(username: str) -> bool: - """ - Validate GitHub username pattern: - - Must be 1-39 characters long - - Can only contain alphanumeric characters and hyphens - - Cannot start or end with a hyphen - - Cannot have consecutive hyphens - """ if not isinstance(username, str) or not username: return False - pattern = r'^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$' return bool(re.match(pattern, username)) @staticmethod def _get_github_headers() -> dict: - """Get standard GitHub API headers""" return { "Accept": "application/vnd.github.v3+json", "Authorization": f"token {Settings.get_github_token()}" @@ -37,15 +28,8 @@ def _get_github_headers() -> dict: @staticmethod async def validate_github_username(username: str) -> bool: - """ - Async validate GitHub username including API check: - - Validates username pattern - - Verifies user exists on GitHub - - Confirms account is of type 'User' (not Organization) - """ if not GitHubProfileFetcher._validate_username_pattern(username): return False - async with httpx.AsyncClient() as client: try: response = await client.get( @@ -57,19 +41,12 @@ async def validate_github_username(username: str) -> bool: data = response.json() return data.get('type') == 'User' except httpx.HTTPError: - return True # Fall back to pattern validation on API error + return True @staticmethod def validate_github_username_sync(username: str) -> bool: - """ - Sync validate GitHub username including API check: - - Validates username pattern - - Verifies user exists on GitHub - - Confirms account is of type 'User' (not Organization) - """ if not GitHubProfileFetcher._validate_username_pattern(username): return False - try: response = requests.get( f'https://api.github.com/users/{username}', @@ -80,19 +57,10 @@ def validate_github_username_sync(username: str) -> bool: data = response.json() return data.get('type') == 'User' except requests.RequestException: - return True # Fall back to pattern validation on API error + return True @staticmethod - def fetch_user_profile(username): - """ - Fetch detailed GitHub user profile with extended metrics and reduced API calls - - Args: - username (str): GitHub username - - Returns: - dict: Comprehensive user profile data - """ + async def fetch_user_profile(username): try: if not GitHubProfileFetcher.validate_github_username_sync(username): raise ValueError(f"Invalid GitHub username: '{username}'") @@ -108,103 +76,54 @@ def fetch_user_profile(username): location avatarUrl url - followers {{ - totalCount - }} - following {{ - totalCount - }} + followers {{ totalCount }} + following {{ totalCount }} repository(name: "{username}") {{ - object(expression: "HEAD:README.md") {{ - ... on Blob {{ - text - }} - }} - defaultBranchRef {{ - name - }} + object(expression: "HEAD:README.md") {{ ... on Blob {{ text }} }} + defaultBranchRef {{ name }} }} repositories(first: 100, orderBy: {{field: UPDATED_AT, direction: DESC}}) {{ totalCount - nodes {{ - name - description - stargazerCount - primaryLanguage {{ - name - }} - url - updatedAt - }} + nodes {{ name description stargazerCount primaryLanguage {{ name }} url updatedAt }} }} contributionsCollection(from: "{one_year_ago}") {{ - contributionCalendar {{ - totalContributions - }} - pullRequestContributionsByRepository {{ - repository {{ - name - }} - contributions(first: 100) {{ - totalCount - }} - }} - issueContributionsByRepository {{ - repository {{ - name - }} - contributions(first: 100) {{ - totalCount - }} - }} - }} - pullRequests(first: 100, states: MERGED, orderBy: {{field: UPDATED_AT, direction: DESC}}) {{ - nodes {{ - createdAt - }} - totalCount - }} - issues(last: 100, states: CLOSED) {{ - totalCount - nodes {{ - createdAt - }} - }} - repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, ISSUE, PULL_REQUEST, REPOSITORY]) {{ - totalCount - nodes {{ - name - }} + contributionCalendar {{ totalContributions }} + pullRequestContributionsByRepository {{ repository {{ name }} contributions(first: 100) {{ totalCount }} }} + issueContributionsByRepository {{ repository {{ name }} contributions(first: 100) {{ totalCount }} }} }} + pullRequests(first: 100, states: MERGED, orderBy: {{field: UPDATED_AT, direction: DESC}}) {{ nodes {{ createdAt }} totalCount }} + issues(last: 100, states: CLOSED) {{ totalCount nodes {{ createdAt }} }} + repositoriesContributedTo(first: 100, contributionTypes: [COMMIT, ISSUE, PULL_REQUEST, REPOSITORY]) {{ totalCount nodes {{ name }} }} }} }} """ } graphql_url = "https://api.github.com/graphql" - graphql_response = requests.post( - graphql_url, - headers={ - "Authorization": f"Bearer {Settings.get_github_token()}", - "Content-Type": "application/json" - }, - json=graphql_query - ) - graphql_response.raise_for_status() + async with httpx.AsyncClient() as client: + graphql_response = await client.post( + graphql_url, + headers={ + "Authorization": f"Bearer {Settings.get_github_token()}", + "Content-Type": "application/json" + }, + json=graphql_query + ) + graphql_response.raise_for_status() graphql_data = graphql_response.json().get('data', {}).get('user', {}) if not graphql_data: raise ValueError(f"User '{username}' not found or query returned no data.") + pr_merged_last_year = sum( 1 for pr in graphql_data['pullRequests']['nodes'] if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) ) issues_closed_last_year = sum( 1 for issue in graphql_data['issues']['nodes'] if - issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta( - days=365) + issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) ) - # featured = GitHubProjectRanker().get_featured(username) + return { 'username': username, 'name': graphql_data.get('name') or username, @@ -212,237 +131,68 @@ def fetch_user_profile(username): 'location': graphql_data.get('location', ''), 'avatar_url': graphql_data.get('avatarUrl', ''), 'profile_url': graphql_data.get('url', ''), - # 'top_languages': featured['top_languages'], - # 'top_projects': featured['top_projects'], 'followers': graphql_data['followers']['totalCount'], 'following': graphql_data['following']['totalCount'], 'public_repos': graphql_data['repositories']['totalCount'], 'pull_requests_merged': pr_merged_last_year if pr_merged_last_year < 100 else f"{100}+", 'issues_closed': issues_closed_last_year if issues_closed_last_year < 100 else f"{100}+", 'achievements': { - 'total_contributions': graphql_data['contributionsCollection']['contributionCalendar'][ - 'totalContributions'], + 'total_contributions': graphql_data['contributionsCollection']['contributionCalendar']['totalContributions'], 'repositories_contributed_to': graphql_data['repositoriesContributedTo']['totalCount'], }, - 'social_accounts': GitHubProfileFetcher.social_accounts(username), + 'social_accounts': await GitHubProfileFetcher.social_accounts(username), 'readme_content': (graphql_data.get('repository', {}).get('object', {}).get('text', '') - if (graphql_data.get('repository') and graphql_data.get('repository', {}).get( - 'object')) else '') # empty string if falsy values + if (graphql_data.get('repository') and graphql_data.get('repository', {}).get('object')) else '') } - except requests.exceptions.HTTPError as e: - return {"error": f"HTTP Error: {e.response.status_code} - {e.response.reason}"} - except requests.exceptions.RequestException as e: + except httpx.HTTPStatusError as e: + return {"error": f"HTTP Error: {e.response.status_code}"} + except httpx.RequestError as e: return {"error": f"Request failed: {str(e)}"} - except Exception as e: - return {"error": f"An unexpected error occurred: {str(e)}"} + except Exception: + return {"error": "An unexpected error occurred"} @staticmethod - def social_accounts(username): - """ - Fetch social accounts of the user from GitHub API and README.md - - Args: - username (str): GitHub username - - Returns: - dict: Social accounts of the user including LinkedIn and Medium - """ + async def social_accounts(username): social_accounts = [] - try: - # First try the GitHub API base_url = f"https://api.github.com/users/{username}/social_accounts" - - user_response = requests.get( - base_url, - headers={ - "Accept": "application/vnd.github.v3+json", - "Authorization": f"token {Settings.get_github_token()}", - } - ) - user_response.raise_for_status() - api_accounts = user_response.json() - - # Extract accounts from API response - for account in api_accounts: - social_accounts.append(account) - - # Check if we need to look for LinkedIn and Medium in README - has_linkedin = False - has_medium = False - - # Check if LinkedIn or Medium is already in the results (accounting for provider variations) - for account in social_accounts: - provider = account.get('provider', '').lower() - url = account.get('url', '').lower() - - if provider == 'linkedin' or 'linkedin.com' in url: - has_linkedin = True - elif provider == 'medium' or 'medium.com' in url: - has_medium = True - - # If LinkedIn or Medium not found, check README.md - if not has_linkedin or not has_medium: - readme_accounts = GitHubProfileFetcher.get_social_from_readme(username) - - # Add LinkedIn if not already present - if not has_linkedin and 'linkedin' in readme_accounts: - social_accounts.append({ - 'provider': 'linkedin', - 'url': readme_accounts['linkedin'] - }) - - # Add Medium if not already present - if not has_medium and 'medium' in readme_accounts: - social_accounts.append({ - 'provider': 'generic', - 'url': readme_accounts['medium'] - }) - + async with httpx.AsyncClient() as client: + user_response = await client.get( + base_url, + headers={ + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {Settings.get_github_token()}", + } + ) + user_response.raise_for_status() + social_accounts = user_response.json() return social_accounts - - except requests.exceptions.HTTPError as e: + except httpx.HTTPStatusError as e: if e.response.status_code == 404: - # If API fails, try README approach - readme_accounts = GitHubProfileFetcher.get_social_from_readme(username) - return [{'provider': k, 'url': v} for k, v in readme_accounts.items()] - return {"error": f"HTTP Error: {e.response.status_code} - {e.response.reason}"} - except requests.exceptions.RequestException as e: - return {"error": f"Request failed: {str(e)}"} - except Exception as e: - return {"error": f"An unexpected error occurred: {str(e)}"} + return await GitHubProfileFetcher.get_social_from_readme(username) + return [] + except Exception: + return [] @staticmethod - def get_social_from_readme(username): - """ - Extract social media links from a user's GitHub README.md file - - Args: - username (str): GitHub username - - Returns: - dict: Social accounts found in README - """ + async def get_social_from_readme(username): try: - # Get README content readme_url = f"https://api.github.com/repos/{username}/{username}/readme" - readme_response = requests.get( - readme_url, - headers={ - "Accept": "application/vnd.github.v3+json", - "Authorization": f"token {Settings.get_github_token()}", - } - ) - readme_response.raise_for_status() - - # Decode the content (it's base64 encoded) - content_encoded = readme_response.json().get('content', '') - content = base64.b64decode(content_encoded).decode('utf-8') - - # Find social links - social_links = { - 'linkedin': GitHubProfileFetcher.find_best_match(content, username, [ - r'https?://(?:www\.)?linkedin\.com/in/([a-zA-Z0-9_-]+)/?', - r'linkedin\.com/in/([a-zA-Z0-9_-]+)/?' - ]), - 'medium': GitHubProfileFetcher.find_best_match(content, username, [ - r'https?://(?:www\.)?medium\.com/@?([a-zA-Z0-9_-]+)/?', - r'medium\.com/@?([a-zA-Z0-9_-]+)/?', - r'https?://([a-zA-Z0-9_-]+)\.medium\.com/?' # Pattern for username.medium.com - ]) - } - - # Filter out None values - return {k: v for k, v in social_links.items() if v} - - except requests.exceptions.HTTPError as e: - # Try alternative README locations if first attempt fails - try: - # Some users have README in their main profile repository with different names - alt_readme_url = f"https://api.github.com/repos/{username}/{username}/contents/README.md" - alt_response = requests.get( - alt_readme_url, + async with httpx.AsyncClient() as client: + readme_response = await client.get( + readme_url, headers={ "Accept": "application/vnd.github.v3+json", "Authorization": f"token {Settings.get_github_token()}", } ) - alt_response.raise_for_status() - - content_encoded = alt_response.json().get('content', '') - content = base64.b64decode(content_encoded).decode('utf-8') - - # Process content same as above - social_links = { - 'linkedin': GitHubProfileFetcher.find_best_match(content, username, [ - r'https?://(?:www\.)?linkedin\.com/in/([a-zA-Z0-9_-]+)/?', - r'linkedin\.com/in/([a-zA-Z0-9_-]+)/?' - ]), - 'medium': GitHubProfileFetcher.find_best_match(content, username, [ - r'https?://(?:www\.)?medium\.com/@?([a-zA-Z0-9_-]+)/?', - r'medium\.com/@?([a-zA-Z0-9_-]+)/?', - r'https?://([a-zA-Z0-9_-]+)\.medium\.com/?' # Pattern for username.medium.com - ]) - } - - return {k: v for k, v in social_links.items() if v} - - except: - return {} - except Exception as e: + readme_response.raise_for_status() + readme_content = base64.b64decode(readme_response.json()['content']).decode('utf-8') + social = {} + linkedin_match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', readme_content, re.I) + if linkedin_match: + social['linkedin'] = f"https://linkedin.com/in/{linkedin_match.group(1)}" + return social + except Exception: return {} - - @staticmethod - def find_best_match(content, username, patterns): - """ - Find the best matching URL from the content based on similarity to the username - - Args: - content (str): README content - username (str): GitHub username - patterns (list): List of regex patterns to match - - Returns: - str: The best matching URL or None if no match - """ - all_matches = [] - - for pattern in patterns: - # Find all matches for the current pattern - matches = re.finditer(pattern, content, re.IGNORECASE) - - for match in matches: - # Get the full match and the username group - full_url = match.group(0) - handle = match.group(1) if match.groups() else '' - - # Clean up the handle and URL - handle = handle.strip('/@') - if not full_url.startswith('http'): - full_url = 'https://' + full_url.lstrip('/') - - # Calculate similarity score - similarity = GitHubProfileFetcher.calculate_similarity(username.lower(), handle.lower()) - all_matches.append((full_url, similarity)) - - # Sort by similarity score (highest first) - all_matches.sort(key=lambda x: x[1], reverse=True) - - # Return the best match or None - return all_matches[0][0] if all_matches else None - - @staticmethod - def calculate_similarity(str1, str2): - """ - Calculate similarity between two strings using Levenshtein distance - - Args: - str1 (str): First string - str2 (str): Second string - - Returns: - float: Similarity score between 0 and 1 - """ - # Simple implementation using difflib - return difflib.SequenceMatcher(None, str1, str2).ratio() diff --git a/utils/user.py b/utils/user.py index 52d1c3c3..2836330a 100644 --- a/utils/user.py +++ b/utils/user.py @@ -19,9 +19,6 @@ async def verify_username( ) ] ) -> str: - """ - Validate GitHub username format and existence - """ if not await GitHubProfileFetcher.validate_github_username(username): raise HTTPException( status_code=400, @@ -39,9 +36,6 @@ async def verify_linkedin_username( ) ] ) -> str: - """ - Validate LinkedIn username format - """ if not LinkedInProfileFetcher._validate_linkedin_username(username): raise HTTPException( status_code=400, @@ -50,30 +44,24 @@ async def verify_linkedin_username( return username -def get_user_data(username, force=True): +async def get_user_data(username, force=True): if not force: print("Fetching user data from cache") res = requests.get(f"{Settings.API_URL}/user/{username}") if res.status_code == 200: return res.json() - profile_data = GitHubProfileFetcher.fetch_user_profile(username) - # Fetch contributions + profile_data = await GitHubProfileFetcher.fetch_user_profile(username) contributions_data = GitHubContributionsFetcher.fetch_recent_contributions( username, Settings.CONTRIBUTION_DAYS ) - - # Generate AI descriptions ai_generator = AIDescriptionGenerator() try: profile_summary = ai_generator.generate_profile_summary(profile_data) - except Exception as e: + except Exception: profile_summary = None if contributions_data: activity_summary = ai_generator.generate_activity_summary(contributions_data) profile_data['activity_summary'] = activity_summary if activity_summary else {} - - # Add summaries to profile data profile_data['profile_summary'] = profile_summary - return profile_data From a1233e97a8deb3090b03eed239bc9198f2c07cbd Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 19:45:33 +0530 Subject: [PATCH 2/8] fix: remaining async issues from gemini review - Replace sync requests.get with httpx in get_user_data cache path - Wrap fetch_recent_contributions in asyncio.to_thread - Fix get_social_from_readme return type dict->list - Add asyncio import --- modules/github_fetcher.py | 6 +++--- utils/user.py | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 373f2695..1eaea1e1 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -189,10 +189,10 @@ async def get_social_from_readme(username): ) readme_response.raise_for_status() readme_content = base64.b64decode(readme_response.json()['content']).decode('utf-8') - social = {} + social_accounts_list = [] linkedin_match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', readme_content, re.I) if linkedin_match: - social['linkedin'] = f"https://linkedin.com/in/{linkedin_match.group(1)}" - return social + social_accounts_list.append({"provider": "linkedin", "url": f"https://linkedin.com/in/{linkedin_match.group(1)}"}) + return social_accounts_list except Exception: return {} diff --git a/utils/user.py b/utils/user.py index 2836330a..13ed7cd3 100644 --- a/utils/user.py +++ b/utils/user.py @@ -1,3 +1,4 @@ +import asyncio import requests from typing import Annotated from fastapi import Path, HTTPException @@ -47,14 +48,12 @@ async def verify_linkedin_username( async def get_user_data(username, force=True): if not force: print("Fetching user data from cache") - res = requests.get(f"{Settings.API_URL}/user/{username}") + async with httpx.AsyncClient() as client: + res = await client.get(f"{Settings.API_URL}/user/{username}") if res.status_code == 200: return res.json() profile_data = await GitHubProfileFetcher.fetch_user_profile(username) - contributions_data = GitHubContributionsFetcher.fetch_recent_contributions( - username, - Settings.CONTRIBUTION_DAYS - ) + contributions_data = await asyncio.to_thread(GitHubContributionsFetcher.fetch_recent_contributions, username, Settings.CONTRIBUTION_DAYS) ai_generator = AIDescriptionGenerator() try: profile_summary = ai_generator.generate_profile_summary(profile_data) From 923acde6ae193b505efd2e02490730078846ae51 Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 20:10:03 +0530 Subject: [PATCH 3/8] fix: final gemini review fixes (async + logging + imports + formatting) --- modules/github_fetcher.py | 53 ++++++++++++++++++++++++--------------- utils/user.py | 22 +++++++++++++--- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 1eaea1e1..042e1f3d 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -1,19 +1,22 @@ import base64 -import difflib import re +import requests +import logging from datetime import datetime, timedelta import httpx -import requests from config.settings import Settings +logger = logging.getLogger(__name__) + class GitHubProfileFetcher: """Fetch comprehensive GitHub user profile data""" @staticmethod def _validate_username_pattern(username: str) -> bool: + """Validate GitHub username pattern""" if not isinstance(username, str) or not username: return False pattern = r'^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$' @@ -21,6 +24,7 @@ def _validate_username_pattern(username: str) -> bool: @staticmethod def _get_github_headers() -> dict: + """Get standard GitHub API headers""" return { "Accept": "application/vnd.github.v3+json", "Authorization": f"token {Settings.get_github_token()}" @@ -28,6 +32,7 @@ def _get_github_headers() -> dict: @staticmethod async def validate_github_username(username: str) -> bool: + """Async validate GitHub username including API check""" if not GitHubProfileFetcher._validate_username_pattern(username): return False async with httpx.AsyncClient() as client: @@ -41,10 +46,11 @@ async def validate_github_username(username: str) -> bool: data = response.json() return data.get('type') == 'User' except httpx.HTTPError: - return True + return True # Fall back to pattern validation on API error @staticmethod def validate_github_username_sync(username: str) -> bool: + """Sync validate GitHub username including API check""" if not GitHubProfileFetcher._validate_username_pattern(username): return False try: @@ -57,12 +63,13 @@ def validate_github_username_sync(username: str) -> bool: data = response.json() return data.get('type') == 'User' except requests.RequestException: - return True + return True # Fall back to pattern validation on API error @staticmethod async def fetch_user_profile(username): + """Fetch detailed GitHub user profile with extended metrics""" try: - if not GitHubProfileFetcher.validate_github_username_sync(username): + if not await GitHubProfileFetcher.validate_github_username(username): raise ValueError(f"Invalid GitHub username: '{username}'") one_year_ago = (datetime.now() - timedelta(days=365)).isoformat() + 'Z' @@ -99,10 +106,9 @@ async def fetch_user_profile(username): """ } - graphql_url = "https://api.github.com/graphql" async with httpx.AsyncClient() as client: graphql_response = await client.post( - graphql_url, + "https://api.github.com/graphql", headers={ "Authorization": f"Bearer {Settings.get_github_token()}", "Content-Type": "application/json" @@ -116,12 +122,12 @@ async def fetch_user_profile(username): raise ValueError(f"User '{username}' not found or query returned no data.") pr_merged_last_year = sum( - 1 for pr in graphql_data['pullRequests']['nodes'] if - pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) + 1 for pr in graphql_data.get('pullRequests', {}).get('nodes', []) + if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) ) issues_closed_last_year = sum( - 1 for issue in graphql_data['issues']['nodes'] if - issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) + 1 for issue in graphql_data.get('issues', {}).get('nodes', []) + if issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) ) return { @@ -142,19 +148,20 @@ async def fetch_user_profile(username): }, 'social_accounts': await GitHubProfileFetcher.social_accounts(username), 'readme_content': (graphql_data.get('repository', {}).get('object', {}).get('text', '') - if (graphql_data.get('repository') and graphql_data.get('repository', {}).get('object')) else '') + if graphql_data.get('repository') and graphql_data.get('repository', {}).get('object') else '') } except httpx.HTTPStatusError as e: return {"error": f"HTTP Error: {e.response.status_code}"} except httpx.RequestError as e: return {"error": f"Request failed: {str(e)}"} - except Exception: + except Exception as e: + logger.exception("Unexpected error in fetch_user_profile for user %s", username) return {"error": "An unexpected error occurred"} @staticmethod async def social_accounts(username): - social_accounts = [] + """Fetch social accounts of the user from GitHub API. Falls back to README on 404.""" try: base_url = f"https://api.github.com/users/{username}/social_accounts" async with httpx.AsyncClient() as client: @@ -166,17 +173,18 @@ async def social_accounts(username): } ) user_response.raise_for_status() - social_accounts = user_response.json() - return social_accounts + return user_response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 404: return await GitHubProfileFetcher.get_social_from_readme(username) return [] - except Exception: + except Exception as e: + logger.exception("Unexpected error in social_accounts for user %s", username) return [] @staticmethod async def get_social_from_readme(username): + """Extract LinkedIn link from user's README.md""" try: readme_url = f"https://api.github.com/repos/{username}/{username}/readme" async with httpx.AsyncClient() as client: @@ -189,10 +197,15 @@ async def get_social_from_readme(username): ) readme_response.raise_for_status() readme_content = base64.b64decode(readme_response.json()['content']).decode('utf-8') + social_accounts_list = [] linkedin_match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', readme_content, re.I) if linkedin_match: - social_accounts_list.append({"provider": "linkedin", "url": f"https://linkedin.com/in/{linkedin_match.group(1)}"}) + social_accounts_list.append({ + "provider": "linkedin", + "url": f"https://linkedin.com/in/{linkedin_match.group(1)}" + }) return social_accounts_list - except Exception: - return {} + except Exception as e: + logger.exception("Unexpected error in get_social_from_readme for user %s", username) + return [] diff --git a/utils/user.py b/utils/user.py index 13ed7cd3..af769787 100644 --- a/utils/user.py +++ b/utils/user.py @@ -1,8 +1,12 @@ import asyncio -import requests +import httpx +import logging from typing import Annotated + from fastapi import Path, HTTPException +logger = logging.getLogger(__name__) + from config.settings import Settings from modules.ai_generator import AIDescriptionGenerator from modules.contributions_fetcher import GitHubContributionsFetcher @@ -20,6 +24,7 @@ async def verify_username( ) ] ) -> str: + """Validate GitHub username format and existence""" if not await GitHubProfileFetcher.validate_github_username(username): raise HTTPException( status_code=400, @@ -37,6 +42,7 @@ async def verify_linkedin_username( ) ] ) -> str: + """Validate LinkedIn username format""" if not LinkedInProfileFetcher._validate_linkedin_username(username): raise HTTPException( status_code=400, @@ -46,21 +52,31 @@ async def verify_linkedin_username( async def get_user_data(username, force=True): + """Get complete user data (profile + contributions + AI summary)""" if not force: print("Fetching user data from cache") async with httpx.AsyncClient() as client: res = await client.get(f"{Settings.API_URL}/user/{username}") if res.status_code == 200: return res.json() + profile_data = await GitHubProfileFetcher.fetch_user_profile(username) - contributions_data = await asyncio.to_thread(GitHubContributionsFetcher.fetch_recent_contributions, username, Settings.CONTRIBUTION_DAYS) + contributions_data = await asyncio.to_thread( + GitHubContributionsFetcher.fetch_recent_contributions, + username, + Settings.CONTRIBUTION_DAYS + ) + ai_generator = AIDescriptionGenerator() try: profile_summary = ai_generator.generate_profile_summary(profile_data) - except Exception: + except Exception as e: + logger.exception("Failed to generate profile summary for user %s", username) profile_summary = None + if contributions_data: activity_summary = ai_generator.generate_activity_summary(contributions_data) profile_data['activity_summary'] = activity_summary if activity_summary else {} + profile_data['profile_summary'] = profile_summary return profile_data From 763b4ac50e7f20b9f371540c52ab224b54e1c76c Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 20:21:16 +0530 Subject: [PATCH 4/8] fix: address latest gemini review (async AI calls + error sanitization + logging) --- modules/github_fetcher.py | 31 +++++++++++++++++++++++-------- utils/user.py | 18 +++++++++++++----- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 042e1f3d..22cd3c11 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -1,6 +1,5 @@ import base64 import re -import requests import logging from datetime import datetime, timedelta @@ -16,7 +15,11 @@ class GitHubProfileFetcher: @staticmethod def _validate_username_pattern(username: str) -> bool: - """Validate GitHub username pattern""" + """Validate GitHub username pattern: + - Must be 1-39 characters long + - Can only contain alphanumeric characters and hyphens + - Cannot start or end with a hyphen + - Cannot have consecutive hyphens""" if not isinstance(username, str) or not username: return False pattern = r'^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$' @@ -32,7 +35,10 @@ def _get_github_headers() -> dict: @staticmethod async def validate_github_username(username: str) -> bool: - """Async validate GitHub username including API check""" + """Async validate GitHub username including API check: + - Validates username pattern + - Verifies user exists on GitHub + - Confirms account is of type 'User' (not Organization)""" if not GitHubProfileFetcher._validate_username_pattern(username): return False async with httpx.AsyncClient() as client: @@ -50,7 +56,10 @@ async def validate_github_username(username: str) -> bool: @staticmethod def validate_github_username_sync(username: str) -> bool: - """Sync validate GitHub username including API check""" + """Sync validate GitHub username including API check: + - Validates username pattern + - Verifies user exists on GitHub + - Confirms account is of type 'User' (not Organization)""" if not GitHubProfileFetcher._validate_username_pattern(username): return False try: @@ -66,7 +75,7 @@ def validate_github_username_sync(username: str) -> bool: return True # Fall back to pattern validation on API error @staticmethod - async def fetch_user_profile(username): + async def fetch_user_profile(username: str) -> dict: """Fetch detailed GitHub user profile with extended metrics""" try: if not await GitHubProfileFetcher.validate_github_username(username): @@ -154,14 +163,20 @@ async def fetch_user_profile(username): except httpx.HTTPStatusError as e: return {"error": f"HTTP Error: {e.response.status_code}"} except httpx.RequestError as e: - return {"error": f"Request failed: {str(e)}"} + logger.exception("Request failed for user %s", username) + return {"error": "A network error occurred while fetching GitHub data"} except Exception as e: logger.exception("Unexpected error in fetch_user_profile for user %s", username) return {"error": "An unexpected error occurred"} @staticmethod async def social_accounts(username): - """Fetch social accounts of the user from GitHub API. Falls back to README on 404.""" + """Fetch social accounts of the user from GitHub API. + If the API returns a 404, it attempts to extract social links from the user's README.md. + + Returns: + list: A list of dictionaries, each representing a social account. + """ try: base_url = f"https://api.github.com/users/{username}/social_accounts" async with httpx.AsyncClient() as client: @@ -184,7 +199,7 @@ async def social_accounts(username): @staticmethod async def get_social_from_readme(username): - """Extract LinkedIn link from user's README.md""" + """Extract LinkedIn link from user's README.md (simplified version)""" try: readme_url = f"https://api.github.com/repos/{username}/{username}/readme" async with httpx.AsyncClient() as client: diff --git a/utils/user.py b/utils/user.py index af769787..4dfbac1f 100644 --- a/utils/user.py +++ b/utils/user.py @@ -51,10 +51,10 @@ async def verify_linkedin_username( return username -async def get_user_data(username, force=True): +async def get_user_data(username: str, force: bool = True) -> dict: """Get complete user data (profile + contributions + AI summary)""" if not force: - print("Fetching user data from cache") + logger.debug("Fetching user data from cache for: %s", username) async with httpx.AsyncClient() as client: res = await client.get(f"{Settings.API_URL}/user/{username}") if res.status_code == 200: @@ -69,14 +69,22 @@ async def get_user_data(username, force=True): ai_generator = AIDescriptionGenerator() try: - profile_summary = ai_generator.generate_profile_summary(profile_data) + profile_summary = await asyncio.to_thread( + ai_generator.generate_profile_summary, profile_data + ) except Exception as e: logger.exception("Failed to generate profile summary for user %s", username) profile_summary = None if contributions_data: - activity_summary = ai_generator.generate_activity_summary(contributions_data) - profile_data['activity_summary'] = activity_summary if activity_summary else {} + try: + activity_summary = await asyncio.to_thread( + ai_generator.generate_activity_summary, contributions_data + ) + profile_data['activity_summary'] = activity_summary if activity_summary else {} + except Exception as e: + logger.exception("Failed to generate activity summary for user %s", username) + profile_data['activity_summary'] = {} profile_data['profile_summary'] = profile_summary return profile_data From c8dc7e7231a87e306bb00e5feb5482378f678a02 Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 20:27:08 +0530 Subject: [PATCH 5/8] fix: address latest gemini review (dead code removal + safe dict access + error check) --- modules/github_fetcher.py | 35 +++++++++-------------------------- utils/user.py | 5 +++++ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 22cd3c11..da0ebca0 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -54,26 +54,6 @@ async def validate_github_username(username: str) -> bool: except httpx.HTTPError: return True # Fall back to pattern validation on API error - @staticmethod - def validate_github_username_sync(username: str) -> bool: - """Sync validate GitHub username including API check: - - Validates username pattern - - Verifies user exists on GitHub - - Confirms account is of type 'User' (not Organization)""" - if not GitHubProfileFetcher._validate_username_pattern(username): - return False - try: - response = requests.get( - f'https://api.github.com/users/{username}', - headers=GitHubProfileFetcher._get_github_headers() - ) - if response.status_code != 200: - return False - data = response.json() - return data.get('type') == 'User' - except requests.RequestException: - return True # Fall back to pattern validation on API error - @staticmethod async def fetch_user_profile(username: str) -> dict: """Fetch detailed GitHub user profile with extended metrics""" @@ -146,14 +126,17 @@ async def fetch_user_profile(username: str) -> dict: 'location': graphql_data.get('location', ''), 'avatar_url': graphql_data.get('avatarUrl', ''), 'profile_url': graphql_data.get('url', ''), - 'followers': graphql_data['followers']['totalCount'], - 'following': graphql_data['following']['totalCount'], - 'public_repos': graphql_data['repositories']['totalCount'], + 'followers': graphql_data.get('followers', {}).get('totalCount', 0), + 'following': graphql_data.get('following', {}).get('totalCount', 0), + 'public_repos': graphql_data.get('repositories', {}).get('totalCount', 0), 'pull_requests_merged': pr_merged_last_year if pr_merged_last_year < 100 else f"{100}+", 'issues_closed': issues_closed_last_year if issues_closed_last_year < 100 else f"{100}+", 'achievements': { - 'total_contributions': graphql_data['contributionsCollection']['contributionCalendar']['totalContributions'], - 'repositories_contributed_to': graphql_data['repositoriesContributedTo']['totalCount'], + 'total_contributions': graphql_data.get('contributionsCollection', {}) + .get('contributionCalendar', {}) + .get('totalContributions', 0), + 'repositories_contributed_to': graphql_data.get('repositoriesContributedTo', {}) + .get('totalCount', 0), }, 'social_accounts': await GitHubProfileFetcher.social_accounts(username), 'readme_content': (graphql_data.get('repository', {}).get('object', {}).get('text', '') @@ -199,7 +182,7 @@ async def social_accounts(username): @staticmethod async def get_social_from_readme(username): - """Extract LinkedIn link from user's README.md (simplified version)""" + """Extract LinkedIn link from user's README.md (simplified - only LinkedIn for reliability)""" try: readme_url = f"https://api.github.com/repos/{username}/{username}/readme" async with httpx.AsyncClient() as client: diff --git a/utils/user.py b/utils/user.py index 4dfbac1f..fb86e213 100644 --- a/utils/user.py +++ b/utils/user.py @@ -61,6 +61,11 @@ async def get_user_data(username: str, force: bool = True) -> dict: return res.json() profile_data = await GitHubProfileFetcher.fetch_user_profile(username) + + # Early return if GitHub fetch failed + if "error" in profile_data: + return profile_data + contributions_data = await asyncio.to_thread( GitHubContributionsFetcher.fetch_recent_contributions, username, From 8d86d8d5d43946cb06a95cd8383af7e28aa75014 Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 20:35:15 +0530 Subject: [PATCH 6/8] fix: final gemini performance tweaks (pre-calculate date + asyncio.gather) --- modules/github_fetcher.py | 5 +++-- utils/user.py | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index da0ebca0..c0090914 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -62,6 +62,7 @@ async def fetch_user_profile(username: str) -> dict: raise ValueError(f"Invalid GitHub username: '{username}'") one_year_ago = (datetime.now() - timedelta(days=365)).isoformat() + 'Z' + one_year_ago_dt = datetime.now() - timedelta(days=365) # pre-calculated for loops graphql_query = { "query": f""" @@ -112,11 +113,11 @@ async def fetch_user_profile(username: str) -> dict: pr_merged_last_year = sum( 1 for pr in graphql_data.get('pullRequests', {}).get('nodes', []) - if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) + if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt ) issues_closed_last_year = sum( 1 for issue in graphql_data.get('issues', {}).get('nodes', []) - if issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > datetime.now() - timedelta(days=365) + if issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt ) return { diff --git a/utils/user.py b/utils/user.py index fb86e213..609316dd 100644 --- a/utils/user.py +++ b/utils/user.py @@ -60,36 +60,36 @@ async def get_user_data(username: str, force: bool = True) -> dict: if res.status_code == 200: return res.json() - profile_data = await GitHubProfileFetcher.fetch_user_profile(username) + # Parallel fetch profile + contributions + profile_data, contributions_data = await asyncio.gather( + GitHubProfileFetcher.fetch_user_profile(username), + asyncio.to_thread( + GitHubContributionsFetcher.fetch_recent_contributions, + username, + Settings.CONTRIBUTION_DAYS + ) + ) # Early return if GitHub fetch failed if "error" in profile_data: return profile_data - contributions_data = await asyncio.to_thread( - GitHubContributionsFetcher.fetch_recent_contributions, - username, - Settings.CONTRIBUTION_DAYS - ) - ai_generator = AIDescriptionGenerator() + + # Parallel AI summary generation try: - profile_summary = await asyncio.to_thread( - ai_generator.generate_profile_summary, profile_data + profile_summary, activity_summary = await asyncio.gather( + asyncio.to_thread(ai_generator.generate_profile_summary, profile_data), + asyncio.to_thread(ai_generator.generate_activity_summary, contributions_data) if contributions_data else None, + return_exceptions=True ) except Exception as e: - logger.exception("Failed to generate profile summary for user %s", username) + logger.exception("Failed to generate AI summaries for user %s", username) profile_summary = None + activity_summary = None if contributions_data: - try: - activity_summary = await asyncio.to_thread( - ai_generator.generate_activity_summary, contributions_data - ) - profile_data['activity_summary'] = activity_summary if activity_summary else {} - except Exception as e: - logger.exception("Failed to generate activity summary for user %s", username) - profile_data['activity_summary'] = {} + profile_data['activity_summary'] = activity_summary if activity_summary else {} profile_data['profile_summary'] = profile_summary return profile_data From 9332c77a62ad5dcd9f74ad2e029a8141d10e4a4e Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 20:50:18 +0530 Subject: [PATCH 7/8] fix: final gemini tweaks (utcnow + robust AI gather) --- modules/github_fetcher.py | 52 +++++++++++++++------------------------ utils/user.py | 23 ++++++++--------- 2 files changed, 30 insertions(+), 45 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index c0090914..0671f70c 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -52,7 +52,7 @@ async def validate_github_username(username: str) -> bool: data = response.json() return data.get('type') == 'User' except httpx.HTTPError: - return True # Fall back to pattern validation on API error + return True @staticmethod async def fetch_user_profile(username: str) -> dict: @@ -61,8 +61,8 @@ async def fetch_user_profile(username: str) -> dict: if not await GitHubProfileFetcher.validate_github_username(username): raise ValueError(f"Invalid GitHub username: '{username}'") - one_year_ago = (datetime.now() - timedelta(days=365)).isoformat() + 'Z' - one_year_ago_dt = datetime.now() - timedelta(days=365) # pre-calculated for loops + one_year_ago_dt = datetime.utcnow() - timedelta(days=365) + one_year_ago = one_year_ago_dt.isoformat() + 'Z' graphql_query = { "query": f""" @@ -109,7 +109,7 @@ async def fetch_user_profile(username: str) -> dict: graphql_data = graphql_response.json().get('data', {}).get('user', {}) if not graphql_data: - raise ValueError(f"User '{username}' not found or query returned no data.") + raise ValueError(f"User '{username}' not found") pr_merged_last_year = sum( 1 for pr in graphql_data.get('pullRequests', {}).get('nodes', []) @@ -155,56 +155,44 @@ async def fetch_user_profile(username: str) -> dict: @staticmethod async def social_accounts(username): - """Fetch social accounts of the user from GitHub API. - If the API returns a 404, it attempts to extract social links from the user's README.md. - - Returns: - list: A list of dictionaries, each representing a social account. - """ + """Fetch social accounts. Returns list or {"error": "..."} for consistency.""" try: base_url = f"https://api.github.com/users/{username}/social_accounts" async with httpx.AsyncClient() as client: - user_response = await client.get( + resp = await client.get( base_url, headers={ "Accept": "application/vnd.github.v3+json", - "Authorization": f"token {Settings.get_github_token()}", + "Authorization": f"token {Settings.get_github_token()}" } ) - user_response.raise_for_status() - return user_response.json() + resp.raise_for_status() + return resp.json() except httpx.HTTPStatusError as e: if e.response.status_code == 404: return await GitHubProfileFetcher.get_social_from_readme(username) - return [] + return {"error": f"HTTP Error: {e.response.status_code}"} except Exception as e: logger.exception("Unexpected error in social_accounts for user %s", username) - return [] + return {"error": "Failed to fetch social accounts"} @staticmethod async def get_social_from_readme(username): - """Extract LinkedIn link from user's README.md (simplified - only LinkedIn for reliability)""" + """Extract LinkedIn link from README (simplified for reliability)""" try: - readme_url = f"https://api.github.com/repos/{username}/{username}/readme" + url = f"https://api.github.com/repos/{username}/{username}/readme" async with httpx.AsyncClient() as client: - readme_response = await client.get( - readme_url, + r = await client.get( + url, headers={ "Accept": "application/vnd.github.v3+json", - "Authorization": f"token {Settings.get_github_token()}", + "Authorization": f"token {Settings.get_github_token()}" } ) - readme_response.raise_for_status() - readme_content = base64.b64decode(readme_response.json()['content']).decode('utf-8') - - social_accounts_list = [] - linkedin_match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', readme_content, re.I) - if linkedin_match: - social_accounts_list.append({ - "provider": "linkedin", - "url": f"https://linkedin.com/in/{linkedin_match.group(1)}" - }) - return social_accounts_list + r.raise_for_status() + content = base64.b64decode(r.json()['content']).decode('utf-8') + match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', content, re.I) + return [{"provider": "linkedin", "url": f"https://linkedin.com/in/{match.group(1)}"}] if match else [] except Exception as e: logger.exception("Unexpected error in get_social_from_readme for user %s", username) return [] diff --git a/utils/user.py b/utils/user.py index 609316dd..fb7fb467 100644 --- a/utils/user.py +++ b/utils/user.py @@ -60,7 +60,7 @@ async def get_user_data(username: str, force: bool = True) -> dict: if res.status_code == 200: return res.json() - # Parallel fetch profile + contributions + # Parallel fetch profile_data, contributions_data = await asyncio.gather( GitHubProfileFetcher.fetch_user_profile(username), asyncio.to_thread( @@ -70,23 +70,20 @@ async def get_user_data(username: str, force: bool = True) -> dict: ) ) - # Early return if GitHub fetch failed if "error" in profile_data: return profile_data ai_generator = AIDescriptionGenerator() - # Parallel AI summary generation - try: - profile_summary, activity_summary = await asyncio.gather( - asyncio.to_thread(ai_generator.generate_profile_summary, profile_data), - asyncio.to_thread(ai_generator.generate_activity_summary, contributions_data) if contributions_data else None, - return_exceptions=True - ) - except Exception as e: - logger.exception("Failed to generate AI summaries for user %s", username) - profile_summary = None - activity_summary = None + # Robust parallel AI summaries + ai_tasks = [asyncio.to_thread(ai_generator.generate_profile_summary, profile_data)] + if contributions_data: + ai_tasks.append(asyncio.to_thread(ai_generator.generate_activity_summary, contributions_data)) + + results = await asyncio.gather(*ai_tasks, return_exceptions=True) + + profile_summary = results[0] if not isinstance(results[0], BaseException) else None + activity_summary = results[1] if len(results) > 1 and not isinstance(results[1], BaseException) else None if contributions_data: profile_data['activity_summary'] = activity_summary if activity_summary else {} From 3345b1fe5bb0d1dc342f084dbc25837c95c27db2 Mon Sep 17 00:00:00 2001 From: g0w6y Date: Thu, 26 Mar 2026 21:09:09 +0530 Subject: [PATCH 8/8] fix: final gemini cleanup (consistent returns + type hints + utcnow) --- modules/github_fetcher.py | 54 ++++++++++++++------------------------- utils/user.py | 24 +++-------------- 2 files changed, 23 insertions(+), 55 deletions(-) diff --git a/modules/github_fetcher.py b/modules/github_fetcher.py index 0671f70c..facf8b09 100644 --- a/modules/github_fetcher.py +++ b/modules/github_fetcher.py @@ -52,7 +52,7 @@ async def validate_github_username(username: str) -> bool: data = response.json() return data.get('type') == 'User' except httpx.HTTPError: - return True + return False # API failed → treat as invalid (safer) @staticmethod async def fetch_user_profile(username: str) -> dict: @@ -68,11 +68,7 @@ async def fetch_user_profile(username: str) -> dict: "query": f""" query {{ user(login: "{username}") {{ - name - bio - location - avatarUrl - url + name bio location avatarUrl url followers {{ totalCount }} following {{ totalCount }} repository(name: "{username}") {{ @@ -97,28 +93,19 @@ async def fetch_user_profile(username: str) -> dict: } async with httpx.AsyncClient() as client: - graphql_response = await client.post( + resp = await client.post( "https://api.github.com/graphql", - headers={ - "Authorization": f"Bearer {Settings.get_github_token()}", - "Content-Type": "application/json" - }, + headers={"Authorization": f"Bearer {Settings.get_github_token()}", "Content-Type": "application/json"}, json=graphql_query ) - graphql_response.raise_for_status() + resp.raise_for_status() - graphql_data = graphql_response.json().get('data', {}).get('user', {}) + graphql_data = resp.json().get('data', {}).get('user', {}) if not graphql_data: raise ValueError(f"User '{username}' not found") - pr_merged_last_year = sum( - 1 for pr in graphql_data.get('pullRequests', {}).get('nodes', []) - if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt - ) - issues_closed_last_year = sum( - 1 for issue in graphql_data.get('issues', {}).get('nodes', []) - if issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt - ) + pr_merged = sum(1 for pr in graphql_data.get('pullRequests', {}).get('nodes', []) if pr and datetime.strptime(pr['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt) + issues_closed = sum(1 for issue in graphql_data.get('issues', {}).get('nodes', []) if issue and datetime.strptime(issue['createdAt'], '%Y-%m-%dT%H:%M:%SZ') > one_year_ago_dt) return { 'username': username, @@ -130,18 +117,14 @@ async def fetch_user_profile(username: str) -> dict: 'followers': graphql_data.get('followers', {}).get('totalCount', 0), 'following': graphql_data.get('following', {}).get('totalCount', 0), 'public_repos': graphql_data.get('repositories', {}).get('totalCount', 0), - 'pull_requests_merged': pr_merged_last_year if pr_merged_last_year < 100 else f"{100}+", - 'issues_closed': issues_closed_last_year if issues_closed_last_year < 100 else f"{100}+", + 'pull_requests_merged': pr_merged if pr_merged < 100 else f"{100}+", + 'issues_closed': issues_closed if issues_closed < 100 else f"{100}+", 'achievements': { - 'total_contributions': graphql_data.get('contributionsCollection', {}) - .get('contributionCalendar', {}) - .get('totalContributions', 0), - 'repositories_contributed_to': graphql_data.get('repositoriesContributedTo', {}) - .get('totalCount', 0), + 'total_contributions': graphql_data.get('contributionsCollection', {}).get('contributionCalendar', {}).get('totalContributions', 0), + 'repositories_contributed_to': graphql_data.get('repositoriesContributedTo', {}).get('totalCount', 0), }, 'social_accounts': await GitHubProfileFetcher.social_accounts(username), - 'readme_content': (graphql_data.get('repository', {}).get('object', {}).get('text', '') - if graphql_data.get('repository') and graphql_data.get('repository', {}).get('object') else '') + 'readme_content': graphql_data.get('repository', {}).get('object', {}).get('text', '') } except httpx.HTTPStatusError as e: @@ -154,8 +137,8 @@ async def fetch_user_profile(username: str) -> dict: return {"error": "An unexpected error occurred"} @staticmethod - async def social_accounts(username): - """Fetch social accounts. Returns list or {"error": "..."} for consistency.""" + async def social_accounts(username) -> list: + """Fetch social accounts. Always returns list (empty on error).""" try: base_url = f"https://api.github.com/users/{username}/social_accounts" async with httpx.AsyncClient() as client: @@ -171,13 +154,14 @@ async def social_accounts(username): except httpx.HTTPStatusError as e: if e.response.status_code == 404: return await GitHubProfileFetcher.get_social_from_readme(username) - return {"error": f"HTTP Error: {e.response.status_code}"} + logger.warning("HTTP error fetching social accounts for %s: %s", username, e) + return [] except Exception as e: logger.exception("Unexpected error in social_accounts for user %s", username) - return {"error": "Failed to fetch social accounts"} + return [] @staticmethod - async def get_social_from_readme(username): + async def get_social_from_readme(username) -> list: """Extract LinkedIn link from README (simplified for reliability)""" try: url = f"https://api.github.com/repos/{username}/{username}/readme" diff --git a/utils/user.py b/utils/user.py index fb7fb467..c19a882a 100644 --- a/utils/user.py +++ b/utils/user.py @@ -26,28 +26,19 @@ async def verify_username( ) -> str: """Validate GitHub username format and existence""" if not await GitHubProfileFetcher.validate_github_username(username): - raise HTTPException( - status_code=400, - detail="Invalid GitHub username. Usernames must be 1-39 characters long and can only contain alphanumeric characters and single hyphens." - ) + raise HTTPException(status_code=400, detail="Invalid GitHub username.") return username async def verify_linkedin_username( username: Annotated[ str, - Path( - min_length=1, - pattern=r'^[\w\-]+$' - ) + Path(min_length=1, pattern=r'^[\w\-]+$') ] ) -> str: """Validate LinkedIn username format""" if not LinkedInProfileFetcher._validate_linkedin_username(username): - raise HTTPException( - status_code=400, - detail="Invalid LinkedIn username. Username can only contain letters, numbers, and hyphens." - ) + raise HTTPException(status_code=400, detail="Invalid LinkedIn username.") return username @@ -60,22 +51,15 @@ async def get_user_data(username: str, force: bool = True) -> dict: if res.status_code == 200: return res.json() - # Parallel fetch profile_data, contributions_data = await asyncio.gather( GitHubProfileFetcher.fetch_user_profile(username), - asyncio.to_thread( - GitHubContributionsFetcher.fetch_recent_contributions, - username, - Settings.CONTRIBUTION_DAYS - ) + asyncio.to_thread(GitHubContributionsFetcher.fetch_recent_contributions, username, Settings.CONTRIBUTION_DAYS) ) if "error" in profile_data: return profile_data ai_generator = AIDescriptionGenerator() - - # Robust parallel AI summaries ai_tasks = [asyncio.to_thread(ai_generator.generate_profile_summary, profile_data)] if contributions_data: ai_tasks.append(asyncio.to_thread(ai_generator.generate_activity_summary, contributions_data))