mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 13:25:11 +00:00
Fix passing auth token to Github API to increase rate limits by x85
- Previously wasn't prefixing "token" to PAT token in Auth header This resulted in the request being considered unauthenticated - Unauthenticated requests to Github API are limited to 60 requests/hour Authenticated requests to Github API are allowed 5000 requests/hour
This commit is contained in:
@@ -77,12 +77,9 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
return entries_with_ids
|
return entries_with_ids
|
||||||
|
|
||||||
def get_markdown_files(self):
|
def get_markdown_files(self):
|
||||||
# set the url to get the contents of the repository
|
# Get the contents of the repository
|
||||||
repo_content_url = f"{self.repo_url}/git/trees/{self.config.repo_branch}"
|
repo_content_url = f"{self.repo_url}/git/trees/{self.config.repo_branch}"
|
||||||
# set the headers to include the authentication token
|
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
|
||||||
|
|
||||||
# get the contents of the repository
|
|
||||||
response = requests.get(repo_content_url, headers=headers)
|
response = requests.get(repo_content_url, headers=headers)
|
||||||
contents = response.json()
|
contents = response.json()
|
||||||
|
|
||||||
@@ -91,6 +88,7 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
# Extract markdown files from the repository
|
||||||
markdown_files = []
|
markdown_files = []
|
||||||
for item in contents["tree"]:
|
for item in contents["tree"]:
|
||||||
# Find all markdown files in the repository
|
# Find all markdown files in the repository
|
||||||
@@ -105,7 +103,7 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
|
|
||||||
def get_file_contents(self, file_url):
|
def get_file_contents(self, file_url):
|
||||||
# Get text from each markdown file
|
# Get text from each markdown file
|
||||||
headers = {"Authorization": f"{self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
headers = {"Authorization": f"token {self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
||||||
response = requests.get(file_url, headers=headers)
|
response = requests.get(file_url, headers=headers)
|
||||||
|
|
||||||
# Wait for rate limit reset if needed
|
# Wait for rate limit reset if needed
|
||||||
@@ -117,8 +115,8 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
|
|
||||||
def get_commits(self) -> List[Dict]:
|
def get_commits(self) -> List[Dict]:
|
||||||
# Get commit messages from the repository using the Github API
|
# Get commit messages from the repository using the Github API
|
||||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
|
||||||
commits_url = f"{self.repo_url}/commits"
|
commits_url = f"{self.repo_url}/commits"
|
||||||
|
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||||
commits = []
|
commits = []
|
||||||
|
|
||||||
while commits_url is not None:
|
while commits_url is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user