mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 05:40:17 +00:00
Set page size to 100 to reduce requests required to Github API to 1/3
- Default is 30. So number of paginated requests required to get all items (commits, files) will reduce by 67% - No need to increase page size for the get tree Github API request from `get_markdown_files' Get tree Github API doesn't support pagination and return 100K items in response. This should be way more than enough for our current use-cases
This commit is contained in:
@@ -117,11 +117,12 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
# Get commit messages from the repository using the Github API
|
# Get commit messages from the repository using the Github API
|
||||||
commits_url = f"{self.repo_url}/commits"
|
commits_url = f"{self.repo_url}/commits"
|
||||||
headers = {"Authorization": f"token {self.config.pat_token}"}
|
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||||
|
params = {"per_page": 100}
|
||||||
commits = []
|
commits = []
|
||||||
|
|
||||||
while commits_url is not None:
|
while commits_url is not None:
|
||||||
# Get the next page of commits
|
# Get the next page of commits
|
||||||
response = requests.get(commits_url, headers=headers)
|
response = requests.get(commits_url, headers=headers, params=params)
|
||||||
raw_commits = response.json()
|
raw_commits = response.json()
|
||||||
|
|
||||||
# Wait for rate limit reset if needed
|
# Wait for rate limit reset if needed
|
||||||
|
|||||||
Reference in New Issue
Block a user