mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Extract logic to get file content from Github into a separate method
This commit is contained in:
@@ -98,16 +98,23 @@ class GithubToJsonl(TextToJsonl):
|
|||||||
# Create URL for each markdown file on Github
|
# Create URL for each markdown file on Github
|
||||||
url_path = f'https://github.com/{self.config.repo_owner}/{self.config.repo_name}/blob/{self.config.repo_branch}/{item["path"]}'
|
url_path = f'https://github.com/{self.config.repo_owner}/{self.config.repo_name}/blob/{self.config.repo_branch}/{item["path"]}'
|
||||||
|
|
||||||
# Get text from each markdown file
|
|
||||||
file_content_url = f'{self.repo_url}/contents/{item["path"]}'
|
|
||||||
headers["Accept"] = "application/vnd.github.v3.raw"
|
|
||||||
markdown_file_contents = requests.get(file_content_url, headers=headers).content.decode("utf-8")
|
|
||||||
|
|
||||||
# Add markdown file contents and URL to list
|
# Add markdown file contents and URL to list
|
||||||
markdown_files += [{"content": markdown_file_contents, "path": url_path}]
|
markdown_files += [{"content": self.get_file_contents(item["url"]), "path": url_path}]
|
||||||
|
|
||||||
return markdown_files
|
return markdown_files
|
||||||
|
|
||||||
|
def get_file_contents(self, file_url):
|
||||||
|
# Get text from each markdown file
|
||||||
|
headers = {"Authorization": f"{self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
||||||
|
response = requests.get(file_url, headers=headers)
|
||||||
|
|
||||||
|
# Wait for rate limit reset if needed
|
||||||
|
result = self.wait_for_rate_limit_reset(response, self.get_file_contents, file_url)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
return response.content.decode("utf-8")
|
||||||
|
|
||||||
def get_commits(self) -> List[Dict]:
|
def get_commits(self) -> List[Dict]:
|
||||||
# Get commit messages from the repository using the Github API
|
# Get commit messages from the repository using the Github API
|
||||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
headers = {"Authorization": f"{self.config.pat_token}"}
|
||||||
|
|||||||
Reference in New Issue
Block a user