mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 21:29:12 +00:00
Split entries by max tokens while converting Markdown entries To JSONL
This commit is contained in:
@@ -35,6 +35,12 @@ class MarkdownToJsonl(TextToJsonl):
|
|||||||
end = time.time()
|
end = time.time()
|
||||||
logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
|
logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
|
||||||
|
|
||||||
|
# Split entries by max tokens supported by model
|
||||||
|
start = time.time()
|
||||||
|
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
|
||||||
|
end = time.time()
|
||||||
|
logger.debug(f"Split entries by max token size supported by model: {end - start} seconds")
|
||||||
|
|
||||||
# Identify, mark and merge any new entries with previous entries
|
# Identify, mark and merge any new entries with previous entries
|
||||||
start = time.time()
|
start = time.time()
|
||||||
if not previous_entries:
|
if not previous_entries:
|
||||||
|
|||||||
Reference in New Issue
Block a user