mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 13:23:15 +00:00
Use a more accurate model for symmetric semantic search
- The all-MiniLM-L6-v2 is more accurate
- The exact previous model isn't benchmarked but based on the
performance of the closest model to it. Seems like the new model
maybe similar in speed and size
- On very preliminary evaluation of the model, the new model seems
faster, with pretty decent results
This commit is contained in:
@@ -59,7 +59,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--dataset', type=str, default="./.dataset", help="Path to dataset to generate index from")
|
||||
parser.add_argument('--column', type=str, default="DATA", help="Name of dataset column to index")
|
||||
parser.add_argument('--num_results', type=int, default=10, help="Number of most suitable matches to show")
|
||||
parser.add_argument('--model_name', type=str, default='paraphrase-distilroberta-base-v1', help="Specify name of the SentenceTransformer model to use for encoding")
|
||||
parser.add_argument('--model_name', type=str, default='all-MiniLM-L6-v2', help="Specify name of the SentenceTransformer model to use for encoding")
|
||||
args = parser.parse_args()
|
||||
|
||||
model = SentenceTransformer(args.model_name)
|
||||
|
||||
Reference in New Issue
Block a user