Allow using OpenAI (compatible) API for Speech to Text transcription

This commit is contained in:
Debanjum
2025-01-15 18:31:37 +07:00
parent 182c49b41c
commit f8b887cabd
4 changed files with 39 additions and 4 deletions

View File

@@ -1288,7 +1288,7 @@ class ConversationAdapters:
@staticmethod
async def get_speech_to_text_config():
return await SpeechToTextModelOptions.objects.filter().afirst()
return await SpeechToTextModelOptions.objects.filter().prefetch_related("ai_model_api").afirst()
@staticmethod
@arequire_valid_user

View File

@@ -0,0 +1,24 @@
# Generated by Django 5.0.10 on 2025-01-15 11:05
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("database", "0079_searchmodelconfig_embeddings_inference_endpoint_type"),
]
operations = [
migrations.AddField(
model_name="speechtotextmodeloptions",
name="ai_model_api",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.CASCADE,
to="database.aimodelapi",
),
),
]

View File

@@ -566,6 +566,7 @@ class SpeechToTextModelOptions(DbBaseModel):
model_name = models.CharField(max_length=200, default="base")
model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE)
ai_model_api = models.ForeignKey(AiModelApi, on_delete=models.CASCADE, default=None, null=True, blank=True)
def __str__(self):
return f"{self.model_name} - {self.model_type}"

View File

@@ -9,6 +9,7 @@ import uuid
from typing import Any, Callable, List, Optional, Set, Union
import cron_descriptor
import openai
import pytz
from apscheduler.job import Job
from apscheduler.triggers.cron import CronTrigger
@@ -264,12 +265,21 @@ async def transcribe(
if not speech_to_text_config:
# If the user has not configured a speech to text model, return an unsupported on server error
status_code = 501
elif state.openai_client and speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OPENAI:
speech2text_model = speech_to_text_config.model_name
user_message = await transcribe_audio(audio_file, speech2text_model, client=state.openai_client)
elif speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OFFLINE:
speech2text_model = speech_to_text_config.model_name
user_message = await transcribe_audio_offline(audio_filename, speech2text_model)
elif speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OPENAI:
speech2text_model = speech_to_text_config.model_name
if speech_to_text_config.ai_model_api:
api_key = speech_to_text_config.ai_model_api.api_key
api_base_url = speech_to_text_config.ai_model_api.api_base_url
openai_client = openai.OpenAI(api_key=api_key, base_url=api_base_url)
elif state.openai_client:
openai_client = state.openai_client
if openai_client:
user_message = await transcribe_audio(audio_file, speech2text_model, client=openai_client)
else:
status_code = 501
finally:
# Close and Delete the temporary audio file
audio_file.close()