refac: audio lang fallback logic

This commit is contained in:
Timothy Jaeryang Baek 2025-08-17 04:33:42 +04:00
parent ccd2a0be5b
commit f23eb2a31c

View file

@ -550,6 +550,11 @@ def transcription_handler(request, file_path, metadata):
metadata = metadata or {} metadata = metadata or {}
languages = [
metadata.get("language", None) if WHISPER_LANGUAGE == "" else WHISPER_LANGUAGE,
None, # Always fallback to None in case transcription fails
]
if request.app.state.config.STT_ENGINE == "": if request.app.state.config.STT_ENGINE == "":
if request.app.state.faster_whisper_model is None: if request.app.state.faster_whisper_model is None:
request.app.state.faster_whisper_model = set_faster_whisper_model( request.app.state.faster_whisper_model = set_faster_whisper_model(
@ -561,11 +566,7 @@ def transcription_handler(request, file_path, metadata):
file_path, file_path,
beam_size=5, beam_size=5,
vad_filter=request.app.state.config.WHISPER_VAD_FILTER, vad_filter=request.app.state.config.WHISPER_VAD_FILTER,
language=( language=languages[0],
metadata.get("language", None)
if WHISPER_LANGUAGE == ""
else WHISPER_LANGUAGE
),
) )
log.info( log.info(
"Detected language '%s' with probability %f" "Detected language '%s' with probability %f"
@ -585,21 +586,26 @@ def transcription_handler(request, file_path, metadata):
elif request.app.state.config.STT_ENGINE == "openai": elif request.app.state.config.STT_ENGINE == "openai":
r = None r = None
try: try:
r = requests.post( for language in languages:
url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", payload = {
headers={
"Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}"
},
files={"file": (filename, open(file_path, "rb"))},
data={
"model": request.app.state.config.STT_MODEL, "model": request.app.state.config.STT_MODEL,
**( }
{"language": metadata.get("language")}
if metadata.get("language") if language:
else {} payload["language"] = language
),
}, r = requests.post(
) url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions",
headers={
"Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}"
},
files={"file": (filename, open(file_path, "rb"))},
data=payload,
)
if r.status_code == 200:
# Successful transcription
break
r.raise_for_status() r.raise_for_status()
data = r.json() data = r.json()
@ -641,18 +647,26 @@ def transcription_handler(request, file_path, metadata):
"Content-Type": mime, "Content-Type": mime,
} }
# Add model if specified for language in languages:
params = {} params = {}
if request.app.state.config.STT_MODEL: if request.app.state.config.STT_MODEL:
params["model"] = request.app.state.config.STT_MODEL params["model"] = request.app.state.config.STT_MODEL
if language:
params["language"] = language
# Make request to Deepgram API
r = requests.post(
"https://api.deepgram.com/v1/listen?smart_format=true",
headers=headers,
params=params,
data=file_data,
)
if r.status_code == 200:
# Successful transcription
break
# Make request to Deepgram API
r = requests.post(
"https://api.deepgram.com/v1/listen?smart_format=true",
headers=headers,
params=params,
data=file_data,
)
r.raise_for_status() r.raise_for_status()
response_data = r.json() response_data = r.json()