refac: audio lang fallback logic

This commit is contained in:
Timothy Jaeryang Baek 2025-08-17 04:33:42 +04:00
parent ccd2a0be5b
commit f23eb2a31c

View file

@ -550,6 +550,11 @@ def transcription_handler(request, file_path, metadata):
metadata = metadata or {}
languages = [
metadata.get("language", None) if WHISPER_LANGUAGE == "" else WHISPER_LANGUAGE,
None, # Always fallback to None in case transcription fails
]
if request.app.state.config.STT_ENGINE == "":
if request.app.state.faster_whisper_model is None:
request.app.state.faster_whisper_model = set_faster_whisper_model(
@ -561,11 +566,7 @@ def transcription_handler(request, file_path, metadata):
file_path,
beam_size=5,
vad_filter=request.app.state.config.WHISPER_VAD_FILTER,
language=(
metadata.get("language", None)
if WHISPER_LANGUAGE == ""
else WHISPER_LANGUAGE
),
language=languages[0],
)
log.info(
"Detected language '%s' with probability %f"
@ -585,22 +586,27 @@ def transcription_handler(request, file_path, metadata):
elif request.app.state.config.STT_ENGINE == "openai":
r = None
try:
for language in languages:
payload = {
"model": request.app.state.config.STT_MODEL,
}
if language:
payload["language"] = language
r = requests.post(
url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions",
headers={
"Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}"
},
files={"file": (filename, open(file_path, "rb"))},
data={
"model": request.app.state.config.STT_MODEL,
**(
{"language": metadata.get("language")}
if metadata.get("language")
else {}
),
},
data=payload,
)
if r.status_code == 200:
# Successful transcription
break
r.raise_for_status()
data = r.json()
@ -641,11 +647,14 @@ def transcription_handler(request, file_path, metadata):
"Content-Type": mime,
}
# Add model if specified
for language in languages:
params = {}
if request.app.state.config.STT_MODEL:
params["model"] = request.app.state.config.STT_MODEL
if language:
params["language"] = language
# Make request to Deepgram API
r = requests.post(
"https://api.deepgram.com/v1/listen?smart_format=true",
@ -653,6 +662,11 @@ def transcription_handler(request, file_path, metadata):
params=params,
data=file_data,
)
if r.status_code == 200:
# Successful transcription
break
r.raise_for_status()
response_data = r.json()