Compare commits

...

5 commits

Author SHA1 Message Date
Shirasawa
4e3f129b3b
Merge 153240c8d6 into 1ea555a5ac 2025-12-09 22:23:33 +01:00
Shirasawa
1ea555a5ac
i18n: improve Chinese translation (#19830) 2025-12-09 15:29:43 -05:00
Shirasawa
c24b1207a0
fix: fixed missing text in the explanation feature (#19829) 2025-12-09 15:29:27 -05:00
Classic298
44e41806f2
chore: dep bump across many dependencies (#19850)
* Update pyproject.toml (#101)

* Update pyproject.toml

* Update requirements.txt

* Update requirements-min.txt

* Upgrade Playwright version to 1.57.0

* Update langchain-community version to 0.3.29

* Update requirements.txt

* Update requirements-min.txt
2025-12-09 15:28:21 -05:00
Shirasawa
153240c8d6 fix: fixed the issue of mismatched spaces in audio MIME types 2025-12-08 09:45:58 +00:00
9 changed files with 140 additions and 115 deletions

View file

@ -33,6 +33,7 @@ from fastapi.responses import FileResponse
from pydantic import BaseModel
from open_webui.utils.misc import strict_match_mime_type
from open_webui.utils.auth import get_admin_user, get_verified_user
from open_webui.utils.headers import include_user_info_headers
from open_webui.config import (
@ -1155,17 +1156,9 @@ def transcription(
stt_supported_content_types = getattr(
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
)
) or ["audio/*", "video/webm"]
if not any(
fnmatch(file.content_type, content_type)
for content_type in (
stt_supported_content_types
if stt_supported_content_types
and any(t.strip() for t in stt_supported_content_types)
else ["audio/*", "video/webm"]
)
):
if not strict_match_mime_type(stt_supported_content_types, file.content_type):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,

View file

@ -47,7 +47,7 @@ from open_webui.storage.provider import Storage
from open_webui.utils.auth import get_admin_user, get_verified_user
from open_webui.utils.access_control import has_access
from open_webui.utils.misc import strict_match_mime_type
from pydantic import BaseModel
log = logging.getLogger(__name__)
@ -104,17 +104,9 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us
if file.content_type:
stt_supported_content_types = getattr(
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
)
) or ["audio/*", "video/webm"]
if any(
fnmatch(file.content_type, content_type)
for content_type in (
stt_supported_content_types
if stt_supported_content_types
and any(t.strip() for t in stt_supported_content_types)
else ["audio/*", "video/webm"]
)
):
if strict_match_mime_type(stt_supported_content_types, file.content_type):
file_path = Storage.get_file(file_path)
result = transcribe(request, file_path, file_metadata, user)

View file

@ -9,6 +9,7 @@ from pathlib import Path
from typing import Callable, Optional, Sequence, Union
import json
import aiohttp
import mimeparse
import collections.abc
@ -577,6 +578,37 @@ def throttle(interval: float = 10.0):
return decorator
def strict_match_mime_type(supported: list[str] | str, header: str) -> Optional[str]:
"""
Strictly match the mime type with the supported mime types.
:param supported: The supported mime types.
:param header: The header to match.
:return: The matched mime type or None if no match is found.
"""
try:
if isinstance(supported, str):
supported = supported.split(",")
supported = [s for s in supported if s.strip() and "/" in s]
match = mimeparse.best_match(supported, header)
if not match:
return None
_, _, match_params = mimeparse.parse_mime_type(match)
_, _, header_params = mimeparse.parse_mime_type(header)
for k, v in match_params.items():
if header_params.get(k) != v:
return None
return match
except Exception as e:
log.exception(f"Failed to match mime type {header}: {e}")
return None
def extract_urls(text: str) -> list[str]:
# Regex pattern to match URLs
url_pattern = re.compile(
@ -624,14 +656,17 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
yield line
else:
yield b"data: {}"
yield b"\n"
else:
# Normal mode: check if line exceeds limit
if len(line) > max_buffer_size:
skip_mode = True
yield b"data: {}"
yield b"\n"
log.info(f"Skip mode triggered, line size: {len(line)}")
else:
yield line
yield b"\n"
# Save the last incomplete fragment
buffer = lines[-1]
@ -646,5 +681,6 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
# Process remaining buffer data
if buffer and not skip_mode:
yield buffer
yield b"\n"
return yield_safe_stream_chunks()

View file

@ -1,7 +1,7 @@
# Minimal requirements for backend to run
# WIP: use this as a reference to build a minimal docker image
fastapi==0.123.0
fastapi==0.124.0
uvicorn[standard]==0.37.0
pydantic==2.12.5
python-multipart==0.0.20
@ -16,7 +16,7 @@ PyJWT[crypto]==2.10.1
authlib==1.6.5
requests==2.32.5
aiohttp==3.12.15
aiohttp==3.13.2
async-timeout
aiocache
aiofiles
@ -24,21 +24,21 @@ starlette-compress==1.6.1
httpx[socks,http2,zstd,cli,brotli]==0.28.1
starsessions[redis]==2.2.1
sqlalchemy==2.0.38
sqlalchemy==2.0.44
alembic==1.17.2
peewee==3.18.3
peewee-migrate==1.14.3
pycrdt==0.12.25
pycrdt==0.12.44
redis
APScheduler==3.10.4
RestrictedPython==8.0
APScheduler==3.11.1
RestrictedPython==8.1
loguru==0.7.3
asgiref==3.11.0
mcp==1.22.0
mcp==1.23.1
openai
langchain==0.3.27
@ -46,6 +46,6 @@ langchain-community==0.3.29
fake-useragent==2.2.0
chromadb==1.3.5
black==25.11.0
black==25.12.0
pydub
chardet==5.2.0

View file

@ -1,4 +1,4 @@
fastapi==0.123.0
fastapi==0.124.0
uvicorn[standard]==0.37.0
pydantic==2.12.5
python-multipart==0.0.20
@ -13,35 +13,36 @@ PyJWT[crypto]==2.10.1
authlib==1.6.5
requests==2.32.5
aiohttp==3.12.15
aiohttp==3.13.2
async-timeout
aiocache
aiofiles
starlette-compress==1.6.1
httpx[socks,http2,zstd,cli,brotli]==0.28.1
starsessions[redis]==2.2.1
python-mimeparse==2.0.0
sqlalchemy==2.0.38
sqlalchemy==2.0.44
alembic==1.17.2
peewee==3.18.3
peewee-migrate==1.14.3
pycrdt==0.12.25
pycrdt==0.12.44
redis
APScheduler==3.10.4
RestrictedPython==8.0
APScheduler==3.11.1
RestrictedPython==8.1
loguru==0.7.3
asgiref==3.11.0
# AI libraries
tiktoken
mcp==1.22.0
mcp==1.23.3
openai
anthropic
google-genai==1.52.0
google-genai==1.54.0
google-generativeai==0.8.5
langchain==0.3.27
@ -49,8 +50,8 @@ langchain-community==0.3.29
fake-useragent==2.2.0
chromadb==1.3.5
weaviate-client==4.17.0
opensearch-py==2.8.0
weaviate-client==4.18.3
opensearch-py==3.1.0
transformers==4.57.3
sentence-transformers==5.1.2
@ -60,43 +61,43 @@ einops==0.8.1
ftfy==6.3.1
chardet==5.2.0
pypdf==6.4.0
fpdf2==2.8.2
pymdown-extensions==10.17.2
docx2txt==0.8
pypdf==6.4.1
fpdf2==2.8.5
pymdown-extensions==10.18
docx2txt==0.9
python-pptx==1.0.2
unstructured==0.18.21
msoffcrypto-tool==5.4.2
nltk==3.9.1
nltk==3.9.2
Markdown==3.10
pypandoc==1.16.2
pandas==2.2.3
pandas==2.3.3
openpyxl==3.1.5
pyxlsb==1.0.10
xlrd==2.0.1
xlrd==2.0.2
validators==0.35.0
psutil
sentencepiece
soundfile==0.13.1
pillow==11.3.0
opencv-python-headless==4.11.0.86
pillow==12.0.0
opencv-python-headless==4.12.0.88
rapidocr-onnxruntime==1.4.4
rank-bm25==0.2.2
onnxruntime==1.20.1
faster-whisper==1.1.1
onnxruntime==1.23.2
faster-whisper==1.2.1
black==25.11.0
youtube-transcript-api==1.2.2
black==25.12.0
youtube-transcript-api==1.2.3
pytube==15.0.0
pydub
ddgs==9.9.2
ddgs==9.9.3
azure-ai-documentintelligence==1.0.2
azure-identity==1.25.0
azure-storage-blob==12.24.1
azure-identity==1.25.1
azure-storage-blob==12.27.1
azure-search-documents==11.6.0
## Google Drive
@ -105,26 +106,26 @@ google-auth-httplib2
google-auth-oauthlib
googleapis-common-protos==1.72.0
google-cloud-storage==2.19.0
google-cloud-storage==3.7.0
## Databases
pymongo
psycopg2-binary==2.9.10
pgvector==0.4.1
psycopg2-binary==2.9.11
pgvector==0.4.2
PyMySQL==1.1.1
boto3==1.41.5
PyMySQL==1.1.2
boto3==1.42.5
pymilvus==2.6.5
qdrant-client==1.16.1
playwright==1.56.0 # Caution: version must match docker-compose.playwright.yaml
elasticsearch==9.1.0
playwright==1.57.0 # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
elasticsearch==9.2.0
pinecone==6.0.2
oracledb==3.2.0
oracledb==3.4.1
av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720
colbert-ai==0.2.21
colbert-ai==0.2.22
## Tests
@ -136,17 +137,17 @@ pytest-docker~=3.2.5
ldap3==2.9.1
## Firecrawl
firecrawl-py==4.10.0
firecrawl-py==4.10.4
## Trace
opentelemetry-api==1.38.0
opentelemetry-sdk==1.38.0
opentelemetry-exporter-otlp==1.38.0
opentelemetry-instrumentation==0.59b0
opentelemetry-instrumentation-fastapi==0.59b0
opentelemetry-instrumentation-sqlalchemy==0.59b0
opentelemetry-instrumentation-redis==0.59b0
opentelemetry-instrumentation-requests==0.59b0
opentelemetry-instrumentation-logging==0.59b0
opentelemetry-instrumentation-httpx==0.59b0
opentelemetry-instrumentation-aiohttp-client==0.59b0
opentelemetry-api==1.39.0
opentelemetry-sdk==1.39.0
opentelemetry-exporter-otlp==1.39.0
opentelemetry-instrumentation==0.60b0
opentelemetry-instrumentation-fastapi==0.60b0
opentelemetry-instrumentation-sqlalchemy==0.60b0
opentelemetry-instrumentation-redis==0.60b0
opentelemetry-instrumentation-requests==0.60b0
opentelemetry-instrumentation-logging==0.60b0
opentelemetry-instrumentation-httpx==0.60b0
opentelemetry-instrumentation-aiohttp-client==0.60b0

View file

@ -1,8 +1,8 @@
services:
playwright:
image: mcr.microsoft.com/playwright:v1.56.0-noble # Version must match requirements.txt
image: mcr.microsoft.com/playwright:v1.57.0-noble # Version must match requirements.txt
container_name: playwright
command: npx -y playwright@1.56.0 run-server --port 3000 --host 0.0.0.0
command: npx -y playwright@1.57.0 run-server --port 3000 --host 0.0.0.0
open-webui:
environment:

View file

@ -6,7 +6,7 @@ authors = [
]
license = { file = "LICENSE" }
dependencies = [
"fastapi==0.123.0",
"fastapi==0.124.0",
"uvicorn[standard]==0.37.0",
"pydantic==2.12.5",
"python-multipart==0.0.20",
@ -21,7 +21,7 @@ dependencies = [
"authlib==1.6.5",
"requests==2.32.5",
"aiohttp==3.12.15",
"aiohttp==3.13.2",
"async-timeout",
"aiocache",
"aiofiles",
@ -29,26 +29,26 @@ dependencies = [
"httpx[socks,http2,zstd,cli,brotli]==0.28.1",
"starsessions[redis]==2.2.1",
"sqlalchemy==2.0.38",
"sqlalchemy==2.0.44",
"alembic==1.17.2",
"peewee==3.18.3",
"peewee-migrate==1.14.3",
"pycrdt==0.12.25",
"pycrdt==0.12.44",
"redis",
"APScheduler==3.10.4",
"RestrictedPython==8.0",
"APScheduler==3.11.1",
"RestrictedPython==8.1",
"loguru==0.7.3",
"asgiref==3.11.0",
"tiktoken",
"mcp==1.22.0",
"mcp==1.23.3",
"openai",
"anthropic",
"google-genai==1.52.0",
"google-genai==1.54.0",
"google-generativeai==0.8.5",
"langchain==0.3.27",
@ -56,62 +56,62 @@ dependencies = [
"fake-useragent==2.2.0",
"chromadb==1.3.5",
"opensearch-py==2.8.0",
"PyMySQL==1.1.1",
"boto3==1.41.5",
"opensearch-py==3.1.0",
"PyMySQL==1.1.2",
"boto3==1.42.5",
"transformers==4.57.3",
"sentence-transformers==5.1.2",
"accelerate",
"pyarrow==20.0.0",
"pyarrow==20.0.0", # fix: pin pyarrow version to 20 for rpi compatibility #15897
"einops==0.8.1",
"ftfy==6.3.1",
"chardet==5.2.0",
"pypdf==6.4.0",
"fpdf2==2.8.2",
"pymdown-extensions==10.17.2",
"docx2txt==0.8",
"pypdf==6.4.1",
"fpdf2==2.8.5",
"pymdown-extensions==10.18",
"docx2txt==0.9",
"python-pptx==1.0.2",
"unstructured==0.18.21",
"msoffcrypto-tool==5.4.2",
"nltk==3.9.1",
"nltk==3.9.2",
"Markdown==3.10",
"pypandoc==1.16.2",
"pandas==2.2.3",
"pandas==2.3.3",
"openpyxl==3.1.5",
"pyxlsb==1.0.10",
"xlrd==2.0.1",
"xlrd==2.0.2",
"validators==0.35.0",
"psutil",
"sentencepiece",
"soundfile==0.13.1",
"azure-ai-documentintelligence==1.0.2",
"pillow==11.3.0",
"opencv-python-headless==4.11.0.86",
"pillow==12.0.0",
"opencv-python-headless==4.12.0.88",
"rapidocr-onnxruntime==1.4.4",
"rank-bm25==0.2.2",
"onnxruntime==1.20.1",
"faster-whisper==1.1.1",
"onnxruntime==1.23.2",
"faster-whisper==1.2.1",
"black==25.11.0",
"youtube-transcript-api==1.2.2",
"black==25.12.0",
"youtube-transcript-api==1.2.3",
"pytube==15.0.0",
"pydub",
"ddgs==9.9.2",
"ddgs==9.9.3",
"google-api-python-client",
"google-auth-httplib2",
"google-auth-oauthlib",
"googleapis-common-protos==1.72.0",
"google-cloud-storage==2.19.0",
"google-cloud-storage==3.7.0",
"azure-identity==1.25.0",
"azure-storage-blob==12.24.1",
"azure-identity==1.25.1",
"azure-storage-blob==12.27.1",
"ldap3==2.9.1",
]
@ -130,8 +130,8 @@ classifiers = [
[project.optional-dependencies]
postgres = [
"psycopg2-binary==2.9.10",
"pgvector==0.4.1",
"psycopg2-binary==2.9.11",
"pgvector==0.4.2",
]
all = [
@ -143,17 +143,18 @@ all = [
"docker~=7.1.0",
"pytest~=8.3.2",
"pytest-docker~=3.2.5",
"playwright==1.56.0",
"elasticsearch==9.1.0",
"playwright==1.57.0", # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
"elasticsearch==9.2.0",
"qdrant-client==1.16.1",
"weaviate-client==4.17.0",
"pymilvus==2.6.4",
"weaviate-client==4.18.3",
"pymilvus==2.6.5",
"pinecone==6.0.2",
"oracledb==3.2.0",
"colbert-ai==0.2.21",
"oracledb==3.4.1",
"colbert-ai==0.2.22",
"firecrawl-py==4.10.0",
"firecrawl-py==4.10.4",
"azure-search-documents==11.6.0",
]

View file

@ -1624,6 +1624,7 @@
"Tika": "Tika",
"Tika Server URL required.": "请输入 Tika 服务器接口地址",
"Tiktoken": "Tiktoken",
"Timeout": "超时时间",
"Title": "标题",
"Title Auto-Generation": "自动生成标题",
"Title cannot be an empty string.": "标题不能为空",

View file

@ -1624,6 +1624,7 @@
"Tika": "Tika",
"Tika Server URL required.": "需要提供 Tika 伺服器 URL。",
"Tiktoken": "Tiktoken",
"Timeout": "逾時時間",
"Title": "標題",
"Title Auto-Generation": "自動產生標題",
"Title cannot be an empty string.": "標題不能是空字串。",