Compare commits

...

5 commits

Author SHA1 Message Date
Shirasawa
4e3f129b3b
Merge 153240c8d6 into 1ea555a5ac 2025-12-09 22:23:33 +01:00
Shirasawa
1ea555a5ac
i18n: improve Chinese translation (#19830) 2025-12-09 15:29:43 -05:00
Shirasawa
c24b1207a0
fix: fixed missing text in the explanation feature (#19829) 2025-12-09 15:29:27 -05:00
Classic298
44e41806f2
chore: dep bump across many dependencies (#19850)
* Update pyproject.toml (#101)

* Update pyproject.toml

* Update requirements.txt

* Update requirements-min.txt

* Upgrade Playwright version to 1.57.0

* Update langchain-community version to 0.3.29

* Update requirements.txt

* Update requirements-min.txt
2025-12-09 15:28:21 -05:00
Shirasawa
153240c8d6 fix: fixed the issue of mismatched spaces in audio MIME types 2025-12-08 09:45:58 +00:00
9 changed files with 140 additions and 115 deletions

View file

@ -33,6 +33,7 @@ from fastapi.responses import FileResponse
from pydantic import BaseModel from pydantic import BaseModel
from open_webui.utils.misc import strict_match_mime_type
from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.auth import get_admin_user, get_verified_user
from open_webui.utils.headers import include_user_info_headers from open_webui.utils.headers import include_user_info_headers
from open_webui.config import ( from open_webui.config import (
@ -1155,17 +1156,9 @@ def transcription(
stt_supported_content_types = getattr( stt_supported_content_types = getattr(
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
) ) or ["audio/*", "video/webm"]
if not any( if not strict_match_mime_type(stt_supported_content_types, file.content_type):
fnmatch(file.content_type, content_type)
for content_type in (
stt_supported_content_types
if stt_supported_content_types
and any(t.strip() for t in stt_supported_content_types)
else ["audio/*", "video/webm"]
)
):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,

View file

@ -47,7 +47,7 @@ from open_webui.storage.provider import Storage
from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.auth import get_admin_user, get_verified_user
from open_webui.utils.access_control import has_access from open_webui.utils.access_control import has_access
from open_webui.utils.misc import strict_match_mime_type
from pydantic import BaseModel from pydantic import BaseModel
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -104,17 +104,9 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us
if file.content_type: if file.content_type:
stt_supported_content_types = getattr( stt_supported_content_types = getattr(
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
) ) or ["audio/*", "video/webm"]
if any( if strict_match_mime_type(stt_supported_content_types, file.content_type):
fnmatch(file.content_type, content_type)
for content_type in (
stt_supported_content_types
if stt_supported_content_types
and any(t.strip() for t in stt_supported_content_types)
else ["audio/*", "video/webm"]
)
):
file_path = Storage.get_file(file_path) file_path = Storage.get_file(file_path)
result = transcribe(request, file_path, file_metadata, user) result = transcribe(request, file_path, file_metadata, user)

View file

@ -9,6 +9,7 @@ from pathlib import Path
from typing import Callable, Optional, Sequence, Union from typing import Callable, Optional, Sequence, Union
import json import json
import aiohttp import aiohttp
import mimeparse
import collections.abc import collections.abc
@ -577,6 +578,37 @@ def throttle(interval: float = 10.0):
return decorator return decorator
def strict_match_mime_type(supported: list[str] | str, header: str) -> Optional[str]:
"""
Strictly match the mime type with the supported mime types.
:param supported: The supported mime types.
:param header: The header to match.
:return: The matched mime type or None if no match is found.
"""
try:
if isinstance(supported, str):
supported = supported.split(",")
supported = [s for s in supported if s.strip() and "/" in s]
match = mimeparse.best_match(supported, header)
if not match:
return None
_, _, match_params = mimeparse.parse_mime_type(match)
_, _, header_params = mimeparse.parse_mime_type(header)
for k, v in match_params.items():
if header_params.get(k) != v:
return None
return match
except Exception as e:
log.exception(f"Failed to match mime type {header}: {e}")
return None
def extract_urls(text: str) -> list[str]: def extract_urls(text: str) -> list[str]:
# Regex pattern to match URLs # Regex pattern to match URLs
url_pattern = re.compile( url_pattern = re.compile(
@ -624,14 +656,17 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
yield line yield line
else: else:
yield b"data: {}" yield b"data: {}"
yield b"\n"
else: else:
# Normal mode: check if line exceeds limit # Normal mode: check if line exceeds limit
if len(line) > max_buffer_size: if len(line) > max_buffer_size:
skip_mode = True skip_mode = True
yield b"data: {}" yield b"data: {}"
yield b"\n"
log.info(f"Skip mode triggered, line size: {len(line)}") log.info(f"Skip mode triggered, line size: {len(line)}")
else: else:
yield line yield line
yield b"\n"
# Save the last incomplete fragment # Save the last incomplete fragment
buffer = lines[-1] buffer = lines[-1]
@ -646,5 +681,6 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
# Process remaining buffer data # Process remaining buffer data
if buffer and not skip_mode: if buffer and not skip_mode:
yield buffer yield buffer
yield b"\n"
return yield_safe_stream_chunks() return yield_safe_stream_chunks()

View file

@ -1,7 +1,7 @@
# Minimal requirements for backend to run # Minimal requirements for backend to run
# WIP: use this as a reference to build a minimal docker image # WIP: use this as a reference to build a minimal docker image
fastapi==0.123.0 fastapi==0.124.0
uvicorn[standard]==0.37.0 uvicorn[standard]==0.37.0
pydantic==2.12.5 pydantic==2.12.5
python-multipart==0.0.20 python-multipart==0.0.20
@ -16,7 +16,7 @@ PyJWT[crypto]==2.10.1
authlib==1.6.5 authlib==1.6.5
requests==2.32.5 requests==2.32.5
aiohttp==3.12.15 aiohttp==3.13.2
async-timeout async-timeout
aiocache aiocache
aiofiles aiofiles
@ -24,21 +24,21 @@ starlette-compress==1.6.1
httpx[socks,http2,zstd,cli,brotli]==0.28.1 httpx[socks,http2,zstd,cli,brotli]==0.28.1
starsessions[redis]==2.2.1 starsessions[redis]==2.2.1
sqlalchemy==2.0.38 sqlalchemy==2.0.44
alembic==1.17.2 alembic==1.17.2
peewee==3.18.3 peewee==3.18.3
peewee-migrate==1.14.3 peewee-migrate==1.14.3
pycrdt==0.12.25 pycrdt==0.12.44
redis redis
APScheduler==3.10.4 APScheduler==3.11.1
RestrictedPython==8.0 RestrictedPython==8.1
loguru==0.7.3 loguru==0.7.3
asgiref==3.11.0 asgiref==3.11.0
mcp==1.22.0 mcp==1.23.1
openai openai
langchain==0.3.27 langchain==0.3.27
@ -46,6 +46,6 @@ langchain-community==0.3.29
fake-useragent==2.2.0 fake-useragent==2.2.0
chromadb==1.3.5 chromadb==1.3.5
black==25.11.0 black==25.12.0
pydub pydub
chardet==5.2.0 chardet==5.2.0

View file

@ -1,4 +1,4 @@
fastapi==0.123.0 fastapi==0.124.0
uvicorn[standard]==0.37.0 uvicorn[standard]==0.37.0
pydantic==2.12.5 pydantic==2.12.5
python-multipart==0.0.20 python-multipart==0.0.20
@ -13,35 +13,36 @@ PyJWT[crypto]==2.10.1
authlib==1.6.5 authlib==1.6.5
requests==2.32.5 requests==2.32.5
aiohttp==3.12.15 aiohttp==3.13.2
async-timeout async-timeout
aiocache aiocache
aiofiles aiofiles
starlette-compress==1.6.1 starlette-compress==1.6.1
httpx[socks,http2,zstd,cli,brotli]==0.28.1 httpx[socks,http2,zstd,cli,brotli]==0.28.1
starsessions[redis]==2.2.1 starsessions[redis]==2.2.1
python-mimeparse==2.0.0
sqlalchemy==2.0.38 sqlalchemy==2.0.44
alembic==1.17.2 alembic==1.17.2
peewee==3.18.3 peewee==3.18.3
peewee-migrate==1.14.3 peewee-migrate==1.14.3
pycrdt==0.12.25 pycrdt==0.12.44
redis redis
APScheduler==3.10.4 APScheduler==3.11.1
RestrictedPython==8.0 RestrictedPython==8.1
loguru==0.7.3 loguru==0.7.3
asgiref==3.11.0 asgiref==3.11.0
# AI libraries # AI libraries
tiktoken tiktoken
mcp==1.22.0 mcp==1.23.3
openai openai
anthropic anthropic
google-genai==1.52.0 google-genai==1.54.0
google-generativeai==0.8.5 google-generativeai==0.8.5
langchain==0.3.27 langchain==0.3.27
@ -49,8 +50,8 @@ langchain-community==0.3.29
fake-useragent==2.2.0 fake-useragent==2.2.0
chromadb==1.3.5 chromadb==1.3.5
weaviate-client==4.17.0 weaviate-client==4.18.3
opensearch-py==2.8.0 opensearch-py==3.1.0
transformers==4.57.3 transformers==4.57.3
sentence-transformers==5.1.2 sentence-transformers==5.1.2
@ -60,43 +61,43 @@ einops==0.8.1
ftfy==6.3.1 ftfy==6.3.1
chardet==5.2.0 chardet==5.2.0
pypdf==6.4.0 pypdf==6.4.1
fpdf2==2.8.2 fpdf2==2.8.5
pymdown-extensions==10.17.2 pymdown-extensions==10.18
docx2txt==0.8 docx2txt==0.9
python-pptx==1.0.2 python-pptx==1.0.2
unstructured==0.18.21 unstructured==0.18.21
msoffcrypto-tool==5.4.2 msoffcrypto-tool==5.4.2
nltk==3.9.1 nltk==3.9.2
Markdown==3.10 Markdown==3.10
pypandoc==1.16.2 pypandoc==1.16.2
pandas==2.2.3 pandas==2.3.3
openpyxl==3.1.5 openpyxl==3.1.5
pyxlsb==1.0.10 pyxlsb==1.0.10
xlrd==2.0.1 xlrd==2.0.2
validators==0.35.0 validators==0.35.0
psutil psutil
sentencepiece sentencepiece
soundfile==0.13.1 soundfile==0.13.1
pillow==11.3.0 pillow==12.0.0
opencv-python-headless==4.11.0.86 opencv-python-headless==4.12.0.88
rapidocr-onnxruntime==1.4.4 rapidocr-onnxruntime==1.4.4
rank-bm25==0.2.2 rank-bm25==0.2.2
onnxruntime==1.20.1 onnxruntime==1.23.2
faster-whisper==1.1.1 faster-whisper==1.2.1
black==25.11.0 black==25.12.0
youtube-transcript-api==1.2.2 youtube-transcript-api==1.2.3
pytube==15.0.0 pytube==15.0.0
pydub pydub
ddgs==9.9.2 ddgs==9.9.3
azure-ai-documentintelligence==1.0.2 azure-ai-documentintelligence==1.0.2
azure-identity==1.25.0 azure-identity==1.25.1
azure-storage-blob==12.24.1 azure-storage-blob==12.27.1
azure-search-documents==11.6.0 azure-search-documents==11.6.0
## Google Drive ## Google Drive
@ -105,26 +106,26 @@ google-auth-httplib2
google-auth-oauthlib google-auth-oauthlib
googleapis-common-protos==1.72.0 googleapis-common-protos==1.72.0
google-cloud-storage==2.19.0 google-cloud-storage==3.7.0
## Databases ## Databases
pymongo pymongo
psycopg2-binary==2.9.10 psycopg2-binary==2.9.11
pgvector==0.4.1 pgvector==0.4.2
PyMySQL==1.1.1 PyMySQL==1.1.2
boto3==1.41.5 boto3==1.42.5
pymilvus==2.6.5 pymilvus==2.6.5
qdrant-client==1.16.1 qdrant-client==1.16.1
playwright==1.56.0 # Caution: version must match docker-compose.playwright.yaml playwright==1.57.0 # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
elasticsearch==9.1.0 elasticsearch==9.2.0
pinecone==6.0.2 pinecone==6.0.2
oracledb==3.2.0 oracledb==3.4.1
av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720 av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720
colbert-ai==0.2.21 colbert-ai==0.2.22
## Tests ## Tests
@ -136,17 +137,17 @@ pytest-docker~=3.2.5
ldap3==2.9.1 ldap3==2.9.1
## Firecrawl ## Firecrawl
firecrawl-py==4.10.0 firecrawl-py==4.10.4
## Trace ## Trace
opentelemetry-api==1.38.0 opentelemetry-api==1.39.0
opentelemetry-sdk==1.38.0 opentelemetry-sdk==1.39.0
opentelemetry-exporter-otlp==1.38.0 opentelemetry-exporter-otlp==1.39.0
opentelemetry-instrumentation==0.59b0 opentelemetry-instrumentation==0.60b0
opentelemetry-instrumentation-fastapi==0.59b0 opentelemetry-instrumentation-fastapi==0.60b0
opentelemetry-instrumentation-sqlalchemy==0.59b0 opentelemetry-instrumentation-sqlalchemy==0.60b0
opentelemetry-instrumentation-redis==0.59b0 opentelemetry-instrumentation-redis==0.60b0
opentelemetry-instrumentation-requests==0.59b0 opentelemetry-instrumentation-requests==0.60b0
opentelemetry-instrumentation-logging==0.59b0 opentelemetry-instrumentation-logging==0.60b0
opentelemetry-instrumentation-httpx==0.59b0 opentelemetry-instrumentation-httpx==0.60b0
opentelemetry-instrumentation-aiohttp-client==0.59b0 opentelemetry-instrumentation-aiohttp-client==0.60b0

View file

@ -1,8 +1,8 @@
services: services:
playwright: playwright:
image: mcr.microsoft.com/playwright:v1.56.0-noble # Version must match requirements.txt image: mcr.microsoft.com/playwright:v1.57.0-noble # Version must match requirements.txt
container_name: playwright container_name: playwright
command: npx -y playwright@1.56.0 run-server --port 3000 --host 0.0.0.0 command: npx -y playwright@1.57.0 run-server --port 3000 --host 0.0.0.0
open-webui: open-webui:
environment: environment:

View file

@ -6,7 +6,7 @@ authors = [
] ]
license = { file = "LICENSE" } license = { file = "LICENSE" }
dependencies = [ dependencies = [
"fastapi==0.123.0", "fastapi==0.124.0",
"uvicorn[standard]==0.37.0", "uvicorn[standard]==0.37.0",
"pydantic==2.12.5", "pydantic==2.12.5",
"python-multipart==0.0.20", "python-multipart==0.0.20",
@ -21,7 +21,7 @@ dependencies = [
"authlib==1.6.5", "authlib==1.6.5",
"requests==2.32.5", "requests==2.32.5",
"aiohttp==3.12.15", "aiohttp==3.13.2",
"async-timeout", "async-timeout",
"aiocache", "aiocache",
"aiofiles", "aiofiles",
@ -29,26 +29,26 @@ dependencies = [
"httpx[socks,http2,zstd,cli,brotli]==0.28.1", "httpx[socks,http2,zstd,cli,brotli]==0.28.1",
"starsessions[redis]==2.2.1", "starsessions[redis]==2.2.1",
"sqlalchemy==2.0.38", "sqlalchemy==2.0.44",
"alembic==1.17.2", "alembic==1.17.2",
"peewee==3.18.3", "peewee==3.18.3",
"peewee-migrate==1.14.3", "peewee-migrate==1.14.3",
"pycrdt==0.12.25", "pycrdt==0.12.44",
"redis", "redis",
"APScheduler==3.10.4", "APScheduler==3.11.1",
"RestrictedPython==8.0", "RestrictedPython==8.1",
"loguru==0.7.3", "loguru==0.7.3",
"asgiref==3.11.0", "asgiref==3.11.0",
"tiktoken", "tiktoken",
"mcp==1.22.0", "mcp==1.23.3",
"openai", "openai",
"anthropic", "anthropic",
"google-genai==1.52.0", "google-genai==1.54.0",
"google-generativeai==0.8.5", "google-generativeai==0.8.5",
"langchain==0.3.27", "langchain==0.3.27",
@ -56,62 +56,62 @@ dependencies = [
"fake-useragent==2.2.0", "fake-useragent==2.2.0",
"chromadb==1.3.5", "chromadb==1.3.5",
"opensearch-py==2.8.0", "opensearch-py==3.1.0",
"PyMySQL==1.1.1", "PyMySQL==1.1.2",
"boto3==1.41.5", "boto3==1.42.5",
"transformers==4.57.3", "transformers==4.57.3",
"sentence-transformers==5.1.2", "sentence-transformers==5.1.2",
"accelerate", "accelerate",
"pyarrow==20.0.0", "pyarrow==20.0.0", # fix: pin pyarrow version to 20 for rpi compatibility #15897
"einops==0.8.1", "einops==0.8.1",
"ftfy==6.3.1", "ftfy==6.3.1",
"chardet==5.2.0", "chardet==5.2.0",
"pypdf==6.4.0", "pypdf==6.4.1",
"fpdf2==2.8.2", "fpdf2==2.8.5",
"pymdown-extensions==10.17.2", "pymdown-extensions==10.18",
"docx2txt==0.8", "docx2txt==0.9",
"python-pptx==1.0.2", "python-pptx==1.0.2",
"unstructured==0.18.21", "unstructured==0.18.21",
"msoffcrypto-tool==5.4.2", "msoffcrypto-tool==5.4.2",
"nltk==3.9.1", "nltk==3.9.2",
"Markdown==3.10", "Markdown==3.10",
"pypandoc==1.16.2", "pypandoc==1.16.2",
"pandas==2.2.3", "pandas==2.3.3",
"openpyxl==3.1.5", "openpyxl==3.1.5",
"pyxlsb==1.0.10", "pyxlsb==1.0.10",
"xlrd==2.0.1", "xlrd==2.0.2",
"validators==0.35.0", "validators==0.35.0",
"psutil", "psutil",
"sentencepiece", "sentencepiece",
"soundfile==0.13.1", "soundfile==0.13.1",
"azure-ai-documentintelligence==1.0.2", "azure-ai-documentintelligence==1.0.2",
"pillow==11.3.0", "pillow==12.0.0",
"opencv-python-headless==4.11.0.86", "opencv-python-headless==4.12.0.88",
"rapidocr-onnxruntime==1.4.4", "rapidocr-onnxruntime==1.4.4",
"rank-bm25==0.2.2", "rank-bm25==0.2.2",
"onnxruntime==1.20.1", "onnxruntime==1.23.2",
"faster-whisper==1.1.1", "faster-whisper==1.2.1",
"black==25.11.0", "black==25.12.0",
"youtube-transcript-api==1.2.2", "youtube-transcript-api==1.2.3",
"pytube==15.0.0", "pytube==15.0.0",
"pydub", "pydub",
"ddgs==9.9.2", "ddgs==9.9.3",
"google-api-python-client", "google-api-python-client",
"google-auth-httplib2", "google-auth-httplib2",
"google-auth-oauthlib", "google-auth-oauthlib",
"googleapis-common-protos==1.72.0", "googleapis-common-protos==1.72.0",
"google-cloud-storage==2.19.0", "google-cloud-storage==3.7.0",
"azure-identity==1.25.0", "azure-identity==1.25.1",
"azure-storage-blob==12.24.1", "azure-storage-blob==12.27.1",
"ldap3==2.9.1", "ldap3==2.9.1",
] ]
@ -130,8 +130,8 @@ classifiers = [
[project.optional-dependencies] [project.optional-dependencies]
postgres = [ postgres = [
"psycopg2-binary==2.9.10", "psycopg2-binary==2.9.11",
"pgvector==0.4.1", "pgvector==0.4.2",
] ]
all = [ all = [
@ -143,17 +143,18 @@ all = [
"docker~=7.1.0", "docker~=7.1.0",
"pytest~=8.3.2", "pytest~=8.3.2",
"pytest-docker~=3.2.5", "pytest-docker~=3.2.5",
"playwright==1.56.0", "playwright==1.57.0", # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
"elasticsearch==9.1.0", "elasticsearch==9.2.0",
"qdrant-client==1.16.1", "qdrant-client==1.16.1",
"weaviate-client==4.17.0", "pymilvus==2.6.4",
"weaviate-client==4.18.3",
"pymilvus==2.6.5", "pymilvus==2.6.5",
"pinecone==6.0.2", "pinecone==6.0.2",
"oracledb==3.2.0", "oracledb==3.4.1",
"colbert-ai==0.2.21", "colbert-ai==0.2.22",
"firecrawl-py==4.10.0", "firecrawl-py==4.10.4",
"azure-search-documents==11.6.0", "azure-search-documents==11.6.0",
] ]

View file

@ -1624,6 +1624,7 @@
"Tika": "Tika", "Tika": "Tika",
"Tika Server URL required.": "请输入 Tika 服务器接口地址", "Tika Server URL required.": "请输入 Tika 服务器接口地址",
"Tiktoken": "Tiktoken", "Tiktoken": "Tiktoken",
"Timeout": "超时时间",
"Title": "标题", "Title": "标题",
"Title Auto-Generation": "自动生成标题", "Title Auto-Generation": "自动生成标题",
"Title cannot be an empty string.": "标题不能为空", "Title cannot be an empty string.": "标题不能为空",

View file

@ -1624,6 +1624,7 @@
"Tika": "Tika", "Tika": "Tika",
"Tika Server URL required.": "需要提供 Tika 伺服器 URL。", "Tika Server URL required.": "需要提供 Tika 伺服器 URL。",
"Tiktoken": "Tiktoken", "Tiktoken": "Tiktoken",
"Timeout": "逾時時間",
"Title": "標題", "Title": "標題",
"Title Auto-Generation": "自動產生標題", "Title Auto-Generation": "自動產生標題",
"Title cannot be an empty string.": "標題不能是空字串。", "Title cannot be an empty string.": "標題不能是空字串。",