mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-11 20:05:19 +00:00
Compare commits
5 commits
2daa3a617f
...
4e3f129b3b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e3f129b3b | ||
|
|
1ea555a5ac | ||
|
|
c24b1207a0 | ||
|
|
44e41806f2 | ||
|
|
153240c8d6 |
9 changed files with 140 additions and 115 deletions
|
|
@ -33,6 +33,7 @@ from fastapi.responses import FileResponse
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
from open_webui.utils.misc import strict_match_mime_type
|
||||
from open_webui.utils.auth import get_admin_user, get_verified_user
|
||||
from open_webui.utils.headers import include_user_info_headers
|
||||
from open_webui.config import (
|
||||
|
|
@ -1155,17 +1156,9 @@ def transcription(
|
|||
|
||||
stt_supported_content_types = getattr(
|
||||
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
||||
)
|
||||
) or ["audio/*", "video/webm"]
|
||||
|
||||
if not any(
|
||||
fnmatch(file.content_type, content_type)
|
||||
for content_type in (
|
||||
stt_supported_content_types
|
||||
if stt_supported_content_types
|
||||
and any(t.strip() for t in stt_supported_content_types)
|
||||
else ["audio/*", "video/webm"]
|
||||
)
|
||||
):
|
||||
if not strict_match_mime_type(stt_supported_content_types, file.content_type):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ from open_webui.storage.provider import Storage
|
|||
|
||||
from open_webui.utils.auth import get_admin_user, get_verified_user
|
||||
from open_webui.utils.access_control import has_access
|
||||
|
||||
from open_webui.utils.misc import strict_match_mime_type
|
||||
from pydantic import BaseModel
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -104,17 +104,9 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us
|
|||
if file.content_type:
|
||||
stt_supported_content_types = getattr(
|
||||
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
||||
)
|
||||
) or ["audio/*", "video/webm"]
|
||||
|
||||
if any(
|
||||
fnmatch(file.content_type, content_type)
|
||||
for content_type in (
|
||||
stt_supported_content_types
|
||||
if stt_supported_content_types
|
||||
and any(t.strip() for t in stt_supported_content_types)
|
||||
else ["audio/*", "video/webm"]
|
||||
)
|
||||
):
|
||||
if strict_match_mime_type(stt_supported_content_types, file.content_type):
|
||||
file_path = Storage.get_file(file_path)
|
||||
result = transcribe(request, file_path, file_metadata, user)
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
|||
from typing import Callable, Optional, Sequence, Union
|
||||
import json
|
||||
import aiohttp
|
||||
import mimeparse
|
||||
|
||||
|
||||
import collections.abc
|
||||
|
|
@ -577,6 +578,37 @@ def throttle(interval: float = 10.0):
|
|||
return decorator
|
||||
|
||||
|
||||
def strict_match_mime_type(supported: list[str] | str, header: str) -> Optional[str]:
|
||||
"""
|
||||
Strictly match the mime type with the supported mime types.
|
||||
|
||||
:param supported: The supported mime types.
|
||||
:param header: The header to match.
|
||||
:return: The matched mime type or None if no match is found.
|
||||
"""
|
||||
|
||||
try:
|
||||
if isinstance(supported, str):
|
||||
supported = supported.split(",")
|
||||
|
||||
supported = [s for s in supported if s.strip() and "/" in s]
|
||||
|
||||
match = mimeparse.best_match(supported, header)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
_, _, match_params = mimeparse.parse_mime_type(match)
|
||||
_, _, header_params = mimeparse.parse_mime_type(header)
|
||||
for k, v in match_params.items():
|
||||
if header_params.get(k) != v:
|
||||
return None
|
||||
|
||||
return match
|
||||
except Exception as e:
|
||||
log.exception(f"Failed to match mime type {header}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_urls(text: str) -> list[str]:
|
||||
# Regex pattern to match URLs
|
||||
url_pattern = re.compile(
|
||||
|
|
@ -624,14 +656,17 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
|
|||
yield line
|
||||
else:
|
||||
yield b"data: {}"
|
||||
yield b"\n"
|
||||
else:
|
||||
# Normal mode: check if line exceeds limit
|
||||
if len(line) > max_buffer_size:
|
||||
skip_mode = True
|
||||
yield b"data: {}"
|
||||
yield b"\n"
|
||||
log.info(f"Skip mode triggered, line size: {len(line)}")
|
||||
else:
|
||||
yield line
|
||||
yield b"\n"
|
||||
|
||||
# Save the last incomplete fragment
|
||||
buffer = lines[-1]
|
||||
|
|
@ -646,5 +681,6 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
|
|||
# Process remaining buffer data
|
||||
if buffer and not skip_mode:
|
||||
yield buffer
|
||||
yield b"\n"
|
||||
|
||||
return yield_safe_stream_chunks()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Minimal requirements for backend to run
|
||||
# WIP: use this as a reference to build a minimal docker image
|
||||
|
||||
fastapi==0.123.0
|
||||
fastapi==0.124.0
|
||||
uvicorn[standard]==0.37.0
|
||||
pydantic==2.12.5
|
||||
python-multipart==0.0.20
|
||||
|
|
@ -16,7 +16,7 @@ PyJWT[crypto]==2.10.1
|
|||
authlib==1.6.5
|
||||
|
||||
requests==2.32.5
|
||||
aiohttp==3.12.15
|
||||
aiohttp==3.13.2
|
||||
async-timeout
|
||||
aiocache
|
||||
aiofiles
|
||||
|
|
@ -24,21 +24,21 @@ starlette-compress==1.6.1
|
|||
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
||||
starsessions[redis]==2.2.1
|
||||
|
||||
sqlalchemy==2.0.38
|
||||
sqlalchemy==2.0.44
|
||||
alembic==1.17.2
|
||||
peewee==3.18.3
|
||||
peewee-migrate==1.14.3
|
||||
|
||||
pycrdt==0.12.25
|
||||
pycrdt==0.12.44
|
||||
redis
|
||||
|
||||
APScheduler==3.10.4
|
||||
RestrictedPython==8.0
|
||||
APScheduler==3.11.1
|
||||
RestrictedPython==8.1
|
||||
|
||||
loguru==0.7.3
|
||||
asgiref==3.11.0
|
||||
|
||||
mcp==1.22.0
|
||||
mcp==1.23.1
|
||||
openai
|
||||
|
||||
langchain==0.3.27
|
||||
|
|
@ -46,6 +46,6 @@ langchain-community==0.3.29
|
|||
fake-useragent==2.2.0
|
||||
|
||||
chromadb==1.3.5
|
||||
black==25.11.0
|
||||
black==25.12.0
|
||||
pydub
|
||||
chardet==5.2.0
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
fastapi==0.123.0
|
||||
fastapi==0.124.0
|
||||
uvicorn[standard]==0.37.0
|
||||
pydantic==2.12.5
|
||||
python-multipart==0.0.20
|
||||
|
|
@ -13,35 +13,36 @@ PyJWT[crypto]==2.10.1
|
|||
authlib==1.6.5
|
||||
|
||||
requests==2.32.5
|
||||
aiohttp==3.12.15
|
||||
aiohttp==3.13.2
|
||||
async-timeout
|
||||
aiocache
|
||||
aiofiles
|
||||
starlette-compress==1.6.1
|
||||
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
||||
starsessions[redis]==2.2.1
|
||||
python-mimeparse==2.0.0
|
||||
|
||||
sqlalchemy==2.0.38
|
||||
sqlalchemy==2.0.44
|
||||
alembic==1.17.2
|
||||
peewee==3.18.3
|
||||
peewee-migrate==1.14.3
|
||||
|
||||
pycrdt==0.12.25
|
||||
pycrdt==0.12.44
|
||||
redis
|
||||
|
||||
APScheduler==3.10.4
|
||||
RestrictedPython==8.0
|
||||
APScheduler==3.11.1
|
||||
RestrictedPython==8.1
|
||||
|
||||
loguru==0.7.3
|
||||
asgiref==3.11.0
|
||||
|
||||
# AI libraries
|
||||
tiktoken
|
||||
mcp==1.22.0
|
||||
mcp==1.23.3
|
||||
|
||||
openai
|
||||
anthropic
|
||||
google-genai==1.52.0
|
||||
google-genai==1.54.0
|
||||
google-generativeai==0.8.5
|
||||
|
||||
langchain==0.3.27
|
||||
|
|
@ -49,8 +50,8 @@ langchain-community==0.3.29
|
|||
|
||||
fake-useragent==2.2.0
|
||||
chromadb==1.3.5
|
||||
weaviate-client==4.17.0
|
||||
opensearch-py==2.8.0
|
||||
weaviate-client==4.18.3
|
||||
opensearch-py==3.1.0
|
||||
|
||||
transformers==4.57.3
|
||||
sentence-transformers==5.1.2
|
||||
|
|
@ -60,43 +61,43 @@ einops==0.8.1
|
|||
|
||||
ftfy==6.3.1
|
||||
chardet==5.2.0
|
||||
pypdf==6.4.0
|
||||
fpdf2==2.8.2
|
||||
pymdown-extensions==10.17.2
|
||||
docx2txt==0.8
|
||||
pypdf==6.4.1
|
||||
fpdf2==2.8.5
|
||||
pymdown-extensions==10.18
|
||||
docx2txt==0.9
|
||||
python-pptx==1.0.2
|
||||
unstructured==0.18.21
|
||||
msoffcrypto-tool==5.4.2
|
||||
nltk==3.9.1
|
||||
nltk==3.9.2
|
||||
Markdown==3.10
|
||||
pypandoc==1.16.2
|
||||
pandas==2.2.3
|
||||
pandas==2.3.3
|
||||
openpyxl==3.1.5
|
||||
pyxlsb==1.0.10
|
||||
xlrd==2.0.1
|
||||
xlrd==2.0.2
|
||||
validators==0.35.0
|
||||
psutil
|
||||
sentencepiece
|
||||
soundfile==0.13.1
|
||||
|
||||
pillow==11.3.0
|
||||
opencv-python-headless==4.11.0.86
|
||||
pillow==12.0.0
|
||||
opencv-python-headless==4.12.0.88
|
||||
rapidocr-onnxruntime==1.4.4
|
||||
rank-bm25==0.2.2
|
||||
|
||||
onnxruntime==1.20.1
|
||||
faster-whisper==1.1.1
|
||||
onnxruntime==1.23.2
|
||||
faster-whisper==1.2.1
|
||||
|
||||
black==25.11.0
|
||||
youtube-transcript-api==1.2.2
|
||||
black==25.12.0
|
||||
youtube-transcript-api==1.2.3
|
||||
pytube==15.0.0
|
||||
|
||||
pydub
|
||||
ddgs==9.9.2
|
||||
ddgs==9.9.3
|
||||
|
||||
azure-ai-documentintelligence==1.0.2
|
||||
azure-identity==1.25.0
|
||||
azure-storage-blob==12.24.1
|
||||
azure-identity==1.25.1
|
||||
azure-storage-blob==12.27.1
|
||||
azure-search-documents==11.6.0
|
||||
|
||||
## Google Drive
|
||||
|
|
@ -105,26 +106,26 @@ google-auth-httplib2
|
|||
google-auth-oauthlib
|
||||
|
||||
googleapis-common-protos==1.72.0
|
||||
google-cloud-storage==2.19.0
|
||||
google-cloud-storage==3.7.0
|
||||
|
||||
## Databases
|
||||
pymongo
|
||||
psycopg2-binary==2.9.10
|
||||
pgvector==0.4.1
|
||||
psycopg2-binary==2.9.11
|
||||
pgvector==0.4.2
|
||||
|
||||
PyMySQL==1.1.1
|
||||
boto3==1.41.5
|
||||
PyMySQL==1.1.2
|
||||
boto3==1.42.5
|
||||
|
||||
pymilvus==2.6.5
|
||||
qdrant-client==1.16.1
|
||||
playwright==1.56.0 # Caution: version must match docker-compose.playwright.yaml
|
||||
elasticsearch==9.1.0
|
||||
playwright==1.57.0 # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
|
||||
elasticsearch==9.2.0
|
||||
pinecone==6.0.2
|
||||
oracledb==3.2.0
|
||||
oracledb==3.4.1
|
||||
|
||||
av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720
|
||||
|
||||
colbert-ai==0.2.21
|
||||
colbert-ai==0.2.22
|
||||
|
||||
|
||||
## Tests
|
||||
|
|
@ -136,17 +137,17 @@ pytest-docker~=3.2.5
|
|||
ldap3==2.9.1
|
||||
|
||||
## Firecrawl
|
||||
firecrawl-py==4.10.0
|
||||
firecrawl-py==4.10.4
|
||||
|
||||
## Trace
|
||||
opentelemetry-api==1.38.0
|
||||
opentelemetry-sdk==1.38.0
|
||||
opentelemetry-exporter-otlp==1.38.0
|
||||
opentelemetry-instrumentation==0.59b0
|
||||
opentelemetry-instrumentation-fastapi==0.59b0
|
||||
opentelemetry-instrumentation-sqlalchemy==0.59b0
|
||||
opentelemetry-instrumentation-redis==0.59b0
|
||||
opentelemetry-instrumentation-requests==0.59b0
|
||||
opentelemetry-instrumentation-logging==0.59b0
|
||||
opentelemetry-instrumentation-httpx==0.59b0
|
||||
opentelemetry-instrumentation-aiohttp-client==0.59b0
|
||||
opentelemetry-api==1.39.0
|
||||
opentelemetry-sdk==1.39.0
|
||||
opentelemetry-exporter-otlp==1.39.0
|
||||
opentelemetry-instrumentation==0.60b0
|
||||
opentelemetry-instrumentation-fastapi==0.60b0
|
||||
opentelemetry-instrumentation-sqlalchemy==0.60b0
|
||||
opentelemetry-instrumentation-redis==0.60b0
|
||||
opentelemetry-instrumentation-requests==0.60b0
|
||||
opentelemetry-instrumentation-logging==0.60b0
|
||||
opentelemetry-instrumentation-httpx==0.60b0
|
||||
opentelemetry-instrumentation-aiohttp-client==0.60b0
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
services:
|
||||
playwright:
|
||||
image: mcr.microsoft.com/playwright:v1.56.0-noble # Version must match requirements.txt
|
||||
image: mcr.microsoft.com/playwright:v1.57.0-noble # Version must match requirements.txt
|
||||
container_name: playwright
|
||||
command: npx -y playwright@1.56.0 run-server --port 3000 --host 0.0.0.0
|
||||
command: npx -y playwright@1.57.0 run-server --port 3000 --host 0.0.0.0
|
||||
|
||||
open-webui:
|
||||
environment:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ authors = [
|
|||
]
|
||||
license = { file = "LICENSE" }
|
||||
dependencies = [
|
||||
"fastapi==0.123.0",
|
||||
"fastapi==0.124.0",
|
||||
"uvicorn[standard]==0.37.0",
|
||||
"pydantic==2.12.5",
|
||||
"python-multipart==0.0.20",
|
||||
|
|
@ -21,7 +21,7 @@ dependencies = [
|
|||
"authlib==1.6.5",
|
||||
|
||||
"requests==2.32.5",
|
||||
"aiohttp==3.12.15",
|
||||
"aiohttp==3.13.2",
|
||||
"async-timeout",
|
||||
"aiocache",
|
||||
"aiofiles",
|
||||
|
|
@ -29,26 +29,26 @@ dependencies = [
|
|||
"httpx[socks,http2,zstd,cli,brotli]==0.28.1",
|
||||
"starsessions[redis]==2.2.1",
|
||||
|
||||
"sqlalchemy==2.0.38",
|
||||
"sqlalchemy==2.0.44",
|
||||
"alembic==1.17.2",
|
||||
"peewee==3.18.3",
|
||||
"peewee-migrate==1.14.3",
|
||||
|
||||
"pycrdt==0.12.25",
|
||||
"pycrdt==0.12.44",
|
||||
"redis",
|
||||
|
||||
"APScheduler==3.10.4",
|
||||
"RestrictedPython==8.0",
|
||||
"APScheduler==3.11.1",
|
||||
"RestrictedPython==8.1",
|
||||
|
||||
"loguru==0.7.3",
|
||||
"asgiref==3.11.0",
|
||||
|
||||
"tiktoken",
|
||||
"mcp==1.22.0",
|
||||
"mcp==1.23.3",
|
||||
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google-genai==1.52.0",
|
||||
"google-genai==1.54.0",
|
||||
"google-generativeai==0.8.5",
|
||||
|
||||
"langchain==0.3.27",
|
||||
|
|
@ -56,62 +56,62 @@ dependencies = [
|
|||
|
||||
"fake-useragent==2.2.0",
|
||||
"chromadb==1.3.5",
|
||||
"opensearch-py==2.8.0",
|
||||
"PyMySQL==1.1.1",
|
||||
"boto3==1.41.5",
|
||||
"opensearch-py==3.1.0",
|
||||
"PyMySQL==1.1.2",
|
||||
"boto3==1.42.5",
|
||||
|
||||
"transformers==4.57.3",
|
||||
"sentence-transformers==5.1.2",
|
||||
"accelerate",
|
||||
"pyarrow==20.0.0",
|
||||
"pyarrow==20.0.0", # fix: pin pyarrow version to 20 for rpi compatibility #15897
|
||||
"einops==0.8.1",
|
||||
|
||||
"ftfy==6.3.1",
|
||||
"chardet==5.2.0",
|
||||
"pypdf==6.4.0",
|
||||
"fpdf2==2.8.2",
|
||||
"pymdown-extensions==10.17.2",
|
||||
"docx2txt==0.8",
|
||||
"pypdf==6.4.1",
|
||||
"fpdf2==2.8.5",
|
||||
"pymdown-extensions==10.18",
|
||||
"docx2txt==0.9",
|
||||
"python-pptx==1.0.2",
|
||||
"unstructured==0.18.21",
|
||||
"msoffcrypto-tool==5.4.2",
|
||||
"nltk==3.9.1",
|
||||
"nltk==3.9.2",
|
||||
"Markdown==3.10",
|
||||
"pypandoc==1.16.2",
|
||||
"pandas==2.2.3",
|
||||
"pandas==2.3.3",
|
||||
"openpyxl==3.1.5",
|
||||
"pyxlsb==1.0.10",
|
||||
"xlrd==2.0.1",
|
||||
"xlrd==2.0.2",
|
||||
"validators==0.35.0",
|
||||
"psutil",
|
||||
"sentencepiece",
|
||||
"soundfile==0.13.1",
|
||||
"azure-ai-documentintelligence==1.0.2",
|
||||
|
||||
"pillow==11.3.0",
|
||||
"opencv-python-headless==4.11.0.86",
|
||||
"pillow==12.0.0",
|
||||
"opencv-python-headless==4.12.0.88",
|
||||
"rapidocr-onnxruntime==1.4.4",
|
||||
"rank-bm25==0.2.2",
|
||||
|
||||
"onnxruntime==1.20.1",
|
||||
"faster-whisper==1.1.1",
|
||||
"onnxruntime==1.23.2",
|
||||
"faster-whisper==1.2.1",
|
||||
|
||||
"black==25.11.0",
|
||||
"youtube-transcript-api==1.2.2",
|
||||
"black==25.12.0",
|
||||
"youtube-transcript-api==1.2.3",
|
||||
"pytube==15.0.0",
|
||||
|
||||
"pydub",
|
||||
"ddgs==9.9.2",
|
||||
"ddgs==9.9.3",
|
||||
|
||||
"google-api-python-client",
|
||||
"google-auth-httplib2",
|
||||
"google-auth-oauthlib",
|
||||
|
||||
"googleapis-common-protos==1.72.0",
|
||||
"google-cloud-storage==2.19.0",
|
||||
"google-cloud-storage==3.7.0",
|
||||
|
||||
"azure-identity==1.25.0",
|
||||
"azure-storage-blob==12.24.1",
|
||||
"azure-identity==1.25.1",
|
||||
"azure-storage-blob==12.27.1",
|
||||
|
||||
"ldap3==2.9.1",
|
||||
]
|
||||
|
|
@ -130,8 +130,8 @@ classifiers = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
postgres = [
|
||||
"psycopg2-binary==2.9.10",
|
||||
"pgvector==0.4.1",
|
||||
"psycopg2-binary==2.9.11",
|
||||
"pgvector==0.4.2",
|
||||
]
|
||||
|
||||
all = [
|
||||
|
|
@ -143,17 +143,18 @@ all = [
|
|||
"docker~=7.1.0",
|
||||
"pytest~=8.3.2",
|
||||
"pytest-docker~=3.2.5",
|
||||
"playwright==1.56.0",
|
||||
"elasticsearch==9.1.0",
|
||||
"playwright==1.57.0", # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
|
||||
"elasticsearch==9.2.0",
|
||||
|
||||
"qdrant-client==1.16.1",
|
||||
"weaviate-client==4.17.0",
|
||||
"pymilvus==2.6.4",
|
||||
"weaviate-client==4.18.3",
|
||||
"pymilvus==2.6.5",
|
||||
"pinecone==6.0.2",
|
||||
"oracledb==3.2.0",
|
||||
"colbert-ai==0.2.21",
|
||||
"oracledb==3.4.1",
|
||||
"colbert-ai==0.2.22",
|
||||
|
||||
"firecrawl-py==4.10.0",
|
||||
"firecrawl-py==4.10.4",
|
||||
"azure-search-documents==11.6.0",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1624,6 +1624,7 @@
|
|||
"Tika": "Tika",
|
||||
"Tika Server URL required.": "请输入 Tika 服务器接口地址",
|
||||
"Tiktoken": "Tiktoken",
|
||||
"Timeout": "超时时间",
|
||||
"Title": "标题",
|
||||
"Title Auto-Generation": "自动生成标题",
|
||||
"Title cannot be an empty string.": "标题不能为空",
|
||||
|
|
|
|||
|
|
@ -1624,6 +1624,7 @@
|
|||
"Tika": "Tika",
|
||||
"Tika Server URL required.": "需要提供 Tika 伺服器 URL。",
|
||||
"Tiktoken": "Tiktoken",
|
||||
"Timeout": "逾時時間",
|
||||
"Title": "標題",
|
||||
"Title Auto-Generation": "自動產生標題",
|
||||
"Title cannot be an empty string.": "標題不能是空字串。",
|
||||
|
|
|
|||
Loading…
Reference in a new issue