mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 20:35:19 +00:00
Compare commits
5 commits
2daa3a617f
...
4e3f129b3b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e3f129b3b | ||
|
|
1ea555a5ac | ||
|
|
c24b1207a0 | ||
|
|
44e41806f2 | ||
|
|
153240c8d6 |
9 changed files with 140 additions and 115 deletions
|
|
@ -33,6 +33,7 @@ from fastapi.responses import FileResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
from open_webui.utils.misc import strict_match_mime_type
|
||||||
from open_webui.utils.auth import get_admin_user, get_verified_user
|
from open_webui.utils.auth import get_admin_user, get_verified_user
|
||||||
from open_webui.utils.headers import include_user_info_headers
|
from open_webui.utils.headers import include_user_info_headers
|
||||||
from open_webui.config import (
|
from open_webui.config import (
|
||||||
|
|
@ -1155,17 +1156,9 @@ def transcription(
|
||||||
|
|
||||||
stt_supported_content_types = getattr(
|
stt_supported_content_types = getattr(
|
||||||
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
||||||
)
|
) or ["audio/*", "video/webm"]
|
||||||
|
|
||||||
if not any(
|
if not strict_match_mime_type(stt_supported_content_types, file.content_type):
|
||||||
fnmatch(file.content_type, content_type)
|
|
||||||
for content_type in (
|
|
||||||
stt_supported_content_types
|
|
||||||
if stt_supported_content_types
|
|
||||||
and any(t.strip() for t in stt_supported_content_types)
|
|
||||||
else ["audio/*", "video/webm"]
|
|
||||||
)
|
|
||||||
):
|
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ from open_webui.storage.provider import Storage
|
||||||
|
|
||||||
from open_webui.utils.auth import get_admin_user, get_verified_user
|
from open_webui.utils.auth import get_admin_user, get_verified_user
|
||||||
from open_webui.utils.access_control import has_access
|
from open_webui.utils.access_control import has_access
|
||||||
|
from open_webui.utils.misc import strict_match_mime_type
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
@ -104,17 +104,9 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us
|
||||||
if file.content_type:
|
if file.content_type:
|
||||||
stt_supported_content_types = getattr(
|
stt_supported_content_types = getattr(
|
||||||
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
||||||
)
|
) or ["audio/*", "video/webm"]
|
||||||
|
|
||||||
if any(
|
if strict_match_mime_type(stt_supported_content_types, file.content_type):
|
||||||
fnmatch(file.content_type, content_type)
|
|
||||||
for content_type in (
|
|
||||||
stt_supported_content_types
|
|
||||||
if stt_supported_content_types
|
|
||||||
and any(t.strip() for t in stt_supported_content_types)
|
|
||||||
else ["audio/*", "video/webm"]
|
|
||||||
)
|
|
||||||
):
|
|
||||||
file_path = Storage.get_file(file_path)
|
file_path = Storage.get_file(file_path)
|
||||||
result = transcribe(request, file_path, file_metadata, user)
|
result = transcribe(request, file_path, file_metadata, user)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
from typing import Callable, Optional, Sequence, Union
|
from typing import Callable, Optional, Sequence, Union
|
||||||
import json
|
import json
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
import mimeparse
|
||||||
|
|
||||||
|
|
||||||
import collections.abc
|
import collections.abc
|
||||||
|
|
@ -577,6 +578,37 @@ def throttle(interval: float = 10.0):
|
||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def strict_match_mime_type(supported: list[str] | str, header: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Strictly match the mime type with the supported mime types.
|
||||||
|
|
||||||
|
:param supported: The supported mime types.
|
||||||
|
:param header: The header to match.
|
||||||
|
:return: The matched mime type or None if no match is found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
if isinstance(supported, str):
|
||||||
|
supported = supported.split(",")
|
||||||
|
|
||||||
|
supported = [s for s in supported if s.strip() and "/" in s]
|
||||||
|
|
||||||
|
match = mimeparse.best_match(supported, header)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
_, _, match_params = mimeparse.parse_mime_type(match)
|
||||||
|
_, _, header_params = mimeparse.parse_mime_type(header)
|
||||||
|
for k, v in match_params.items():
|
||||||
|
if header_params.get(k) != v:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return match
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(f"Failed to match mime type {header}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def extract_urls(text: str) -> list[str]:
|
def extract_urls(text: str) -> list[str]:
|
||||||
# Regex pattern to match URLs
|
# Regex pattern to match URLs
|
||||||
url_pattern = re.compile(
|
url_pattern = re.compile(
|
||||||
|
|
@ -624,14 +656,17 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
|
||||||
yield line
|
yield line
|
||||||
else:
|
else:
|
||||||
yield b"data: {}"
|
yield b"data: {}"
|
||||||
|
yield b"\n"
|
||||||
else:
|
else:
|
||||||
# Normal mode: check if line exceeds limit
|
# Normal mode: check if line exceeds limit
|
||||||
if len(line) > max_buffer_size:
|
if len(line) > max_buffer_size:
|
||||||
skip_mode = True
|
skip_mode = True
|
||||||
yield b"data: {}"
|
yield b"data: {}"
|
||||||
|
yield b"\n"
|
||||||
log.info(f"Skip mode triggered, line size: {len(line)}")
|
log.info(f"Skip mode triggered, line size: {len(line)}")
|
||||||
else:
|
else:
|
||||||
yield line
|
yield line
|
||||||
|
yield b"\n"
|
||||||
|
|
||||||
# Save the last incomplete fragment
|
# Save the last incomplete fragment
|
||||||
buffer = lines[-1]
|
buffer = lines[-1]
|
||||||
|
|
@ -646,5 +681,6 @@ def stream_chunks_handler(stream: aiohttp.StreamReader):
|
||||||
# Process remaining buffer data
|
# Process remaining buffer data
|
||||||
if buffer and not skip_mode:
|
if buffer and not skip_mode:
|
||||||
yield buffer
|
yield buffer
|
||||||
|
yield b"\n"
|
||||||
|
|
||||||
return yield_safe_stream_chunks()
|
return yield_safe_stream_chunks()
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
# Minimal requirements for backend to run
|
# Minimal requirements for backend to run
|
||||||
# WIP: use this as a reference to build a minimal docker image
|
# WIP: use this as a reference to build a minimal docker image
|
||||||
|
|
||||||
fastapi==0.123.0
|
fastapi==0.124.0
|
||||||
uvicorn[standard]==0.37.0
|
uvicorn[standard]==0.37.0
|
||||||
pydantic==2.12.5
|
pydantic==2.12.5
|
||||||
python-multipart==0.0.20
|
python-multipart==0.0.20
|
||||||
|
|
@ -16,7 +16,7 @@ PyJWT[crypto]==2.10.1
|
||||||
authlib==1.6.5
|
authlib==1.6.5
|
||||||
|
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
aiohttp==3.12.15
|
aiohttp==3.13.2
|
||||||
async-timeout
|
async-timeout
|
||||||
aiocache
|
aiocache
|
||||||
aiofiles
|
aiofiles
|
||||||
|
|
@ -24,21 +24,21 @@ starlette-compress==1.6.1
|
||||||
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
||||||
starsessions[redis]==2.2.1
|
starsessions[redis]==2.2.1
|
||||||
|
|
||||||
sqlalchemy==2.0.38
|
sqlalchemy==2.0.44
|
||||||
alembic==1.17.2
|
alembic==1.17.2
|
||||||
peewee==3.18.3
|
peewee==3.18.3
|
||||||
peewee-migrate==1.14.3
|
peewee-migrate==1.14.3
|
||||||
|
|
||||||
pycrdt==0.12.25
|
pycrdt==0.12.44
|
||||||
redis
|
redis
|
||||||
|
|
||||||
APScheduler==3.10.4
|
APScheduler==3.11.1
|
||||||
RestrictedPython==8.0
|
RestrictedPython==8.1
|
||||||
|
|
||||||
loguru==0.7.3
|
loguru==0.7.3
|
||||||
asgiref==3.11.0
|
asgiref==3.11.0
|
||||||
|
|
||||||
mcp==1.22.0
|
mcp==1.23.1
|
||||||
openai
|
openai
|
||||||
|
|
||||||
langchain==0.3.27
|
langchain==0.3.27
|
||||||
|
|
@ -46,6 +46,6 @@ langchain-community==0.3.29
|
||||||
fake-useragent==2.2.0
|
fake-useragent==2.2.0
|
||||||
|
|
||||||
chromadb==1.3.5
|
chromadb==1.3.5
|
||||||
black==25.11.0
|
black==25.12.0
|
||||||
pydub
|
pydub
|
||||||
chardet==5.2.0
|
chardet==5.2.0
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
fastapi==0.123.0
|
fastapi==0.124.0
|
||||||
uvicorn[standard]==0.37.0
|
uvicorn[standard]==0.37.0
|
||||||
pydantic==2.12.5
|
pydantic==2.12.5
|
||||||
python-multipart==0.0.20
|
python-multipart==0.0.20
|
||||||
|
|
@ -13,35 +13,36 @@ PyJWT[crypto]==2.10.1
|
||||||
authlib==1.6.5
|
authlib==1.6.5
|
||||||
|
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
aiohttp==3.12.15
|
aiohttp==3.13.2
|
||||||
async-timeout
|
async-timeout
|
||||||
aiocache
|
aiocache
|
||||||
aiofiles
|
aiofiles
|
||||||
starlette-compress==1.6.1
|
starlette-compress==1.6.1
|
||||||
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
httpx[socks,http2,zstd,cli,brotli]==0.28.1
|
||||||
starsessions[redis]==2.2.1
|
starsessions[redis]==2.2.1
|
||||||
|
python-mimeparse==2.0.0
|
||||||
|
|
||||||
sqlalchemy==2.0.38
|
sqlalchemy==2.0.44
|
||||||
alembic==1.17.2
|
alembic==1.17.2
|
||||||
peewee==3.18.3
|
peewee==3.18.3
|
||||||
peewee-migrate==1.14.3
|
peewee-migrate==1.14.3
|
||||||
|
|
||||||
pycrdt==0.12.25
|
pycrdt==0.12.44
|
||||||
redis
|
redis
|
||||||
|
|
||||||
APScheduler==3.10.4
|
APScheduler==3.11.1
|
||||||
RestrictedPython==8.0
|
RestrictedPython==8.1
|
||||||
|
|
||||||
loguru==0.7.3
|
loguru==0.7.3
|
||||||
asgiref==3.11.0
|
asgiref==3.11.0
|
||||||
|
|
||||||
# AI libraries
|
# AI libraries
|
||||||
tiktoken
|
tiktoken
|
||||||
mcp==1.22.0
|
mcp==1.23.3
|
||||||
|
|
||||||
openai
|
openai
|
||||||
anthropic
|
anthropic
|
||||||
google-genai==1.52.0
|
google-genai==1.54.0
|
||||||
google-generativeai==0.8.5
|
google-generativeai==0.8.5
|
||||||
|
|
||||||
langchain==0.3.27
|
langchain==0.3.27
|
||||||
|
|
@ -49,8 +50,8 @@ langchain-community==0.3.29
|
||||||
|
|
||||||
fake-useragent==2.2.0
|
fake-useragent==2.2.0
|
||||||
chromadb==1.3.5
|
chromadb==1.3.5
|
||||||
weaviate-client==4.17.0
|
weaviate-client==4.18.3
|
||||||
opensearch-py==2.8.0
|
opensearch-py==3.1.0
|
||||||
|
|
||||||
transformers==4.57.3
|
transformers==4.57.3
|
||||||
sentence-transformers==5.1.2
|
sentence-transformers==5.1.2
|
||||||
|
|
@ -60,43 +61,43 @@ einops==0.8.1
|
||||||
|
|
||||||
ftfy==6.3.1
|
ftfy==6.3.1
|
||||||
chardet==5.2.0
|
chardet==5.2.0
|
||||||
pypdf==6.4.0
|
pypdf==6.4.1
|
||||||
fpdf2==2.8.2
|
fpdf2==2.8.5
|
||||||
pymdown-extensions==10.17.2
|
pymdown-extensions==10.18
|
||||||
docx2txt==0.8
|
docx2txt==0.9
|
||||||
python-pptx==1.0.2
|
python-pptx==1.0.2
|
||||||
unstructured==0.18.21
|
unstructured==0.18.21
|
||||||
msoffcrypto-tool==5.4.2
|
msoffcrypto-tool==5.4.2
|
||||||
nltk==3.9.1
|
nltk==3.9.2
|
||||||
Markdown==3.10
|
Markdown==3.10
|
||||||
pypandoc==1.16.2
|
pypandoc==1.16.2
|
||||||
pandas==2.2.3
|
pandas==2.3.3
|
||||||
openpyxl==3.1.5
|
openpyxl==3.1.5
|
||||||
pyxlsb==1.0.10
|
pyxlsb==1.0.10
|
||||||
xlrd==2.0.1
|
xlrd==2.0.2
|
||||||
validators==0.35.0
|
validators==0.35.0
|
||||||
psutil
|
psutil
|
||||||
sentencepiece
|
sentencepiece
|
||||||
soundfile==0.13.1
|
soundfile==0.13.1
|
||||||
|
|
||||||
pillow==11.3.0
|
pillow==12.0.0
|
||||||
opencv-python-headless==4.11.0.86
|
opencv-python-headless==4.12.0.88
|
||||||
rapidocr-onnxruntime==1.4.4
|
rapidocr-onnxruntime==1.4.4
|
||||||
rank-bm25==0.2.2
|
rank-bm25==0.2.2
|
||||||
|
|
||||||
onnxruntime==1.20.1
|
onnxruntime==1.23.2
|
||||||
faster-whisper==1.1.1
|
faster-whisper==1.2.1
|
||||||
|
|
||||||
black==25.11.0
|
black==25.12.0
|
||||||
youtube-transcript-api==1.2.2
|
youtube-transcript-api==1.2.3
|
||||||
pytube==15.0.0
|
pytube==15.0.0
|
||||||
|
|
||||||
pydub
|
pydub
|
||||||
ddgs==9.9.2
|
ddgs==9.9.3
|
||||||
|
|
||||||
azure-ai-documentintelligence==1.0.2
|
azure-ai-documentintelligence==1.0.2
|
||||||
azure-identity==1.25.0
|
azure-identity==1.25.1
|
||||||
azure-storage-blob==12.24.1
|
azure-storage-blob==12.27.1
|
||||||
azure-search-documents==11.6.0
|
azure-search-documents==11.6.0
|
||||||
|
|
||||||
## Google Drive
|
## Google Drive
|
||||||
|
|
@ -105,26 +106,26 @@ google-auth-httplib2
|
||||||
google-auth-oauthlib
|
google-auth-oauthlib
|
||||||
|
|
||||||
googleapis-common-protos==1.72.0
|
googleapis-common-protos==1.72.0
|
||||||
google-cloud-storage==2.19.0
|
google-cloud-storage==3.7.0
|
||||||
|
|
||||||
## Databases
|
## Databases
|
||||||
pymongo
|
pymongo
|
||||||
psycopg2-binary==2.9.10
|
psycopg2-binary==2.9.11
|
||||||
pgvector==0.4.1
|
pgvector==0.4.2
|
||||||
|
|
||||||
PyMySQL==1.1.1
|
PyMySQL==1.1.2
|
||||||
boto3==1.41.5
|
boto3==1.42.5
|
||||||
|
|
||||||
pymilvus==2.6.5
|
pymilvus==2.6.5
|
||||||
qdrant-client==1.16.1
|
qdrant-client==1.16.1
|
||||||
playwright==1.56.0 # Caution: version must match docker-compose.playwright.yaml
|
playwright==1.57.0 # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
|
||||||
elasticsearch==9.1.0
|
elasticsearch==9.2.0
|
||||||
pinecone==6.0.2
|
pinecone==6.0.2
|
||||||
oracledb==3.2.0
|
oracledb==3.4.1
|
||||||
|
|
||||||
av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720
|
av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720
|
||||||
|
|
||||||
colbert-ai==0.2.21
|
colbert-ai==0.2.22
|
||||||
|
|
||||||
|
|
||||||
## Tests
|
## Tests
|
||||||
|
|
@ -136,17 +137,17 @@ pytest-docker~=3.2.5
|
||||||
ldap3==2.9.1
|
ldap3==2.9.1
|
||||||
|
|
||||||
## Firecrawl
|
## Firecrawl
|
||||||
firecrawl-py==4.10.0
|
firecrawl-py==4.10.4
|
||||||
|
|
||||||
## Trace
|
## Trace
|
||||||
opentelemetry-api==1.38.0
|
opentelemetry-api==1.39.0
|
||||||
opentelemetry-sdk==1.38.0
|
opentelemetry-sdk==1.39.0
|
||||||
opentelemetry-exporter-otlp==1.38.0
|
opentelemetry-exporter-otlp==1.39.0
|
||||||
opentelemetry-instrumentation==0.59b0
|
opentelemetry-instrumentation==0.60b0
|
||||||
opentelemetry-instrumentation-fastapi==0.59b0
|
opentelemetry-instrumentation-fastapi==0.60b0
|
||||||
opentelemetry-instrumentation-sqlalchemy==0.59b0
|
opentelemetry-instrumentation-sqlalchemy==0.60b0
|
||||||
opentelemetry-instrumentation-redis==0.59b0
|
opentelemetry-instrumentation-redis==0.60b0
|
||||||
opentelemetry-instrumentation-requests==0.59b0
|
opentelemetry-instrumentation-requests==0.60b0
|
||||||
opentelemetry-instrumentation-logging==0.59b0
|
opentelemetry-instrumentation-logging==0.60b0
|
||||||
opentelemetry-instrumentation-httpx==0.59b0
|
opentelemetry-instrumentation-httpx==0.60b0
|
||||||
opentelemetry-instrumentation-aiohttp-client==0.59b0
|
opentelemetry-instrumentation-aiohttp-client==0.60b0
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
services:
|
services:
|
||||||
playwright:
|
playwright:
|
||||||
image: mcr.microsoft.com/playwright:v1.56.0-noble # Version must match requirements.txt
|
image: mcr.microsoft.com/playwright:v1.57.0-noble # Version must match requirements.txt
|
||||||
container_name: playwright
|
container_name: playwright
|
||||||
command: npx -y playwright@1.56.0 run-server --port 3000 --host 0.0.0.0
|
command: npx -y playwright@1.57.0 run-server --port 3000 --host 0.0.0.0
|
||||||
|
|
||||||
open-webui:
|
open-webui:
|
||||||
environment:
|
environment:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ authors = [
|
||||||
]
|
]
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fastapi==0.123.0",
|
"fastapi==0.124.0",
|
||||||
"uvicorn[standard]==0.37.0",
|
"uvicorn[standard]==0.37.0",
|
||||||
"pydantic==2.12.5",
|
"pydantic==2.12.5",
|
||||||
"python-multipart==0.0.20",
|
"python-multipart==0.0.20",
|
||||||
|
|
@ -21,7 +21,7 @@ dependencies = [
|
||||||
"authlib==1.6.5",
|
"authlib==1.6.5",
|
||||||
|
|
||||||
"requests==2.32.5",
|
"requests==2.32.5",
|
||||||
"aiohttp==3.12.15",
|
"aiohttp==3.13.2",
|
||||||
"async-timeout",
|
"async-timeout",
|
||||||
"aiocache",
|
"aiocache",
|
||||||
"aiofiles",
|
"aiofiles",
|
||||||
|
|
@ -29,26 +29,26 @@ dependencies = [
|
||||||
"httpx[socks,http2,zstd,cli,brotli]==0.28.1",
|
"httpx[socks,http2,zstd,cli,brotli]==0.28.1",
|
||||||
"starsessions[redis]==2.2.1",
|
"starsessions[redis]==2.2.1",
|
||||||
|
|
||||||
"sqlalchemy==2.0.38",
|
"sqlalchemy==2.0.44",
|
||||||
"alembic==1.17.2",
|
"alembic==1.17.2",
|
||||||
"peewee==3.18.3",
|
"peewee==3.18.3",
|
||||||
"peewee-migrate==1.14.3",
|
"peewee-migrate==1.14.3",
|
||||||
|
|
||||||
"pycrdt==0.12.25",
|
"pycrdt==0.12.44",
|
||||||
"redis",
|
"redis",
|
||||||
|
|
||||||
"APScheduler==3.10.4",
|
"APScheduler==3.11.1",
|
||||||
"RestrictedPython==8.0",
|
"RestrictedPython==8.1",
|
||||||
|
|
||||||
"loguru==0.7.3",
|
"loguru==0.7.3",
|
||||||
"asgiref==3.11.0",
|
"asgiref==3.11.0",
|
||||||
|
|
||||||
"tiktoken",
|
"tiktoken",
|
||||||
"mcp==1.22.0",
|
"mcp==1.23.3",
|
||||||
|
|
||||||
"openai",
|
"openai",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
"google-genai==1.52.0",
|
"google-genai==1.54.0",
|
||||||
"google-generativeai==0.8.5",
|
"google-generativeai==0.8.5",
|
||||||
|
|
||||||
"langchain==0.3.27",
|
"langchain==0.3.27",
|
||||||
|
|
@ -56,62 +56,62 @@ dependencies = [
|
||||||
|
|
||||||
"fake-useragent==2.2.0",
|
"fake-useragent==2.2.0",
|
||||||
"chromadb==1.3.5",
|
"chromadb==1.3.5",
|
||||||
"opensearch-py==2.8.0",
|
"opensearch-py==3.1.0",
|
||||||
"PyMySQL==1.1.1",
|
"PyMySQL==1.1.2",
|
||||||
"boto3==1.41.5",
|
"boto3==1.42.5",
|
||||||
|
|
||||||
"transformers==4.57.3",
|
"transformers==4.57.3",
|
||||||
"sentence-transformers==5.1.2",
|
"sentence-transformers==5.1.2",
|
||||||
"accelerate",
|
"accelerate",
|
||||||
"pyarrow==20.0.0",
|
"pyarrow==20.0.0", # fix: pin pyarrow version to 20 for rpi compatibility #15897
|
||||||
"einops==0.8.1",
|
"einops==0.8.1",
|
||||||
|
|
||||||
"ftfy==6.3.1",
|
"ftfy==6.3.1",
|
||||||
"chardet==5.2.0",
|
"chardet==5.2.0",
|
||||||
"pypdf==6.4.0",
|
"pypdf==6.4.1",
|
||||||
"fpdf2==2.8.2",
|
"fpdf2==2.8.5",
|
||||||
"pymdown-extensions==10.17.2",
|
"pymdown-extensions==10.18",
|
||||||
"docx2txt==0.8",
|
"docx2txt==0.9",
|
||||||
"python-pptx==1.0.2",
|
"python-pptx==1.0.2",
|
||||||
"unstructured==0.18.21",
|
"unstructured==0.18.21",
|
||||||
"msoffcrypto-tool==5.4.2",
|
"msoffcrypto-tool==5.4.2",
|
||||||
"nltk==3.9.1",
|
"nltk==3.9.2",
|
||||||
"Markdown==3.10",
|
"Markdown==3.10",
|
||||||
"pypandoc==1.16.2",
|
"pypandoc==1.16.2",
|
||||||
"pandas==2.2.3",
|
"pandas==2.3.3",
|
||||||
"openpyxl==3.1.5",
|
"openpyxl==3.1.5",
|
||||||
"pyxlsb==1.0.10",
|
"pyxlsb==1.0.10",
|
||||||
"xlrd==2.0.1",
|
"xlrd==2.0.2",
|
||||||
"validators==0.35.0",
|
"validators==0.35.0",
|
||||||
"psutil",
|
"psutil",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
"soundfile==0.13.1",
|
"soundfile==0.13.1",
|
||||||
"azure-ai-documentintelligence==1.0.2",
|
"azure-ai-documentintelligence==1.0.2",
|
||||||
|
|
||||||
"pillow==11.3.0",
|
"pillow==12.0.0",
|
||||||
"opencv-python-headless==4.11.0.86",
|
"opencv-python-headless==4.12.0.88",
|
||||||
"rapidocr-onnxruntime==1.4.4",
|
"rapidocr-onnxruntime==1.4.4",
|
||||||
"rank-bm25==0.2.2",
|
"rank-bm25==0.2.2",
|
||||||
|
|
||||||
"onnxruntime==1.20.1",
|
"onnxruntime==1.23.2",
|
||||||
"faster-whisper==1.1.1",
|
"faster-whisper==1.2.1",
|
||||||
|
|
||||||
"black==25.11.0",
|
"black==25.12.0",
|
||||||
"youtube-transcript-api==1.2.2",
|
"youtube-transcript-api==1.2.3",
|
||||||
"pytube==15.0.0",
|
"pytube==15.0.0",
|
||||||
|
|
||||||
"pydub",
|
"pydub",
|
||||||
"ddgs==9.9.2",
|
"ddgs==9.9.3",
|
||||||
|
|
||||||
"google-api-python-client",
|
"google-api-python-client",
|
||||||
"google-auth-httplib2",
|
"google-auth-httplib2",
|
||||||
"google-auth-oauthlib",
|
"google-auth-oauthlib",
|
||||||
|
|
||||||
"googleapis-common-protos==1.72.0",
|
"googleapis-common-protos==1.72.0",
|
||||||
"google-cloud-storage==2.19.0",
|
"google-cloud-storage==3.7.0",
|
||||||
|
|
||||||
"azure-identity==1.25.0",
|
"azure-identity==1.25.1",
|
||||||
"azure-storage-blob==12.24.1",
|
"azure-storage-blob==12.27.1",
|
||||||
|
|
||||||
"ldap3==2.9.1",
|
"ldap3==2.9.1",
|
||||||
]
|
]
|
||||||
|
|
@ -130,8 +130,8 @@ classifiers = [
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
postgres = [
|
postgres = [
|
||||||
"psycopg2-binary==2.9.10",
|
"psycopg2-binary==2.9.11",
|
||||||
"pgvector==0.4.1",
|
"pgvector==0.4.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
all = [
|
all = [
|
||||||
|
|
@ -143,17 +143,18 @@ all = [
|
||||||
"docker~=7.1.0",
|
"docker~=7.1.0",
|
||||||
"pytest~=8.3.2",
|
"pytest~=8.3.2",
|
||||||
"pytest-docker~=3.2.5",
|
"pytest-docker~=3.2.5",
|
||||||
"playwright==1.56.0",
|
"playwright==1.57.0", # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary
|
||||||
"elasticsearch==9.1.0",
|
"elasticsearch==9.2.0",
|
||||||
|
|
||||||
"qdrant-client==1.16.1",
|
"qdrant-client==1.16.1",
|
||||||
"weaviate-client==4.17.0",
|
"pymilvus==2.6.4",
|
||||||
|
"weaviate-client==4.18.3",
|
||||||
"pymilvus==2.6.5",
|
"pymilvus==2.6.5",
|
||||||
"pinecone==6.0.2",
|
"pinecone==6.0.2",
|
||||||
"oracledb==3.2.0",
|
"oracledb==3.4.1",
|
||||||
"colbert-ai==0.2.21",
|
"colbert-ai==0.2.22",
|
||||||
|
|
||||||
"firecrawl-py==4.10.0",
|
"firecrawl-py==4.10.4",
|
||||||
"azure-search-documents==11.6.0",
|
"azure-search-documents==11.6.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1624,6 +1624,7 @@
|
||||||
"Tika": "Tika",
|
"Tika": "Tika",
|
||||||
"Tika Server URL required.": "请输入 Tika 服务器接口地址",
|
"Tika Server URL required.": "请输入 Tika 服务器接口地址",
|
||||||
"Tiktoken": "Tiktoken",
|
"Tiktoken": "Tiktoken",
|
||||||
|
"Timeout": "超时时间",
|
||||||
"Title": "标题",
|
"Title": "标题",
|
||||||
"Title Auto-Generation": "自动生成标题",
|
"Title Auto-Generation": "自动生成标题",
|
||||||
"Title cannot be an empty string.": "标题不能为空",
|
"Title cannot be an empty string.": "标题不能为空",
|
||||||
|
|
|
||||||
|
|
@ -1624,6 +1624,7 @@
|
||||||
"Tika": "Tika",
|
"Tika": "Tika",
|
||||||
"Tika Server URL required.": "需要提供 Tika 伺服器 URL。",
|
"Tika Server URL required.": "需要提供 Tika 伺服器 URL。",
|
||||||
"Tiktoken": "Tiktoken",
|
"Tiktoken": "Tiktoken",
|
||||||
|
"Timeout": "逾時時間",
|
||||||
"Title": "標題",
|
"Title": "標題",
|
||||||
"Title Auto-Generation": "自動產生標題",
|
"Title Auto-Generation": "自動產生標題",
|
||||||
"Title cannot be an empty string.": "標題不能是空字串。",
|
"Title cannot be an empty string.": "標題不能是空字串。",
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue