diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index a3a9050f78..41e88df5d2 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2590,6 +2590,12 @@ DOCUMENT_INTELLIGENCE_KEY = PersistentConfig( os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""), ) +DOCUMENT_INTELLIGENCE_MODEL = PersistentConfig( + "DOCUMENT_INTELLIGENCE_MODEL", + "rag.document_intelligence_model", + os.getenv("DOCUMENT_INTELLIGENCE_MODEL", "prebuilt-layout"), +) + MISTRAL_OCR_API_BASE_URL = PersistentConfig( "MISTRAL_OCR_API_BASE_URL", "rag.MISTRAL_OCR_API_BASE_URL", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 087dc5fb03..e1f3b39a3e 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -273,6 +273,7 @@ from open_webui.config import ( DOCLING_PARAMS, DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY, RAG_TEXT_SPLITTER, @@ -871,6 +872,7 @@ app.state.config.DOCLING_API_KEY = DOCLING_API_KEY app.state.config.DOCLING_PARAMS = DOCLING_PARAMS app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY +app.state.config.DOCUMENT_INTELLIGENCE_MODEL = DOCUMENT_INTELLIGENCE_MODEL app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MINERU_API_MODE = MINERU_API_MODE diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index fcc507e088..1346cd065c 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -322,12 +322,14 @@ class Loader: file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) else: loader = AzureAIDocumentIntelligenceLoader( file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), azure_credential=DefaultAzureCredential(), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) elif self.engine == "mineru" and file_ext in [ "pdf" diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 190f001edd..b7ed993895 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -468,6 +468,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -647,6 +648,7 @@ class ConfigForm(BaseModel): DOCLING_PARAMS: Optional[dict] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None + DOCUMENT_INTELLIGENCE_MODEL: Optional[str] = None MISTRAL_OCR_API_BASE_URL: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None @@ -842,6 +844,11 @@ async def update_rag_config( if form_data.DOCUMENT_INTELLIGENCE_KEY is not None else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY ) + request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL = ( + form_data.DOCUMENT_INTELLIGENCE_MODEL + if form_data.DOCUMENT_INTELLIGENCE_MODEL is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL + ) request.app.state.config.MISTRAL_OCR_API_BASE_URL = ( form_data.MISTRAL_OCR_API_BASE_URL @@ -1131,6 +1138,7 @@ async def update_rag_config( "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -1543,6 +1551,7 @@ def process_file( PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL=request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY, MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 5cb0f60a72..75065910d6 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -35,6 +35,7 @@ type ChunkConfigForm = { type DocumentIntelligenceConfigForm = { key: string; endpoint: string; + model: string; }; type ContentExtractConfigForm = { diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 26c23028ed..0b9accd4bf 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -597,6 +597,18 @@ required={false} /> +