feat: Adds document intelligence model configuration (#19692)

* Adds document intelligence model configuration

Enables the configuration of the Document Intelligence model to be used by the RAG pipeline.

This allows users to specify the model they want to use for document processing, providing flexibility and control over the extraction process.

* Added Titel to Document Intelligence Model Config

Added Titel to Document Intelligence Model Config
This commit is contained in:
Henne 2025-12-02 20:41:09 +01:00 committed by GitHub
parent e5c6b739c2
commit a7e614ca4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 36 additions and 0 deletions

View file

@ -2590,6 +2590,12 @@ DOCUMENT_INTELLIGENCE_KEY = PersistentConfig(
os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""), os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""),
) )
DOCUMENT_INTELLIGENCE_MODEL = PersistentConfig(
"DOCUMENT_INTELLIGENCE_MODEL",
"rag.document_intelligence_model",
os.getenv("DOCUMENT_INTELLIGENCE_MODEL", "prebuilt-layout"),
)
MISTRAL_OCR_API_BASE_URL = PersistentConfig( MISTRAL_OCR_API_BASE_URL = PersistentConfig(
"MISTRAL_OCR_API_BASE_URL", "MISTRAL_OCR_API_BASE_URL",
"rag.MISTRAL_OCR_API_BASE_URL", "rag.MISTRAL_OCR_API_BASE_URL",

View file

@ -273,6 +273,7 @@ from open_webui.config import (
DOCLING_PARAMS, DOCLING_PARAMS,
DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_ENDPOINT,
DOCUMENT_INTELLIGENCE_KEY, DOCUMENT_INTELLIGENCE_KEY,
DOCUMENT_INTELLIGENCE_MODEL,
MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_BASE_URL,
MISTRAL_OCR_API_KEY, MISTRAL_OCR_API_KEY,
RAG_TEXT_SPLITTER, RAG_TEXT_SPLITTER,
@ -871,6 +872,7 @@ app.state.config.DOCLING_API_KEY = DOCLING_API_KEY
app.state.config.DOCLING_PARAMS = DOCLING_PARAMS app.state.config.DOCLING_PARAMS = DOCLING_PARAMS
app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
app.state.config.DOCUMENT_INTELLIGENCE_MODEL = DOCUMENT_INTELLIGENCE_MODEL
app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL
app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY
app.state.config.MINERU_API_MODE = MINERU_API_MODE app.state.config.MINERU_API_MODE = MINERU_API_MODE

View file

@ -322,12 +322,14 @@ class Loader:
file_path=file_path, file_path=file_path,
api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"), api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"),
api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"),
) )
else: else:
loader = AzureAIDocumentIntelligenceLoader( loader = AzureAIDocumentIntelligenceLoader(
file_path=file_path, file_path=file_path,
api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
azure_credential=DefaultAzureCredential(), azure_credential=DefaultAzureCredential(),
api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"),
) )
elif self.engine == "mineru" and file_ext in [ elif self.engine == "mineru" and file_ext in [
"pdf" "pdf"

View file

@ -468,6 +468,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
"DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
"MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL,
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
# MinerU settings # MinerU settings
@ -647,6 +648,7 @@ class ConfigForm(BaseModel):
DOCLING_PARAMS: Optional[dict] = None DOCLING_PARAMS: Optional[dict] = None
DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None
DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None
DOCUMENT_INTELLIGENCE_MODEL: Optional[str] = None
MISTRAL_OCR_API_BASE_URL: Optional[str] = None MISTRAL_OCR_API_BASE_URL: Optional[str] = None
MISTRAL_OCR_API_KEY: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None
@ -842,6 +844,11 @@ async def update_rag_config(
if form_data.DOCUMENT_INTELLIGENCE_KEY is not None if form_data.DOCUMENT_INTELLIGENCE_KEY is not None
else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY
) )
request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL = (
form_data.DOCUMENT_INTELLIGENCE_MODEL
if form_data.DOCUMENT_INTELLIGENCE_MODEL is not None
else request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL
)
request.app.state.config.MISTRAL_OCR_API_BASE_URL = ( request.app.state.config.MISTRAL_OCR_API_BASE_URL = (
form_data.MISTRAL_OCR_API_BASE_URL form_data.MISTRAL_OCR_API_BASE_URL
@ -1131,6 +1138,7 @@ async def update_rag_config(
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
"DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
"MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL,
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
# MinerU settings # MinerU settings
@ -1543,6 +1551,7 @@ def process_file(
PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
DOCUMENT_INTELLIGENCE_MODEL=request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL,
MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY, MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY,
MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, MINERU_API_MODE=request.app.state.config.MINERU_API_MODE,

View file

@ -35,6 +35,7 @@ type ChunkConfigForm = {
type DocumentIntelligenceConfigForm = { type DocumentIntelligenceConfigForm = {
key: string; key: string;
endpoint: string; endpoint: string;
model: string;
}; };
type ContentExtractConfigForm = { type ContentExtractConfigForm = {

View file

@ -597,6 +597,18 @@
required={false} required={false}
/> />
</div> </div>
<div class="my-0.5 flex flex-col w-full">
<div class=" mb-1 text-xs font-medium">{$i18n.t('Document Intelligence Model')}</div>
<div class="flex w-full">
<div class="flex-1 mr-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Document Intelligence Model')}
bind:value={RAGConfig.DOCUMENT_INTELLIGENCE_MODEL}
/>
</div>
</div>
</div>
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'} {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'}
<div class="my-0.5 flex gap-2 pr-2"> <div class="my-0.5 flex gap-2 pr-2">
<input <input

View file

@ -462,6 +462,7 @@
"Docling Server URL required.": "", "Docling Server URL required.": "",
"Document": "", "Document": "",
"Document Intelligence": "", "Document Intelligence": "",
"Document Intelligence Model": "",
"Document Intelligence endpoint required.": "", "Document Intelligence endpoint required.": "",
"Documentation": "", "Documentation": "",
"Documents": "", "Documents": "",
@ -564,6 +565,7 @@
"Enter Docling Server URL": "", "Enter Docling Server URL": "",
"Enter Document Intelligence Endpoint": "", "Enter Document Intelligence Endpoint": "",
"Enter Document Intelligence Key": "", "Enter Document Intelligence Key": "",
"Enter Document Intelligence Model": "",
"Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "", "Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "",
"Enter Exa API Key": "", "Enter Exa API Key": "",
"Enter External Document Loader API Key": "", "Enter External Document Loader API Key": "",

View file

@ -462,6 +462,7 @@
"Docling Server URL required.": "", "Docling Server URL required.": "",
"Document": "", "Document": "",
"Document Intelligence": "", "Document Intelligence": "",
"Document Intelligence Model": "",
"Document Intelligence endpoint required.": "", "Document Intelligence endpoint required.": "",
"Documentation": "", "Documentation": "",
"Documents": "", "Documents": "",
@ -564,6 +565,7 @@
"Enter Docling Server URL": "", "Enter Docling Server URL": "",
"Enter Document Intelligence Endpoint": "", "Enter Document Intelligence Endpoint": "",
"Enter Document Intelligence Key": "", "Enter Document Intelligence Key": "",
"Enter Document Intelligence Model": "",
"Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "", "Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "",
"Enter Exa API Key": "", "Enter Exa API Key": "",
"Enter External Document Loader API Key": "", "Enter External Document Loader API Key": "",