feat: Adds document intelligence model configuration (#19692)

* Adds document intelligence model configuration

Enables the configuration of the Document Intelligence model to be used by the RAG pipeline.

This allows users to specify the model they want to use for document processing, providing flexibility and control over the extraction process.

* Added Titel to Document Intelligence Model Config

Added Titel to Document Intelligence Model Config
This commit is contained in:
Henne 2025-12-02 20:41:09 +01:00 committed by GitHub
parent e5c6b739c2
commit a7e614ca4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 36 additions and 0 deletions

View file

@ -2590,6 +2590,12 @@ DOCUMENT_INTELLIGENCE_KEY = PersistentConfig(
os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""),
)
DOCUMENT_INTELLIGENCE_MODEL = PersistentConfig(
"DOCUMENT_INTELLIGENCE_MODEL",
"rag.document_intelligence_model",
os.getenv("DOCUMENT_INTELLIGENCE_MODEL", "prebuilt-layout"),
)
MISTRAL_OCR_API_BASE_URL = PersistentConfig(
"MISTRAL_OCR_API_BASE_URL",
"rag.MISTRAL_OCR_API_BASE_URL",

View file

@ -273,6 +273,7 @@ from open_webui.config import (
DOCLING_PARAMS,
DOCUMENT_INTELLIGENCE_ENDPOINT,
DOCUMENT_INTELLIGENCE_KEY,
DOCUMENT_INTELLIGENCE_MODEL,
MISTRAL_OCR_API_BASE_URL,
MISTRAL_OCR_API_KEY,
RAG_TEXT_SPLITTER,
@ -871,6 +872,7 @@ app.state.config.DOCLING_API_KEY = DOCLING_API_KEY
app.state.config.DOCLING_PARAMS = DOCLING_PARAMS
app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
app.state.config.DOCUMENT_INTELLIGENCE_MODEL = DOCUMENT_INTELLIGENCE_MODEL
app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL
app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY
app.state.config.MINERU_API_MODE = MINERU_API_MODE

View file

@ -322,12 +322,14 @@ class Loader:
file_path=file_path,
api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"),
api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"),
)
else:
loader = AzureAIDocumentIntelligenceLoader(
file_path=file_path,
api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
azure_credential=DefaultAzureCredential(),
api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"),
)
elif self.engine == "mineru" and file_ext in [
"pdf"

View file

@ -468,6 +468,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
"DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
"MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL,
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
# MinerU settings
@ -647,6 +648,7 @@ class ConfigForm(BaseModel):
DOCLING_PARAMS: Optional[dict] = None
DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None
DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None
DOCUMENT_INTELLIGENCE_MODEL: Optional[str] = None
MISTRAL_OCR_API_BASE_URL: Optional[str] = None
MISTRAL_OCR_API_KEY: Optional[str] = None
@ -842,6 +844,11 @@ async def update_rag_config(
if form_data.DOCUMENT_INTELLIGENCE_KEY is not None
else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY
)
request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL = (
form_data.DOCUMENT_INTELLIGENCE_MODEL
if form_data.DOCUMENT_INTELLIGENCE_MODEL is not None
else request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL
)
request.app.state.config.MISTRAL_OCR_API_BASE_URL = (
form_data.MISTRAL_OCR_API_BASE_URL
@ -1131,6 +1138,7 @@ async def update_rag_config(
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
"DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
"MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL,
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
# MinerU settings
@ -1543,6 +1551,7 @@ def process_file(
PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
DOCUMENT_INTELLIGENCE_MODEL=request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL,
MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL,
MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY,
MINERU_API_MODE=request.app.state.config.MINERU_API_MODE,

View file

@ -35,6 +35,7 @@ type ChunkConfigForm = {
type DocumentIntelligenceConfigForm = {
key: string;
endpoint: string;
model: string;
};
type ContentExtractConfigForm = {

View file

@ -597,6 +597,18 @@
required={false}
/>
</div>
<div class="my-0.5 flex flex-col w-full">
<div class=" mb-1 text-xs font-medium">{$i18n.t('Document Intelligence Model')}</div>
<div class="flex w-full">
<div class="flex-1 mr-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Document Intelligence Model')}
bind:value={RAGConfig.DOCUMENT_INTELLIGENCE_MODEL}
/>
</div>
</div>
</div>
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'}
<div class="my-0.5 flex gap-2 pr-2">
<input

View file

@ -462,6 +462,7 @@
"Docling Server URL required.": "",
"Document": "",
"Document Intelligence": "",
"Document Intelligence Model": "",
"Document Intelligence endpoint required.": "",
"Documentation": "",
"Documents": "",
@ -564,6 +565,7 @@
"Enter Docling Server URL": "",
"Enter Document Intelligence Endpoint": "",
"Enter Document Intelligence Key": "",
"Enter Document Intelligence Model": "",
"Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "",
"Enter Exa API Key": "",
"Enter External Document Loader API Key": "",

View file

@ -462,6 +462,7 @@
"Docling Server URL required.": "",
"Document": "",
"Document Intelligence": "",
"Document Intelligence Model": "",
"Document Intelligence endpoint required.": "",
"Documentation": "",
"Documents": "",
@ -564,6 +565,7 @@
"Enter Docling Server URL": "",
"Enter Document Intelligence Endpoint": "",
"Enter Document Intelligence Key": "",
"Enter Document Intelligence Model": "",
"Enter domains separated by commas (e.g., example.com,site.org,!excludedsite.com)": "",
"Enter Exa API Key": "",
"Enter External Document Loader API Key": "",