Merge pull request #17276 from Elettrotecnica/extend-docling-configuration

feat: Extend docling configuration options
This commit is contained in:
Tim Jaeryang Baek 2025-09-09 18:04:30 +04:00 committed by GitHub
commit 71fd483fba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 198 additions and 17 deletions

View file

@ -2232,6 +2232,18 @@ DOCLING_SERVER_URL = PersistentConfig(
os.getenv("DOCLING_SERVER_URL", "http://docling:5001"), os.getenv("DOCLING_SERVER_URL", "http://docling:5001"),
) )
DOCLING_DO_OCR = PersistentConfig(
"DOCLING_DO_OCR",
"rag.docling_do_ocr",
os.getenv("DOCLING_DO_OCR", "True").lower() == "true",
)
DOCLING_FORCE_OCR = PersistentConfig(
"DOCLING_FORCE_OCR",
"rag.docling_force_ocr",
os.getenv("DOCLING_FORCE_OCR", "False").lower() == "true",
)
DOCLING_OCR_ENGINE = PersistentConfig( DOCLING_OCR_ENGINE = PersistentConfig(
"DOCLING_OCR_ENGINE", "DOCLING_OCR_ENGINE",
"rag.docling_ocr_engine", "rag.docling_ocr_engine",
@ -2244,6 +2256,24 @@ DOCLING_OCR_LANG = PersistentConfig(
os.getenv("DOCLING_OCR_LANG", "eng,fra,deu,spa"), os.getenv("DOCLING_OCR_LANG", "eng,fra,deu,spa"),
) )
DOCLING_PDF_BACKEND = PersistentConfig(
"DOCLING_PDF_BACKEND",
"rag.docling_pdf_backend",
os.getenv("DOCLING_PDF_BACKEND", "dlparse_v4"),
)
DOCLING_TABLE_MODE = PersistentConfig(
"DOCLING_TABLE_MODE",
"rag.docling_table_mode",
os.getenv("DOCLING_TABLE_MODE", "accurate"),
)
DOCLING_PIPELINE = PersistentConfig(
"DOCLING_PIPELINE",
"rag.docling_pipeline",
os.getenv("DOCLING_PIPELINE", "standard"),
)
DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig( DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig(
"DOCLING_DO_PICTURE_DESCRIPTION", "DOCLING_DO_PICTURE_DESCRIPTION",
"rag.docling_do_picture_description", "rag.docling_do_picture_description",

View file

@ -244,8 +244,13 @@ from open_webui.config import (
EXTERNAL_DOCUMENT_LOADER_API_KEY, EXTERNAL_DOCUMENT_LOADER_API_KEY,
TIKA_SERVER_URL, TIKA_SERVER_URL,
DOCLING_SERVER_URL, DOCLING_SERVER_URL,
DOCLING_DO_OCR,
DOCLING_FORCE_OCR,
DOCLING_OCR_ENGINE, DOCLING_OCR_ENGINE,
DOCLING_OCR_LANG, DOCLING_OCR_LANG,
DOCLING_PDF_BACKEND,
DOCLING_TABLE_MODE,
DOCLING_PIPELINE,
DOCLING_DO_PICTURE_DESCRIPTION, DOCLING_DO_PICTURE_DESCRIPTION,
DOCLING_PICTURE_DESCRIPTION_MODE, DOCLING_PICTURE_DESCRIPTION_MODE,
DOCLING_PICTURE_DESCRIPTION_LOCAL, DOCLING_PICTURE_DESCRIPTION_LOCAL,
@ -812,8 +817,13 @@ app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
app.state.config.DOCLING_DO_OCR = DOCLING_DO_OCR
app.state.config.DOCLING_FORCE_OCR = DOCLING_FORCE_OCR
app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE
app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG
app.state.config.DOCLING_PDF_BACKEND = DOCLING_PDF_BACKEND
app.state.config.DOCLING_TABLE_MODE = DOCLING_TABLE_MODE
app.state.config.DOCLING_PIPELINE = DOCLING_PIPELINE
app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION
app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE
app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = DOCLING_PICTURE_DESCRIPTION_LOCAL app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = DOCLING_PICTURE_DESCRIPTION_LOCAL

View file

@ -148,7 +148,7 @@ class DoclingLoader:
) )
} }
params = {"image_export_mode": "placeholder", "table_mode": "accurate"} params = {"image_export_mode": "placeholder"}
if self.params: if self.params:
if self.params.get("do_picture_description"): if self.params.get("do_picture_description"):
@ -174,7 +174,11 @@ class DoclingLoader:
self.params.get("picture_description_api", {}) self.params.get("picture_description_api", {})
) )
if self.params.get("ocr_engine") and self.params.get("ocr_lang"): params["do_ocr"] = self.params.get("do_ocr")
params["force_ocr"] = self.params.get("force_ocr")
if self.params.get("do_ocr") and self.params.get("ocr_engine") and self.params.get("ocr_lang"):
params["ocr_engine"] = self.params.get("ocr_engine") params["ocr_engine"] = self.params.get("ocr_engine")
params["ocr_lang"] = [ params["ocr_lang"] = [
lang.strip() lang.strip()
@ -182,6 +186,16 @@ class DoclingLoader:
if lang.strip() if lang.strip()
] ]
if self.params.get("pdf_backend"):
params["pdf_backend"] = self.params.get("pdf_backend")
if self.params.get("table_mode"):
params["table_mode"] = self.params.get("table_mode")
if self.params.get("pipeline"):
params["pipeline"] = self.params.get("pipeline")
endpoint = f"{self.url}/v1/convert/file" endpoint = f"{self.url}/v1/convert/file"
r = requests.post(endpoint, files=files, data=params) r = requests.post(endpoint, files=files, data=params)

View file

@ -426,8 +426,13 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
"TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
"DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
"DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
"DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
"DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
"DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND,
"DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE,
"DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE,
"DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
"DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
"DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,
@ -596,8 +601,13 @@ class ConfigForm(BaseModel):
TIKA_SERVER_URL: Optional[str] = None TIKA_SERVER_URL: Optional[str] = None
DOCLING_SERVER_URL: Optional[str] = None DOCLING_SERVER_URL: Optional[str] = None
DOCLING_DO_OCR: Optional[bool] = None
DOCLING_FORCE_OCR: Optional[bool] = None
DOCLING_OCR_ENGINE: Optional[str] = None DOCLING_OCR_ENGINE: Optional[str] = None
DOCLING_OCR_LANG: Optional[str] = None DOCLING_OCR_LANG: Optional[str] = None
DOCLING_PDF_BACKEND: Optional[str] = None
DOCLING_TABLE_MODE: Optional[str] = None
DOCLING_PIPELINE: Optional[str] = None
DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None
DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None
DOCLING_PICTURE_DESCRIPTION_LOCAL: Optional[dict] = None DOCLING_PICTURE_DESCRIPTION_LOCAL: Optional[dict] = None
@ -767,6 +777,16 @@ async def update_rag_config(
if form_data.DOCLING_SERVER_URL is not None if form_data.DOCLING_SERVER_URL is not None
else request.app.state.config.DOCLING_SERVER_URL else request.app.state.config.DOCLING_SERVER_URL
) )
request.app.state.config.DOCLING_DO_OCR = (
form_data.DOCLING_DO_OCR
if form_data.DOCLING_DO_OCR is not None
else request.app.state.config.DOCLING_DO_OCR
)
request.app.state.config.DOCLING_FORCE_OCR = (
form_data.DOCLING_FORCE_OCR
if form_data.DOCLING_FORCE_OCR is not None
else request.app.state.config.DOCLING_FORCE_OCR
)
request.app.state.config.DOCLING_OCR_ENGINE = ( request.app.state.config.DOCLING_OCR_ENGINE = (
form_data.DOCLING_OCR_ENGINE form_data.DOCLING_OCR_ENGINE
if form_data.DOCLING_OCR_ENGINE is not None if form_data.DOCLING_OCR_ENGINE is not None
@ -777,7 +797,21 @@ async def update_rag_config(
if form_data.DOCLING_OCR_LANG is not None if form_data.DOCLING_OCR_LANG is not None
else request.app.state.config.DOCLING_OCR_LANG else request.app.state.config.DOCLING_OCR_LANG
) )
request.app.state.config.DOCLING_PDF_BACKEND = (
form_data.DOCLING_PDF_BACKEND
if form_data.DOCLING_PDF_BACKEND is not None
else request.app.state.config.DOCLING_PDF_BACKEND
)
request.app.state.config.DOCLING_TABLE_MODE = (
form_data.DOCLING_TABLE_MODE
if form_data.DOCLING_TABLE_MODE is not None
else request.app.state.config.DOCLING_TABLE_MODE
)
request.app.state.config.DOCLING_PIPELINE = (
form_data.DOCLING_PIPELINE
if form_data.DOCLING_PIPELINE is not None
else request.app.state.config.DOCLING_PIPELINE
)
request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = ( request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = (
form_data.DOCLING_DO_PICTURE_DESCRIPTION form_data.DOCLING_DO_PICTURE_DESCRIPTION
if form_data.DOCLING_DO_PICTURE_DESCRIPTION is not None if form_data.DOCLING_DO_PICTURE_DESCRIPTION is not None
@ -1062,8 +1096,13 @@ async def update_rag_config(
"EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
"TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
"DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
"DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
"DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
"DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
"DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND,
"DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE,
"DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE,
"DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
"DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
"DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,
@ -1453,8 +1492,13 @@ def process_file(
TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL, TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL,
DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL, DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL,
DOCLING_PARAMS={ DOCLING_PARAMS={
"do_ocr": request.app.state.config.DOCLING_DO_OCR,
"force_ocr": request.app.state.config.DOCLING_FORCE_OCR,
"ocr_engine": request.app.state.config.DOCLING_OCR_ENGINE, "ocr_engine": request.app.state.config.DOCLING_OCR_ENGINE,
"ocr_lang": request.app.state.config.DOCLING_OCR_LANG, "ocr_lang": request.app.state.config.DOCLING_OCR_LANG,
"pdf_backend": request.app.state.config.DOCLING_PDF_BACKEND,
"table_mode": request.app.state.config.DOCLING_TABLE_MODE,
"pipeline": request.app.state.config.DOCLING_PIPELINE,
"do_picture_description": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "do_picture_description": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
"picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
"picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, "picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,

View file

@ -153,6 +153,7 @@
} }
if ( if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
RAGConfig.DOCLING_DO_OCR &&
((RAGConfig.DOCLING_OCR_ENGINE === '' && RAGConfig.DOCLING_OCR_LANG !== '') || ((RAGConfig.DOCLING_OCR_ENGINE === '' && RAGConfig.DOCLING_OCR_LANG !== '') ||
(RAGConfig.DOCLING_OCR_ENGINE !== '' && RAGConfig.DOCLING_OCR_LANG === '')) (RAGConfig.DOCLING_OCR_ENGINE !== '' && RAGConfig.DOCLING_OCR_LANG === ''))
) { ) {
@ -161,6 +162,16 @@
); );
return; return;
} }
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' &&
RAGConfig.DOCLING_DO_OCR === false &&
RAGConfig.DOCLING_FORCE_OCR === true
) {
toast.error(
$i18n.t('In order to force OCR, performing OCR must be enabled.')
);
return;
}
if ( if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' && RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' &&
@ -545,6 +556,18 @@
bind:value={RAGConfig.DOCLING_SERVER_URL} bind:value={RAGConfig.DOCLING_SERVER_URL}
/> />
</div> </div>
<div class="flex w-full mt-2">
<div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Perform OCR')}
</div>
<div class="flex items-center relative">
<Switch bind:state={RAGConfig.DOCLING_DO_OCR} />
</div>
</div>
</div>
{#if RAGConfig.DOCLING_DO_OCR}
<div class="flex w-full mt-2"> <div class="flex w-full mt-2">
<input <input
class="flex-1 w-full text-sm bg-transparent outline-hidden" class="flex-1 w-full text-sm bg-transparent outline-hidden"
@ -557,7 +580,67 @@
bind:value={RAGConfig.DOCLING_OCR_LANG} bind:value={RAGConfig.DOCLING_OCR_LANG}
/> />
</div> </div>
{/if}
<div class="flex w-full mt-2">
<div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Force OCR')}
</div>
<div class="flex items-center relative">
<Switch bind:state={RAGConfig.DOCLING_FORCE_OCR} />
</div>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('PDF Backend')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_PDF_BACKEND}
>
<option value="pypdfium2">{$i18n.t('pypdfium2')}</option>
<option value="dlparse_v1">{$i18n.t('dlparse_v1')}</option>
<option value="dlparse_v2">{$i18n.t('dlparse_v2')}</option>
<option value="dlparse_v4">{$i18n.t('dlparse_v4')}</option>
</select>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('Table Mode')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_TABLE_MODE}
>
<option value="fast">{$i18n.t('fast')}</option>
<option value="accurate">{$i18n.t('accurate')}</option>
</select>
</div>
</div>
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('Pipeline')}
</Tooltip>
</div>
<div class="">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right"
bind:value={RAGConfig.DOCLING_PIPELINE}
>
<option value="standard">{$i18n.t('standard')}</option>
<option value="vlm">{$i18n.t('vlm')}</option>
</select>
</div>
</div>
<div class="flex w-full mt-2"> <div class="flex w-full mt-2">
<div class="flex-1 flex justify-between"> <div class="flex-1 flex justify-between">
<div class=" self-center text-xs font-medium"> <div class=" self-center text-xs font-medium">