diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 069faab439..11698d87af 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2232,6 +2232,18 @@ DOCLING_SERVER_URL = PersistentConfig( os.getenv("DOCLING_SERVER_URL", "http://docling:5001"), ) +DOCLING_DO_OCR = PersistentConfig( + "DOCLING_DO_OCR", + "rag.docling_do_ocr", + os.getenv("DOCLING_DO_OCR", "True").lower() == "true", +) + +DOCLING_FORCE_OCR = PersistentConfig( + "DOCLING_FORCE_OCR", + "rag.docling_force_ocr", + os.getenv("DOCLING_FORCE_OCR", "False").lower() == "true", +) + DOCLING_OCR_ENGINE = PersistentConfig( "DOCLING_OCR_ENGINE", "rag.docling_ocr_engine", @@ -2244,6 +2256,24 @@ DOCLING_OCR_LANG = PersistentConfig( os.getenv("DOCLING_OCR_LANG", "eng,fra,deu,spa"), ) +DOCLING_PDF_BACKEND = PersistentConfig( + "DOCLING_PDF_BACKEND", + "rag.docling_pdf_backend", + os.getenv("DOCLING_PDF_BACKEND", "dlparse_v4"), +) + +DOCLING_TABLE_MODE = PersistentConfig( + "DOCLING_TABLE_MODE", + "rag.docling_table_mode", + os.getenv("DOCLING_TABLE_MODE", "accurate"), +) + +DOCLING_PIPELINE = PersistentConfig( + "DOCLING_PIPELINE", + "rag.docling_pipeline", + os.getenv("DOCLING_PIPELINE", "standard"), +) + DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig( "DOCLING_DO_PICTURE_DESCRIPTION", "rag.docling_do_picture_description", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index ea60900c9c..a5d55f75ab 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -244,8 +244,13 @@ from open_webui.config import ( EXTERNAL_DOCUMENT_LOADER_API_KEY, TIKA_SERVER_URL, DOCLING_SERVER_URL, + DOCLING_DO_OCR, + DOCLING_FORCE_OCR, DOCLING_OCR_ENGINE, DOCLING_OCR_LANG, + DOCLING_PDF_BACKEND, + DOCLING_TABLE_MODE, + DOCLING_PIPELINE, DOCLING_DO_PICTURE_DESCRIPTION, DOCLING_PICTURE_DESCRIPTION_MODE, DOCLING_PICTURE_DESCRIPTION_LOCAL, @@ -812,8 +817,13 @@ app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL +app.state.config.DOCLING_DO_OCR = DOCLING_DO_OCR +app.state.config.DOCLING_FORCE_OCR = DOCLING_FORCE_OCR app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG +app.state.config.DOCLING_PDF_BACKEND = DOCLING_PDF_BACKEND +app.state.config.DOCLING_TABLE_MODE = DOCLING_TABLE_MODE +app.state.config.DOCLING_PIPELINE = DOCLING_PIPELINE app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = DOCLING_PICTURE_DESCRIPTION_LOCAL diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 9b90dca041..a459274a09 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -148,7 +148,7 @@ class DoclingLoader: ) } - params = {"image_export_mode": "placeholder", "table_mode": "accurate"} + params = {"image_export_mode": "placeholder"} if self.params: if self.params.get("do_picture_description"): @@ -174,7 +174,11 @@ class DoclingLoader: self.params.get("picture_description_api", {}) ) - if self.params.get("ocr_engine") and self.params.get("ocr_lang"): + params["do_ocr"] = self.params.get("do_ocr") + + params["force_ocr"] = self.params.get("force_ocr") + + if self.params.get("do_ocr") and self.params.get("ocr_engine") and self.params.get("ocr_lang"): params["ocr_engine"] = self.params.get("ocr_engine") params["ocr_lang"] = [ lang.strip() @@ -182,6 +186,16 @@ class DoclingLoader: if lang.strip() ] + if self.params.get("pdf_backend"): + params["pdf_backend"] = self.params.get("pdf_backend") + + if self.params.get("table_mode"): + params["table_mode"] = self.params.get("table_mode") + + if self.params.get("pipeline"): + params["pipeline"] = self.params.get("pipeline") + + endpoint = f"{self.url}/v1/convert/file" r = requests.post(endpoint, files=files, data=params) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index dd1da9db40..dd5e2d5bc4 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -426,8 +426,13 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR, + "DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, + "DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND, + "DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE, + "DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, @@ -596,8 +601,13 @@ class ConfigForm(BaseModel): TIKA_SERVER_URL: Optional[str] = None DOCLING_SERVER_URL: Optional[str] = None + DOCLING_DO_OCR: Optional[bool] = None + DOCLING_FORCE_OCR: Optional[bool] = None DOCLING_OCR_ENGINE: Optional[str] = None DOCLING_OCR_LANG: Optional[str] = None + DOCLING_PDF_BACKEND: Optional[str] = None + DOCLING_TABLE_MODE: Optional[str] = None + DOCLING_PIPELINE: Optional[str] = None DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None DOCLING_PICTURE_DESCRIPTION_LOCAL: Optional[dict] = None @@ -767,6 +777,16 @@ async def update_rag_config( if form_data.DOCLING_SERVER_URL is not None else request.app.state.config.DOCLING_SERVER_URL ) + request.app.state.config.DOCLING_DO_OCR = ( + form_data.DOCLING_DO_OCR + if form_data.DOCLING_DO_OCR is not None + else request.app.state.config.DOCLING_DO_OCR + ) + request.app.state.config.DOCLING_FORCE_OCR = ( + form_data.DOCLING_FORCE_OCR + if form_data.DOCLING_FORCE_OCR is not None + else request.app.state.config.DOCLING_FORCE_OCR + ) request.app.state.config.DOCLING_OCR_ENGINE = ( form_data.DOCLING_OCR_ENGINE if form_data.DOCLING_OCR_ENGINE is not None @@ -777,7 +797,21 @@ async def update_rag_config( if form_data.DOCLING_OCR_LANG is not None else request.app.state.config.DOCLING_OCR_LANG ) - + request.app.state.config.DOCLING_PDF_BACKEND = ( + form_data.DOCLING_PDF_BACKEND + if form_data.DOCLING_PDF_BACKEND is not None + else request.app.state.config.DOCLING_PDF_BACKEND + ) + request.app.state.config.DOCLING_TABLE_MODE = ( + form_data.DOCLING_TABLE_MODE + if form_data.DOCLING_TABLE_MODE is not None + else request.app.state.config.DOCLING_TABLE_MODE + ) + request.app.state.config.DOCLING_PIPELINE = ( + form_data.DOCLING_PIPELINE + if form_data.DOCLING_PIPELINE is not None + else request.app.state.config.DOCLING_PIPELINE + ) request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = ( form_data.DOCLING_DO_PICTURE_DESCRIPTION if form_data.DOCLING_DO_PICTURE_DESCRIPTION is not None @@ -1062,8 +1096,13 @@ async def update_rag_config( "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR, + "DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, + "DOCLING_PDF_BACKEND": request.app.state.config.DOCLING_PDF_BACKEND, + "DOCLING_TABLE_MODE": request.app.state.config.DOCLING_TABLE_MODE, + "DOCLING_PIPELINE": request.app.state.config.DOCLING_PIPELINE, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, @@ -1453,8 +1492,13 @@ def process_file( TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL, DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL, DOCLING_PARAMS={ + "do_ocr": request.app.state.config.DOCLING_DO_OCR, + "force_ocr": request.app.state.config.DOCLING_FORCE_OCR, "ocr_engine": request.app.state.config.DOCLING_OCR_ENGINE, "ocr_lang": request.app.state.config.DOCLING_OCR_LANG, + "pdf_backend": request.app.state.config.DOCLING_PDF_BACKEND, + "table_mode": request.app.state.config.DOCLING_TABLE_MODE, + "pipeline": request.app.state.config.DOCLING_PIPELINE, "do_picture_description": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, "picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 087a9bb950..31a529140e 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -152,7 +152,8 @@ return; } if ( - RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && + RAGConfig.DOCLING_DO_OCR && ((RAGConfig.DOCLING_OCR_ENGINE === '' && RAGConfig.DOCLING_OCR_LANG !== '') || (RAGConfig.DOCLING_OCR_ENGINE !== '' && RAGConfig.DOCLING_OCR_LANG === '')) ) { @@ -161,6 +162,16 @@ ); return; } + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && + RAGConfig.DOCLING_DO_OCR === false && + RAGConfig.DOCLING_FORCE_OCR === true + ) { + toast.error( + $i18n.t('In order to force OCR, performing OCR must be enabled.') + ); + return; + } if ( RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' && @@ -544,21 +555,93 @@ placeholder={$i18n.t('Enter Docling Server URL')} bind:value={RAGConfig.DOCLING_SERVER_URL} /> - -
- - -
+
+
+
+ {$i18n.t('Perform OCR')} +
+
+ +
+
+
+ {#if RAGConfig.DOCLING_DO_OCR} +
+ + +
+ {/if} +
+
+
+ {$i18n.t('Force OCR')} +
+
+ +
+
+
+
+
+ + {$i18n.t('PDF Backend')} + +
+
+ +
+
+
+
+ + {$i18n.t('Table Mode')} + +
+
+ +
+
+
+
+ + {$i18n.t('Pipeline')} + +
+
+ +
+
+
{$i18n.t('Describe Pictures in Documents')}