refac/enh: docling params

This commit is contained in:
Timothy Jaeryang Baek 2025-10-02 16:28:06 -05:00
parent 2494de8f12
commit 339e95e9d7
5 changed files with 41 additions and 1 deletions

View file

@ -2310,6 +2310,18 @@ DOCLING_SERVER_URL = PersistentConfig(
os.getenv("DOCLING_SERVER_URL", "http://docling:5001"),
)
docling_params = os.getenv("DOCLING_PARAMS", "")
try:
docling_params = json.loads(docling_params)
except json.JSONDecodeError:
docling_params = {}
DOCLING_PARAMS = PersistentConfig(
"DOCLING_PARAMS",
"rag.docling_params",
docling_params,
)
DOCLING_DO_OCR = PersistentConfig(
"DOCLING_DO_OCR",
"rag.docling_do_ocr",

View file

@ -248,6 +248,7 @@ from open_webui.config import (
EXTERNAL_DOCUMENT_LOADER_API_KEY,
TIKA_SERVER_URL,
DOCLING_SERVER_URL,
DOCLING_PARAMS,
DOCLING_DO_OCR,
DOCLING_FORCE_OCR,
DOCLING_OCR_ENGINE,
@ -836,6 +837,7 @@ app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
app.state.config.DOCLING_PARAMS = DOCLING_PARAMS
app.state.config.DOCLING_DO_OCR = DOCLING_DO_OCR
app.state.config.DOCLING_FORCE_OCR = DOCLING_FORCE_OCR
app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE

View file

@ -429,6 +429,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
"TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
"DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
"DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
@ -590,6 +591,7 @@ class ConfigForm(BaseModel):
# Content extraction settings
CONTENT_EXTRACTION_ENGINE: Optional[str] = None
PDF_EXTRACT_IMAGES: Optional[bool] = None
DATALAB_MARKER_API_KEY: Optional[str] = None
DATALAB_MARKER_API_BASE_URL: Optional[str] = None
DATALAB_MARKER_ADDITIONAL_CONFIG: Optional[str] = None
@ -601,11 +603,13 @@ class ConfigForm(BaseModel):
DATALAB_MARKER_FORMAT_LINES: Optional[bool] = None
DATALAB_MARKER_USE_LLM: Optional[bool] = None
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
EXTERNAL_DOCUMENT_LOADER_API_KEY: Optional[str] = None
TIKA_SERVER_URL: Optional[str] = None
DOCLING_SERVER_URL: Optional[str] = None
DOCLING_PARAMS: Optional[dict] = None
DOCLING_DO_OCR: Optional[bool] = None
DOCLING_FORCE_OCR: Optional[bool] = None
DOCLING_OCR_ENGINE: Optional[str] = None
@ -782,6 +786,11 @@ async def update_rag_config(
if form_data.DOCLING_SERVER_URL is not None
else request.app.state.config.DOCLING_SERVER_URL
)
request.app.state.config.DOCLING_PARAMS = (
form_data.DOCLING_PARAMS
if form_data.DOCLING_PARAMS is not None
else request.app.state.config.DOCLING_PARAMS
)
request.app.state.config.DOCLING_DO_OCR = (
form_data.DOCLING_DO_OCR
if form_data.DOCLING_DO_OCR is not None
@ -1104,6 +1113,7 @@ async def update_rag_config(
"EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY,
"TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL,
"DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL,
"DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS,
"DOCLING_DO_OCR": request.app.state.config.DOCLING_DO_OCR,
"DOCLING_FORCE_OCR": request.app.state.config.DOCLING_FORCE_OCR,
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
@ -1522,6 +1532,7 @@ def process_file(
"picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
"picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL,
"picture_description_api": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API,
**request.app.state.config.DOCLING_PARAMS,
},
PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,

View file

@ -634,7 +634,7 @@
<Textarea
className="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={TTS_OPENAI_PARAMS}
placeholder={$i18n.t('Enter additional parameters')}
placeholder={$i18n.t('Enter additional parameters in JSON format')}
minSize={100}
/>
</div>

View file

@ -714,6 +714,21 @@
</div>
{/if}
{/if}
<div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium">
<Tooltip content={''} placement="top-start">
{$i18n.t('Parameters')}
</Tooltip>
</div>
<div class="">
<Textarea
bind:value={RAGConfig.DOCLING_PARAMETERS}
placeholder={$i18n.t('Enter additional parameters in JSON format')}
minSize={100}
/>
</div>
</div>
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
<div class="my-0.5 flex gap-2 pr-2">
<input