mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 12:25:20 +00:00
feat: use MINERU_PARAMS json field for mineru settings
This commit is contained in:
parent
40e9d9c330
commit
288b323df8
6 changed files with 77 additions and 213 deletions
|
|
@ -2291,7 +2291,6 @@ DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
|||
os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown"),
|
||||
)
|
||||
|
||||
# MinerU Configuration
|
||||
MINERU_API_MODE = PersistentConfig(
|
||||
"MINERU_API_MODE",
|
||||
"rag.mineru_api_mode",
|
||||
|
|
@ -2310,40 +2309,16 @@ MINERU_API_KEY = PersistentConfig(
|
|||
os.environ.get("MINERU_API_KEY", ""),
|
||||
)
|
||||
|
||||
MINERU_ENABLE_OCR = PersistentConfig(
|
||||
"MINERU_ENABLE_OCR",
|
||||
"rag.mineru_enable_ocr",
|
||||
os.environ.get("MINERU_ENABLE_OCR", "false").lower() == "true",
|
||||
)
|
||||
mineru_params = os.getenv("MINERU_PARAMS", "")
|
||||
try:
|
||||
mineru_params = json.loads(mineru_params)
|
||||
except json.JSONDecodeError:
|
||||
mineru_params = {}
|
||||
|
||||
MINERU_ENABLE_FORMULA = PersistentConfig(
|
||||
"MINERU_ENABLE_FORMULA",
|
||||
"rag.mineru_enable_formula",
|
||||
os.environ.get("MINERU_ENABLE_FORMULA", "true").lower() == "true",
|
||||
)
|
||||
|
||||
MINERU_ENABLE_TABLE = PersistentConfig(
|
||||
"MINERU_ENABLE_TABLE",
|
||||
"rag.mineru_enable_table",
|
||||
os.environ.get("MINERU_ENABLE_TABLE", "true").lower() == "true",
|
||||
)
|
||||
|
||||
MINERU_LANGUAGE = PersistentConfig(
|
||||
"MINERU_LANGUAGE",
|
||||
"rag.mineru_language",
|
||||
os.environ.get("MINERU_LANGUAGE", "en"),
|
||||
)
|
||||
|
||||
MINERU_MODEL_VERSION = PersistentConfig(
|
||||
"MINERU_MODEL_VERSION",
|
||||
"rag.mineru_model_version",
|
||||
os.environ.get("MINERU_MODEL_VERSION", "pipeline"), # "pipeline" or "vlm"
|
||||
)
|
||||
|
||||
MINERU_PAGE_RANGES = PersistentConfig(
|
||||
"MINERU_PAGE_RANGES",
|
||||
"rag.mineru_page_ranges",
|
||||
os.environ.get("MINERU_PAGE_RANGES", ""),
|
||||
MINERU_PARAMS = PersistentConfig(
|
||||
"MINERU_PARAMS",
|
||||
"rag.mineru_params",
|
||||
mineru_params,
|
||||
)
|
||||
|
||||
EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig(
|
||||
|
|
|
|||
|
|
@ -246,12 +246,7 @@ from open_webui.config import (
|
|||
MINERU_API_MODE,
|
||||
MINERU_API_URL,
|
||||
MINERU_API_KEY,
|
||||
MINERU_ENABLE_OCR,
|
||||
MINERU_ENABLE_FORMULA,
|
||||
MINERU_ENABLE_TABLE,
|
||||
MINERU_LANGUAGE,
|
||||
MINERU_MODEL_VERSION,
|
||||
MINERU_PAGE_RANGES,
|
||||
MINERU_PARAMS,
|
||||
DATALAB_MARKER_USE_LLM,
|
||||
EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
EXTERNAL_DOCUMENT_LOADER_API_KEY,
|
||||
|
|
@ -865,12 +860,7 @@ app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY
|
|||
app.state.config.MINERU_API_MODE = MINERU_API_MODE
|
||||
app.state.config.MINERU_API_URL = MINERU_API_URL
|
||||
app.state.config.MINERU_API_KEY = MINERU_API_KEY
|
||||
app.state.config.MINERU_ENABLE_OCR = MINERU_ENABLE_OCR
|
||||
app.state.config.MINERU_ENABLE_FORMULA = MINERU_ENABLE_FORMULA
|
||||
app.state.config.MINERU_ENABLE_TABLE = MINERU_ENABLE_TABLE
|
||||
app.state.config.MINERU_LANGUAGE = MINERU_LANGUAGE
|
||||
app.state.config.MINERU_MODEL_VERSION = MINERU_MODEL_VERSION
|
||||
app.state.config.MINERU_PAGE_RANGES = MINERU_PAGE_RANGES
|
||||
app.state.config.MINERU_PARAMS = MINERU_PARAMS
|
||||
|
||||
app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER
|
||||
app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME
|
||||
|
|
|
|||
|
|
@ -382,12 +382,7 @@ class Loader:
|
|||
api_mode=self.kwargs.get("MINERU_API_MODE", "local"),
|
||||
api_url=self.kwargs.get("MINERU_API_URL", "http://localhost:8000"),
|
||||
api_key=self.kwargs.get("MINERU_API_KEY", ""),
|
||||
enable_ocr=self.kwargs.get("MINERU_ENABLE_OCR", False),
|
||||
enable_formula=self.kwargs.get("MINERU_ENABLE_FORMULA", True),
|
||||
enable_table=self.kwargs.get("MINERU_ENABLE_TABLE", True),
|
||||
language=self.kwargs.get("MINERU_LANGUAGE", "en"),
|
||||
model_version=self.kwargs.get("MINERU_MODEL_VERSION", "pipeline"),
|
||||
page_ranges=self.kwargs.get("MINERU_PAGE_RANGES", ""),
|
||||
params=self.kwargs.get("MINERU_PARAMS", {}),
|
||||
)
|
||||
elif (
|
||||
self.engine == "mistral_ocr"
|
||||
|
|
|
|||
|
|
@ -25,23 +25,21 @@ class MinerULoader:
|
|||
api_mode: str = "local",
|
||||
api_url: str = "http://localhost:8000",
|
||||
api_key: str = "",
|
||||
enable_ocr: bool = False,
|
||||
enable_formula: bool = True,
|
||||
enable_table: bool = True,
|
||||
language: str = "en",
|
||||
model_version: str = "pipeline",
|
||||
page_ranges: str = "",
|
||||
params: dict = None,
|
||||
):
|
||||
self.file_path = file_path
|
||||
self.api_mode = api_mode.lower()
|
||||
self.api_url = api_url.rstrip("/")
|
||||
self.api_key = api_key
|
||||
self.enable_ocr = enable_ocr
|
||||
self.enable_formula = enable_formula
|
||||
self.enable_table = enable_table
|
||||
self.language = language
|
||||
self.model_version = model_version
|
||||
self.page_ranges = page_ranges
|
||||
|
||||
# Parse params dict with defaults
|
||||
params = params or {}
|
||||
self.enable_ocr = params.get("enable_ocr", False)
|
||||
self.enable_formula = params.get("enable_formula", True)
|
||||
self.enable_table = params.get("enable_table", True)
|
||||
self.language = params.get("language", "en")
|
||||
self.model_version = params.get("model_version", "pipeline")
|
||||
self.page_ranges = params.get("page_ranges", "")
|
||||
|
||||
# Validate API mode
|
||||
if self.api_mode not in ["local", "cloud"]:
|
||||
|
|
|
|||
|
|
@ -470,12 +470,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|||
"MINERU_API_MODE": request.app.state.config.MINERU_API_MODE,
|
||||
"MINERU_API_URL": request.app.state.config.MINERU_API_URL,
|
||||
"MINERU_API_KEY": request.app.state.config.MINERU_API_KEY,
|
||||
"MINERU_ENABLE_OCR": request.app.state.config.MINERU_ENABLE_OCR,
|
||||
"MINERU_ENABLE_FORMULA": request.app.state.config.MINERU_ENABLE_FORMULA,
|
||||
"MINERU_ENABLE_TABLE": request.app.state.config.MINERU_ENABLE_TABLE,
|
||||
"MINERU_LANGUAGE": request.app.state.config.MINERU_LANGUAGE,
|
||||
"MINERU_MODEL_VERSION": request.app.state.config.MINERU_MODEL_VERSION,
|
||||
"MINERU_PAGE_RANGES": request.app.state.config.MINERU_PAGE_RANGES,
|
||||
"MINERU_PARAMS": request.app.state.config.MINERU_PARAMS,
|
||||
# Reranking settings
|
||||
"RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL,
|
||||
"RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE,
|
||||
|
|
@ -661,12 +656,7 @@ class ConfigForm(BaseModel):
|
|||
MINERU_API_MODE: Optional[str] = None
|
||||
MINERU_API_URL: Optional[str] = None
|
||||
MINERU_API_KEY: Optional[str] = None
|
||||
MINERU_ENABLE_OCR: Optional[bool] = None
|
||||
MINERU_ENABLE_FORMULA: Optional[bool] = None
|
||||
MINERU_ENABLE_TABLE: Optional[bool] = None
|
||||
MINERU_LANGUAGE: Optional[str] = None
|
||||
MINERU_MODEL_VERSION: Optional[str] = None
|
||||
MINERU_PAGE_RANGES: Optional[str] = None
|
||||
MINERU_PARAMS: Optional[dict] = None
|
||||
|
||||
# Reranking settings
|
||||
RAG_RERANKING_MODEL: Optional[str] = None
|
||||
|
|
@ -923,35 +913,10 @@ async def update_rag_config(
|
|||
if form_data.MINERU_API_KEY is not None
|
||||
else request.app.state.config.MINERU_API_KEY
|
||||
)
|
||||
request.app.state.config.MINERU_ENABLE_OCR = (
|
||||
form_data.MINERU_ENABLE_OCR
|
||||
if form_data.MINERU_ENABLE_OCR is not None
|
||||
else request.app.state.config.MINERU_ENABLE_OCR
|
||||
)
|
||||
request.app.state.config.MINERU_ENABLE_FORMULA = (
|
||||
form_data.MINERU_ENABLE_FORMULA
|
||||
if form_data.MINERU_ENABLE_FORMULA is not None
|
||||
else request.app.state.config.MINERU_ENABLE_FORMULA
|
||||
)
|
||||
request.app.state.config.MINERU_ENABLE_TABLE = (
|
||||
form_data.MINERU_ENABLE_TABLE
|
||||
if form_data.MINERU_ENABLE_TABLE is not None
|
||||
else request.app.state.config.MINERU_ENABLE_TABLE
|
||||
)
|
||||
request.app.state.config.MINERU_LANGUAGE = (
|
||||
form_data.MINERU_LANGUAGE
|
||||
if form_data.MINERU_LANGUAGE is not None
|
||||
else request.app.state.config.MINERU_LANGUAGE
|
||||
)
|
||||
request.app.state.config.MINERU_MODEL_VERSION = (
|
||||
form_data.MINERU_MODEL_VERSION
|
||||
if form_data.MINERU_MODEL_VERSION is not None
|
||||
else request.app.state.config.MINERU_MODEL_VERSION
|
||||
)
|
||||
request.app.state.config.MINERU_PAGE_RANGES = (
|
||||
form_data.MINERU_PAGE_RANGES
|
||||
if form_data.MINERU_PAGE_RANGES is not None
|
||||
else request.app.state.config.MINERU_PAGE_RANGES
|
||||
request.app.state.config.MINERU_PARAMS = (
|
||||
form_data.MINERU_PARAMS
|
||||
if form_data.MINERU_PARAMS is not None
|
||||
else request.app.state.config.MINERU_PARAMS
|
||||
)
|
||||
|
||||
# Reranking settings
|
||||
|
|
@ -1222,12 +1187,7 @@ async def update_rag_config(
|
|||
"MINERU_API_MODE": request.app.state.config.MINERU_API_MODE,
|
||||
"MINERU_API_URL": request.app.state.config.MINERU_API_URL,
|
||||
"MINERU_API_KEY": request.app.state.config.MINERU_API_KEY,
|
||||
"MINERU_ENABLE_OCR": request.app.state.config.MINERU_ENABLE_OCR,
|
||||
"MINERU_ENABLE_FORMULA": request.app.state.config.MINERU_ENABLE_FORMULA,
|
||||
"MINERU_ENABLE_TABLE": request.app.state.config.MINERU_ENABLE_TABLE,
|
||||
"MINERU_LANGUAGE": request.app.state.config.MINERU_LANGUAGE,
|
||||
"MINERU_MODEL_VERSION": request.app.state.config.MINERU_MODEL_VERSION,
|
||||
"MINERU_PAGE_RANGES": request.app.state.config.MINERU_PAGE_RANGES,
|
||||
"MINERU_PARAMS": request.app.state.config.MINERU_PARAMS,
|
||||
# Reranking settings
|
||||
"RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL,
|
||||
"RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE,
|
||||
|
|
@ -1641,12 +1601,7 @@ def process_file(
|
|||
MINERU_API_MODE=request.app.state.config.MINERU_API_MODE,
|
||||
MINERU_API_URL=request.app.state.config.MINERU_API_URL,
|
||||
MINERU_API_KEY=request.app.state.config.MINERU_API_KEY,
|
||||
MINERU_ENABLE_OCR=request.app.state.config.MINERU_ENABLE_OCR,
|
||||
MINERU_ENABLE_FORMULA=request.app.state.config.MINERU_ENABLE_FORMULA,
|
||||
MINERU_ENABLE_TABLE=request.app.state.config.MINERU_ENABLE_TABLE,
|
||||
MINERU_LANGUAGE=request.app.state.config.MINERU_LANGUAGE,
|
||||
MINERU_MODEL_VERSION=request.app.state.config.MINERU_MODEL_VERSION,
|
||||
MINERU_PAGE_RANGES=request.app.state.config.MINERU_PAGE_RANGES,
|
||||
MINERU_PARAMS=request.app.state.config.MINERU_PARAMS,
|
||||
)
|
||||
docs = loader.load(
|
||||
file.filename, file.meta.get("content_type"), file_path
|
||||
|
|
|
|||
|
|
@ -791,106 +791,57 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<!-- API URL -->
|
||||
<!-- API URL -->
|
||||
<div class="flex w-full mt-2">
|
||||
<input
|
||||
class="flex-1 w-full text-sm bg-transparent outline-hidden"
|
||||
placeholder={RAGConfig.MINERU_API_MODE === 'cloud'
|
||||
? $i18n.t('https://mineru.net/api/v4')
|
||||
: $i18n.t('http://localhost:8000')}
|
||||
bind:value={RAGConfig.MINERU_API_URL}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- API Key (Cloud only) -->
|
||||
{#if RAGConfig.MINERU_API_MODE === 'cloud'}
|
||||
<div class="flex w-full mt-2">
|
||||
<input
|
||||
class="flex-1 w-full text-sm bg-transparent outline-hidden"
|
||||
placeholder={RAGConfig.MINERU_API_MODE === 'cloud'
|
||||
? $i18n.t('https://mineru.net/api/v4')
|
||||
: $i18n.t('http://localhost:8000')}
|
||||
bind:value={RAGConfig.MINERU_API_URL}
|
||||
<SensitiveInput
|
||||
placeholder={$i18n.t('Enter MinerU API Key')}
|
||||
bind:value={RAGConfig.MINERU_API_KEY}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- API Key (Cloud only) -->
|
||||
{#if RAGConfig.MINERU_API_MODE === 'cloud'}
|
||||
<div class="flex w-full mt-2">
|
||||
<SensitiveInput
|
||||
placeholder={$i18n.t('Enter MinerU API Key')}
|
||||
bind:value={RAGConfig.MINERU_API_KEY}
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- OCR Toggle -->
|
||||
<div class="flex w-full mt-2">
|
||||
<div class="flex-1 flex justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
{$i18n.t('Enable OCR (for scanned documents)')}
|
||||
</div>
|
||||
<div class="flex items-center relative">
|
||||
<Switch bind:state={RAGConfig.MINERU_ENABLE_OCR} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Formula Recognition -->
|
||||
<div class="flex w-full mt-2">
|
||||
<div class="flex-1 flex justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
{$i18n.t('Enable Formula Recognition')}
|
||||
</div>
|
||||
<div class="flex items-center relative">
|
||||
<Switch bind:state={RAGConfig.MINERU_ENABLE_FORMULA} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Table Recognition -->
|
||||
<div class="flex w-full mt-2">
|
||||
<div class="flex-1 flex justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
{$i18n.t('Enable Table Recognition')}
|
||||
</div>
|
||||
<div class="flex items-center relative">
|
||||
<Switch bind:state={RAGConfig.MINERU_ENABLE_TABLE} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Advanced Settings Toggle -->
|
||||
<details class="w-full mt-2">
|
||||
<summary class="text-xs font-medium cursor-pointer hover:text-gray-600 dark:hover:text-gray-300">
|
||||
{$i18n.t('Advanced Settings')}
|
||||
</summary>
|
||||
|
||||
<div class="mt-2 space-y-2 pl-2 border-l-2 border-gray-200 dark:border-gray-700">
|
||||
<!-- Model Version -->
|
||||
<div class="flex w-full">
|
||||
<div class="flex-1 flex justify-between">
|
||||
<div class="self-center text-xs font-medium">
|
||||
{$i18n.t('Model Version')}
|
||||
</div>
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden"
|
||||
bind:value={RAGConfig.MINERU_MODEL_VERSION}
|
||||
>
|
||||
<option value="pipeline">{$i18n.t('Pipeline (Faster, CPU-friendly)')}</option>
|
||||
<option value="vlm">{$i18n.t('VLM (More Accurate, GPU required)')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Language -->
|
||||
<div class="flex w-full">
|
||||
<input
|
||||
class="flex-1 w-full text-xs bg-transparent outline-hidden"
|
||||
placeholder={$i18n.t('Language: en, ch, japan, korean, etc. (default: en)')}
|
||||
bind:value={RAGConfig.MINERU_LANGUAGE}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<!-- Page Ranges (Optional) -->
|
||||
<div class="flex w-full">
|
||||
<input
|
||||
class="flex-1 w-full text-xs bg-transparent outline-hidden"
|
||||
placeholder={$i18n.t('Page ranges (optional): e.g., 1-10,15,20-25')}
|
||||
bind:value={RAGConfig.MINERU_PAGE_RANGES}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
{/if}
|
||||
|
||||
<!-- Parameters -->
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Advanced parameters for MinerU parsing (enable_ocr, enable_formula, enable_table, language, model_version, page_ranges)')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Parameters')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="">
|
||||
<Textarea
|
||||
value={typeof RAGConfig.MINERU_PARAMS === 'object' && RAGConfig.MINERU_PARAMS !== null && Object.keys(RAGConfig.MINERU_PARAMS).length > 0
|
||||
? JSON.stringify(RAGConfig.MINERU_PARAMS, null, 2)
|
||||
: ''}
|
||||
on:input={(e) => {
|
||||
try {
|
||||
const value = e.target.value.trim();
|
||||
RAGConfig.MINERU_PARAMS = value ? JSON.parse(value) : {};
|
||||
} catch (err) {
|
||||
// Keep the string value if JSON is invalid (user is still typing)
|
||||
RAGConfig.MINERU_PARAMS = e.target.value;
|
||||
}
|
||||
}}
|
||||
placeholder={`{\n "enable_ocr": false,\n "enable_formula": true,\n "enable_table": true,\n "language": "en",\n "model_version": "pipeline",\n "page_ranges": ""\n}`}
|
||||
minSize={100}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class=" mb-2.5 flex w-full justify-between">
|
||||
|
|
|
|||
Loading…
Reference in a new issue