diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 0d21913463..a40d3bf2e3 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2291,7 +2291,6 @@ DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig( os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown"), ) -# MinerU Configuration MINERU_API_MODE = PersistentConfig( "MINERU_API_MODE", "rag.mineru_api_mode", @@ -2310,40 +2309,16 @@ MINERU_API_KEY = PersistentConfig( os.environ.get("MINERU_API_KEY", ""), ) -MINERU_ENABLE_OCR = PersistentConfig( - "MINERU_ENABLE_OCR", - "rag.mineru_enable_ocr", - os.environ.get("MINERU_ENABLE_OCR", "false").lower() == "true", -) +mineru_params = os.getenv("MINERU_PARAMS", "") +try: + mineru_params = json.loads(mineru_params) +except json.JSONDecodeError: + mineru_params = {} -MINERU_ENABLE_FORMULA = PersistentConfig( - "MINERU_ENABLE_FORMULA", - "rag.mineru_enable_formula", - os.environ.get("MINERU_ENABLE_FORMULA", "true").lower() == "true", -) - -MINERU_ENABLE_TABLE = PersistentConfig( - "MINERU_ENABLE_TABLE", - "rag.mineru_enable_table", - os.environ.get("MINERU_ENABLE_TABLE", "true").lower() == "true", -) - -MINERU_LANGUAGE = PersistentConfig( - "MINERU_LANGUAGE", - "rag.mineru_language", - os.environ.get("MINERU_LANGUAGE", "en"), -) - -MINERU_MODEL_VERSION = PersistentConfig( - "MINERU_MODEL_VERSION", - "rag.mineru_model_version", - os.environ.get("MINERU_MODEL_VERSION", "pipeline"), # "pipeline" or "vlm" -) - -MINERU_PAGE_RANGES = PersistentConfig( - "MINERU_PAGE_RANGES", - "rag.mineru_page_ranges", - os.environ.get("MINERU_PAGE_RANGES", ""), +MINERU_PARAMS = PersistentConfig( + "MINERU_PARAMS", + "rag.mineru_params", + mineru_params, ) EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index efb897d2be..9998af0e73 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -246,12 +246,7 @@ from open_webui.config import ( MINERU_API_MODE, MINERU_API_URL, MINERU_API_KEY, - MINERU_ENABLE_OCR, - MINERU_ENABLE_FORMULA, - MINERU_ENABLE_TABLE, - MINERU_LANGUAGE, - MINERU_MODEL_VERSION, - MINERU_PAGE_RANGES, + MINERU_PARAMS, DATALAB_MARKER_USE_LLM, EXTERNAL_DOCUMENT_LOADER_URL, EXTERNAL_DOCUMENT_LOADER_API_KEY, @@ -865,12 +860,7 @@ app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MINERU_API_MODE = MINERU_API_MODE app.state.config.MINERU_API_URL = MINERU_API_URL app.state.config.MINERU_API_KEY = MINERU_API_KEY -app.state.config.MINERU_ENABLE_OCR = MINERU_ENABLE_OCR -app.state.config.MINERU_ENABLE_FORMULA = MINERU_ENABLE_FORMULA -app.state.config.MINERU_ENABLE_TABLE = MINERU_ENABLE_TABLE -app.state.config.MINERU_LANGUAGE = MINERU_LANGUAGE -app.state.config.MINERU_MODEL_VERSION = MINERU_MODEL_VERSION -app.state.config.MINERU_PAGE_RANGES = MINERU_PAGE_RANGES +app.state.config.MINERU_PARAMS = MINERU_PARAMS app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index cb41cea843..2ef1d75e02 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -382,12 +382,7 @@ class Loader: api_mode=self.kwargs.get("MINERU_API_MODE", "local"), api_url=self.kwargs.get("MINERU_API_URL", "http://localhost:8000"), api_key=self.kwargs.get("MINERU_API_KEY", ""), - enable_ocr=self.kwargs.get("MINERU_ENABLE_OCR", False), - enable_formula=self.kwargs.get("MINERU_ENABLE_FORMULA", True), - enable_table=self.kwargs.get("MINERU_ENABLE_TABLE", True), - language=self.kwargs.get("MINERU_LANGUAGE", "en"), - model_version=self.kwargs.get("MINERU_MODEL_VERSION", "pipeline"), - page_ranges=self.kwargs.get("MINERU_PAGE_RANGES", ""), + params=self.kwargs.get("MINERU_PARAMS", {}), ) elif ( self.engine == "mistral_ocr" diff --git a/backend/open_webui/retrieval/loaders/mineru.py b/backend/open_webui/retrieval/loaders/mineru.py index 0dbf12d874..437f44ae6b 100644 --- a/backend/open_webui/retrieval/loaders/mineru.py +++ b/backend/open_webui/retrieval/loaders/mineru.py @@ -25,23 +25,21 @@ class MinerULoader: api_mode: str = "local", api_url: str = "http://localhost:8000", api_key: str = "", - enable_ocr: bool = False, - enable_formula: bool = True, - enable_table: bool = True, - language: str = "en", - model_version: str = "pipeline", - page_ranges: str = "", + params: dict = None, ): self.file_path = file_path self.api_mode = api_mode.lower() self.api_url = api_url.rstrip("/") self.api_key = api_key - self.enable_ocr = enable_ocr - self.enable_formula = enable_formula - self.enable_table = enable_table - self.language = language - self.model_version = model_version - self.page_ranges = page_ranges + + # Parse params dict with defaults + params = params or {} + self.enable_ocr = params.get("enable_ocr", False) + self.enable_formula = params.get("enable_formula", True) + self.enable_table = params.get("enable_table", True) + self.language = params.get("language", "en") + self.model_version = params.get("model_version", "pipeline") + self.page_ranges = params.get("page_ranges", "") # Validate API mode if self.api_mode not in ["local", "cloud"]: diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 6823c6294d..cb66e8926e 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -470,12 +470,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, "MINERU_API_URL": request.app.state.config.MINERU_API_URL, "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, - "MINERU_ENABLE_OCR": request.app.state.config.MINERU_ENABLE_OCR, - "MINERU_ENABLE_FORMULA": request.app.state.config.MINERU_ENABLE_FORMULA, - "MINERU_ENABLE_TABLE": request.app.state.config.MINERU_ENABLE_TABLE, - "MINERU_LANGUAGE": request.app.state.config.MINERU_LANGUAGE, - "MINERU_MODEL_VERSION": request.app.state.config.MINERU_MODEL_VERSION, - "MINERU_PAGE_RANGES": request.app.state.config.MINERU_PAGE_RANGES, + "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, "RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE, @@ -661,12 +656,7 @@ class ConfigForm(BaseModel): MINERU_API_MODE: Optional[str] = None MINERU_API_URL: Optional[str] = None MINERU_API_KEY: Optional[str] = None - MINERU_ENABLE_OCR: Optional[bool] = None - MINERU_ENABLE_FORMULA: Optional[bool] = None - MINERU_ENABLE_TABLE: Optional[bool] = None - MINERU_LANGUAGE: Optional[str] = None - MINERU_MODEL_VERSION: Optional[str] = None - MINERU_PAGE_RANGES: Optional[str] = None + MINERU_PARAMS: Optional[dict] = None # Reranking settings RAG_RERANKING_MODEL: Optional[str] = None @@ -923,35 +913,10 @@ async def update_rag_config( if form_data.MINERU_API_KEY is not None else request.app.state.config.MINERU_API_KEY ) - request.app.state.config.MINERU_ENABLE_OCR = ( - form_data.MINERU_ENABLE_OCR - if form_data.MINERU_ENABLE_OCR is not None - else request.app.state.config.MINERU_ENABLE_OCR - ) - request.app.state.config.MINERU_ENABLE_FORMULA = ( - form_data.MINERU_ENABLE_FORMULA - if form_data.MINERU_ENABLE_FORMULA is not None - else request.app.state.config.MINERU_ENABLE_FORMULA - ) - request.app.state.config.MINERU_ENABLE_TABLE = ( - form_data.MINERU_ENABLE_TABLE - if form_data.MINERU_ENABLE_TABLE is not None - else request.app.state.config.MINERU_ENABLE_TABLE - ) - request.app.state.config.MINERU_LANGUAGE = ( - form_data.MINERU_LANGUAGE - if form_data.MINERU_LANGUAGE is not None - else request.app.state.config.MINERU_LANGUAGE - ) - request.app.state.config.MINERU_MODEL_VERSION = ( - form_data.MINERU_MODEL_VERSION - if form_data.MINERU_MODEL_VERSION is not None - else request.app.state.config.MINERU_MODEL_VERSION - ) - request.app.state.config.MINERU_PAGE_RANGES = ( - form_data.MINERU_PAGE_RANGES - if form_data.MINERU_PAGE_RANGES is not None - else request.app.state.config.MINERU_PAGE_RANGES + request.app.state.config.MINERU_PARAMS = ( + form_data.MINERU_PARAMS + if form_data.MINERU_PARAMS is not None + else request.app.state.config.MINERU_PARAMS ) # Reranking settings @@ -1222,12 +1187,7 @@ async def update_rag_config( "MINERU_API_MODE": request.app.state.config.MINERU_API_MODE, "MINERU_API_URL": request.app.state.config.MINERU_API_URL, "MINERU_API_KEY": request.app.state.config.MINERU_API_KEY, - "MINERU_ENABLE_OCR": request.app.state.config.MINERU_ENABLE_OCR, - "MINERU_ENABLE_FORMULA": request.app.state.config.MINERU_ENABLE_FORMULA, - "MINERU_ENABLE_TABLE": request.app.state.config.MINERU_ENABLE_TABLE, - "MINERU_LANGUAGE": request.app.state.config.MINERU_LANGUAGE, - "MINERU_MODEL_VERSION": request.app.state.config.MINERU_MODEL_VERSION, - "MINERU_PAGE_RANGES": request.app.state.config.MINERU_PAGE_RANGES, + "MINERU_PARAMS": request.app.state.config.MINERU_PARAMS, # Reranking settings "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, "RAG_RERANKING_ENGINE": request.app.state.config.RAG_RERANKING_ENGINE, @@ -1641,12 +1601,7 @@ def process_file( MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, MINERU_API_URL=request.app.state.config.MINERU_API_URL, MINERU_API_KEY=request.app.state.config.MINERU_API_KEY, - MINERU_ENABLE_OCR=request.app.state.config.MINERU_ENABLE_OCR, - MINERU_ENABLE_FORMULA=request.app.state.config.MINERU_ENABLE_FORMULA, - MINERU_ENABLE_TABLE=request.app.state.config.MINERU_ENABLE_TABLE, - MINERU_LANGUAGE=request.app.state.config.MINERU_LANGUAGE, - MINERU_MODEL_VERSION=request.app.state.config.MINERU_MODEL_VERSION, - MINERU_PAGE_RANGES=request.app.state.config.MINERU_PAGE_RANGES, + MINERU_PARAMS=request.app.state.config.MINERU_PARAMS, ) docs = loader.load( file.filename, file.meta.get("content_type"), file_path diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 7e338d9d66..fbda44c4ba 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -791,106 +791,57 @@ - + +