From 4673e120c413d2e62a852e5c1d6cad8922b82300 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 11 Nov 2025 00:30:11 -0500 Subject: [PATCH] refac/fix: mineru params breaking change --- .../open_webui/retrieval/loaders/mineru.py | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/backend/open_webui/retrieval/loaders/mineru.py b/backend/open_webui/retrieval/loaders/mineru.py index 437f44ae6b..360af804c7 100644 --- a/backend/open_webui/retrieval/loaders/mineru.py +++ b/backend/open_webui/retrieval/loaders/mineru.py @@ -33,13 +33,14 @@ class MinerULoader: self.api_key = api_key # Parse params dict with defaults - params = params or {} + self.params = params or {} self.enable_ocr = params.get("enable_ocr", False) self.enable_formula = params.get("enable_formula", True) self.enable_table = params.get("enable_table", True) self.language = params.get("language", "en") self.model_version = params.get("model_version", "pipeline") - self.page_ranges = params.get("page_ranges", "") + + self.page_ranges = self.params.pop("page_ranges", "") # Validate API mode if self.api_mode not in ["local", "cloud"]: @@ -76,27 +77,10 @@ class MinerULoader: # Build form data for Local API form_data = { + **self.params, "return_md": "true", - "formula_enable": str(self.enable_formula).lower(), - "table_enable": str(self.enable_table).lower(), } - # Parse method based on OCR setting - if self.enable_ocr: - form_data["parse_method"] = "ocr" - else: - form_data["parse_method"] = "auto" - - # Language configuration (Local API uses lang_list array) - if self.language: - form_data["lang_list"] = self.language - - # Backend/model version (Local API uses "backend" parameter) - if self.model_version == "vlm": - form_data["backend"] = "vlm-vllm-engine" - else: - form_data["backend"] = "pipeline" - # Page ranges (Local API uses start_page_id and end_page_id) if self.page_ranges: # For simplicity, if page_ranges is specified, log a warning @@ -236,10 +220,7 @@ class MinerULoader: # Build request body request_body = { - "enable_formula": self.enable_formula, - "enable_table": self.enable_table, - "language": self.language, - "model_version": self.model_version, + **self.params, "files": [ { "name": filename,