refac/fix: mineru params

breaking change
This commit is contained in:
Timothy Jaeryang Baek 2025-11-11 00:30:11 -05:00
parent 8e41fea2d9
commit 4673e120c4

View file

@ -33,13 +33,14 @@ class MinerULoader:
self.api_key = api_key self.api_key = api_key
# Parse params dict with defaults # Parse params dict with defaults
params = params or {} self.params = params or {}
self.enable_ocr = params.get("enable_ocr", False) self.enable_ocr = params.get("enable_ocr", False)
self.enable_formula = params.get("enable_formula", True) self.enable_formula = params.get("enable_formula", True)
self.enable_table = params.get("enable_table", True) self.enable_table = params.get("enable_table", True)
self.language = params.get("language", "en") self.language = params.get("language", "en")
self.model_version = params.get("model_version", "pipeline") self.model_version = params.get("model_version", "pipeline")
self.page_ranges = params.get("page_ranges", "")
self.page_ranges = self.params.pop("page_ranges", "")
# Validate API mode # Validate API mode
if self.api_mode not in ["local", "cloud"]: if self.api_mode not in ["local", "cloud"]:
@ -76,27 +77,10 @@ class MinerULoader:
# Build form data for Local API # Build form data for Local API
form_data = { form_data = {
**self.params,
"return_md": "true", "return_md": "true",
"formula_enable": str(self.enable_formula).lower(),
"table_enable": str(self.enable_table).lower(),
} }
# Parse method based on OCR setting
if self.enable_ocr:
form_data["parse_method"] = "ocr"
else:
form_data["parse_method"] = "auto"
# Language configuration (Local API uses lang_list array)
if self.language:
form_data["lang_list"] = self.language
# Backend/model version (Local API uses "backend" parameter)
if self.model_version == "vlm":
form_data["backend"] = "vlm-vllm-engine"
else:
form_data["backend"] = "pipeline"
# Page ranges (Local API uses start_page_id and end_page_id) # Page ranges (Local API uses start_page_id and end_page_id)
if self.page_ranges: if self.page_ranges:
# For simplicity, if page_ranges is specified, log a warning # For simplicity, if page_ranges is specified, log a warning
@ -236,10 +220,7 @@ class MinerULoader:
# Build request body # Build request body
request_body = { request_body = {
"enable_formula": self.enable_formula, **self.params,
"enable_table": self.enable_table,
"language": self.language,
"model_version": self.model_version,
"files": [ "files": [
{ {
"name": filename, "name": filename,