feat: update marker api

This commit is contained in:
Hisma 2025-07-22 20:49:28 -04:00
parent 8da08ad73a
commit f31cc07a9d
6 changed files with 174 additions and 95 deletions

View file

@ -2018,10 +2018,16 @@ DATALAB_MARKER_API_KEY = PersistentConfig(
os.environ.get("DATALAB_MARKER_API_KEY", ""), os.environ.get("DATALAB_MARKER_API_KEY", ""),
) )
DATALAB_MARKER_LANGS = PersistentConfig( DATALAB_MARKER_API_BASE_URL = PersistentConfig(
"DATALAB_MARKER_LANGS", "DATALAB_MARKER_API_BASE_URL",
"rag.datalab_marker_langs", "rag.datalab_marker_api_base_url",
os.environ.get("DATALAB_MARKER_LANGS", ""), os.environ.get("DATALAB_MARKER_API_BASE_URL", ""),
)
DATALAB_MARKER_ADDITIONAL_CONFIG = PersistentConfig(
"DATALAB_MARKER_ADDITIONAL_CONFIG",
"rag.datalab_marker_additional_config",
os.environ.get("DATALAB_MARKER_ADDITIONAL_CONFIG", ""),
) )
DATALAB_MARKER_USE_LLM = PersistentConfig( DATALAB_MARKER_USE_LLM = PersistentConfig(

View file

@ -227,7 +227,8 @@ from open_webui.config import (
CHUNK_SIZE, CHUNK_SIZE,
CONTENT_EXTRACTION_ENGINE, CONTENT_EXTRACTION_ENGINE,
DATALAB_MARKER_API_KEY, DATALAB_MARKER_API_KEY,
DATALAB_MARKER_LANGS, DATALAB_MARKER_API_BASE_URL,
DATALAB_MARKER_ADDITIONAL_CONFIG,
DATALAB_MARKER_SKIP_CACHE, DATALAB_MARKER_SKIP_CACHE,
DATALAB_MARKER_FORCE_OCR, DATALAB_MARKER_FORCE_OCR,
DATALAB_MARKER_PAGINATE, DATALAB_MARKER_PAGINATE,
@ -767,7 +768,8 @@ app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERI
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
app.state.config.DATALAB_MARKER_API_KEY = DATALAB_MARKER_API_KEY app.state.config.DATALAB_MARKER_API_KEY = DATALAB_MARKER_API_KEY
app.state.config.DATALAB_MARKER_LANGS = DATALAB_MARKER_LANGS app.state.config.DATALAB_MARKER_API_BASE_URL = DATALAB_MARKER_API_BASE_URL
app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG = DATALAB_MARKER_ADDITIONAL_CONFIG
app.state.config.DATALAB_MARKER_SKIP_CACHE = DATALAB_MARKER_SKIP_CACHE app.state.config.DATALAB_MARKER_SKIP_CACHE = DATALAB_MARKER_SKIP_CACHE
app.state.config.DATALAB_MARKER_FORCE_OCR = DATALAB_MARKER_FORCE_OCR app.state.config.DATALAB_MARKER_FORCE_OCR = DATALAB_MARKER_FORCE_OCR
app.state.config.DATALAB_MARKER_PAGINATE = DATALAB_MARKER_PAGINATE app.state.config.DATALAB_MARKER_PAGINATE = DATALAB_MARKER_PAGINATE

View file

@ -15,7 +15,8 @@ class DatalabMarkerLoader:
self, self,
file_path: str, file_path: str,
api_key: str, api_key: str,
langs: Optional[str] = None, api_base_url: str,
additional_config: Optional[str] = None,
use_llm: bool = False, use_llm: bool = False,
skip_cache: bool = False, skip_cache: bool = False,
force_ocr: bool = False, force_ocr: bool = False,
@ -26,7 +27,8 @@ class DatalabMarkerLoader:
): ):
self.file_path = file_path self.file_path = file_path
self.api_key = api_key self.api_key = api_key
self.langs = langs self.api_base_url = api_base_url
self.additional_config = additional_config
self.use_llm = use_llm self.use_llm = use_llm
self.skip_cache = skip_cache self.skip_cache = skip_cache
self.force_ocr = force_ocr self.force_ocr = force_ocr
@ -60,7 +62,7 @@ class DatalabMarkerLoader:
return mime_map.get(ext, "application/octet-stream") return mime_map.get(ext, "application/octet-stream")
def check_marker_request_status(self, request_id: str) -> dict: def check_marker_request_status(self, request_id: str) -> dict:
url = f"https://www.datalab.to/api/v1/marker/{request_id}" url = f"{self.api_base_url}/{request_id}"
headers = {"X-Api-Key": self.api_key} headers = {"X-Api-Key": self.api_key}
try: try:
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
@ -81,13 +83,12 @@ class DatalabMarkerLoader:
) )
def load(self) -> List[Document]: def load(self) -> List[Document]:
url = "https://www.datalab.to/api/v1/marker" url = self.api_base_url
filename = os.path.basename(self.file_path) filename = os.path.basename(self.file_path)
mime_type = self._get_mime_type(filename) mime_type = self._get_mime_type(filename)
headers = {"X-Api-Key": self.api_key} headers = {"X-Api-Key": self.api_key}
form_data = { form_data = {
"langs": self.langs,
"use_llm": str(self.use_llm).lower(), "use_llm": str(self.use_llm).lower(),
"skip_cache": str(self.skip_cache).lower(), "skip_cache": str(self.skip_cache).lower(),
"force_ocr": str(self.force_ocr).lower(), "force_ocr": str(self.force_ocr).lower(),
@ -97,6 +98,9 @@ class DatalabMarkerLoader:
"output_format": self.output_format, "output_format": self.output_format,
} }
if self.additional_config and self.additional_config.strip():
form_data["additional_config"] = self.additional_config
log.info( log.info(
f"Datalab Marker POST request parameters: {{'filename': '{filename}', 'mime_type': '{mime_type}', **{form_data}}}" f"Datalab Marker POST request parameters: {{'filename': '{filename}', 'mime_type': '{mime_type}', **{form_data}}}"
) )
@ -133,74 +137,87 @@ class DatalabMarkerLoader:
check_url = result.get("request_check_url") check_url = result.get("request_check_url")
request_id = result.get("request_id") request_id = result.get("request_id")
if not check_url:
raise HTTPException(
status.HTTP_502_BAD_GATEWAY, detail="No request_check_url returned."
)
for _ in range(300): # Up to 10 minutes # Check if this is a direct response (self-hosted) or polling response (DataLab)
time.sleep(2) if check_url:
try: # DataLab polling pattern
poll_response = requests.get(check_url, headers=headers) for _ in range(300): # Up to 10 minutes
poll_response.raise_for_status() time.sleep(2)
poll_result = poll_response.json() try:
except (requests.HTTPError, ValueError) as e: poll_response = requests.get(check_url, headers=headers)
raw_body = poll_response.text poll_response.raise_for_status()
log.error(f"Polling error: {e}, response body: {raw_body}") poll_result = poll_response.json()
raise HTTPException( except (requests.HTTPError, ValueError) as e:
status.HTTP_502_BAD_GATEWAY, detail=f"Polling failed: {e}" raw_body = poll_response.text
) log.error(f"Polling error: {e}, response body: {raw_body}")
raise HTTPException(
status_val = poll_result.get("status") status.HTTP_502_BAD_GATEWAY, detail=f"Polling failed: {e}"
success_val = poll_result.get("success")
if status_val == "complete":
summary = {
k: poll_result.get(k)
for k in (
"status",
"output_format",
"success",
"error",
"page_count",
"total_cost",
) )
}
log.info(
f"Marker processing completed successfully: {json.dumps(summary, indent=2)}"
)
break
if status_val == "failed" or success_val is False: status_val = poll_result.get("status")
log.error( success_val = poll_result.get("success")
f"Marker poll failed full response: {json.dumps(poll_result, indent=2)}"
) if status_val == "complete":
error_msg = ( summary = {
poll_result.get("error") k: poll_result.get(k)
or "Marker returned failure without error message" for k in (
"status",
"output_format",
"success",
"error",
"page_count",
"total_cost",
)
}
log.info(
f"Marker processing completed successfully: {json.dumps(summary, indent=2)}"
)
break
if status_val == "failed" or success_val is False:
log.error(
f"Marker poll failed full response: {json.dumps(poll_result, indent=2)}"
)
error_msg = (
poll_result.get("error")
or "Marker returned failure without error message"
)
raise HTTPException(
status.HTTP_400_BAD_REQUEST,
detail=f"Marker processing failed: {error_msg}",
)
else:
raise HTTPException(
status.HTTP_504_GATEWAY_TIMEOUT, detail="Marker processing timed out"
) )
if not poll_result.get("success", False):
error_msg = poll_result.get("error") or "Unknown processing error"
raise HTTPException( raise HTTPException(
status.HTTP_400_BAD_REQUEST, status.HTTP_400_BAD_REQUEST,
detail=f"Marker processing failed: {error_msg}", detail=f"Final processing failed: {error_msg}",
) )
# DataLab format - content in format-specific fields
content_key = self.output_format.lower()
raw_content = poll_result.get(content_key)
final_result = poll_result
else: else:
raise HTTPException( # Self-hosted direct response - content in "output" field
status.HTTP_504_GATEWAY_TIMEOUT, detail="Marker processing timed out" if "output" in result:
) log.info("Self-hosted Marker returned direct response without polling")
raw_content = result.get("output")
final_result = result
else:
available_fields = list(result.keys()) if isinstance(result, dict) else "non-dict response"
raise HTTPException(
status.HTTP_502_BAD_GATEWAY,
detail=f"Custom Marker endpoint returned success but no 'output' field found. Available fields: {available_fields}. Expected either 'request_check_url' for polling or 'output' field for direct response."
)
if not poll_result.get("success", False): if self.output_format.lower() == "json":
error_msg = poll_result.get("error") or "Unknown processing error"
raise HTTPException(
status.HTTP_400_BAD_REQUEST,
detail=f"Final processing failed: {error_msg}",
)
content_key = self.output_format.lower()
raw_content = poll_result.get(content_key)
if content_key == "json":
full_text = json.dumps(raw_content, indent=2) full_text = json.dumps(raw_content, indent=2)
elif content_key in {"markdown", "html"}: elif self.output_format.lower() in {"markdown", "html"}:
full_text = str(raw_content).strip() full_text = str(raw_content).strip()
else: else:
raise HTTPException( raise HTTPException(
@ -211,14 +228,14 @@ class DatalabMarkerLoader:
if not full_text: if not full_text:
raise HTTPException( raise HTTPException(
status.HTTP_400_BAD_REQUEST, status.HTTP_400_BAD_REQUEST,
detail="Datalab Marker returned empty content", detail="Marker returned empty content",
) )
marker_output_dir = os.path.join("/app/backend/data/uploads", "marker_output") marker_output_dir = os.path.join("/app/backend/data/uploads", "marker_output")
os.makedirs(marker_output_dir, exist_ok=True) os.makedirs(marker_output_dir, exist_ok=True)
file_ext_map = {"markdown": "md", "json": "json", "html": "html"} file_ext_map = {"markdown": "md", "json": "json", "html": "html"}
file_ext = file_ext_map.get(content_key, "txt") file_ext = file_ext_map.get(self.output_format.lower(), "txt")
output_filename = f"{os.path.splitext(filename)[0]}.{file_ext}" output_filename = f"{os.path.splitext(filename)[0]}.{file_ext}"
output_path = os.path.join(marker_output_dir, output_filename) output_path = os.path.join(marker_output_dir, output_filename)
@ -231,13 +248,13 @@ class DatalabMarkerLoader:
metadata = { metadata = {
"source": filename, "source": filename,
"output_format": poll_result.get("output_format", self.output_format), "output_format": final_result.get("output_format", self.output_format),
"page_count": poll_result.get("page_count", 0), "page_count": final_result.get("page_count", 0),
"processed_with_llm": self.use_llm, "processed_with_llm": self.use_llm,
"request_id": request_id or "", "request_id": request_id or "",
} }
images = poll_result.get("images", {}) images = final_result.get("images", {})
if images: if images:
metadata["image_count"] = len(images) metadata["image_count"] = len(images)
metadata["images"] = json.dumps(list(images.keys())) metadata["images"] = json.dumps(list(images.keys()))

View file

@ -281,10 +281,15 @@ class Loader:
"tiff", "tiff",
] ]
): ):
api_base_url = self.kwargs.get("DATALAB_MARKER_API_BASE_URL", "")
if not api_base_url or api_base_url.strip() == "":
api_base_url = "https://www.datalab.to/api/v1/marker"
loader = DatalabMarkerLoader( loader = DatalabMarkerLoader(
file_path=file_path, file_path=file_path,
api_key=self.kwargs["DATALAB_MARKER_API_KEY"], api_key=self.kwargs["DATALAB_MARKER_API_KEY"],
langs=self.kwargs.get("DATALAB_MARKER_LANGS"), api_base_url=api_base_url,
additional_config=self.kwargs.get("DATALAB_MARKER_ADDITIONAL_CONFIG"),
use_llm=self.kwargs.get("DATALAB_MARKER_USE_LLM", False), use_llm=self.kwargs.get("DATALAB_MARKER_USE_LLM", False),
skip_cache=self.kwargs.get("DATALAB_MARKER_SKIP_CACHE", False), skip_cache=self.kwargs.get("DATALAB_MARKER_SKIP_CACHE", False),
force_ocr=self.kwargs.get("DATALAB_MARKER_FORCE_OCR", False), force_ocr=self.kwargs.get("DATALAB_MARKER_FORCE_OCR", False),

View file

@ -401,7 +401,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
"DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY, "DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY,
"DATALAB_MARKER_LANGS": request.app.state.config.DATALAB_MARKER_LANGS, "DATALAB_MARKER_API_BASE_URL": request.app.state.config.DATALAB_MARKER_API_BASE_URL,
"DATALAB_MARKER_ADDITIONAL_CONFIG": request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
"DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE, "DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
"DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR, "DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR,
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE, "DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
@ -566,7 +567,8 @@ class ConfigForm(BaseModel):
CONTENT_EXTRACTION_ENGINE: Optional[str] = None CONTENT_EXTRACTION_ENGINE: Optional[str] = None
PDF_EXTRACT_IMAGES: Optional[bool] = None PDF_EXTRACT_IMAGES: Optional[bool] = None
DATALAB_MARKER_API_KEY: Optional[str] = None DATALAB_MARKER_API_KEY: Optional[str] = None
DATALAB_MARKER_LANGS: Optional[str] = None DATALAB_MARKER_API_BASE_URL: Optional[str] = None
DATALAB_MARKER_ADDITIONAL_CONFIG: Optional[str] = None
DATALAB_MARKER_SKIP_CACHE: Optional[bool] = None DATALAB_MARKER_SKIP_CACHE: Optional[bool] = None
DATALAB_MARKER_FORCE_OCR: Optional[bool] = None DATALAB_MARKER_FORCE_OCR: Optional[bool] = None
DATALAB_MARKER_PAGINATE: Optional[bool] = None DATALAB_MARKER_PAGINATE: Optional[bool] = None
@ -683,10 +685,15 @@ async def update_rag_config(
if form_data.DATALAB_MARKER_API_KEY is not None if form_data.DATALAB_MARKER_API_KEY is not None
else request.app.state.config.DATALAB_MARKER_API_KEY else request.app.state.config.DATALAB_MARKER_API_KEY
) )
request.app.state.config.DATALAB_MARKER_LANGS = ( request.app.state.config.DATALAB_MARKER_API_BASE_URL = (
form_data.DATALAB_MARKER_LANGS form_data.DATALAB_MARKER_API_BASE_URL
if form_data.DATALAB_MARKER_LANGS is not None if form_data.DATALAB_MARKER_API_BASE_URL is not None
else request.app.state.config.DATALAB_MARKER_LANGS else request.app.state.config.DATALAB_MARKER_API_BASE_URL
)
request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG = (
form_data.DATALAB_MARKER_ADDITIONAL_CONFIG
if form_data.DATALAB_MARKER_ADDITIONAL_CONFIG is not None
else request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG
) )
request.app.state.config.DATALAB_MARKER_SKIP_CACHE = ( request.app.state.config.DATALAB_MARKER_SKIP_CACHE = (
form_data.DATALAB_MARKER_SKIP_CACHE form_data.DATALAB_MARKER_SKIP_CACHE
@ -1006,7 +1013,8 @@ async def update_rag_config(
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
"DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY, "DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY,
"DATALAB_MARKER_LANGS": request.app.state.config.DATALAB_MARKER_LANGS, "DATALAB_MARKER_API_BASE_URL": request.app.state.config.DATALAB_MARKER_API_BASE_URL,
"DATALAB_MARKER_ADDITIONAL_CONFIG": request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
"DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE, "DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
"DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR, "DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR,
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE, "DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
@ -1406,7 +1414,8 @@ def process_file(
loader = Loader( loader = Loader(
engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE, engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE,
DATALAB_MARKER_API_KEY=request.app.state.config.DATALAB_MARKER_API_KEY, DATALAB_MARKER_API_KEY=request.app.state.config.DATALAB_MARKER_API_KEY,
DATALAB_MARKER_LANGS=request.app.state.config.DATALAB_MARKER_LANGS, DATALAB_MARKER_API_BASE_URL=request.app.state.config.DATALAB_MARKER_API_BASE_URL,
DATALAB_MARKER_ADDITIONAL_CONFIG=request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
DATALAB_MARKER_SKIP_CACHE=request.app.state.config.DATALAB_MARKER_SKIP_CACHE, DATALAB_MARKER_SKIP_CACHE=request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
DATALAB_MARKER_FORCE_OCR=request.app.state.config.DATALAB_MARKER_FORCE_OCR, DATALAB_MARKER_FORCE_OCR=request.app.state.config.DATALAB_MARKER_FORCE_OCR,
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE, DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,

View file

@ -170,6 +170,19 @@
return; return;
} }
if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' &&
RAGConfig.DATALAB_MARKER_ADDITIONAL_CONFIG &&
RAGConfig.DATALAB_MARKER_ADDITIONAL_CONFIG.trim() !== ''
) {
try {
JSON.parse(RAGConfig.DATALAB_MARKER_ADDITIONAL_CONFIG);
} catch (e) {
toast.error($i18n.t('Invalid JSON format in Additional Config'));
return;
}
}
if ( if (
RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' && RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' &&
(RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' || (RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' ||
@ -243,6 +256,11 @@
2 2
); );
// Set default API Base URL if empty
if (!config.DATALAB_MARKER_API_BASE_URL) {
config.DATALAB_MARKER_API_BASE_URL = 'https://www.datalab.to/api/v1/marker';
}
RAGConfig = config; RAGConfig = config;
}); });
</script> </script>
@ -337,6 +355,19 @@
</div> </div>
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker'} {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker'}
<div class="my-0.5 flex gap-2 pr-2"> <div class="my-0.5 flex gap-2 pr-2">
<Tooltip
content={$i18n.t(
'API Base URL for Datalab Marker service. Defaults to: https://www.datalab.to/api/v1/marker'
)}
placement="top-start"
className="w-full"
>
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Enter Datalab Marker API Base URL')}
bind:value={RAGConfig.DATALAB_MARKER_API_BASE_URL}
/>
</Tooltip>
<SensitiveInput <SensitiveInput
placeholder={$i18n.t('Enter Datalab Marker API Key')} placeholder={$i18n.t('Enter Datalab Marker API Key')}
required={false} required={false}
@ -344,24 +375,33 @@
/> />
</div> </div>
<div class="flex justify-between w-full mt-2"> <div class="flex flex-col gap-2 mt-2">
<div class="text-xs font-medium"> <div class=" flex flex-col w-full justify-between">
{$i18n.t('Languages')} <div class=" mb-1 text-xs font-medium">
{$i18n.t('Additional Config')}
</div>
<div class="flex w-full items-center relative">
<Tooltip
content={$i18n.t(
'Additional configuration options for marker. This should be a JSON string with key-value pairs. For example, \'{"key": "value"}\'. Supported keys include: disable_links, keep_pageheader_in_output, keep_pagefooter_in_output, filter_blank_pages, drop_repeated_text, layout_coverage_threshold, merge_threshold, height_tolerance, gap_threshold, image_threshold, min_line_length, level_count, default_level'
)}
placement="top-start"
className="w-full"
>
<Textarea
bind:value={RAGConfig.DATALAB_MARKER_ADDITIONAL_CONFIG}
placeholder={$i18n.t('Enter JSON config (e.g., {"disable_links": true})')}
/>
</Tooltip>
</div>
</div> </div>
<input
class="text-sm bg-transparent outline-hidden"
type="text"
bind:value={RAGConfig.DATALAB_MARKER_LANGS}
placeholder={$i18n.t('e.g.) en,fr,de')}
/>
</div> </div>
<div class="flex justify-between w-full mt-2"> <div class="flex justify-between w-full mt-2">
<div class="self-center text-xs font-medium"> <div class="self-center text-xs font-medium">
<Tooltip <Tooltip
content={$i18n.t( content={$i18n.t(
'Significantly improves accuracy by using an LLM to enhance tables, forms, inline math, and layout detection. Will increase latency. Defaults to True.' 'Significantly improves accuracy by using an LLM to enhance tables, forms, inline math, and layout detection. Will increase latency. Defaults to False.'
)} )}
placement="top-start" placement="top-start"
> >