diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py
index d403f6acdd..de6abcda60 100644
--- a/backend/open_webui/config.py
+++ b/backend/open_webui/config.py
@@ -2067,6 +2067,12 @@ DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
== "true",
)
+DATALAB_MARKER_FORMAT_LINES = PersistentConfig(
+ "DATALAB_MARKER_FORMAT_LINES",
+ "rag.datalab_marker_format_lines",
+ os.environ.get("DATALAB_MARKER_FORMAT_LINES", "false").lower() == "true",
+)
+
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
"DATALAB_MARKER_OUTPUT_FORMAT",
"rag.datalab_marker_output_format",
diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 55e94dce35..68d5acc5a2 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -234,6 +234,7 @@ from open_webui.config import (
DATALAB_MARKER_PAGINATE,
DATALAB_MARKER_STRIP_EXISTING_OCR,
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
+ DATALAB_MARKER_FORMAT_LINES,
DATALAB_MARKER_OUTPUT_FORMAT,
DATALAB_MARKER_USE_LLM,
EXTERNAL_DOCUMENT_LOADER_URL,
@@ -777,6 +778,7 @@ app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR = DATALAB_MARKER_STRIP_EXISTI
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = (
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
)
+app.state.config.DATALAB_MARKER_FORMAT_LINES = DATALAB_MARKER_FORMAT_LINES
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
diff --git a/backend/open_webui/retrieval/loaders/datalab_marker.py b/backend/open_webui/retrieval/loaders/datalab_marker.py
index 25534bbb68..87d7c226c7 100644
--- a/backend/open_webui/retrieval/loaders/datalab_marker.py
+++ b/backend/open_webui/retrieval/loaders/datalab_marker.py
@@ -23,6 +23,7 @@ class DatalabMarkerLoader:
paginate: bool = False,
strip_existing_ocr: bool = False,
disable_image_extraction: bool = False,
+ format_lines: bool = False,
output_format: str = None,
):
self.file_path = file_path
@@ -35,6 +36,7 @@ class DatalabMarkerLoader:
self.paginate = paginate
self.strip_existing_ocr = strip_existing_ocr
self.disable_image_extraction = disable_image_extraction
+ self.format_lines = format_lines
self.output_format = output_format
def _get_mime_type(self, filename: str) -> str:
@@ -95,6 +97,7 @@ class DatalabMarkerLoader:
"paginate": str(self.paginate).lower(),
"strip_existing_ocr": str(self.strip_existing_ocr).lower(),
"disable_image_extraction": str(self.disable_image_extraction).lower(),
+ "format_lines": str(self.format_lines).lower(),
"output_format": self.output_format,
}
diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py
index dd34a70669..763b6ba650 100644
--- a/backend/open_webui/retrieval/loaders/main.py
+++ b/backend/open_webui/retrieval/loaders/main.py
@@ -300,6 +300,7 @@ class Loader:
disable_image_extraction=self.kwargs.get(
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
),
+ format_lines=self.kwargs.get("DATALAB_MARKER_FORMAT_LINES", False),
output_format=self.kwargs.get(
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
),
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 62f1782864..b920032acd 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -408,6 +408,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
"DATALAB_MARKER_STRIP_EXISTING_OCR": request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION": request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
+ "DATALAB_MARKER_FORMAT_LINES": request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
"DATALAB_MARKER_USE_LLM": request.app.state.config.DATALAB_MARKER_USE_LLM,
"DATALAB_MARKER_OUTPUT_FORMAT": request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
"EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
@@ -574,6 +575,7 @@ class ConfigForm(BaseModel):
DATALAB_MARKER_PAGINATE: Optional[bool] = None
DATALAB_MARKER_STRIP_EXISTING_OCR: Optional[bool] = None
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION: Optional[bool] = None
+ DATALAB_MARKER_FORMAT_LINES: Optional[bool] = None
DATALAB_MARKER_USE_LLM: Optional[bool] = None
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
@@ -720,6 +722,11 @@ async def update_rag_config(
if form_data.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION is not None
else request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
)
+ request.app.state.config.DATALAB_MARKER_FORMAT_LINES = (
+ form_data.DATALAB_MARKER_FORMAT_LINES
+ if form_data.DATALAB_MARKER_FORMAT_LINES is not None
+ else request.app.state.config.DATALAB_MARKER_FORMAT_LINES
+ )
request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = (
form_data.DATALAB_MARKER_OUTPUT_FORMAT
if form_data.DATALAB_MARKER_OUTPUT_FORMAT is not None
@@ -1421,6 +1428,7 @@ def process_file(
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,
DATALAB_MARKER_STRIP_EXISTING_OCR=request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION=request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
+ DATALAB_MARKER_FORMAT_LINES=request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
DATALAB_MARKER_USE_LLM=request.app.state.config.DATALAB_MARKER_USE_LLM,
DATALAB_MARKER_OUTPUT_FORMAT=request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte
index 6ef1f876bc..6860899e1b 100644
--- a/src/lib/components/admin/Settings/Documents.svelte
+++ b/src/lib/components/admin/Settings/Documents.svelte
@@ -485,6 +485,21 @@