mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 04:15:25 +00:00
add format_lines
This commit is contained in:
parent
f31cc07a9d
commit
a99e20cc3d
6 changed files with 35 additions and 0 deletions
|
|
@ -2067,6 +2067,12 @@ DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
|
||||||
== "true",
|
== "true",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DATALAB_MARKER_FORMAT_LINES = PersistentConfig(
|
||||||
|
"DATALAB_MARKER_FORMAT_LINES",
|
||||||
|
"rag.datalab_marker_format_lines",
|
||||||
|
os.environ.get("DATALAB_MARKER_FORMAT_LINES", "false").lower() == "true",
|
||||||
|
)
|
||||||
|
|
||||||
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
||||||
"DATALAB_MARKER_OUTPUT_FORMAT",
|
"DATALAB_MARKER_OUTPUT_FORMAT",
|
||||||
"rag.datalab_marker_output_format",
|
"rag.datalab_marker_output_format",
|
||||||
|
|
|
||||||
|
|
@ -234,6 +234,7 @@ from open_webui.config import (
|
||||||
DATALAB_MARKER_PAGINATE,
|
DATALAB_MARKER_PAGINATE,
|
||||||
DATALAB_MARKER_STRIP_EXISTING_OCR,
|
DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||||
|
DATALAB_MARKER_FORMAT_LINES,
|
||||||
DATALAB_MARKER_OUTPUT_FORMAT,
|
DATALAB_MARKER_OUTPUT_FORMAT,
|
||||||
DATALAB_MARKER_USE_LLM,
|
DATALAB_MARKER_USE_LLM,
|
||||||
EXTERNAL_DOCUMENT_LOADER_URL,
|
EXTERNAL_DOCUMENT_LOADER_URL,
|
||||||
|
|
@ -777,6 +778,7 @@ app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR = DATALAB_MARKER_STRIP_EXISTI
|
||||||
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = (
|
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = (
|
||||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||||
)
|
)
|
||||||
|
app.state.config.DATALAB_MARKER_FORMAT_LINES = DATALAB_MARKER_FORMAT_LINES
|
||||||
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
|
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
|
||||||
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
|
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
|
||||||
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
|
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ class DatalabMarkerLoader:
|
||||||
paginate: bool = False,
|
paginate: bool = False,
|
||||||
strip_existing_ocr: bool = False,
|
strip_existing_ocr: bool = False,
|
||||||
disable_image_extraction: bool = False,
|
disable_image_extraction: bool = False,
|
||||||
|
format_lines: bool = False,
|
||||||
output_format: str = None,
|
output_format: str = None,
|
||||||
):
|
):
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
|
|
@ -35,6 +36,7 @@ class DatalabMarkerLoader:
|
||||||
self.paginate = paginate
|
self.paginate = paginate
|
||||||
self.strip_existing_ocr = strip_existing_ocr
|
self.strip_existing_ocr = strip_existing_ocr
|
||||||
self.disable_image_extraction = disable_image_extraction
|
self.disable_image_extraction = disable_image_extraction
|
||||||
|
self.format_lines = format_lines
|
||||||
self.output_format = output_format
|
self.output_format = output_format
|
||||||
|
|
||||||
def _get_mime_type(self, filename: str) -> str:
|
def _get_mime_type(self, filename: str) -> str:
|
||||||
|
|
@ -95,6 +97,7 @@ class DatalabMarkerLoader:
|
||||||
"paginate": str(self.paginate).lower(),
|
"paginate": str(self.paginate).lower(),
|
||||||
"strip_existing_ocr": str(self.strip_existing_ocr).lower(),
|
"strip_existing_ocr": str(self.strip_existing_ocr).lower(),
|
||||||
"disable_image_extraction": str(self.disable_image_extraction).lower(),
|
"disable_image_extraction": str(self.disable_image_extraction).lower(),
|
||||||
|
"format_lines": str(self.format_lines).lower(),
|
||||||
"output_format": self.output_format,
|
"output_format": self.output_format,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -300,6 +300,7 @@ class Loader:
|
||||||
disable_image_extraction=self.kwargs.get(
|
disable_image_extraction=self.kwargs.get(
|
||||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
|
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
|
||||||
),
|
),
|
||||||
|
format_lines=self.kwargs.get("DATALAB_MARKER_FORMAT_LINES", False),
|
||||||
output_format=self.kwargs.get(
|
output_format=self.kwargs.get(
|
||||||
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
|
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -408,6 +408,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||||
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
|
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||||
"DATALAB_MARKER_STRIP_EXISTING_OCR": request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
"DATALAB_MARKER_STRIP_EXISTING_OCR": request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION": request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION": request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||||
|
"DATALAB_MARKER_FORMAT_LINES": request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||||
"DATALAB_MARKER_USE_LLM": request.app.state.config.DATALAB_MARKER_USE_LLM,
|
"DATALAB_MARKER_USE_LLM": request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||||
"DATALAB_MARKER_OUTPUT_FORMAT": request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
"DATALAB_MARKER_OUTPUT_FORMAT": request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||||
"EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
"EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||||
|
|
@ -574,6 +575,7 @@ class ConfigForm(BaseModel):
|
||||||
DATALAB_MARKER_PAGINATE: Optional[bool] = None
|
DATALAB_MARKER_PAGINATE: Optional[bool] = None
|
||||||
DATALAB_MARKER_STRIP_EXISTING_OCR: Optional[bool] = None
|
DATALAB_MARKER_STRIP_EXISTING_OCR: Optional[bool] = None
|
||||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION: Optional[bool] = None
|
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION: Optional[bool] = None
|
||||||
|
DATALAB_MARKER_FORMAT_LINES: Optional[bool] = None
|
||||||
DATALAB_MARKER_USE_LLM: Optional[bool] = None
|
DATALAB_MARKER_USE_LLM: Optional[bool] = None
|
||||||
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
|
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
|
||||||
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
|
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
|
||||||
|
|
@ -720,6 +722,11 @@ async def update_rag_config(
|
||||||
if form_data.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION is not None
|
if form_data.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION is not None
|
||||||
else request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
else request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||||
)
|
)
|
||||||
|
request.app.state.config.DATALAB_MARKER_FORMAT_LINES = (
|
||||||
|
form_data.DATALAB_MARKER_FORMAT_LINES
|
||||||
|
if form_data.DATALAB_MARKER_FORMAT_LINES is not None
|
||||||
|
else request.app.state.config.DATALAB_MARKER_FORMAT_LINES
|
||||||
|
)
|
||||||
request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = (
|
request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = (
|
||||||
form_data.DATALAB_MARKER_OUTPUT_FORMAT
|
form_data.DATALAB_MARKER_OUTPUT_FORMAT
|
||||||
if form_data.DATALAB_MARKER_OUTPUT_FORMAT is not None
|
if form_data.DATALAB_MARKER_OUTPUT_FORMAT is not None
|
||||||
|
|
@ -1421,6 +1428,7 @@ def process_file(
|
||||||
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,
|
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||||
DATALAB_MARKER_STRIP_EXISTING_OCR=request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
DATALAB_MARKER_STRIP_EXISTING_OCR=request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION=request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION=request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||||
|
DATALAB_MARKER_FORMAT_LINES=request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||||
DATALAB_MARKER_USE_LLM=request.app.state.config.DATALAB_MARKER_USE_LLM,
|
DATALAB_MARKER_USE_LLM=request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||||
DATALAB_MARKER_OUTPUT_FORMAT=request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
DATALAB_MARKER_OUTPUT_FORMAT=request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||||
EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||||
|
|
|
||||||
|
|
@ -485,6 +485,21 @@
|
||||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION} />
|
<Switch bind:state={RAGConfig.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION} />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="flex justify-between w-full mt-2">
|
||||||
|
<div class="self-center text-xs font-medium">
|
||||||
|
<Tooltip
|
||||||
|
content={$i18n.t(
|
||||||
|
'Format the lines in the output. Defaults to False. If set to True, the lines will be formatted to detect inline math and styles.'
|
||||||
|
)}
|
||||||
|
placement="top-start"
|
||||||
|
>
|
||||||
|
{$i18n.t('Format Lines')}
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center">
|
||||||
|
<Switch bind:state={RAGConfig.DATALAB_MARKER_FORMAT_LINES} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="flex justify-between w-full mt-2">
|
<div class="flex justify-between w-full mt-2">
|
||||||
<div class="self-center text-xs font-medium">
|
<div class="self-center text-xs font-medium">
|
||||||
<Tooltip
|
<Tooltip
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue