mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-11 20:05:19 +00:00
add format_lines
This commit is contained in:
parent
f31cc07a9d
commit
a99e20cc3d
6 changed files with 35 additions and 0 deletions
|
|
@ -2067,6 +2067,12 @@ DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
|
|||
== "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_FORMAT_LINES = PersistentConfig(
|
||||
"DATALAB_MARKER_FORMAT_LINES",
|
||||
"rag.datalab_marker_format_lines",
|
||||
os.environ.get("DATALAB_MARKER_FORMAT_LINES", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT",
|
||||
"rag.datalab_marker_output_format",
|
||||
|
|
|
|||
|
|
@ -234,6 +234,7 @@ from open_webui.config import (
|
|||
DATALAB_MARKER_PAGINATE,
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
DATALAB_MARKER_FORMAT_LINES,
|
||||
DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
DATALAB_MARKER_USE_LLM,
|
||||
EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
|
|
@ -777,6 +778,7 @@ app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR = DATALAB_MARKER_STRIP_EXISTI
|
|||
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = (
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||
)
|
||||
app.state.config.DATALAB_MARKER_FORMAT_LINES = DATALAB_MARKER_FORMAT_LINES
|
||||
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
|
||||
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
|
||||
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class DatalabMarkerLoader:
|
|||
paginate: bool = False,
|
||||
strip_existing_ocr: bool = False,
|
||||
disable_image_extraction: bool = False,
|
||||
format_lines: bool = False,
|
||||
output_format: str = None,
|
||||
):
|
||||
self.file_path = file_path
|
||||
|
|
@ -35,6 +36,7 @@ class DatalabMarkerLoader:
|
|||
self.paginate = paginate
|
||||
self.strip_existing_ocr = strip_existing_ocr
|
||||
self.disable_image_extraction = disable_image_extraction
|
||||
self.format_lines = format_lines
|
||||
self.output_format = output_format
|
||||
|
||||
def _get_mime_type(self, filename: str) -> str:
|
||||
|
|
@ -95,6 +97,7 @@ class DatalabMarkerLoader:
|
|||
"paginate": str(self.paginate).lower(),
|
||||
"strip_existing_ocr": str(self.strip_existing_ocr).lower(),
|
||||
"disable_image_extraction": str(self.disable_image_extraction).lower(),
|
||||
"format_lines": str(self.format_lines).lower(),
|
||||
"output_format": self.output_format,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -300,6 +300,7 @@ class Loader:
|
|||
disable_image_extraction=self.kwargs.get(
|
||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
|
||||
),
|
||||
format_lines=self.kwargs.get("DATALAB_MARKER_FORMAT_LINES", False),
|
||||
output_format=self.kwargs.get(
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
|
||||
),
|
||||
|
|
|
|||
|
|
@ -408,6 +408,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|||
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||
"DATALAB_MARKER_STRIP_EXISTING_OCR": request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION": request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
"DATALAB_MARKER_FORMAT_LINES": request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||
"DATALAB_MARKER_USE_LLM": request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT": request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
"EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
|
|
@ -574,6 +575,7 @@ class ConfigForm(BaseModel):
|
|||
DATALAB_MARKER_PAGINATE: Optional[bool] = None
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR: Optional[bool] = None
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION: Optional[bool] = None
|
||||
DATALAB_MARKER_FORMAT_LINES: Optional[bool] = None
|
||||
DATALAB_MARKER_USE_LLM: Optional[bool] = None
|
||||
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
|
||||
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
|
||||
|
|
@ -720,6 +722,11 @@ async def update_rag_config(
|
|||
if form_data.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION is not None
|
||||
else request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_FORMAT_LINES = (
|
||||
form_data.DATALAB_MARKER_FORMAT_LINES
|
||||
if form_data.DATALAB_MARKER_FORMAT_LINES is not None
|
||||
else request.app.state.config.DATALAB_MARKER_FORMAT_LINES
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = (
|
||||
form_data.DATALAB_MARKER_OUTPUT_FORMAT
|
||||
if form_data.DATALAB_MARKER_OUTPUT_FORMAT is not None
|
||||
|
|
@ -1421,6 +1428,7 @@ def process_file(
|
|||
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR=request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION=request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
DATALAB_MARKER_FORMAT_LINES=request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||
DATALAB_MARKER_USE_LLM=request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||
DATALAB_MARKER_OUTPUT_FORMAT=request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
|
|
|
|||
|
|
@ -485,6 +485,21 @@
|
|||
<Switch bind:state={RAGConfig.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t(
|
||||
'Format the lines in the output. Defaults to False. If set to True, the lines will be formatted to detect inline math and styles.'
|
||||
)}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Format Lines')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div class="flex items-center">
|
||||
<Switch bind:state={RAGConfig.DATALAB_MARKER_FORMAT_LINES} />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-between w-full mt-2">
|
||||
<div class="self-center text-xs font-medium">
|
||||
<Tooltip
|
||||
|
|
|
|||
Loading…
Reference in a new issue