diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index a91496e8e8..e57323e1eb 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -226,7 +226,10 @@ class Loader: def _is_text_file(self, file_ext: str, file_content_type: str) -> bool: return file_ext in known_source_ext or ( - file_content_type and file_content_type.find("text/") >= 0 + file_content_type + and file_content_type.find("text/") >= 0 + # Avoid text/html files being detected as text + and not file_content_type.find("html") >= 0 ) def _get_loader(self, filename: str, file_content_type: str, file_path: str):