fix: handle unicode filenames in external document loader

Files with special characters in their names (e.g., ü.pdf) caused issues since HTTP headers only allow Latin-1 characters.
This change URL-encodes `X-Filename` before adding it to request headers, preventing failures when uploading or processing such files.

Fixes: #17000
This commit is contained in:
Athanasios Oikonomou 2025-08-28 22:19:25 +03:00 committed by Athanasios Oikonomou
parent 0ebe4f8f84
commit d735b036fe

View file

@ -1,6 +1,7 @@
import requests
import logging, os
from typing import Iterator, List, Union
from urllib.parse import quote
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
@ -37,7 +38,7 @@ class ExternalDocumentLoader(BaseLoader):
headers["Authorization"] = f"Bearer {self.api_key}"
try:
headers["X-Filename"] = os.path.basename(self.file_path)
headers["X-Filename"] = quote(os.path.basename(self.file_path))
except:
pass