Merge pull request #13085 from ayan4m1/fix/tika-image-ocr

fix: pass extractInlineImages header to Tika if PDF_EXTRACT_IMAGES is true
This commit is contained in:
Tim Jaeryang Baek 2025-05-02 03:47:51 -07:00 committed by GitHub
commit 7d184c3a14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -99,6 +99,9 @@ class TikaLoader:
else:
headers = {}
if self.kwargs.get("PDF_EXTRACT_IMAGES") == True:
headers['X-Tika-PDFextractInlineImages'] = 'true'
endpoint = self.url
if not endpoint.endswith("/"):
endpoint += "/"