From 720af637e606fe3f222bbf6b10cb4dfada762925 Mon Sep 17 00:00:00 2001 From: Seth Argyle <94506602+shargyle@users.noreply.github.com> Date: Tue, 18 Nov 2025 22:19:10 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20Use=20get=5Findex()=20instead=20of=20lis?= =?UTF-8?q?t=5Findexes()=20in=20has=5Fcollection()=20to=E2=80=A6=20(#19238?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Use get_index() instead of list_indexes() in has_collection() to handle pagination Fixes #19233 Replace list_indexes() pagination scan with direct get_index() lookup in has_collection() method. The previous implementation only checked the first ~1,000 indexes due to unhandled pagination, causing RAG queries to fail for indexes beyond the first page. Benefits: - Handles buckets with any number of indexes (no pagination needed) - ~8x faster (0.19s vs 1.53s in testing) - Proper exception handling for ResourceNotFoundException - Scales to millions of indexes * Update s3vector.py Unneeded exception handling removed to match original OWUI code --- .../retrieval/vector/dbs/s3vector.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/backend/open_webui/retrieval/vector/dbs/s3vector.py b/backend/open_webui/retrieval/vector/dbs/s3vector.py index 519ee5abad..bada99cde0 100644 --- a/backend/open_webui/retrieval/vector/dbs/s3vector.py +++ b/backend/open_webui/retrieval/vector/dbs/s3vector.py @@ -116,17 +116,19 @@ class S3VectorClient(VectorDBBase): return filtered_metadata def has_collection(self, collection_name: str) -> bool: - """ - Check if a vector index (collection) exists in the S3 vector bucket. - """ - - try: - response = self.client.list_indexes(vectorBucketName=self.bucket_name) - indexes = response.get("indexes", []) - return any(idx.get("indexName") == collection_name for idx in indexes) - except Exception as e: - log.error(f"Error listing indexes: {e}") - return False + """ + Check if a vector index exists using direct lookup. + This avoids pagination issues with list_indexes() and is significantly faster. + """ + try: + self.client.get_index( + vectorBucketName=self.bucket_name, + indexName=collection_name + ) + return True + except Exception as e: + log.error(f"Error checking if index '{collection_name}' exists: {e}") + return False def delete_collection(self, collection_name: str) -> None: """