fix: Use get_index() instead of list_indexes() in has_collection() to… (#19238)

* fix: Use get_index() instead of list_indexes() in has_collection() to handle pagination Fixes #19233 Replace list_indexes() pagination scan with direct get_index() lookup in has_collection() method. The previous implementation only checked the first ~1,000 indexes due to unhandled pagination, causing RAG queries to fail for indexes beyond the first page. Benefits: - Handles buckets with any number of indexes (no pagination needed) - ~8x faster (0.19s vs 1.53s in testing) - Proper exception handling for ResourceNotFoundException - Scales to millions of indexes * Update s3vector.py Unneeded exception handling removed to match original OWUI code
2025-12-12 04:15:25 +00:00 · 2025-11-18 22:19:10 -07:00 · 2025-11-18 22:19:10 -07:00 · 720af637e6
commit 720af637e6
parent 4386e5abb8
1 changed files with 13 additions and 11 deletions
--- a/backend/open_webui/retrieval/vector/dbs/s3vector.py
+++ b/backend/open_webui/retrieval/vector/dbs/s3vector.py
@ -116,17 +116,19 @@ class S3VectorClient(VectorDBBase):
        return filtered_metadata

    def has_collection(self, collection_name: str) -> bool:
-        """
-        Check if a vector index (collection) exists in the S3 vector bucket.
-        """
-
-        try:
-            response = self.client.list_indexes(vectorBucketName=self.bucket_name)
-            indexes = response.get("indexes", [])
-            return any(idx.get("indexName") == collection_name for idx in indexes)
-        except Exception as e:
-            log.error(f"Error listing indexes: {e}")
-            return False
+          """
+          Check if a vector index exists using direct lookup.
+          This avoids pagination issues with list_indexes() and is significantly faster.
+          """
+          try:
+              self.client.get_index(
+                  vectorBucketName=self.bucket_name,
+                  indexName=collection_name
+              )
+              return True
+          except Exception as e:
+              log.error(f"Error checking if index '{collection_name}' exists: {e}")
+              return False

    def delete_collection(self, collection_name: str) -> None:
        """