From 5c2e0e4bebdc2a5d8fdc6cf3436f1966e318cb18 Mon Sep 17 00:00:00 2001 From: guenhter Date: Sun, 29 Jun 2025 15:30:55 +0200 Subject: [PATCH] feat: add qdrant indices for metadata fields All fieldnames which are part of a query should have an index for performance reasons. This is even enforced on some qdrant cluster like those on qdrant.io, and queries using a unindexed column fail with an error. --- .../open_webui/retrieval/vector/dbs/qdrant.py | 19 +++++++++++++++++++ .../vector/dbs/qdrant_multitenancy.py | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/backend/open_webui/retrieval/vector/dbs/qdrant.py b/backend/open_webui/retrieval/vector/dbs/qdrant.py index 54f42ef7a9..2276e713fc 100644 --- a/backend/open_webui/retrieval/vector/dbs/qdrant.py +++ b/backend/open_webui/retrieval/vector/dbs/qdrant.py @@ -87,6 +87,25 @@ class QdrantClient(VectorDBBase): ), ) + # Create payload indexes for efficient filtering + self.client.create_payload_index( + collection_name=collection_name_with_prefix, + field_name="metadata.hash", + field_schema=models.KeywordIndexParams( + type=models.KeywordIndexType.KEYWORD, + is_tenant=False, + on_disk=self.QDRANT_ON_DISK, + ), + ) + self.client.create_payload_index( + collection_name=collection_name_with_prefix, + field_name="metadata.file_id", + field_schema=models.KeywordIndexParams( + type=models.KeywordIndexType.KEYWORD, + is_tenant=False, + on_disk=self.QDRANT_ON_DISK, + ), + ) log.info(f"collection {collection_name_with_prefix} successfully created!") def _create_collection_if_not_exists(self, collection_name, dimension): diff --git a/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py b/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py index bc8f2ba6a8..8f065ca5c8 100644 --- a/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py +++ b/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py @@ -229,6 +229,25 @@ class QdrantClient(VectorDBBase): ), wait=True, ) + # Create payload indexes for efficient filtering on metadata.hash and metadata.file_id + self.client.create_payload_index( + collection_name=mt_collection_name, + field_name="metadata.hash", + field_schema=models.KeywordIndexParams( + type=models.KeywordIndexType.KEYWORD, + is_tenant=False, + on_disk=self.QDRANT_ON_DISK, + ), + ) + self.client.create_payload_index( + collection_name=mt_collection_name, + field_name="metadata.file_id", + field_schema=models.KeywordIndexParams( + type=models.KeywordIndexType.KEYWORD, + is_tenant=False, + on_disk=self.QDRANT_ON_DISK, + ), + ) log.info( f"Multi-tenant collection {mt_collection_name} created with dimension {dimension}!"