feat: add qdrant indices for metadata fields

All fieldnames which are part of a query should
have an index for performance reasons. This is
even enforced on some qdrant cluster like those
on qdrant.io, and queries using a unindexed column
fail with an error.
This commit is contained in:
guenhter 2025-06-29 15:30:55 +02:00
parent 7523613fbc
commit 5c2e0e4beb
2 changed files with 38 additions and 0 deletions

View file

@ -87,6 +87,25 @@ class QdrantClient(VectorDBBase):
),
)
# Create payload indexes for efficient filtering
self.client.create_payload_index(
collection_name=collection_name_with_prefix,
field_name="metadata.hash",
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
on_disk=self.QDRANT_ON_DISK,
),
)
self.client.create_payload_index(
collection_name=collection_name_with_prefix,
field_name="metadata.file_id",
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
on_disk=self.QDRANT_ON_DISK,
),
)
log.info(f"collection {collection_name_with_prefix} successfully created!")
def _create_collection_if_not_exists(self, collection_name, dimension):

View file

@ -229,6 +229,25 @@ class QdrantClient(VectorDBBase):
),
wait=True,
)
# Create payload indexes for efficient filtering on metadata.hash and metadata.file_id
self.client.create_payload_index(
collection_name=mt_collection_name,
field_name="metadata.hash",
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
on_disk=self.QDRANT_ON_DISK,
),
)
self.client.create_payload_index(
collection_name=mt_collection_name,
field_name="metadata.file_id",
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
on_disk=self.QDRANT_ON_DISK,
),
)
log.info(
f"Multi-tenant collection {mt_collection_name} created with dimension {dimension}!"