mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 12:25:20 +00:00
Merge pull request #16741 from 0xThresh/s3vector-support
fix: batch S3 vectors in groups of 500 to comply with API limitations
This commit is contained in:
commit
7452b87877
1 changed files with 32 additions and 17 deletions
|
|
@ -197,13 +197,19 @@ class S3VectorClient(VectorDBBase):
|
||||||
"metadata": metadata,
|
"metadata": metadata,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
# Insert vectors
|
|
||||||
self.client.put_vectors(
|
# Insert vectors in batches of 500 (S3 Vector API limit)
|
||||||
vectorBucketName=self.bucket_name,
|
batch_size = 500
|
||||||
indexName=collection_name,
|
for i in range(0, len(vectors), batch_size):
|
||||||
vectors=vectors,
|
batch = vectors[i:i + batch_size]
|
||||||
)
|
self.client.put_vectors(
|
||||||
log.info(f"Inserted {len(vectors)} vectors into index '{collection_name}'.")
|
vectorBucketName=self.bucket_name,
|
||||||
|
indexName=collection_name,
|
||||||
|
vectors=batch,
|
||||||
|
)
|
||||||
|
log.info(f"Inserted batch {i//batch_size + 1}: {len(batch)} vectors into index '{collection_name}'.")
|
||||||
|
|
||||||
|
log.info(f"Completed insertion of {len(vectors)} vectors into index '{collection_name}'.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error inserting vectors: {e}")
|
log.error(f"Error inserting vectors: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -258,16 +264,25 @@ class S3VectorClient(VectorDBBase):
|
||||||
"metadata": metadata,
|
"metadata": metadata,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
# Upsert vectors (using put_vectors for upsert semantics)
|
|
||||||
log.info(
|
# Upsert vectors in batches of 500 (S3 Vector API limit)
|
||||||
f"Upserting {len(vectors)} vectors. First vector sample: key={vectors[0]['key']}, data_type={type(vectors[0]['data']['float32'])}, data_len={len(vectors[0]['data']['float32'])}"
|
batch_size = 500
|
||||||
)
|
for i in range(0, len(vectors), batch_size):
|
||||||
self.client.put_vectors(
|
batch = vectors[i:i + batch_size]
|
||||||
vectorBucketName=self.bucket_name,
|
if i == 0: # Log sample info for first batch only
|
||||||
indexName=collection_name,
|
log.info(
|
||||||
vectors=vectors,
|
f"Upserting batch 1: {len(batch)} vectors. First vector sample: key={batch[0]['key']}, data_type={type(batch[0]['data']['float32'])}, data_len={len(batch[0]['data']['float32'])}"
|
||||||
)
|
)
|
||||||
log.info(f"Upserted {len(vectors)} vectors into index '{collection_name}'.")
|
else:
|
||||||
|
log.info(f"Upserting batch {i//batch_size + 1}: {len(batch)} vectors.")
|
||||||
|
|
||||||
|
self.client.put_vectors(
|
||||||
|
vectorBucketName=self.bucket_name,
|
||||||
|
indexName=collection_name,
|
||||||
|
vectors=batch,
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info(f"Completed upsert of {len(vectors)} vectors into index '{collection_name}'.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error upserting vectors: {e}")
|
log.error(f"Error upserting vectors: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue