Format python code

This commit is contained in:
Juan Calderon-Perez 2025-08-08 10:09:31 -04:00
parent 5d7e8c8e5f
commit d2f2d42e09
2 changed files with 274 additions and 219 deletions

View file

@ -120,7 +120,7 @@ class Oracle23aiClient(VectorDBBase):
increment=ORACLE_DB_POOL_INCREMENT, increment=ORACLE_DB_POOL_INCREMENT,
config_dir=ORACLE_WALLET_DIR, config_dir=ORACLE_WALLET_DIR,
wallet_location=ORACLE_WALLET_DIR, wallet_location=ORACLE_WALLET_DIR,
wallet_password=ORACLE_WALLET_PASSWORD wallet_password=ORACLE_WALLET_PASSWORD,
) )
log.info("Created ADB connection pool with wallet authentication.") log.info("Created ADB connection pool with wallet authentication.")
@ -136,7 +136,7 @@ class Oracle23aiClient(VectorDBBase):
dsn=ORACLE_DB_DSN, dsn=ORACLE_DB_DSN,
min=ORACLE_DB_POOL_MIN, min=ORACLE_DB_POOL_MIN,
max=ORACLE_DB_POOL_MAX, max=ORACLE_DB_POOL_MAX,
increment=ORACLE_DB_POOL_INCREMENT increment=ORACLE_DB_POOL_INCREMENT,
) )
log.info("Created DB connection pool with basic authentication.") log.info("Created DB connection pool with basic authentication.")
@ -154,11 +154,13 @@ class Oracle23aiClient(VectorDBBase):
connection.outputtypehandler = self._output_type_handler connection.outputtypehandler = self._output_type_handler
return connection return connection
except oracledb.DatabaseError as e: except oracledb.DatabaseError as e:
error_obj, = e.args (error_obj,) = e.args
log.exception(f"Connection attempt {attempt + 1} failed: {error_obj.message}") log.exception(
f"Connection attempt {attempt + 1} failed: {error_obj.message}"
)
if attempt < max_retries - 1: if attempt < max_retries - 1:
wait_time = 2 ** attempt wait_time = 2**attempt
log.info(f"Retrying in {wait_time} seconds...") log.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time) time.sleep(wait_time)
else: else:
@ -171,6 +173,7 @@ class Oracle23aiClient(VectorDBBase):
Args: Args:
interval_seconds (int): Number of seconds between health checks interval_seconds (int): Number of seconds between health checks
""" """
def _monitor(): def _monitor():
while True: while True:
try: try:
@ -219,7 +222,9 @@ class Oracle23aiClient(VectorDBBase):
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute("SELECT 1 FROM dual") cursor.execute("SELECT 1 FROM dual")
except Exception as e: except Exception as e:
log.exception(f"Connection check failed: {e}, attempting to reconnect pool...") log.exception(
f"Connection check failed: {e}, attempting to reconnect pool..."
)
self._reconnect_pool() self._reconnect_pool()
def _output_type_handler(self, cursor, metadata): def _output_type_handler(self, cursor, metadata):
@ -234,8 +239,9 @@ class Oracle23aiClient(VectorDBBase):
A variable with appropriate conversion for vector types A variable with appropriate conversion for vector types
""" """
if metadata.type_code is oracledb.DB_TYPE_VECTOR: if metadata.type_code is oracledb.DB_TYPE_VECTOR:
return cursor.var(metadata.type_code, arraysize=cursor.arraysize, return cursor.var(
outconverter=list) metadata.type_code, arraysize=cursor.arraysize, outconverter=list
)
def _initialize_database(self, connection) -> None: def _initialize_database(self, connection) -> None:
""" """
@ -252,7 +258,8 @@ class Oracle23aiClient(VectorDBBase):
with connection.cursor() as cursor: with connection.cursor() as cursor:
try: try:
log.info("Creating Table document_chunk") log.info("Creating Table document_chunk")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE TABLE IF NOT EXISTS document_chunk ( CREATE TABLE IF NOT EXISTS document_chunk (
@ -269,10 +276,12 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
log.info("Creating Index document_chunk_collection_name_idx") log.info("Creating Index document_chunk_collection_name_idx")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE INDEX IF NOT EXISTS document_chunk_collection_name_idx CREATE INDEX IF NOT EXISTS document_chunk_collection_name_idx
@ -284,10 +293,12 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
log.info("Creating VECTOR INDEX document_chunk_vector_ivf_idx") log.info("Creating VECTOR INDEX document_chunk_vector_ivf_idx")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE VECTOR INDEX IF NOT EXISTS document_chunk_vector_ivf_idx CREATE VECTOR INDEX IF NOT EXISTS document_chunk_vector_ivf_idx
@ -303,7 +314,8 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
connection.commit() connection.commit()
log.info("Database initialization completed successfully.") log.info("Database initialization completed successfully.")
@ -415,20 +427,25 @@ class Oracle23aiClient(VectorDBBase):
vector_blob = self._vector_to_blob(item["vector"]) vector_blob = self._vector_to_blob(item["vector"])
metadata_json = self._metadata_to_json(item["metadata"]) metadata_json = self._metadata_to_json(item["metadata"])
cursor.execute(""" cursor.execute(
"""
INSERT INTO document_chunk INSERT INTO document_chunk
(id, collection_name, text, vmetadata, vector) (id, collection_name, text, vmetadata, vector)
VALUES (:id, :collection_name, :text, :metadata, :vector) VALUES (:id, :collection_name, :text, :metadata, :vector)
""", { """,
'id': item["id"], {
'collection_name': collection_name, "id": item["id"],
'text': item["text"], "collection_name": collection_name,
'metadata': metadata_json, "text": item["text"],
'vector': vector_blob "metadata": metadata_json,
}) "vector": vector_blob,
},
)
connection.commit() connection.commit()
log.info(f"Successfully inserted {len(items)} items into collection '{collection_name}'.") log.info(
f"Successfully inserted {len(items)} items into collection '{collection_name}'."
)
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
@ -466,7 +483,8 @@ class Oracle23aiClient(VectorDBBase):
vector_blob = self._vector_to_blob(item["vector"]) vector_blob = self._vector_to_blob(item["vector"])
metadata_json = self._metadata_to_json(item["metadata"]) metadata_json = self._metadata_to_json(item["metadata"])
cursor.execute(""" cursor.execute(
"""
MERGE INTO document_chunk d MERGE INTO document_chunk d
USING (SELECT :merge_id as id FROM dual) s USING (SELECT :merge_id as id FROM dual) s
ON (d.id = s.id) ON (d.id = s.id)
@ -479,21 +497,25 @@ class Oracle23aiClient(VectorDBBase):
WHEN NOT MATCHED THEN WHEN NOT MATCHED THEN
INSERT (id, collection_name, text, vmetadata, vector) INSERT (id, collection_name, text, vmetadata, vector)
VALUES (:ins_id, :ins_collection_name, :ins_text, :ins_metadata, :ins_vector) VALUES (:ins_id, :ins_collection_name, :ins_text, :ins_metadata, :ins_vector)
""", { """,
'merge_id': item["id"], {
'upd_collection_name': collection_name, "merge_id": item["id"],
'upd_text': item["text"], "upd_collection_name": collection_name,
'upd_metadata': metadata_json, "upd_text": item["text"],
'upd_vector': vector_blob, "upd_metadata": metadata_json,
'ins_id': item["id"], "upd_vector": vector_blob,
'ins_collection_name': collection_name, "ins_id": item["id"],
'ins_text': item["text"], "ins_collection_name": collection_name,
'ins_metadata': metadata_json, "ins_text": item["text"],
'ins_vector': vector_blob "ins_metadata": metadata_json,
}) "ins_vector": vector_blob,
},
)
connection.commit() connection.commit()
log.info(f"Successfully upserted {len(items)} items into collection '{collection_name}'.") log.info(
f"Successfully upserted {len(items)} items into collection '{collection_name}'."
)
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
@ -501,10 +523,7 @@ class Oracle23aiClient(VectorDBBase):
raise raise
def search( def search(
self, self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
collection_name: str,
vectors: List[List[Union[float, int]]],
limit: int
) -> Optional[SearchResult]: ) -> Optional[SearchResult]:
""" """
Search for similar vectors in the database. Search for similar vectors in the database.
@ -528,7 +547,9 @@ class Oracle23aiClient(VectorDBBase):
... for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])): ... for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])):
... log.info(f"Match {i+1}: id={id}, distance={dist}") ... log.info(f"Match {i+1}: id={id}, distance={dist}")
""" """
log.info(f"Searching items from collection '{collection_name}' with limit {limit}.") log.info(
f"Searching items from collection '{collection_name}' with limit {limit}."
)
try: try:
if not vectors: if not vectors:
@ -547,7 +568,8 @@ class Oracle23aiClient(VectorDBBase):
for qid, vector in enumerate(vectors): for qid, vector in enumerate(vectors):
vector_blob = self._vector_to_blob(vector) vector_blob = self._vector_to_blob(vector)
cursor.execute(""" cursor.execute(
"""
SELECT dc.id, dc.text, SELECT dc.id, dc.text,
JSON_SERIALIZE(dc.vmetadata RETURNING VARCHAR2(4096)) as vmetadata, JSON_SERIALIZE(dc.vmetadata RETURNING VARCHAR2(4096)) as vmetadata,
VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance
@ -555,29 +577,38 @@ class Oracle23aiClient(VectorDBBase):
WHERE dc.collection_name = :collection_name WHERE dc.collection_name = :collection_name
ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE)
FETCH APPROX FIRST :limit ROWS ONLY FETCH APPROX FIRST :limit ROWS ONLY
""", { """,
'query_vector': vector_blob, {
'collection_name': collection_name, "query_vector": vector_blob,
'limit': limit "collection_name": collection_name,
}) "limit": limit,
},
)
results = cursor.fetchall() results = cursor.fetchall()
for row in results: for row in results:
ids[qid].append(row[0]) ids[qid].append(row[0])
documents[qid].append(row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])) documents[qid].append(
row[1].read()
if isinstance(row[1], oracledb.LOB)
else str(row[1])
)
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadata_str = row[2].read() if isinstance(row[2], oracledb.LOB) else row[2] metadata_str = (
row[2].read()
if isinstance(row[2], oracledb.LOB)
else row[2]
)
metadatas[qid].append(self._json_to_metadata(metadata_str)) metadatas[qid].append(self._json_to_metadata(metadata_str))
distances[qid].append(float(row[3])) distances[qid].append(float(row[3]))
log.info(f"Search completed. Found {sum(len(ids[i]) for i in range(num_queries))} total results.") log.info(
f"Search completed. Found {sum(len(ids[i]) for i in range(num_queries))} total results."
)
return SearchResult( return SearchResult(
ids=ids, ids=ids, distances=distances, documents=documents, metadatas=metadatas
distances=distances,
documents=documents,
metadatas=metadatas
) )
except Exception as e: except Exception as e:
@ -585,10 +616,7 @@ class Oracle23aiClient(VectorDBBase):
return None return None
def query( def query(
self, self, collection_name: str, filter: Dict, limit: Optional[int] = None
collection_name: str,
filter: Dict,
limit: Optional[int] = None
) -> Optional[GetResult]: ) -> Optional[GetResult]:
""" """
Query items based on metadata filters. Query items based on metadata filters.
@ -621,7 +649,7 @@ class Oracle23aiClient(VectorDBBase):
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
""" """
params = {'collection_name': collection_name} params = {"collection_name": collection_name}
for i, (key, value) in enumerate(filter.items()): for i, (key, value) in enumerate(filter.items()):
param_name = f"value_{i}" param_name = f"value_{i}"
@ -629,7 +657,7 @@ class Oracle23aiClient(VectorDBBase):
params[param_name] = str(value) params[param_name] = str(value)
query += " FETCH FIRST :limit ROWS ONLY" query += " FETCH FIRST :limit ROWS ONLY"
params['limit'] = limit params["limit"] = limit
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
@ -641,26 +669,31 @@ class Oracle23aiClient(VectorDBBase):
return None return None
ids = [[row[0] for row in results]] ids = [[row[0] for row in results]]
documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]] documents = [
[
row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])
for row in results
]
]
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]] metadatas = [
[
self._json_to_metadata(
row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]
)
for row in results
]
]
log.info(f"Query completed. Found {len(results)} results.") log.info(f"Query completed. Found {len(results)} results.")
return GetResult( return GetResult(ids=ids, documents=documents, metadatas=metadatas)
ids=ids,
documents=documents,
metadatas=metadatas
)
except Exception as e: except Exception as e:
log.exception(f"Error during query: {e}") log.exception(f"Error during query: {e}")
return None return None
def get( def get(self, collection_name: str) -> Optional[GetResult]:
self,
collection_name: str
) -> Optional[GetResult]:
""" """
Get all items in a collection. Get all items in a collection.
@ -679,22 +712,24 @@ class Oracle23aiClient(VectorDBBase):
>>> if results: >>> if results:
... print(f"Retrieved {len(results.ids[0])} documents from collection") ... print(f"Retrieved {len(results.ids[0])} documents from collection")
""" """
log.info(f"Getting items from collection '{collection_name}' with limit {limit}.") log.info(
f"Getting items from collection '{collection_name}' with limit {limit}."
)
try: try:
limit = limit or 1000 limit = limit or 1000
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
SELECT /*+ MONITOR */ id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata SELECT /*+ MONITOR */ id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata
FROM document_chunk FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
FETCH FIRST :limit ROWS ONLY FETCH FIRST :limit ROWS ONLY
""", { """,
'collection_name': collection_name, {"collection_name": collection_name, "limit": limit},
'limit': limit )
})
results = cursor.fetchall() results = cursor.fetchall()
@ -703,15 +738,23 @@ class Oracle23aiClient(VectorDBBase):
return None return None
ids = [[row[0] for row in results]] ids = [[row[0] for row in results]]
documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]] documents = [
[
row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])
for row in results
]
]
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]] metadatas = [
[
self._json_to_metadata(
row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]
)
for row in results
]
]
return GetResult( return GetResult(ids=ids, documents=documents, metadatas=metadatas)
ids=ids,
documents=documents,
metadatas=metadatas
)
except Exception as e: except Exception as e:
log.exception(f"Error during get: {e}") log.exception(f"Error during get: {e}")
@ -746,15 +789,17 @@ class Oracle23aiClient(VectorDBBase):
log.info(f"Deleting items from collection '{collection_name}'.") log.info(f"Deleting items from collection '{collection_name}'.")
try: try:
query = "DELETE FROM document_chunk WHERE collection_name = :collection_name" query = (
params = {'collection_name': collection_name} "DELETE FROM document_chunk WHERE collection_name = :collection_name"
)
params = {"collection_name": collection_name}
if ids: if ids:
# 🔧 FIXED: Use proper parameterized query to prevent SQL injection # 🔧 FIXED: Use proper parameterized query to prevent SQL injection
placeholders = ','.join([f':id_{i}' for i in range(len(ids))]) placeholders = ",".join([f":id_{i}" for i in range(len(ids))])
query += f" AND id IN ({placeholders})" query += f" AND id IN ({placeholders})"
for i, id_val in enumerate(ids): for i, id_val in enumerate(ids):
params[f'id_{i}'] = id_val params[f"id_{i}"] = id_val
if filter: if filter:
for i, (key, value) in enumerate(filter.items()): for i, (key, value) in enumerate(filter.items()):
@ -796,7 +841,9 @@ class Oracle23aiClient(VectorDBBase):
deleted = cursor.rowcount deleted = cursor.rowcount
connection.commit() connection.commit()
log.info(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.") log.info(
f"Reset complete. Deleted {deleted} items from 'document_chunk' table."
)
except Exception as e: except Exception as e:
log.exception(f"Error during reset: {e}") log.exception(f"Error during reset: {e}")
@ -814,7 +861,7 @@ class Oracle23aiClient(VectorDBBase):
>>> client.close() >>> client.close()
""" """
try: try:
if hasattr(self, 'pool') and self.pool: if hasattr(self, "pool") and self.pool:
self.pool.close() self.pool.close()
log.info("Oracle Vector Search connection pool closed.") log.info("Oracle Vector Search connection pool closed.")
except Exception as e: except Exception as e:
@ -840,12 +887,15 @@ class Oracle23aiClient(VectorDBBase):
try: try:
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
SELECT COUNT(*) SELECT COUNT(*)
FROM document_chunk FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
FETCH FIRST 1 ROWS ONLY FETCH FIRST 1 ROWS ONLY
""", {'collection_name': collection_name}) """,
{"collection_name": collection_name},
)
count = cursor.fetchone()[0] count = cursor.fetchone()[0]
@ -873,15 +923,20 @@ class Oracle23aiClient(VectorDBBase):
try: try:
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
DELETE FROM document_chunk DELETE FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
""", {'collection_name': collection_name}) """,
{"collection_name": collection_name},
)
deleted = cursor.rowcount deleted = cursor.rowcount
connection.commit() connection.commit()
log.info(f"Collection '{collection_name}' deleted. Removed {deleted} items.") log.info(
f"Collection '{collection_name}' deleted. Removed {deleted} items."
)
except Exception as e: except Exception as e:
log.exception(f"Error deleting collection '{collection_name}': {e}") log.exception(f"Error deleting collection '{collection_name}': {e}")