Format python code

This commit is contained in:
Juan Calderon-Perez 2025-08-08 10:09:31 -04:00
parent 5d7e8c8e5f
commit d2f2d42e09
2 changed files with 274 additions and 219 deletions

View file

@ -64,51 +64,51 @@ log.setLevel(SRC_LOG_LEVELS["RAG"])
class Oracle23aiClient(VectorDBBase): class Oracle23aiClient(VectorDBBase):
""" """
Oracle Vector Database Client for vector similarity search using Oracle Database 23ai. Oracle Vector Database Client for vector similarity search using Oracle Database 23ai.
This client provides an interface to store, retrieve, and search vector embeddings This client provides an interface to store, retrieve, and search vector embeddings
in an Oracle database. It uses connection pooling for efficient database access in an Oracle database. It uses connection pooling for efficient database access
and supports vector similarity search operations. and supports vector similarity search operations.
Attributes: Attributes:
pool: Connection pool for Oracle database connections pool: Connection pool for Oracle database connections
""" """
def __init__(self) -> None: def __init__(self) -> None:
""" """
Initialize the Oracle23aiClient with a connection pool. Initialize the Oracle23aiClient with a connection pool.
Creates a connection pool with configurable min/max connections, initializes Creates a connection pool with configurable min/max connections, initializes
the database schema if needed, and sets up necessary tables and indexes. the database schema if needed, and sets up necessary tables and indexes.
Raises: Raises:
ValueError: If required configuration parameters are missing ValueError: If required configuration parameters are missing
Exception: If database initialization fails Exception: If database initialization fails
""" """
self.pool = None self.pool = None
try: try:
# Create the appropriate connection pool based on DB type # Create the appropriate connection pool based on DB type
if ORACLE_DB_USE_WALLET: if ORACLE_DB_USE_WALLET:
self._create_adb_pool() self._create_adb_pool()
else: # DBCS else: # DBCS
self._create_dbcs_pool() self._create_dbcs_pool()
dsn = ORACLE_DB_DSN dsn = ORACLE_DB_DSN
log.info(f"Creating Connection Pool [{ORACLE_DB_USER}:**@{dsn}]") log.info(f"Creating Connection Pool [{ORACLE_DB_USER}:**@{dsn}]")
with self.get_connection() as connection: with self.get_connection() as connection:
log.info(f"Connection version: {connection.version}") log.info(f"Connection version: {connection.version}")
self._initialize_database(connection) self._initialize_database(connection)
log.info("Oracle Vector Search initialization complete.") log.info("Oracle Vector Search initialization complete.")
except Exception as e: except Exception as e:
log.exception(f"Error during Oracle Vector Search initialization: {e}") log.exception(f"Error during Oracle Vector Search initialization: {e}")
raise raise
def _create_adb_pool(self) -> None: def _create_adb_pool(self) -> None:
""" """
Create connection pool for Oracle Autonomous Database. Create connection pool for Oracle Autonomous Database.
Uses wallet-based authentication. Uses wallet-based authentication.
""" """
self.pool = oracledb.create_pool( self.pool = oracledb.create_pool(
@ -120,14 +120,14 @@ class Oracle23aiClient(VectorDBBase):
increment=ORACLE_DB_POOL_INCREMENT, increment=ORACLE_DB_POOL_INCREMENT,
config_dir=ORACLE_WALLET_DIR, config_dir=ORACLE_WALLET_DIR,
wallet_location=ORACLE_WALLET_DIR, wallet_location=ORACLE_WALLET_DIR,
wallet_password=ORACLE_WALLET_PASSWORD wallet_password=ORACLE_WALLET_PASSWORD,
) )
log.info("Created ADB connection pool with wallet authentication.") log.info("Created ADB connection pool with wallet authentication.")
def _create_dbcs_pool(self) -> None: def _create_dbcs_pool(self) -> None:
""" """
Create connection pool for Oracle Database Cloud Service. Create connection pool for Oracle Database Cloud Service.
Uses basic authentication without wallet. Uses basic authentication without wallet.
""" """
self.pool = oracledb.create_pool( self.pool = oracledb.create_pool(
@ -136,10 +136,10 @@ class Oracle23aiClient(VectorDBBase):
dsn=ORACLE_DB_DSN, dsn=ORACLE_DB_DSN,
min=ORACLE_DB_POOL_MIN, min=ORACLE_DB_POOL_MIN,
max=ORACLE_DB_POOL_MAX, max=ORACLE_DB_POOL_MAX,
increment=ORACLE_DB_POOL_INCREMENT increment=ORACLE_DB_POOL_INCREMENT,
) )
log.info("Created DB connection pool with basic authentication.") log.info("Created DB connection pool with basic authentication.")
def get_connection(self): def get_connection(self):
""" """
Acquire a connection from the connection pool with retry logic. Acquire a connection from the connection pool with retry logic.
@ -154,15 +154,17 @@ class Oracle23aiClient(VectorDBBase):
connection.outputtypehandler = self._output_type_handler connection.outputtypehandler = self._output_type_handler
return connection return connection
except oracledb.DatabaseError as e: except oracledb.DatabaseError as e:
error_obj, = e.args (error_obj,) = e.args
log.exception(f"Connection attempt {attempt + 1} failed: {error_obj.message}") log.exception(
f"Connection attempt {attempt + 1} failed: {error_obj.message}"
)
if attempt < max_retries - 1: if attempt < max_retries - 1:
wait_time = 2 ** attempt wait_time = 2**attempt
log.info(f"Retrying in {wait_time} seconds...") log.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time) time.sleep(wait_time)
else: else:
raise raise
def start_health_monitor(self, interval_seconds: int = 60): def start_health_monitor(self, interval_seconds: int = 60):
""" """
@ -171,6 +173,7 @@ class Oracle23aiClient(VectorDBBase):
Args: Args:
interval_seconds (int): Number of seconds between health checks interval_seconds (int): Number of seconds between health checks
""" """
def _monitor(): def _monitor():
while True: while True:
try: try:
@ -191,20 +194,20 @@ class Oracle23aiClient(VectorDBBase):
""" """
try: try:
log.info("Attempting to reinitialize the Oracle connection pool...") log.info("Attempting to reinitialize the Oracle connection pool...")
# Close existing pool if it exists # Close existing pool if it exists
if self.pool: if self.pool:
try: try:
self.pool.close() self.pool.close()
except Exception as close_error: except Exception as close_error:
log.warning(f"Error closing existing pool: {close_error}") log.warning(f"Error closing existing pool: {close_error}")
# Re-create the appropriate connection pool based on DB type # Re-create the appropriate connection pool based on DB type
if ORACLE_DB_USE_WALLET: if ORACLE_DB_USE_WALLET:
self._create_adb_pool() self._create_adb_pool()
else: # DBCS else: # DBCS
self._create_dbcs_pool() self._create_dbcs_pool()
log.info("Connection pool reinitialized.") log.info("Connection pool reinitialized.")
except Exception as e: except Exception as e:
log.exception(f"Failed to reinitialize the connection pool: {e}") log.exception(f"Failed to reinitialize the connection pool: {e}")
@ -219,40 +222,44 @@ class Oracle23aiClient(VectorDBBase):
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute("SELECT 1 FROM dual") cursor.execute("SELECT 1 FROM dual")
except Exception as e: except Exception as e:
log.exception(f"Connection check failed: {e}, attempting to reconnect pool...") log.exception(
f"Connection check failed: {e}, attempting to reconnect pool..."
)
self._reconnect_pool() self._reconnect_pool()
def _output_type_handler(self, cursor, metadata): def _output_type_handler(self, cursor, metadata):
""" """
Handle Oracle vector type conversion. Handle Oracle vector type conversion.
Args: Args:
cursor: Oracle database cursor cursor: Oracle database cursor
metadata: Metadata for the column metadata: Metadata for the column
Returns: Returns:
A variable with appropriate conversion for vector types A variable with appropriate conversion for vector types
""" """
if metadata.type_code is oracledb.DB_TYPE_VECTOR: if metadata.type_code is oracledb.DB_TYPE_VECTOR:
return cursor.var(metadata.type_code, arraysize=cursor.arraysize, return cursor.var(
outconverter=list) metadata.type_code, arraysize=cursor.arraysize, outconverter=list
)
def _initialize_database(self, connection) -> None: def _initialize_database(self, connection) -> None:
""" """
Initialize database schema, tables and indexes. Initialize database schema, tables and indexes.
Creates the document_chunk table and necessary indexes if they don't exist. Creates the document_chunk table and necessary indexes if they don't exist.
Args: Args:
connection: Oracle database connection connection: Oracle database connection
Raises: Raises:
Exception: If schema initialization fails Exception: If schema initialization fails
""" """
with connection.cursor() as cursor: with connection.cursor() as cursor:
try: try:
log.info("Creating Table document_chunk") log.info("Creating Table document_chunk")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE TABLE IF NOT EXISTS document_chunk ( CREATE TABLE IF NOT EXISTS document_chunk (
@ -269,10 +276,12 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
log.info("Creating Index document_chunk_collection_name_idx") log.info("Creating Index document_chunk_collection_name_idx")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE INDEX IF NOT EXISTS document_chunk_collection_name_idx CREATE INDEX IF NOT EXISTS document_chunk_collection_name_idx
@ -284,10 +293,12 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
log.info("Creating VECTOR INDEX document_chunk_vector_ivf_idx") log.info("Creating VECTOR INDEX document_chunk_vector_ivf_idx")
cursor.execute(""" cursor.execute(
"""
BEGIN BEGIN
EXECUTE IMMEDIATE ' EXECUTE IMMEDIATE '
CREATE VECTOR INDEX IF NOT EXISTS document_chunk_vector_ivf_idx CREATE VECTOR INDEX IF NOT EXISTS document_chunk_vector_ivf_idx
@ -303,11 +314,12 @@ class Oracle23aiClient(VectorDBBase):
RAISE; RAISE;
END IF; END IF;
END; END;
""") """
)
connection.commit() connection.commit()
log.info("Database initialization completed successfully.") log.info("Database initialization completed successfully.")
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
log.exception(f"Error during database initialization: {e}") log.exception(f"Error during database initialization: {e}")
@ -316,7 +328,7 @@ class Oracle23aiClient(VectorDBBase):
def check_vector_length(self) -> None: def check_vector_length(self) -> None:
""" """
Check vector length compatibility (placeholder). Check vector length compatibility (placeholder).
This method would check if the configured vector length matches the database schema. This method would check if the configured vector length matches the database schema.
Currently implemented as a placeholder. Currently implemented as a placeholder.
""" """
@ -325,10 +337,10 @@ class Oracle23aiClient(VectorDBBase):
def _vector_to_blob(self, vector: List[float]) -> bytes: def _vector_to_blob(self, vector: List[float]) -> bytes:
""" """
Convert a vector to Oracle BLOB format. Convert a vector to Oracle BLOB format.
Args: Args:
vector (List[float]): The vector to convert vector (List[float]): The vector to convert
Returns: Returns:
bytes: The vector in Oracle BLOB format bytes: The vector in Oracle BLOB format
""" """
@ -337,25 +349,25 @@ class Oracle23aiClient(VectorDBBase):
def adjust_vector_length(self, vector: List[float]) -> List[float]: def adjust_vector_length(self, vector: List[float]) -> List[float]:
""" """
Adjust vector to the expected length if needed. Adjust vector to the expected length if needed.
Args: Args:
vector (List[float]): The vector to adjust vector (List[float]): The vector to adjust
Returns: Returns:
List[float]: The adjusted vector List[float]: The adjusted vector
""" """
return vector return vector
def _decimal_handler(self, obj): def _decimal_handler(self, obj):
""" """
Handle Decimal objects for JSON serialization. Handle Decimal objects for JSON serialization.
Args: Args:
obj: Object to serialize obj: Object to serialize
Returns: Returns:
float: Converted decimal value float: Converted decimal value
Raises: Raises:
TypeError: If object is not JSON serializable TypeError: If object is not JSON serializable
""" """
@ -366,10 +378,10 @@ class Oracle23aiClient(VectorDBBase):
def _metadata_to_json(self, metadata: Dict) -> str: def _metadata_to_json(self, metadata: Dict) -> str:
""" """
Convert metadata dictionary to JSON string. Convert metadata dictionary to JSON string.
Args: Args:
metadata (Dict): Metadata dictionary metadata (Dict): Metadata dictionary
Returns: Returns:
str: JSON representation of metadata str: JSON representation of metadata
""" """
@ -378,10 +390,10 @@ class Oracle23aiClient(VectorDBBase):
def _json_to_metadata(self, json_str: str) -> Dict: def _json_to_metadata(self, json_str: str) -> Dict:
""" """
Convert JSON string to metadata dictionary. Convert JSON string to metadata dictionary.
Args: Args:
json_str (str): JSON string json_str (str): JSON string
Returns: Returns:
Dict: Metadata dictionary Dict: Metadata dictionary
""" """
@ -390,14 +402,14 @@ class Oracle23aiClient(VectorDBBase):
def insert(self, collection_name: str, items: List[VectorItem]) -> None: def insert(self, collection_name: str, items: List[VectorItem]) -> None:
""" """
Insert vector items into the database. Insert vector items into the database.
Args: Args:
collection_name (str): Name of the collection collection_name (str): Name of the collection
items (List[VectorItem]): List of vector items to insert items (List[VectorItem]): List of vector items to insert
Raises: Raises:
Exception: If insertion fails Exception: If insertion fails
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> items = [ >>> items = [
@ -407,28 +419,33 @@ class Oracle23aiClient(VectorDBBase):
>>> client.insert("my_collection", items) >>> client.insert("my_collection", items)
""" """
log.info(f"Inserting {len(items)} items into collection '{collection_name}'.") log.info(f"Inserting {len(items)} items into collection '{collection_name}'.")
with self.get_connection() as connection: with self.get_connection() as connection:
try: try:
with connection.cursor() as cursor: with connection.cursor() as cursor:
for item in items: for item in items:
vector_blob = self._vector_to_blob(item["vector"]) vector_blob = self._vector_to_blob(item["vector"])
metadata_json = self._metadata_to_json(item["metadata"]) metadata_json = self._metadata_to_json(item["metadata"])
cursor.execute(""" cursor.execute(
"""
INSERT INTO document_chunk INSERT INTO document_chunk
(id, collection_name, text, vmetadata, vector) (id, collection_name, text, vmetadata, vector)
VALUES (:id, :collection_name, :text, :metadata, :vector) VALUES (:id, :collection_name, :text, :metadata, :vector)
""", { """,
'id': item["id"], {
'collection_name': collection_name, "id": item["id"],
'text': item["text"], "collection_name": collection_name,
'metadata': metadata_json, "text": item["text"],
'vector': vector_blob "metadata": metadata_json,
}) "vector": vector_blob,
},
)
connection.commit() connection.commit()
log.info(f"Successfully inserted {len(items)} items into collection '{collection_name}'.") log.info(
f"Successfully inserted {len(items)} items into collection '{collection_name}'."
)
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
@ -438,14 +455,14 @@ class Oracle23aiClient(VectorDBBase):
def upsert(self, collection_name: str, items: List[VectorItem]) -> None: def upsert(self, collection_name: str, items: List[VectorItem]) -> None:
""" """
Update or insert vector items into the database. Update or insert vector items into the database.
If an item with the same ID exists, it will be updated; If an item with the same ID exists, it will be updated;
otherwise, it will be inserted. otherwise, it will be inserted.
Args: Args:
collection_name (str): Name of the collection collection_name (str): Name of the collection
items (List[VectorItem]): List of vector items to upsert items (List[VectorItem]): List of vector items to upsert
Raises: Raises:
Exception: If upsert operation fails Exception: If upsert operation fails
@ -465,8 +482,9 @@ class Oracle23aiClient(VectorDBBase):
for item in items: for item in items:
vector_blob = self._vector_to_blob(item["vector"]) vector_blob = self._vector_to_blob(item["vector"])
metadata_json = self._metadata_to_json(item["metadata"]) metadata_json = self._metadata_to_json(item["metadata"])
cursor.execute(""" cursor.execute(
"""
MERGE INTO document_chunk d MERGE INTO document_chunk d
USING (SELECT :merge_id as id FROM dual) s USING (SELECT :merge_id as id FROM dual) s
ON (d.id = s.id) ON (d.id = s.id)
@ -479,21 +497,25 @@ class Oracle23aiClient(VectorDBBase):
WHEN NOT MATCHED THEN WHEN NOT MATCHED THEN
INSERT (id, collection_name, text, vmetadata, vector) INSERT (id, collection_name, text, vmetadata, vector)
VALUES (:ins_id, :ins_collection_name, :ins_text, :ins_metadata, :ins_vector) VALUES (:ins_id, :ins_collection_name, :ins_text, :ins_metadata, :ins_vector)
""", { """,
'merge_id': item["id"], {
'upd_collection_name': collection_name, "merge_id": item["id"],
'upd_text': item["text"], "upd_collection_name": collection_name,
'upd_metadata': metadata_json, "upd_text": item["text"],
'upd_vector': vector_blob, "upd_metadata": metadata_json,
'ins_id': item["id"], "upd_vector": vector_blob,
'ins_collection_name': collection_name, "ins_id": item["id"],
'ins_text': item["text"], "ins_collection_name": collection_name,
'ins_metadata': metadata_json, "ins_text": item["text"],
'ins_vector': vector_blob "ins_metadata": metadata_json,
}) "ins_vector": vector_blob,
},
)
connection.commit() connection.commit()
log.info(f"Successfully upserted {len(items)} items into collection '{collection_name}'.") log.info(
f"Successfully upserted {len(items)} items into collection '{collection_name}'."
)
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
@ -501,24 +523,21 @@ class Oracle23aiClient(VectorDBBase):
raise raise
def search( def search(
self, self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
collection_name: str,
vectors: List[List[Union[float, int]]],
limit: int
) -> Optional[SearchResult]: ) -> Optional[SearchResult]:
""" """
Search for similar vectors in the database. Search for similar vectors in the database.
Performs vector similarity search using cosine distance. Performs vector similarity search using cosine distance.
Args: Args:
collection_name (str): Name of the collection to search collection_name (str): Name of the collection to search
vectors (List[List[Union[float, int]]]): Query vectors to find similar items for vectors (List[List[Union[float, int]]]): Query vectors to find similar items for
limit (int): Maximum number of results to return per query limit (int): Maximum number of results to return per query
Returns: Returns:
Optional[SearchResult]: Search results containing ids, distances, documents, and metadata Optional[SearchResult]: Search results containing ids, distances, documents, and metadata
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> query_vector = [0.1, 0.2, 0.3, ...] # Must match VECTOR_LENGTH >>> query_vector = [0.1, 0.2, 0.3, ...] # Must match VECTOR_LENGTH
@ -528,26 +547,29 @@ class Oracle23aiClient(VectorDBBase):
... for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])): ... for i, (id, dist) in enumerate(zip(results.ids[0], results.distances[0])):
... log.info(f"Match {i+1}: id={id}, distance={dist}") ... log.info(f"Match {i+1}: id={id}, distance={dist}")
""" """
log.info(f"Searching items from collection '{collection_name}' with limit {limit}.") log.info(
f"Searching items from collection '{collection_name}' with limit {limit}."
)
try: try:
if not vectors: if not vectors:
log.warning("No vectors provided for search.") log.warning("No vectors provided for search.")
return None return None
num_queries = len(vectors) num_queries = len(vectors)
ids = [[] for _ in range(num_queries)] ids = [[] for _ in range(num_queries)]
distances = [[] for _ in range(num_queries)] distances = [[] for _ in range(num_queries)]
documents = [[] for _ in range(num_queries)] documents = [[] for _ in range(num_queries)]
metadatas = [[] for _ in range(num_queries)] metadatas = [[] for _ in range(num_queries)]
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
for qid, vector in enumerate(vectors): for qid, vector in enumerate(vectors):
vector_blob = self._vector_to_blob(vector) vector_blob = self._vector_to_blob(vector)
cursor.execute(""" cursor.execute(
"""
SELECT dc.id, dc.text, SELECT dc.id, dc.text,
JSON_SERIALIZE(dc.vmetadata RETURNING VARCHAR2(4096)) as vmetadata, JSON_SERIALIZE(dc.vmetadata RETURNING VARCHAR2(4096)) as vmetadata,
VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) as distance
@ -555,54 +577,60 @@ class Oracle23aiClient(VectorDBBase):
WHERE dc.collection_name = :collection_name WHERE dc.collection_name = :collection_name
ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE) ORDER BY VECTOR_DISTANCE(dc.vector, :query_vector, COSINE)
FETCH APPROX FIRST :limit ROWS ONLY FETCH APPROX FIRST :limit ROWS ONLY
""", { """,
'query_vector': vector_blob, {
'collection_name': collection_name, "query_vector": vector_blob,
'limit': limit "collection_name": collection_name,
}) "limit": limit,
},
)
results = cursor.fetchall() results = cursor.fetchall()
for row in results: for row in results:
ids[qid].append(row[0]) ids[qid].append(row[0])
documents[qid].append(row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])) documents[qid].append(
row[1].read()
if isinstance(row[1], oracledb.LOB)
else str(row[1])
)
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadata_str = row[2].read() if isinstance(row[2], oracledb.LOB) else row[2] metadata_str = (
row[2].read()
if isinstance(row[2], oracledb.LOB)
else row[2]
)
metadatas[qid].append(self._json_to_metadata(metadata_str)) metadatas[qid].append(self._json_to_metadata(metadata_str))
distances[qid].append(float(row[3])) distances[qid].append(float(row[3]))
log.info(f"Search completed. Found {sum(len(ids[i]) for i in range(num_queries))} total results.") log.info(
f"Search completed. Found {sum(len(ids[i]) for i in range(num_queries))} total results."
)
return SearchResult( return SearchResult(
ids=ids, ids=ids, distances=distances, documents=documents, metadatas=metadatas
distances=distances,
documents=documents,
metadatas=metadatas
) )
except Exception as e: except Exception as e:
log.exception(f"Error during search: {e}") log.exception(f"Error during search: {e}")
return None return None
def query( def query(
self, self, collection_name: str, filter: Dict, limit: Optional[int] = None
collection_name: str,
filter: Dict,
limit: Optional[int] = None
) -> Optional[GetResult]: ) -> Optional[GetResult]:
""" """
Query items based on metadata filters. Query items based on metadata filters.
Retrieves items that match specified metadata criteria. Retrieves items that match specified metadata criteria.
Args: Args:
collection_name (str): Name of the collection to query collection_name (str): Name of the collection to query
filter (Dict[str, Any]): Metadata filters to apply filter (Dict[str, Any]): Metadata filters to apply
limit (Optional[int]): Maximum number of results to return limit (Optional[int]): Maximum number of results to return
Returns: Returns:
Optional[GetResult]: Query results containing ids, documents, and metadata Optional[GetResult]: Query results containing ids, documents, and metadata
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> filter = {"source": "doc1", "category": "finance"} >>> filter = {"source": "doc1", "category": "finance"}
@ -611,107 +639,122 @@ class Oracle23aiClient(VectorDBBase):
... print(f"Found {len(results.ids[0])} matching documents") ... print(f"Found {len(results.ids[0])} matching documents")
""" """
log.info(f"Querying items from collection '{collection_name}' with filters.") log.info(f"Querying items from collection '{collection_name}' with filters.")
try: try:
limit = limit or 100 limit = limit or 100
query = """ query = """
SELECT id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata SELECT id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata
FROM document_chunk FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
""" """
params = {'collection_name': collection_name} params = {"collection_name": collection_name}
for i, (key, value) in enumerate(filter.items()): for i, (key, value) in enumerate(filter.items()):
param_name = f"value_{i}" param_name = f"value_{i}"
query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}" query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
params[param_name] = str(value) params[param_name] = str(value)
query += " FETCH FIRST :limit ROWS ONLY" query += " FETCH FIRST :limit ROWS ONLY"
params['limit'] = limit params["limit"] = limit
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(query, params) cursor.execute(query, params)
results = cursor.fetchall() results = cursor.fetchall()
if not results: if not results:
log.info("No results found for query.") log.info("No results found for query.")
return None return None
ids = [[row[0] for row in results]] ids = [[row[0] for row in results]]
documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]] documents = [
[
row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])
for row in results
]
]
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]] metadatas = [
[
self._json_to_metadata(
row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]
)
for row in results
]
]
log.info(f"Query completed. Found {len(results)} results.") log.info(f"Query completed. Found {len(results)} results.")
return GetResult( return GetResult(ids=ids, documents=documents, metadatas=metadatas)
ids=ids,
documents=documents,
metadatas=metadatas
)
except Exception as e: except Exception as e:
log.exception(f"Error during query: {e}") log.exception(f"Error during query: {e}")
return None return None
def get( def get(self, collection_name: str) -> Optional[GetResult]:
self,
collection_name: str
) -> Optional[GetResult]:
""" """
Get all items in a collection. Get all items in a collection.
Retrieves items from a specified collection up to the limit. Retrieves items from a specified collection up to the limit.
Args: Args:
collection_name (str): Name of the collection to retrieve collection_name (str): Name of the collection to retrieve
limit (Optional[int]): Maximum number of items to retrieve limit (Optional[int]): Maximum number of items to retrieve
Returns: Returns:
Optional[GetResult]: Result containing ids, documents, and metadata Optional[GetResult]: Result containing ids, documents, and metadata
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> results = client.get("my_collection", limit=50) >>> results = client.get("my_collection", limit=50)
>>> if results: >>> if results:
... print(f"Retrieved {len(results.ids[0])} documents from collection") ... print(f"Retrieved {len(results.ids[0])} documents from collection")
""" """
log.info(f"Getting items from collection '{collection_name}' with limit {limit}.") log.info(
f"Getting items from collection '{collection_name}' with limit {limit}."
)
try: try:
limit = limit or 1000 limit = limit or 1000
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
SELECT /*+ MONITOR */ id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata SELECT /*+ MONITOR */ id, text, JSON_SERIALIZE(vmetadata RETURNING VARCHAR2(4096)) as vmetadata
FROM document_chunk FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
FETCH FIRST :limit ROWS ONLY FETCH FIRST :limit ROWS ONLY
""", { """,
'collection_name': collection_name, {"collection_name": collection_name, "limit": limit},
'limit': limit )
})
results = cursor.fetchall() results = cursor.fetchall()
if not results: if not results:
log.info("No results found.") log.info("No results found.")
return None return None
ids = [[row[0] for row in results]] ids = [[row[0] for row in results]]
documents = [[row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1]) for row in results]] documents = [
[
row[1].read() if isinstance(row[1], oracledb.LOB) else str(row[1])
for row in results
]
]
# 🔧 FIXED: Parse JSON metadata properly # 🔧 FIXED: Parse JSON metadata properly
metadatas = [[self._json_to_metadata(row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]) for row in results]] metadatas = [
[
return GetResult( self._json_to_metadata(
ids=ids, row[2].read() if isinstance(row[2], oracledb.LOB) else row[2]
documents=documents, )
metadatas=metadatas for row in results
) ]
]
return GetResult(ids=ids, documents=documents, metadatas=metadatas)
except Exception as e: except Exception as e:
log.exception(f"Error during get: {e}") log.exception(f"Error during get: {e}")
@ -725,17 +768,17 @@ class Oracle23aiClient(VectorDBBase):
) -> None: ) -> None:
""" """
Delete items from the database. Delete items from the database.
Deletes items from a collection based on IDs or metadata filters. Deletes items from a collection based on IDs or metadata filters.
Args: Args:
collection_name (str): Name of the collection to delete from collection_name (str): Name of the collection to delete from
ids (Optional[List[str]]): Specific item IDs to delete ids (Optional[List[str]]): Specific item IDs to delete
filter (Optional[Dict[str, Any]]): Metadata filters for deletion filter (Optional[Dict[str, Any]]): Metadata filters for deletion
Raises: Raises:
Exception: If deletion fails Exception: If deletion fails
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> # Delete specific items by ID >>> # Delete specific items by ID
@ -744,32 +787,34 @@ class Oracle23aiClient(VectorDBBase):
>>> client.delete("my_collection", filter={"source": "deprecated_source"}) >>> client.delete("my_collection", filter={"source": "deprecated_source"})
""" """
log.info(f"Deleting items from collection '{collection_name}'.") log.info(f"Deleting items from collection '{collection_name}'.")
try: try:
query = "DELETE FROM document_chunk WHERE collection_name = :collection_name" query = (
params = {'collection_name': collection_name} "DELETE FROM document_chunk WHERE collection_name = :collection_name"
)
params = {"collection_name": collection_name}
if ids: if ids:
# 🔧 FIXED: Use proper parameterized query to prevent SQL injection # 🔧 FIXED: Use proper parameterized query to prevent SQL injection
placeholders = ','.join([f':id_{i}' for i in range(len(ids))]) placeholders = ",".join([f":id_{i}" for i in range(len(ids))])
query += f" AND id IN ({placeholders})" query += f" AND id IN ({placeholders})"
for i, id_val in enumerate(ids): for i, id_val in enumerate(ids):
params[f'id_{i}'] = id_val params[f"id_{i}"] = id_val
if filter: if filter:
for i, (key, value) in enumerate(filter.items()): for i, (key, value) in enumerate(filter.items()):
param_name = f"value_{i}" param_name = f"value_{i}"
query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}" query += f" AND JSON_VALUE(vmetadata, '$.{key}' RETURNING VARCHAR2(4096)) = :{param_name}"
params[param_name] = str(value) params[param_name] = str(value)
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(query, params) cursor.execute(query, params)
deleted = cursor.rowcount deleted = cursor.rowcount
connection.commit() connection.commit()
log.info(f"Deleted {deleted} items from collection '{collection_name}'.") log.info(f"Deleted {deleted} items from collection '{collection_name}'.")
except Exception as e: except Exception as e:
log.exception(f"Error during delete: {e}") log.exception(f"Error during delete: {e}")
raise raise
@ -777,26 +822,28 @@ class Oracle23aiClient(VectorDBBase):
def reset(self) -> None: def reset(self) -> None:
""" """
Reset the database by deleting all items. Reset the database by deleting all items.
Deletes all items from the document_chunk table. Deletes all items from the document_chunk table.
Raises: Raises:
Exception: If reset fails Exception: If reset fails
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> client.reset() # Warning: Removes all data! >>> client.reset() # Warning: Removes all data!
""" """
log.info("Resetting database - deleting all items.") log.info("Resetting database - deleting all items.")
try: try:
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute("DELETE FROM document_chunk") cursor.execute("DELETE FROM document_chunk")
deleted = cursor.rowcount deleted = cursor.rowcount
connection.commit() connection.commit()
log.info(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.") log.info(
f"Reset complete. Deleted {deleted} items from 'document_chunk' table."
)
except Exception as e: except Exception as e:
log.exception(f"Error during reset: {e}") log.exception(f"Error during reset: {e}")
@ -805,16 +852,16 @@ class Oracle23aiClient(VectorDBBase):
def close(self) -> None: def close(self) -> None:
""" """
Close the database connection pool. Close the database connection pool.
Properly closes the connection pool and releases all resources. Properly closes the connection pool and releases all resources.
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> # After finishing all operations >>> # After finishing all operations
>>> client.close() >>> client.close()
""" """
try: try:
if hasattr(self, 'pool') and self.pool: if hasattr(self, "pool") and self.pool:
self.pool.close() self.pool.close()
log.info("Oracle Vector Search connection pool closed.") log.info("Oracle Vector Search connection pool closed.")
except Exception as e: except Exception as e:
@ -823,13 +870,13 @@ class Oracle23aiClient(VectorDBBase):
def has_collection(self, collection_name: str) -> bool: def has_collection(self, collection_name: str) -> bool:
""" """
Check if a collection exists. Check if a collection exists.
Args: Args:
collection_name (str): Name of the collection to check collection_name (str): Name of the collection to check
Returns: Returns:
bool: True if the collection exists, False otherwise bool: True if the collection exists, False otherwise
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> if client.has_collection("my_collection"): >>> if client.has_collection("my_collection"):
@ -840,17 +887,20 @@ class Oracle23aiClient(VectorDBBase):
try: try:
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
SELECT COUNT(*) SELECT COUNT(*)
FROM document_chunk FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
FETCH FIRST 1 ROWS ONLY FETCH FIRST 1 ROWS ONLY
""", {'collection_name': collection_name}) """,
{"collection_name": collection_name},
)
count = cursor.fetchone()[0] count = cursor.fetchone()[0]
return count > 0 return count > 0
except Exception as e: except Exception as e:
log.exception(f"Error checking collection existence: {e}") log.exception(f"Error checking collection existence: {e}")
return False return False
@ -858,31 +908,36 @@ class Oracle23aiClient(VectorDBBase):
def delete_collection(self, collection_name: str) -> None: def delete_collection(self, collection_name: str) -> None:
""" """
Delete an entire collection. Delete an entire collection.
Removes all items belonging to the specified collection. Removes all items belonging to the specified collection.
Args: Args:
collection_name (str): Name of the collection to delete collection_name (str): Name of the collection to delete
Example: Example:
>>> client = Oracle23aiClient() >>> client = Oracle23aiClient()
>>> client.delete_collection("obsolete_collection") >>> client.delete_collection("obsolete_collection")
""" """
log.info(f"Deleting collection '{collection_name}'.") log.info(f"Deleting collection '{collection_name}'.")
try: try:
with self.get_connection() as connection: with self.get_connection() as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
cursor.execute(""" cursor.execute(
"""
DELETE FROM document_chunk DELETE FROM document_chunk
WHERE collection_name = :collection_name WHERE collection_name = :collection_name
""", {'collection_name': collection_name}) """,
{"collection_name": collection_name},
)
deleted = cursor.rowcount deleted = cursor.rowcount
connection.commit() connection.commit()
log.info(f"Collection '{collection_name}' deleted. Removed {deleted} items.") log.info(
f"Collection '{collection_name}' deleted. Removed {deleted} items."
)
except Exception as e: except Exception as e:
log.exception(f"Error deleting collection '{collection_name}': {e}") log.exception(f"Error deleting collection '{collection_name}': {e}")
raise raise

View file

@ -402,11 +402,11 @@ def convert_openapi_to_tool_payload(openapi_spec):
"type": param_schema.get("type"), "type": param_schema.get("type"),
"description": description, "description": description,
} }
# Include items property for array types (required by OpenAI) # Include items property for array types (required by OpenAI)
if param_schema.get("type") == "array" and "items" in param_schema: if param_schema.get("type") == "array" and "items" in param_schema:
param_property["items"] = param_schema["items"] param_property["items"] = param_schema["items"]
tool["parameters"]["properties"][param_name] = param_property tool["parameters"]["properties"][param_name] = param_property
if param.get("required"): if param.get("required"):
tool["parameters"]["required"].append(param_name) tool["parameters"]["required"].append(param_name)