added new feature : oracle23ai vector search

This commit is contained in:
Oracle Public Cloud User 2025-07-07 12:13:05 +00:00
parent b56dbb26be
commit 25e241ae41
7 changed files with 9311 additions and 2546 deletions

View file

@ -1855,12 +1855,27 @@ ORACLE_WALLET_DIR = os.environ.get("ORACLE_WALLET_DIR", None)
ORACLE_WALLET_PASSWORD = os.environ.get("ORACLE_WALLET_PASSWORD", None)
ORACLE_VECTOR_LENGTH = os.environ.get("ORACLE_VECTOR_LENGTH", 768)
ORACLE_DB_POOL_MIN = int(os.environ.get("ORACLE_DB_POOL_MIN", 2))
ORACLE_DB_POOL_MAX = int(os.environ.get("ORACLE_DB_POOL_MAX", 10))
ORACLE_DB_POOL_INCREMENT = int(os.environ.get("ORACLE_DB_POOL_INCREMENT", 1))
log.info(f"VECTOR_DB: {VECTOR_DB}")
log.info(f"ORACLE_DB_USE_WALLET: {ORACLE_DB_USE_WALLET}/type: {type(ORACLE_DB_USE_WALLET)}")
log.info(f"ORACLE_DB_USER: {ORACLE_DB_USER}/type: {type(ORACLE_DB_USER)}")
log.info(f"ORACLE_DB_PASSWORD: {ORACLE_DB_PASSWORD}/type: {type(ORACLE_DB_PASSWORD)}")
log.info(f"ORACLE_DB_DSN: {ORACLE_DB_DSN}/type: {type(ORACLE_DB_DSN)}")
log.info(f"ORACLE_WALLET_DIR: {ORACLE_WALLET_DIR}/type: {type(ORACLE_WALLET_DIR)}")
log.info(f"ORACLE_WALLET_PASSWORD: {ORACLE_WALLET_PASSWORD}/type: {type(ORACLE_WALLET_PASSWORD)}")
log.info(f"ORACLE_VECTOR_LENGTH: {ORACLE_VECTOR_LENGTH}")
log.info(f"ORACLE_DB_POOL_MIN: {ORACLE_DB_POOL_MIN}")
log.info(f"ORACLE_DB_POOL_MAX: {ORACLE_DB_POOL_MAX}")
log.info(f"ORACLE_DB_POOL_INCREMENT: {ORACLE_DB_POOL_INCREMENT}")
if VECTOR_DB == "oracle23ai" and not ORACLE_DB_USER or not ORACLE_DB_PASSWORD or not ORACLE_DB_DSN:
raise ValueError(
"Oracle23ai requires setting ORACLE_DB_USER, ORACLE_DB_PASSWORD, and ORACLE_DB_DSN."
)
if VECTOR_DB == "oracle23ai" and ORACLE_DB_USE_WALLET and not ORACLE_WALLET_DIR or not ORACLE_WALLET_PASSWORD:
if VECTOR_DB == "oracle23ai" and ORACLE_DB_USE_WALLET and (not ORACLE_WALLET_DIR or not ORACLE_WALLET_PASSWORD):
raise ValueError(
"Oracle23ai requires setting ORACLE_WALLET_DIR and ORACLE_WALLET_PASSWORD when using wallet authentication."
)

View file

@ -1,6 +1,32 @@
"""
# ORACLE23AI (Oracle23ai Vector Search) : env.examples
VECTOR_DB = "oracle23ai"
## DBCS or oracle 23ai free
ORACLE_DB_USE_WALLET = false
ORACLE_DB_USER = "DEMOUSER"
ORACLE_DB_PASSWORD = "Welcome123456"
ORACLE_DB_DSN = "localhost:1521/FREEPDB1"
## ADW or ATP
# ORACLE_DB_USE_WALLET = true
# ORACLE_DB_USER = "DEMOUSER"
# ORACLE_DB_PASSWORD = "Welcome123456"
# ORACLE_DB_DSN = "medium"
# ORACLE_DB_DSN = "(description= (retry_count=3)(retry_delay=3)(address=(protocol=tcps)(port=1522)(host=xx.oraclecloud.com))(connect_data=(service_name=yy.adb.oraclecloud.com))(security=(ssl_server_dn_match=no)))"
# ORACLE_WALLET_DIR = "/home/opc/adb_wallet"
# ORACLE_WALLET_PASSWORD = "Welcome1"
ORACLE_VECTOR_LENGTH = 768
ORACLE_DB_POOL_MIN = 2
ORACLE_DB_POOL_MAX = 10
ORACLE_DB_POOL_INCREMENT = 1
"""
from typing import Optional, List, Dict, Any
from decimal import Decimal
import logging
import os
import oracledb
@ -12,14 +38,27 @@ from open_webui.retrieval.vector.main import (
)
from open_webui.config import (
ORACLE_DB_USE_WALLET
ORACLE_DB_USE_WALLET,
ORACLE_DB_USER,
ORACLE_DB_PASSWORD,
ORACLE_DB_DSN,
ORACLE_WALLET_DIR,
ORACLE_WALLET_PASSWORD,
ORACLE_VECTOR_LENGTH,
ORACLE_DB_POOL_MIN,
ORACLE_DB_POOL_MAX,
ORACLE_DB_POOL_INCREMENT,
)
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
# ORACLE_DB_USE_WALLET = os.environ.get("ORACLE_DB_USE_WALLET", "DBCS")
# ORACLE_DB_USER = os.environ.get("ORACLE_DB_USER", "DEMOUSER")
# ORACLE_DB_PASSWORD = os.environ.get("ORACLE_DB_PASSWORD", "Welcome123456")
# ORACLE_DB_DSN = os.environ.get("ORACLE_DB_DSN", "medium")
# ORACLE_DB_DSN = os.environ.get("ORACLE_DB_DSN", "(description= (retry_count=3)(retry_delay=3)(address=(protocol=tcps)(port=1522)(host=d6aqmjs6.adb.us-chicago-1.oraclecloud.com))(connect_data=(service_name=g13fc7c96b5ee55_agentvs_medium.adb.oraclecloud.com))(security=(ssl_server_dn_match=no)))")
class Oracle23aiClient(VectorDBBase):
"""
@ -40,48 +79,145 @@ class Oracle23aiClient(VectorDBBase):
Creates a connection pool with min=2 and max=10 connections, initializes
the database schema if needed, and sets up necessary tables and indexes.
Args:
db_type (str): Database type - "ADB" for Autonomous Database or "DBCS" for Database Cloud Service
Raises:
ValueError: If required configuration parameters are missing
Exception: If database initialization fails
"""
try:
if not ORACLE_DB_DSN:
raise ValueError("ORACLE_DB_DSN is required for Oracle Vector Search")
self.pool = oracledb.create_pool(
user=ORACLE_DB_USER,
password=ORACLE_DB_PASSWORD,
dsn=ORACLE_DB_DSN,
min=2,
max=10,
increment=1,
config_dir=ORACLE_WALLET_DIR,
wallet_location=ORACLE_WALLET_DIR,
wallet_password=ORACLE_WALLET_PASSWORD
)
# Create the appropriate connection pool based on DB type
if ORACLE_DB_USE_WALLET:
self._create_adb_pool()
else: # DBCS
self._create_dbcs_pool()
log.info(f" >>> Creating Connection Pool [{ORACLE_DB_USER}:**@{ORACLE_DB_DSN}]")
dsn = ORACLE_DB_DSN
log.info(f" >>> Creating Connection Pool [{ORACLE_DB_USER}:**@{dsn}]")
with self.get_connection() as connection:
log.info("Connection version:", connection.version)
self._initialize_database(connection)
print("Oracle Vector Search initialization complete.")
log.info("Oracle Vector Search initialization complete.")
except Exception as e:
print(f"Error during Oracle Vector Search initialization: {e}")
log.exception(f"Error during Oracle Vector Search initialization: {e}")
raise
def _create_adb_pool(self) -> None:
"""
Create connection pool for Oracle Autonomous Database.
Uses wallet-based authentication.
"""
self.pool = oracledb.create_pool(
user=ORACLE_DB_USER,
password=ORACLE_DB_PASSWORD,
dsn=ORACLE_DB_DSN,
min=ORACLE_DB_POOL_MIN,
max=ORACLE_DB_POOL_MAX,
increment=ORACLE_DB_POOL_INCREMENT,
config_dir=ORACLE_WALLET_DIR,
wallet_location=ORACLE_WALLET_DIR,
wallet_password=ORACLE_WALLET_PASSWORD
)
log.info(f"Created ADB connection pool with wallet authentication.")
def _create_dbcs_pool(self) -> None:
"""
Create connection pool for Oracle Database Cloud Service.
Uses basic authentication without wallet.
"""
self.pool = oracledb.create_pool(
user=ORACLE_DB_USER,
password=ORACLE_DB_PASSWORD,
dsn=ORACLE_DB_DSN,
min=ORACLE_DB_POOL_MIN,
max=ORACLE_DB_POOL_MAX,
increment=ORACLE_DB_POOL_INCREMENT
)
log.info("Created DB connection pool with basic authentication.")
def get_connection(self):
"""
Acquire a connection from the connection pool.
Acquire a connection from the connection pool with retry logic.
Returns:
connection: A database connection with output type handler configured
"""
connection = self.pool.acquire()
connection.outputtypehandler = self._output_type_handler
return connection
max_retries = 3
for attempt in range(max_retries):
try:
connection = self.pool.acquire()
connection.outputtypehandler = self._output_type_handler
return connection
except oracledb.DatabaseError as e:
error_obj, = e.args
log.exception(f"Connection attempt {attempt + 1} failed: {error_obj.message}")
if attempt < max_retries - 1:
import time
wait_time = 2 ** attempt
log.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
raise
def start_health_monitor(self, interval_seconds: int = 60):
"""
Start a background thread to periodically check the health of the connection pool.
Args:
interval_seconds (int): Number of seconds between health checks
"""
def _monitor():
while True:
try:
log.info("[HealthCheck] Running periodic DB health check...")
self.ensure_connection()
log.info("[HealthCheck] Connection is healthy.")
except Exception as e:
log.exception(f"[HealthCheck] Connection health check failed: {e}")
time.sleep(interval_seconds)
thread = threading.Thread(target=_monitor, daemon=True)
thread.start()
log.info(f"Started DB health monitor every {interval_seconds} seconds.")
def _reconnect_pool(self):
"""
Attempt to reinitialize the connection pool if it's been closed or broken.
Args:
db_type (str): Database type - "ADB" for Autonomous Database or "DBCS" for Database Cloud Service
"""
try:
log.info("Attempting to reinitialize the Oracle connection pool...")
# Re-create the appropriate connection pool based on DB type
if ORACLE_DB_USE_WALLET:
self._create_adb_pool()
else: # DBCS
self._create_dbcs_pool()
log.info("Connection pool reinitialized.")
except Exception as e:
log.exception(f"Failed to reinitialize the connection pool: {e}")
raise
def ensure_connection(self):
"""
Ensure the database connection is alive, reconnecting pool if needed.
"""
try:
with self.get_connection() as connection:
with connection.cursor() as cursor:
cursor.execute("SELECT 1 FROM dual")
except Exception as e:
log.exception(f"Connection check failed: {e}, attempting to reconnect pool...")
self._reconnect_pool()
def _output_type_handler(self, cursor, metadata):
"""
Handle Oracle vector type conversion.
@ -97,6 +233,8 @@ class Oracle23aiClient(VectorDBBase):
return cursor.var(metadata.type_code, arraysize=cursor.arraysize,
outconverter=list)
# Rest of the Oracle23aiClient class remains unchanged...
def _initialize_database(self, connection) -> None:
"""
Initialize database schema, tables and indexes.
@ -109,8 +247,9 @@ class Oracle23aiClient(VectorDBBase):
Raises:
Exception: If schema initialization fails
"""
with connection.cursor() as cursor:
print(f" >>> Creating Table document_chunk")
log.info(f" >>> Creating Table document_chunk")
cursor.execute(f"""
BEGIN
EXECUTE IMMEDIATE '
@ -130,7 +269,7 @@ class Oracle23aiClient(VectorDBBase):
END;
""")
print(f" >>> Creating Table document_chunk_collection_name_idx")
log.info(f" >>> Creating Table document_chunk_collection_name_idx")
cursor.execute("""
BEGIN
EXECUTE IMMEDIATE '
@ -145,7 +284,7 @@ class Oracle23aiClient(VectorDBBase):
END;
""")
print(f" >>> Creating VECTOR INDEX document_chunk_vector_ivf_idx")
log.info(f" >>> Creating VECTOR INDEX document_chunk_vector_ivf_idx")
cursor.execute("""
BEGIN
EXECUTE IMMEDIATE '
@ -261,7 +400,7 @@ class Oracle23aiClient(VectorDBBase):
... ]
>>> client.insert("my_collection", items)
"""
print(f"Oracle23aiClient:Inserting {len(items)} items into collection '{collection_name}'.")
log.info(f"Oracle23aiClient:Inserting {len(items)} items into collection '{collection_name}'.")
with self.get_connection() as connection:
try:
with connection.cursor() as cursor:
@ -282,10 +421,10 @@ class Oracle23aiClient(VectorDBBase):
})
connection.commit()
print(f"Oracle23aiClient:Inserted {len(items)} items into collection '{collection_name}'.")
log.info(f"Oracle23aiClient:Inserted {len(items)} items into collection '{collection_name}'.")
except Exception as e:
connection.rollback()
print(f"Error during insert: {e}")
log.exception(f"Error during insert: {e}")
raise
def upsert(self, collection_name: str, items: List[VectorItem]) -> None:
@ -344,10 +483,10 @@ class Oracle23aiClient(VectorDBBase):
})
connection.commit()
print(f"Upserted {len(items)} items into collection '{collection_name}'.")
log.info(f"Upserted {len(items)} items into collection '{collection_name}'.")
except Exception as e:
connection.rollback()
print(f"Error during upsert: {e}")
log.exception(f"Error during upsert: {e}")
raise
def search(
@ -425,9 +564,9 @@ class Oracle23aiClient(VectorDBBase):
metadatas=metadatas
)
except Exception as e:
print(f"Error during search: {e}")
log.exception(f"Error during search: {e}")
import traceback
print(traceback.format_exc())
log.exception(traceback.format_exc())
return None
def query(
@ -494,9 +633,9 @@ class Oracle23aiClient(VectorDBBase):
metadatas=metadatas
)
except Exception as e:
print(f"Error during query: {e}")
log.exception(f"Error during query: {e}")
import traceback
print(traceback.format_exc())
log.exception(traceback.format_exc())
return None
def get(
@ -552,9 +691,9 @@ class Oracle23aiClient(VectorDBBase):
metadatas=metadatas
)
except Exception as e:
print(f"Error during get: {e}")
log.exception(f"Error during get: {e}")
import traceback
print(traceback.format_exc())
log.exception(traceback.format_exc())
return None
def delete(
@ -603,9 +742,9 @@ class Oracle23aiClient(VectorDBBase):
deleted = cursor.rowcount
connection.commit()
print(f"Deleted {deleted} items from collection '{collection_name}'.")
log.info(f"Deleted {deleted} items from collection '{collection_name}'.")
except Exception as e:
print(f"Error during delete: {e}")
log.exception(f"Error during delete: {e}")
raise
def reset(self) -> None:
@ -627,9 +766,9 @@ class Oracle23aiClient(VectorDBBase):
cursor.execute("DELETE FROM document_chunk")
deleted = cursor.rowcount
connection.commit()
print(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.")
log.info(f"Reset complete. Deleted {deleted} items from 'document_chunk' table.")
except Exception as e:
print(f"Error during reset: {e}")
log.exception(f"Error during reset: {e}")
raise
def close(self) -> None:
@ -680,7 +819,7 @@ class Oracle23aiClient(VectorDBBase):
count = cursor.fetchone()[0]
return count > 0
except Exception as e:
print(f"Error checking collection existence: {e}")
log.exception(f"Error checking collection existence: {e}")
return False
def delete_collection(self, collection_name: str) -> None:
@ -697,4 +836,4 @@ class Oracle23aiClient(VectorDBBase):
>>> client.delete_collection("obsolete_collection")
"""
self.delete(collection_name)
print(f"Collection '{collection_name}' deleted.")
log.info(f"Collection '{collection_name}' deleted.")

22
package-lock.json generated
View file

@ -53,6 +53,7 @@
"jspdf": "^3.0.0",
"katex": "^0.16.22",
"kokoro-js": "^1.1.1",
"lowlight": "^3.3.0",
"marked": "^9.1.0",
"mermaid": "^11.6.0",
"paneforge": "^0.0.6",
@ -3608,7 +3609,6 @@
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/unist": "*"
}
@ -3692,8 +3692,7 @@
"node_modules/@types/unist": {
"version": "2.0.10",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz",
"integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==",
"peer": true
"integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA=="
},
"node_modules/@types/yauzl": {
"version": "2.10.3",
@ -6055,7 +6054,6 @@
"resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
"integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
"license": "MIT",
"peer": true,
"dependencies": {
"dequal": "^2.0.0"
},
@ -7336,9 +7334,10 @@
"dev": true
},
"node_modules/highlight.js": {
"version": "11.9.0",
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.9.0.tgz",
"integrity": "sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==",
"version": "11.11.1",
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz",
"integrity": "sha512-Xwwo44whKBVCYoliBQwaPvtd/2tYFkRQtXDWj1nackaV2JPXx3L0+Jvd8/qCJ2p+ML0/XVkJ2q+Mr+UVdpJK5w==",
"license": "BSD-3-Clause",
"engines": {
"node": ">=12.0.0"
}
@ -8569,15 +8568,14 @@
}
},
"node_modules/lowlight": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/lowlight/-/lowlight-3.1.0.tgz",
"integrity": "sha512-CEbNVoSikAxwDMDPjXlqlFYiZLkDJHwyGu/MfOsJnF3d7f3tds5J3z8s/l9TMXhzfsJCCJEAsD78842mwmg0PQ==",
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/lowlight/-/lowlight-3.3.0.tgz",
"integrity": "sha512-0JNhgFoPvP6U6lE/UdVsSq99tn6DhjjpAj5MxG49ewd2mOBVtwWYIT8ClyABhq198aXXODMU6Ox8DrGy/CpTZQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/hast": "^3.0.0",
"devlop": "^1.0.0",
"highlight.js": "~11.9.0"
"highlight.js": "~11.11.0"
},
"funding": {
"type": "github",

View file

@ -97,6 +97,7 @@
"jspdf": "^3.0.0",
"katex": "^0.16.22",
"kokoro-js": "^1.1.1",
"lowlight": "^3.3.0",
"marked": "^9.1.0",
"mermaid": "^11.6.0",
"paneforge": "^0.0.6",

View file

@ -10,18 +10,15 @@ dependencies = [
"uvicorn[standard]==0.34.2",
"pydantic==2.10.6",
"python-multipart==0.0.20",
"python-socketio==5.13.0",
"python-jose==3.4.0",
"passlib[bcrypt]==1.7.4",
"requests==2.32.4",
"aiohttp==3.11.11",
"async-timeout",
"aiocache",
"aiofiles",
"starlette-compress==1.6.0",
"sqlalchemy==2.0.38",
"alembic==1.14.0",
"peewee==3.18.1",
@ -30,29 +27,21 @@ dependencies = [
"pgvector==0.4.0",
"PyMySQL==1.1.1",
"bcrypt==4.3.0",
"pymongo",
"redis",
"boto3==1.35.53",
"argon2-cffi==23.1.0",
"APScheduler==3.10.4",
"RestrictedPython==8.0",
"loguru==0.7.3",
"asgiref==3.8.1",
"openai",
"anthropic",
"google-genai==1.15.0",
"google-generativeai==0.8.5",
"tiktoken",
"langchain==0.3.24",
"langchain-community==0.3.23",
"fake-useragent==2.1.0",
"chromadb==0.6.3",
"pymilvus==2.5.0",
@ -61,13 +50,11 @@ dependencies = [
"playwright==1.49.1",
"elasticsearch==9.0.1",
"pinecone==6.0.2",
"transformers",
"sentence-transformers==4.1.0",
"accelerate",
"colbert-ai==0.2.21",
"einops==0.8.1",
"ftfy==6.2.3",
"pypdf==4.3.1",
"fpdf2==2.8.2",
@ -87,51 +74,36 @@ dependencies = [
"sentencepiece",
"soundfile==0.13.1",
"azure-ai-documentintelligence==1.0.2",
"pillow==11.2.1",
"opencv-python-headless==4.11.0.86",
"rapidocr-onnxruntime==1.4.4",
"rank-bm25==0.2.2",
"onnxruntime==1.20.1",
"faster-whisper==1.1.1",
"PyJWT[crypto]==2.10.1",
"authlib==1.4.1",
"black==25.1.0",
"langfuse==2.44.0",
"youtube-transcript-api==1.1.0",
"pytube==15.0.0",
"pydub",
"duckduckgo-search==8.0.2",
"google-api-python-client",
"google-auth-httplib2",
"google-auth-oauthlib",
"docker~=7.1.0",
"pytest~=8.3.2",
"pytest-docker~=3.1.1",
"googleapis-common-protos==1.63.2",
"google-cloud-storage==2.19.0",
"azure-identity==1.20.0",
"azure-storage-blob==12.24.1",
"ldap3==2.9.1",
"firecrawl-py==1.12.0",
"tencentcloud-sdk-python==3.0.1336",
"gcp-storage-emulator>=2024.8.3",
"moto[s3]>=5.0.26",
"oracledb>=3.2.0",
]
readme = "README.md"
requires-python = ">= 3.11, < 3.13.0a1"

4647
uv.lock

File diff suppressed because it is too large Load diff

6909
yarn.lock Normal file

File diff suppressed because it is too large Load diff