feat: Make VACUUM database optimization optional (#36)

Co-authored-by: Claude <noreply@anthropic.com>
Fix #1: Remove duplicate scan in preview mode
Fix #2: Cache stat() result in audio cleanup
This commit is contained in:
Classic298 2025-11-13 20:45:47 +01:00 committed by GitHub
parent a4ddb4b15b
commit 81c7617508
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -948,9 +948,16 @@ class PrunePreviewResult(BaseModel):
# Counting helper functions for dry-run preview # Counting helper functions for dry-run preview
def count_inactive_users( def count_inactive_users(
inactive_days: Optional[int], exempt_admin: bool, exempt_pending: bool inactive_days: Optional[int], exempt_admin: bool, exempt_pending: bool, all_users=None
) -> int: ) -> int:
"""Count users that would be deleted for inactivity.""" """Count users that would be deleted for inactivity.
Args:
inactive_days: Number of days of inactivity before deletion
exempt_admin: Whether to exempt admin users
exempt_pending: Whether to exempt pending users
all_users: Optional pre-fetched list of users to avoid duplicate queries
"""
if inactive_days is None: if inactive_days is None:
return 0 return 0
@ -958,7 +965,8 @@ def count_inactive_users(
count = 0 count = 0
try: try:
all_users = Users.get_users()["users"] if all_users is None:
all_users = Users.get_users()["users"]
for user in all_users: for user in all_users:
if exempt_admin and user.role == "admin": if exempt_admin and user.role == "admin":
continue continue
@ -1139,9 +1147,12 @@ def count_audio_cache_files(max_age_days: Optional[int]) -> int:
return count return count
def get_active_file_ids() -> Set[str]: def get_active_file_ids(knowledge_bases=None) -> Set[str]:
""" """
Get all file IDs that are actively referenced by knowledge bases, chats, folders, and messages. Get all file IDs that are actively referenced by knowledge bases, chats, folders, and messages.
Args:
knowledge_bases: Optional pre-fetched list of knowledge bases to avoid duplicate queries
""" """
active_file_ids = set() active_file_ids = set()
@ -1151,7 +1162,8 @@ def get_active_file_ids() -> Set[str]:
all_file_ids = {f.id for f in Files.get_files()} all_file_ids = {f.id for f in Files.get_files()}
log.debug(f"Preloaded {len(all_file_ids)} file IDs for validation") log.debug(f"Preloaded {len(all_file_ids)} file IDs for validation")
# Scan knowledge bases for file references # Scan knowledge bases for file references
knowledge_bases = Knowledges.get_knowledge_bases() if knowledge_bases is None:
knowledge_bases = Knowledges.get_knowledge_bases()
log.debug(f"Found {len(knowledge_bases)} knowledge bases") log.debug(f"Found {len(knowledge_bases)} knowledge bases")
for kb in knowledge_bases: for kb in knowledge_bases:
@ -1457,13 +1469,16 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
log.info("Starting data pruning preview (dry run)") log.info("Starting data pruning preview (dry run)")
# Get counts for all enabled operations # Get counts for all enabled operations
active_file_ids = get_active_file_ids() # Fetch knowledge bases and users once to avoid duplicate queries
active_user_ids = {user.id for user in Users.get_users()["users"]} knowledge_bases = Knowledges.get_knowledge_bases()
all_users = Users.get_users()["users"]
active_user_ids = {user.id for user in all_users}
active_kb_ids = { active_kb_ids = {
kb.id kb.id
for kb in Knowledges.get_knowledge_bases() for kb in knowledge_bases
if kb.user_id in active_user_ids if kb.user_id in active_user_ids
} }
active_file_ids = get_active_file_ids(knowledge_bases)
orphaned_counts = count_orphaned_records(form_data, active_file_ids, active_user_ids) orphaned_counts = count_orphaned_records(form_data, active_file_ids, active_user_ids)
@ -1472,6 +1487,7 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
form_data.delete_inactive_users_days, form_data.delete_inactive_users_days,
form_data.exempt_admin_users, form_data.exempt_admin_users,
form_data.exempt_pending_users, form_data.exempt_pending_users,
all_users,
), ),
old_chats=count_old_chats( old_chats=count_old_chats(
form_data.days, form_data.days,
@ -1570,7 +1586,7 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
log.info(f"Found {len(active_kb_ids)} active knowledge bases") log.info(f"Found {len(active_kb_ids)} active knowledge bases")
active_file_ids = get_active_file_ids() active_file_ids = get_active_file_ids(knowledge_bases)
# Stage 3: Delete orphaned database records # Stage 3: Delete orphaned database records
log.info("Deleting orphaned database records") log.info("Deleting orphaned database records")