Claude/vacuum optional 011 c uw61vf5 s rym bh cw u1 ls w (#28)

PruneLock class
Vector cleanup error reporting
Lock acquisition/release
Optional VACUUM
Fixed folder deletion
This commit is contained in:
Classic298 2025-11-10 17:14:27 +01:00 committed by GitHub
parent d94492dc0e
commit 60d7ad22ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 448 additions and 257 deletions

View file

@ -5,6 +5,8 @@ import shutil
import json import json
import re import re
import sqlite3 import sqlite3
import uuid
from datetime import datetime, timedelta
from typing import Optional, Set, Union from typing import Optional, Set, Union
from pathlib import Path from pathlib import Path
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
@ -36,6 +38,80 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
router = APIRouter() router = APIRouter()
class PruneLock:
"""
Simple file-based locking mechanism to prevent concurrent prune operations.
This uses a lock file with timestamp to prevent multiple admins from running
prune simultaneously, which could cause race conditions and data corruption.
"""
LOCK_FILE = Path(CACHE_DIR) / ".prune.lock"
LOCK_TIMEOUT = timedelta(hours=2) # Safety timeout
@classmethod
def acquire(cls) -> bool:
"""
Try to acquire the lock. Returns True if acquired, False if already locked.
If lock file exists but is stale (older than timeout), automatically
removes it and acquires a new lock.
"""
try:
# Check if lock file exists
if cls.LOCK_FILE.exists():
# Read lock file to check if it's stale
try:
with open(cls.LOCK_FILE, 'r') as f:
lock_data = json.load(f)
lock_time = datetime.fromisoformat(lock_data['timestamp'])
operation_id = lock_data.get('operation_id', 'unknown')
# Check if lock is stale
if datetime.utcnow() - lock_time > cls.LOCK_TIMEOUT:
log.warning(f"Found stale lock from {lock_time} (operation {operation_id}), removing")
cls.LOCK_FILE.unlink()
else:
# Lock is still valid
log.warning(f"Prune operation already in progress (started {lock_time}, operation {operation_id})")
return False
except (json.JSONDecodeError, KeyError, ValueError) as e:
# Corrupt lock file, remove it
log.warning(f"Found corrupt lock file, removing: {e}")
cls.LOCK_FILE.unlink()
# Create lock file
operation_id = str(uuid.uuid4())[:8]
lock_data = {
'timestamp': datetime.utcnow().isoformat(),
'operation_id': operation_id,
'pid': os.getpid()
}
# Ensure parent directory exists
cls.LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(cls.LOCK_FILE, 'w') as f:
json.dump(lock_data, f)
log.info(f"Acquired prune lock (operation {operation_id})")
return True
except Exception as e:
log.error(f"Error acquiring prune lock: {e}")
return False
@classmethod
def release(cls) -> None:
"""Release the lock by removing the lock file."""
try:
if cls.LOCK_FILE.exists():
cls.LOCK_FILE.unlink()
log.info("Released prune lock")
except Exception as e:
log.error(f"Error releasing prune lock: {e}")
class JSONFileIDExtractor: class JSONFileIDExtractor:
""" """
Utility for extracting and validating file IDs from JSON content. Utility for extracting and validating file IDs from JSON content.
@ -118,7 +194,7 @@ class VectorDatabaseCleaner(ABC):
@abstractmethod @abstractmethod
def cleanup_orphaned_collections( def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str] self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int: ) -> tuple[int, Optional[str]]:
""" """
Actually delete orphaned vector collections. Actually delete orphaned vector collections.
@ -127,7 +203,9 @@ class VectorDatabaseCleaner(ABC):
active_kb_ids: Set of knowledge base IDs that are still active active_kb_ids: Set of knowledge base IDs that are still active
Returns: Returns:
Number of collections that were actually deleted Tuple of (deleted_count, error_message)
- deleted_count: Number of collections that were deleted
- error_message: None on success, error description on failure
""" """
pass pass
@ -193,10 +271,10 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections( def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str] self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int: ) -> tuple[int, Optional[str]]:
"""Actually delete orphaned ChromaDB collections and database records.""" """Actually delete orphaned ChromaDB collections and database records."""
if not self.chroma_db_path.exists(): if not self.chroma_db_path.exists():
return 0 return (0, None)
expected_collections = self._build_expected_collections( expected_collections = self._build_expected_collections(
active_file_ids, active_kb_ids active_file_ids, active_kb_ids
@ -204,12 +282,15 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
uuid_to_collection = self._get_collection_mappings() uuid_to_collection = self._get_collection_mappings()
deleted_count = 0 deleted_count = 0
errors = []
# First, clean up orphaned database records # First, clean up orphaned database records
try: try:
deleted_count += self._cleanup_orphaned_database_records() deleted_count += self._cleanup_orphaned_database_records()
except Exception as e: except Exception as e:
log.error(f"Error cleaning orphaned database records: {e}") error_msg = f"ChromaDB database cleanup failed: {e}"
log.error(error_msg)
errors.append(error_msg)
# Then clean up physical directories # Then clean up physical directories
try: try:
@ -244,12 +325,17 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
) )
except Exception as e: except Exception as e:
log.error(f"Error cleaning ChromaDB collections: {e}") error_msg = f"ChromaDB directory cleanup failed: {e}"
log.error(error_msg)
errors.append(error_msg)
if deleted_count > 0: if deleted_count > 0:
log.info(f"Deleted {deleted_count} orphaned ChromaDB collections") log.info(f"Deleted {deleted_count} orphaned ChromaDB collections")
return deleted_count # Return error if any critical failures occurred
if errors:
return (deleted_count, "; ".join(errors))
return (deleted_count, None)
def delete_collection(self, collection_name: str) -> bool: def delete_collection(self, collection_name: str) -> bool:
"""Delete a specific ChromaDB collection by name.""" """Delete a specific ChromaDB collection by name."""
@ -561,7 +647,7 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections( def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str] self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int: ) -> tuple[int, Optional[str]]:
""" """
Delete orphaned PGVector collections using the existing client's delete method. Delete orphaned PGVector collections using the existing client's delete method.
@ -569,8 +655,9 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
existing PGVector client's delete() method for each orphaned collection. existing PGVector client's delete() method for each orphaned collection.
""" """
if not self.session: if not self.session:
log.warning("PGVector session not available for cleanup") error_msg = "PGVector session not available for cleanup"
return 0 log.warning(error_msg)
return (0, error_msg)
try: try:
orphaned_collections = self._get_orphaned_collections( orphaned_collections = self._get_orphaned_collections(
@ -579,7 +666,7 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
if not orphaned_collections: if not orphaned_collections:
log.debug("No orphaned PGVector collections found") log.debug("No orphaned PGVector collections found")
return 0 return (0, None)
deleted_count = 0 deleted_count = 0
log.info( log.info(
@ -616,13 +703,14 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
f"Successfully deleted {deleted_count} orphaned PGVector collections" f"Successfully deleted {deleted_count} orphaned PGVector collections"
) )
return deleted_count return (deleted_count, None)
except Exception as e: except Exception as e:
if self.session: if self.session:
self.session.rollback() self.session.rollback()
log.error(f"Error cleaning orphaned PGVector collections: {e}") error_msg = f"PGVector cleanup failed: {e}"
return 0 log.error(error_msg)
return (0, error_msg)
def delete_collection(self, collection_name: str) -> bool: def delete_collection(self, collection_name: str) -> bool:
""" """
@ -706,9 +794,9 @@ class NoOpVectorDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections( def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str] self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int: ) -> tuple[int, Optional[str]]:
"""No collections to cleanup for unsupported databases.""" """No collections to cleanup for unsupported databases."""
return 0 return (0, None)
def delete_collection(self, collection_name: str) -> bool: def delete_collection(self, collection_name: str) -> bool:
"""No collection to delete for unsupported databases.""" """No collection to delete for unsupported databases."""
@ -757,6 +845,7 @@ class PruneDataForm(BaseModel):
delete_inactive_users_days: Optional[int] = None delete_inactive_users_days: Optional[int] = None
exempt_admin_users: bool = True exempt_admin_users: bool = True
exempt_pending_users: bool = True exempt_pending_users: bool = True
run_vacuum: bool = False
dry_run: bool = True dry_run: bool = True
@ -1314,6 +1403,14 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
return result return result
# Actual deletion logic (dry_run=False) # Actual deletion logic (dry_run=False)
# Acquire lock to prevent concurrent operations
if not PruneLock.acquire():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A prune operation is already in progress. Please wait for it to complete."
)
try:
log.info("Starting data pruning process") log.info("Starting data pruning process")
# Stage 0: Delete inactive users (if enabled) # Stage 0: Delete inactive users (if enabled)
@ -1508,20 +1605,26 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
cleanup_orphaned_uploads(final_active_file_ids) cleanup_orphaned_uploads(final_active_file_ids)
# Use modular vector database cleanup # Use modular vector database cleanup
vector_cleaner.cleanup_orphaned_collections( warnings = []
deleted_vector_count, vector_error = vector_cleaner.cleanup_orphaned_collections(
final_active_file_ids, final_active_kb_ids final_active_file_ids, final_active_kb_ids
) )
if vector_error:
warnings.append(f"Vector cleanup warning: {vector_error}")
log.warning(f"Vector cleanup completed with errors: {vector_error}")
# Stage 5: Audio cache cleanup # Stage 5: Audio cache cleanup
log.info("Cleaning audio cache") log.info("Cleaning audio cache")
cleanup_audio_cache(form_data.audio_cache_max_age_days) cleanup_audio_cache(form_data.audio_cache_max_age_days)
# Stage 6: Database optimization # Stage 6: Database optimization (optional)
log.info("Optimizing database") if form_data.run_vacuum:
log.info("Optimizing database with VACUUM (this may take a while and lock the database)")
try: try:
with get_db() as db: with get_db() as db:
db.execute(text("VACUUM")) db.execute(text("VACUUM"))
log.info("Vacuumed main database")
except Exception as e: except Exception as e:
log.error(f"Failed to vacuum main database: {e}") log.error(f"Failed to vacuum main database: {e}")
@ -1543,10 +1646,20 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
log.info("Executed VACUUM ANALYZE on PostgreSQL database") log.info("Executed VACUUM ANALYZE on PostgreSQL database")
except Exception as e: except Exception as e:
log.error(f"Failed to vacuum PostgreSQL database: {e}") log.error(f"Failed to vacuum PostgreSQL database: {e}")
else:
log.info("Skipping VACUUM optimization (not enabled)")
# Log any warnings collected during pruning
if warnings:
log.warning(f"Data pruning completed with warnings: {'; '.join(warnings)}")
log.info("Data pruning completed successfully") log.info("Data pruning completed successfully")
return True return True
finally:
# Always release lock, even if operation fails
PruneLock.release()
except Exception as e: except Exception as e:
log.exception(f"Error during data pruning: {e}") log.exception(f"Error during data pruning: {e}")
raise HTTPException( raise HTTPException(

View file

@ -17,6 +17,7 @@ export const pruneData = async (
delete_inactive_users_days: number | null = null, delete_inactive_users_days: number | null = null,
exempt_admin_users: boolean = true, exempt_admin_users: boolean = true,
exempt_pending_users: boolean = true, exempt_pending_users: boolean = true,
run_vacuum: boolean = false,
dry_run: boolean // Removed default value to ensure explicit passing dry_run: boolean // Removed default value to ensure explicit passing
) => { ) => {
let error = null; let error = null;
@ -43,6 +44,7 @@ export const pruneData = async (
delete_inactive_users_days, delete_inactive_users_days,
exempt_admin_users, exempt_admin_users,
exempt_pending_users, exempt_pending_users,
run_vacuum,
dry_run dry_run
}) })
}) })

View file

@ -50,6 +50,7 @@
settings.delete_inactive_users_days, settings.delete_inactive_users_days,
settings.exempt_admin_users, settings.exempt_admin_users,
settings.exempt_pending_users, settings.exempt_pending_users,
settings.run_vacuum,
true // dry_run = true for preview true // dry_run = true for preview
).catch((error) => { ).catch((error) => {
toast.error(`${error}`); toast.error(`${error}`);
@ -83,6 +84,7 @@
lastPruneSettings.delete_inactive_users_days, lastPruneSettings.delete_inactive_users_days,
lastPruneSettings.exempt_admin_users, lastPruneSettings.exempt_admin_users,
lastPruneSettings.exempt_pending_users, lastPruneSettings.exempt_pending_users,
lastPruneSettings.run_vacuum,
false // dry_run = false for actual pruning false // dry_run = false for actual pruning
).catch((error) => { ).catch((error) => {
toast.error(`${error}`); toast.error(`${error}`);

View file

@ -32,6 +32,9 @@
let cleanupAudioCache = true; let cleanupAudioCache = true;
let audio_cache_max_age_days = 30; let audio_cache_max_age_days = 30;
// System/Database optimization
let run_vacuum = false;
let showDetailsExpanded = false; let showDetailsExpanded = false;
let activeDetailsTab = 'users'; let activeDetailsTab = 'users';
let activeSettingsTab = 'users'; let activeSettingsTab = 'users';
@ -55,7 +58,8 @@
audio_cache_max_age_days: cleanupAudioCache ? audio_cache_max_age_days : null, audio_cache_max_age_days: cleanupAudioCache ? audio_cache_max_age_days : null,
delete_inactive_users_days: deleteInactiveUsers ? delete_inactive_users_days : null, delete_inactive_users_days: deleteInactiveUsers ? delete_inactive_users_days : null,
exempt_admin_users, exempt_admin_users,
exempt_pending_users exempt_pending_users,
run_vacuum
}); });
show = false; show = false;
}; };
@ -96,7 +100,10 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
"delete_orphaned_folders": ${delete_orphaned_folders}, "delete_orphaned_folders": ${delete_orphaned_folders},
// AUDIO CACHE CLEANUP (null = disabled) // AUDIO CACHE CLEANUP (null = disabled)
"audio_cache_max_age_days": ${cleanupAudioCache ? audio_cache_max_age_days : null} // TTS/STT files "audio_cache_max_age_days": ${cleanupAudioCache ? audio_cache_max_age_days : null}, // TTS/STT files
// DATABASE OPTIMIZATION (WARNING: Locks database during execution!)
"run_vacuum": ${run_vacuum} // Reclaim disk space - only enable during maintenance windows
}' }'
# API KEY vs JWT TOKEN: # API KEY vs JWT TOKEN:
@ -359,6 +366,12 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
> >
{$i18n.t('Audio Cache')} {$i18n.t('Audio Cache')}
</button> </button>
<button
class="px-3 py-2 text-sm font-medium rounded-t transition-colors {activeSettingsTab === 'system' ? 'bg-blue-100 dark:bg-blue-800 text-blue-800 dark:text-blue-200' : 'text-blue-600 dark:text-blue-400 hover:text-blue-800 dark:hover:text-blue-200'}"
on:click={() => activeSettingsTab = 'system'}
>
{$i18n.t('System')}
</button>
</div> </div>
<!-- Settings Tab Content --> <!-- Settings Tab Content -->
@ -744,6 +757,67 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
</div> </div>
{/if} {/if}
</div> </div>
{:else if activeSettingsTab === 'system'}
<!-- System/Database Optimization -->
<div class="space-y-4">
<div class="flex items-start py-2">
<div class="flex items-center">
<div class="mr-3">
<Switch bind:state={run_vacuum} />
</div>
<div class="flex-1">
<div class="flex items-center text-sm font-medium text-gray-900 dark:text-gray-100">
<span>{$i18n.t('Run VACUUM optimization')}</span>
<div class="relative group ml-2">
<svg class="h-4 w-4 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300 cursor-help" fill="currentColor" viewBox="0 0 20 20">
<path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z" clip-rule="evenodd" />
</svg>
<div class="absolute left-1/2 transform -translate-x-1/2 bottom-full mb-2 w-72 px-3 py-2 text-xs text-white bg-gray-900 dark:bg-gray-700 rounded-lg shadow-lg opacity-0 group-hover:opacity-100 transition-opacity duration-200 pointer-events-none z-10">
<div class="font-medium mb-1">{$i18n.t('Database Optimization Warning:')}</div>
<div class="space-y-1">
<p>{$i18n.t('VACUUM reclaims disk space by rebuilding the database file.')}</p>
<p class="text-yellow-300 dark:text-yellow-400 font-medium">{$i18n.t('⚠️ This may take a very long time on large databases and will LOCK the entire database during execution.')}</p>
<p>{$i18n.t('It is strongly recommended to NOT run this while users are actively using the platform.')}</p>
<p class="text-green-300 dark:text-green-400">{$i18n.t('💡 Best practice: Run during scheduled maintenance windows.')}</p>
</div>
<div class="absolute top-full left-1/2 transform -translate-x-1/2 border-4 border-transparent border-t-gray-900 dark:border-t-gray-700"></div>
</div>
</div>
</div>
<div class="text-xs text-gray-500 dark:text-gray-400">
{$i18n.t('Reclaim disk space after cleanup (locks database during operation)')}
</div>
</div>
</div>
</div>
<!-- VACUUM warning box -->
{#if run_vacuum}
<div class="ml-8 border-l-2 border-yellow-200 dark:border-yellow-700 pl-4">
<div class="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg p-3">
<div class="flex">
<div class="flex-shrink-0">
<svg class="h-5 w-5 text-yellow-400" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd" d="M8.485 2.495c.673-1.167 2.357-1.167 3.03 0l6.28 10.875c.673 1.167-.17 2.625-1.516 2.625H3.72c-1.347 0-2.189-1.458-1.515-2.625L8.485 2.495zM10 5a.75.75 0 01.75.75v3.5a.75.75 0 01-1.5 0v-3.5A.75.75 0 0110 5zm0 9a1 1 0 100-2 1 1 0 000 2z" clip-rule="evenodd" />
</svg>
</div>
<div class="ml-3">
<h4 class="text-sm font-medium text-yellow-800 dark:text-yellow-200">
{$i18n.t('VACUUM Enabled - Important Considerations:')}
</h4>
<div class="mt-2 text-sm text-yellow-700 dark:text-yellow-300 space-y-1">
<p>{$i18n.t('Database will be locked during VACUUM - all users will experience errors')}</p>
<p>{$i18n.t('Operation duration depends on database size (can be 5-30+ minutes)')}</p>
<p>{$i18n.t('Recommended only during scheduled maintenance windows')}</p>
<p>{$i18n.t('Not required for routine cleanups - only when reclaiming disk space is critical')}</p>
</div>
</div>
</div>
</div>
</div>
{/if}
</div>
{/if} {/if}
</div> </div>
</div> </div>