Claude/vacuum optional 011 c uw61vf5 s rym bh cw u1 ls w (#28)

PruneLock class
Vector cleanup error reporting
Lock acquisition/release
Optional VACUUM
Fixed folder deletion
This commit is contained in:
Classic298 2025-11-10 17:14:27 +01:00 committed by GitHub
parent d94492dc0e
commit 60d7ad22ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 448 additions and 257 deletions

View file

@ -5,6 +5,8 @@ import shutil
import json
import re
import sqlite3
import uuid
from datetime import datetime, timedelta
from typing import Optional, Set, Union
from pathlib import Path
from abc import ABC, abstractmethod
@ -36,6 +38,80 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
router = APIRouter()
class PruneLock:
"""
Simple file-based locking mechanism to prevent concurrent prune operations.
This uses a lock file with timestamp to prevent multiple admins from running
prune simultaneously, which could cause race conditions and data corruption.
"""
LOCK_FILE = Path(CACHE_DIR) / ".prune.lock"
LOCK_TIMEOUT = timedelta(hours=2) # Safety timeout
@classmethod
def acquire(cls) -> bool:
"""
Try to acquire the lock. Returns True if acquired, False if already locked.
If lock file exists but is stale (older than timeout), automatically
removes it and acquires a new lock.
"""
try:
# Check if lock file exists
if cls.LOCK_FILE.exists():
# Read lock file to check if it's stale
try:
with open(cls.LOCK_FILE, 'r') as f:
lock_data = json.load(f)
lock_time = datetime.fromisoformat(lock_data['timestamp'])
operation_id = lock_data.get('operation_id', 'unknown')
# Check if lock is stale
if datetime.utcnow() - lock_time > cls.LOCK_TIMEOUT:
log.warning(f"Found stale lock from {lock_time} (operation {operation_id}), removing")
cls.LOCK_FILE.unlink()
else:
# Lock is still valid
log.warning(f"Prune operation already in progress (started {lock_time}, operation {operation_id})")
return False
except (json.JSONDecodeError, KeyError, ValueError) as e:
# Corrupt lock file, remove it
log.warning(f"Found corrupt lock file, removing: {e}")
cls.LOCK_FILE.unlink()
# Create lock file
operation_id = str(uuid.uuid4())[:8]
lock_data = {
'timestamp': datetime.utcnow().isoformat(),
'operation_id': operation_id,
'pid': os.getpid()
}
# Ensure parent directory exists
cls.LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(cls.LOCK_FILE, 'w') as f:
json.dump(lock_data, f)
log.info(f"Acquired prune lock (operation {operation_id})")
return True
except Exception as e:
log.error(f"Error acquiring prune lock: {e}")
return False
@classmethod
def release(cls) -> None:
"""Release the lock by removing the lock file."""
try:
if cls.LOCK_FILE.exists():
cls.LOCK_FILE.unlink()
log.info("Released prune lock")
except Exception as e:
log.error(f"Error releasing prune lock: {e}")
class JSONFileIDExtractor:
"""
Utility for extracting and validating file IDs from JSON content.
@ -118,7 +194,7 @@ class VectorDatabaseCleaner(ABC):
@abstractmethod
def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int:
) -> tuple[int, Optional[str]]:
"""
Actually delete orphaned vector collections.
@ -127,7 +203,9 @@ class VectorDatabaseCleaner(ABC):
active_kb_ids: Set of knowledge base IDs that are still active
Returns:
Number of collections that were actually deleted
Tuple of (deleted_count, error_message)
- deleted_count: Number of collections that were deleted
- error_message: None on success, error description on failure
"""
pass
@ -193,10 +271,10 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int:
) -> tuple[int, Optional[str]]:
"""Actually delete orphaned ChromaDB collections and database records."""
if not self.chroma_db_path.exists():
return 0
return (0, None)
expected_collections = self._build_expected_collections(
active_file_ids, active_kb_ids
@ -204,12 +282,15 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
uuid_to_collection = self._get_collection_mappings()
deleted_count = 0
errors = []
# First, clean up orphaned database records
try:
deleted_count += self._cleanup_orphaned_database_records()
except Exception as e:
log.error(f"Error cleaning orphaned database records: {e}")
error_msg = f"ChromaDB database cleanup failed: {e}"
log.error(error_msg)
errors.append(error_msg)
# Then clean up physical directories
try:
@ -244,12 +325,17 @@ class ChromaDatabaseCleaner(VectorDatabaseCleaner):
)
except Exception as e:
log.error(f"Error cleaning ChromaDB collections: {e}")
error_msg = f"ChromaDB directory cleanup failed: {e}"
log.error(error_msg)
errors.append(error_msg)
if deleted_count > 0:
log.info(f"Deleted {deleted_count} orphaned ChromaDB collections")
return deleted_count
# Return error if any critical failures occurred
if errors:
return (deleted_count, "; ".join(errors))
return (deleted_count, None)
def delete_collection(self, collection_name: str) -> bool:
"""Delete a specific ChromaDB collection by name."""
@ -561,7 +647,7 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int:
) -> tuple[int, Optional[str]]:
"""
Delete orphaned PGVector collections using the existing client's delete method.
@ -569,8 +655,9 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
existing PGVector client's delete() method for each orphaned collection.
"""
if not self.session:
log.warning("PGVector session not available for cleanup")
return 0
error_msg = "PGVector session not available for cleanup"
log.warning(error_msg)
return (0, error_msg)
try:
orphaned_collections = self._get_orphaned_collections(
@ -579,7 +666,7 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
if not orphaned_collections:
log.debug("No orphaned PGVector collections found")
return 0
return (0, None)
deleted_count = 0
log.info(
@ -616,13 +703,14 @@ class PGVectorDatabaseCleaner(VectorDatabaseCleaner):
f"Successfully deleted {deleted_count} orphaned PGVector collections"
)
return deleted_count
return (deleted_count, None)
except Exception as e:
if self.session:
self.session.rollback()
log.error(f"Error cleaning orphaned PGVector collections: {e}")
return 0
error_msg = f"PGVector cleanup failed: {e}"
log.error(error_msg)
return (0, error_msg)
def delete_collection(self, collection_name: str) -> bool:
"""
@ -706,9 +794,9 @@ class NoOpVectorDatabaseCleaner(VectorDatabaseCleaner):
def cleanup_orphaned_collections(
self, active_file_ids: Set[str], active_kb_ids: Set[str]
) -> int:
) -> tuple[int, Optional[str]]:
"""No collections to cleanup for unsupported databases."""
return 0
return (0, None)
def delete_collection(self, collection_name: str) -> bool:
"""No collection to delete for unsupported databases."""
@ -757,6 +845,7 @@ class PruneDataForm(BaseModel):
delete_inactive_users_days: Optional[int] = None
exempt_admin_users: bool = True
exempt_pending_users: bool = True
run_vacuum: bool = False
dry_run: bool = True
@ -1314,6 +1403,14 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
return result
# Actual deletion logic (dry_run=False)
# Acquire lock to prevent concurrent operations
if not PruneLock.acquire():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A prune operation is already in progress. Please wait for it to complete."
)
try:
log.info("Starting data pruning process")
# Stage 0: Delete inactive users (if enabled)
@ -1508,20 +1605,26 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
cleanup_orphaned_uploads(final_active_file_ids)
# Use modular vector database cleanup
vector_cleaner.cleanup_orphaned_collections(
warnings = []
deleted_vector_count, vector_error = vector_cleaner.cleanup_orphaned_collections(
final_active_file_ids, final_active_kb_ids
)
if vector_error:
warnings.append(f"Vector cleanup warning: {vector_error}")
log.warning(f"Vector cleanup completed with errors: {vector_error}")
# Stage 5: Audio cache cleanup
log.info("Cleaning audio cache")
cleanup_audio_cache(form_data.audio_cache_max_age_days)
# Stage 6: Database optimization
log.info("Optimizing database")
# Stage 6: Database optimization (optional)
if form_data.run_vacuum:
log.info("Optimizing database with VACUUM (this may take a while and lock the database)")
try:
with get_db() as db:
db.execute(text("VACUUM"))
log.info("Vacuumed main database")
except Exception as e:
log.error(f"Failed to vacuum main database: {e}")
@ -1543,10 +1646,20 @@ async def prune_data(form_data: PruneDataForm, user=Depends(get_admin_user)):
log.info("Executed VACUUM ANALYZE on PostgreSQL database")
except Exception as e:
log.error(f"Failed to vacuum PostgreSQL database: {e}")
else:
log.info("Skipping VACUUM optimization (not enabled)")
# Log any warnings collected during pruning
if warnings:
log.warning(f"Data pruning completed with warnings: {'; '.join(warnings)}")
log.info("Data pruning completed successfully")
return True
finally:
# Always release lock, even if operation fails
PruneLock.release()
except Exception as e:
log.exception(f"Error during data pruning: {e}")
raise HTTPException(

View file

@ -17,6 +17,7 @@ export const pruneData = async (
delete_inactive_users_days: number | null = null,
exempt_admin_users: boolean = true,
exempt_pending_users: boolean = true,
run_vacuum: boolean = false,
dry_run: boolean // Removed default value to ensure explicit passing
) => {
let error = null;
@ -43,6 +44,7 @@ export const pruneData = async (
delete_inactive_users_days,
exempt_admin_users,
exempt_pending_users,
run_vacuum,
dry_run
})
})

View file

@ -50,6 +50,7 @@
settings.delete_inactive_users_days,
settings.exempt_admin_users,
settings.exempt_pending_users,
settings.run_vacuum,
true // dry_run = true for preview
).catch((error) => {
toast.error(`${error}`);
@ -83,6 +84,7 @@
lastPruneSettings.delete_inactive_users_days,
lastPruneSettings.exempt_admin_users,
lastPruneSettings.exempt_pending_users,
lastPruneSettings.run_vacuum,
false // dry_run = false for actual pruning
).catch((error) => {
toast.error(`${error}`);

View file

@ -32,6 +32,9 @@
let cleanupAudioCache = true;
let audio_cache_max_age_days = 30;
// System/Database optimization
let run_vacuum = false;
let showDetailsExpanded = false;
let activeDetailsTab = 'users';
let activeSettingsTab = 'users';
@ -55,7 +58,8 @@
audio_cache_max_age_days: cleanupAudioCache ? audio_cache_max_age_days : null,
delete_inactive_users_days: deleteInactiveUsers ? delete_inactive_users_days : null,
exempt_admin_users,
exempt_pending_users
exempt_pending_users,
run_vacuum
});
show = false;
};
@ -96,7 +100,10 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
"delete_orphaned_folders": ${delete_orphaned_folders},
// AUDIO CACHE CLEANUP (null = disabled)
"audio_cache_max_age_days": ${cleanupAudioCache ? audio_cache_max_age_days : null} // TTS/STT files
"audio_cache_max_age_days": ${cleanupAudioCache ? audio_cache_max_age_days : null}, // TTS/STT files
// DATABASE OPTIMIZATION (WARNING: Locks database during execution!)
"run_vacuum": ${run_vacuum} // Reclaim disk space - only enable during maintenance windows
}'
# API KEY vs JWT TOKEN:
@ -359,6 +366,12 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
>
{$i18n.t('Audio Cache')}
</button>
<button
class="px-3 py-2 text-sm font-medium rounded-t transition-colors {activeSettingsTab === 'system' ? 'bg-blue-100 dark:bg-blue-800 text-blue-800 dark:text-blue-200' : 'text-blue-600 dark:text-blue-400 hover:text-blue-800 dark:hover:text-blue-200'}"
on:click={() => activeSettingsTab = 'system'}
>
{$i18n.t('System')}
</button>
</div>
<!-- Settings Tab Content -->
@ -744,6 +757,67 @@ curl -X POST "${window.location.origin}/api/v1/prune/" \\
</div>
{/if}
</div>
{:else if activeSettingsTab === 'system'}
<!-- System/Database Optimization -->
<div class="space-y-4">
<div class="flex items-start py-2">
<div class="flex items-center">
<div class="mr-3">
<Switch bind:state={run_vacuum} />
</div>
<div class="flex-1">
<div class="flex items-center text-sm font-medium text-gray-900 dark:text-gray-100">
<span>{$i18n.t('Run VACUUM optimization')}</span>
<div class="relative group ml-2">
<svg class="h-4 w-4 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300 cursor-help" fill="currentColor" viewBox="0 0 20 20">
<path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z" clip-rule="evenodd" />
</svg>
<div class="absolute left-1/2 transform -translate-x-1/2 bottom-full mb-2 w-72 px-3 py-2 text-xs text-white bg-gray-900 dark:bg-gray-700 rounded-lg shadow-lg opacity-0 group-hover:opacity-100 transition-opacity duration-200 pointer-events-none z-10">
<div class="font-medium mb-1">{$i18n.t('Database Optimization Warning:')}</div>
<div class="space-y-1">
<p>{$i18n.t('VACUUM reclaims disk space by rebuilding the database file.')}</p>
<p class="text-yellow-300 dark:text-yellow-400 font-medium">{$i18n.t('⚠️ This may take a very long time on large databases and will LOCK the entire database during execution.')}</p>
<p>{$i18n.t('It is strongly recommended to NOT run this while users are actively using the platform.')}</p>
<p class="text-green-300 dark:text-green-400">{$i18n.t('💡 Best practice: Run during scheduled maintenance windows.')}</p>
</div>
<div class="absolute top-full left-1/2 transform -translate-x-1/2 border-4 border-transparent border-t-gray-900 dark:border-t-gray-700"></div>
</div>
</div>
</div>
<div class="text-xs text-gray-500 dark:text-gray-400">
{$i18n.t('Reclaim disk space after cleanup (locks database during operation)')}
</div>
</div>
</div>
</div>
<!-- VACUUM warning box -->
{#if run_vacuum}
<div class="ml-8 border-l-2 border-yellow-200 dark:border-yellow-700 pl-4">
<div class="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg p-3">
<div class="flex">
<div class="flex-shrink-0">
<svg class="h-5 w-5 text-yellow-400" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd" d="M8.485 2.495c.673-1.167 2.357-1.167 3.03 0l6.28 10.875c.673 1.167-.17 2.625-1.516 2.625H3.72c-1.347 0-2.189-1.458-1.515-2.625L8.485 2.495zM10 5a.75.75 0 01.75.75v3.5a.75.75 0 01-1.5 0v-3.5A.75.75 0 0110 5zm0 9a1 1 0 100-2 1 1 0 000 2z" clip-rule="evenodd" />
</svg>
</div>
<div class="ml-3">
<h4 class="text-sm font-medium text-yellow-800 dark:text-yellow-200">
{$i18n.t('VACUUM Enabled - Important Considerations:')}
</h4>
<div class="mt-2 text-sm text-yellow-700 dark:text-yellow-300 space-y-1">
<p>{$i18n.t('Database will be locked during VACUUM - all users will experience errors')}</p>
<p>{$i18n.t('Operation duration depends on database size (can be 5-30+ minutes)')}</p>
<p>{$i18n.t('Recommended only during scheduled maintenance windows')}</p>
<p>{$i18n.t('Not required for routine cleanups - only when reclaiming disk space is critical')}</p>
</div>
</div>
</div>
</div>
</div>
{/if}
</div>
{/if}
</div>
</div>