2024-08-27 22:10:27 +00:00
|
|
|
|
import logging
|
2024-06-18 18:36:55 +00:00
|
|
|
|
import os
|
|
|
|
|
|
import uuid
|
2025-05-23 20:36:30 +00:00
|
|
|
|
import json
|
2025-04-08 04:42:37 +00:00
|
|
|
|
from fnmatch import fnmatch
|
2024-08-27 22:10:27 +00:00
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Optional
|
2024-12-16 07:08:51 +00:00
|
|
|
|
from urllib.parse import quote
|
2024-10-05 00:22:00 +00:00
|
|
|
|
|
2025-03-29 22:23:02 +00:00
|
|
|
|
from fastapi import (
|
|
|
|
|
|
APIRouter,
|
|
|
|
|
|
Depends,
|
|
|
|
|
|
File,
|
2025-05-23 20:36:30 +00:00
|
|
|
|
Form,
|
2025-03-29 22:23:02 +00:00
|
|
|
|
HTTPException,
|
|
|
|
|
|
Request,
|
|
|
|
|
|
UploadFile,
|
|
|
|
|
|
status,
|
|
|
|
|
|
Query,
|
|
|
|
|
|
)
|
2025-02-06 23:32:06 +00:00
|
|
|
|
from fastapi.responses import FileResponse, StreamingResponse
|
|
|
|
|
|
from open_webui.constants import ERROR_MESSAGES
|
|
|
|
|
|
from open_webui.env import SRC_LOG_LEVELS
|
2025-07-18 15:40:29 +00:00
|
|
|
|
from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT
|
2025-04-30 15:34:53 +00:00
|
|
|
|
|
|
|
|
|
|
from open_webui.models.users import Users
|
2024-12-10 08:54:13 +00:00
|
|
|
|
from open_webui.models.files import (
|
2024-10-21 06:38:26 +00:00
|
|
|
|
FileForm,
|
|
|
|
|
|
FileModel,
|
|
|
|
|
|
FileModelResponse,
|
|
|
|
|
|
Files,
|
|
|
|
|
|
)
|
2025-03-31 08:10:18 +00:00
|
|
|
|
from open_webui.models.knowledge import Knowledges
|
|
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
from open_webui.routers.knowledge import get_knowledge, get_knowledge_list
|
2025-02-06 23:32:06 +00:00
|
|
|
|
from open_webui.routers.retrieval import ProcessFileForm, process_file
|
2025-02-26 21:09:52 +00:00
|
|
|
|
from open_webui.routers.audio import transcribe
|
2025-02-06 23:32:06 +00:00
|
|
|
|
from open_webui.storage.provider import Storage
|
2024-12-09 00:01:56 +00:00
|
|
|
|
from open_webui.utils.auth import get_admin_user, get_verified_user
|
2025-02-06 23:32:06 +00:00
|
|
|
|
from pydantic import BaseModel
|
2024-06-18 18:36:55 +00:00
|
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
log.setLevel(SRC_LOG_LEVELS["MODELS"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Check if the current user has access to a file through any knowledge bases the user may be in.
|
|
|
|
|
|
############################
|
2025-03-31 08:10:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def has_access_to_file(
|
|
|
|
|
|
file_id: Optional[str], access_type: str, user=Depends(get_verified_user)
|
|
|
|
|
|
) -> bool:
|
2025-03-03 20:03:21 +00:00
|
|
|
|
file = Files.get_file_by_id(file_id)
|
|
|
|
|
|
log.debug(f"Checking if user has {access_type} access to file")
|
|
|
|
|
|
|
|
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
2025-03-31 08:10:18 +00:00
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
has_access = False
|
|
|
|
|
|
knowledge_base_id = file.meta.get("collection_name") if file.meta else None
|
2025-03-31 08:10:18 +00:00
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if knowledge_base_id:
|
2025-03-31 08:10:18 +00:00
|
|
|
|
knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(
|
|
|
|
|
|
user.id, access_type
|
|
|
|
|
|
)
|
|
|
|
|
|
for knowledge_base in knowledge_bases:
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if knowledge_base.id == knowledge_base_id:
|
|
|
|
|
|
has_access = True
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
return has_access
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Upload File
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-10-26 19:56:37 +00:00
|
|
|
|
@router.post("/", response_model=FileModelResponse)
|
2024-12-13 06:32:28 +00:00
|
|
|
|
def upload_file(
|
2025-02-06 23:32:06 +00:00
|
|
|
|
request: Request,
|
|
|
|
|
|
file: UploadFile = File(...),
|
2025-05-23 20:36:30 +00:00
|
|
|
|
metadata: Optional[dict | str] = Form(None),
|
2025-03-29 22:23:02 +00:00
|
|
|
|
process: bool = Query(True),
|
2025-05-23 20:36:30 +00:00
|
|
|
|
internal: bool = False,
|
|
|
|
|
|
user=Depends(get_verified_user),
|
2024-12-13 06:32:28 +00:00
|
|
|
|
):
|
2024-06-18 18:36:55 +00:00
|
|
|
|
log.info(f"file.content_type: {file.content_type}")
|
2025-04-28 13:11:28 +00:00
|
|
|
|
|
2025-05-23 20:36:30 +00:00
|
|
|
|
if isinstance(metadata, str):
|
|
|
|
|
|
try:
|
|
|
|
|
|
metadata = json.loads(metadata)
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Invalid metadata format"),
|
|
|
|
|
|
)
|
2025-05-22 22:53:08 +00:00
|
|
|
|
file_metadata = metadata if metadata else {}
|
2025-05-23 20:36:30 +00:00
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
try:
|
|
|
|
|
|
unsanitized_filename = file.filename
|
|
|
|
|
|
filename = os.path.basename(unsanitized_filename)
|
|
|
|
|
|
|
2025-05-14 19:06:33 +00:00
|
|
|
|
file_extension = os.path.splitext(filename)[1]
|
2025-05-22 22:39:19 +00:00
|
|
|
|
# Remove the leading dot from the file extension
|
|
|
|
|
|
file_extension = file_extension[1:] if file_extension else ""
|
|
|
|
|
|
|
2025-05-23 20:36:30 +00:00
|
|
|
|
if (not internal) and request.app.state.config.ALLOWED_FILE_EXTENSIONS:
|
2025-05-16 17:05:52 +00:00
|
|
|
|
request.app.state.config.ALLOWED_FILE_EXTENSIONS = [
|
|
|
|
|
|
ext for ext in request.app.state.config.ALLOWED_FILE_EXTENSIONS if ext
|
|
|
|
|
|
]
|
|
|
|
|
|
|
2025-05-14 19:06:33 +00:00
|
|
|
|
if file_extension not in request.app.state.config.ALLOWED_FILE_EXTENSIONS:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT(
|
|
|
|
|
|
f"File type {file_extension} is not allowed"
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
# replace filename with uuid
|
|
|
|
|
|
id = str(uuid.uuid4())
|
2024-07-15 11:05:38 +00:00
|
|
|
|
name = filename
|
2024-06-18 21:15:08 +00:00
|
|
|
|
filename = f"{id}_{filename}"
|
2025-04-28 04:56:56 +00:00
|
|
|
|
tags = {
|
|
|
|
|
|
"OpenWebUI-User-Email": user.email,
|
|
|
|
|
|
"OpenWebUI-User-Id": user.id,
|
|
|
|
|
|
"OpenWebUI-User-Name": user.name,
|
|
|
|
|
|
"OpenWebUI-File-Id": id,
|
|
|
|
|
|
}
|
|
|
|
|
|
contents, file_path = Storage.upload_file(file.file, filename, tags)
|
2024-06-18 18:36:55 +00:00
|
|
|
|
|
2024-10-21 06:45:15 +00:00
|
|
|
|
file_item = Files.insert_new_file(
|
2024-06-18 20:50:18 +00:00
|
|
|
|
user.id,
|
|
|
|
|
|
FileForm(
|
|
|
|
|
|
**{
|
|
|
|
|
|
"id": id,
|
2024-11-22 03:46:09 +00:00
|
|
|
|
"filename": name,
|
2024-10-21 00:45:37 +00:00
|
|
|
|
"path": file_path,
|
2024-06-18 20:50:18 +00:00
|
|
|
|
"meta": {
|
2024-07-15 11:05:38 +00:00
|
|
|
|
"name": name,
|
2024-06-18 20:50:18 +00:00
|
|
|
|
"content_type": file.content_type,
|
|
|
|
|
|
"size": len(contents),
|
2025-02-08 10:35:27 +00:00
|
|
|
|
"data": file_metadata,
|
2024-06-18 20:50:18 +00:00
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
),
|
2024-06-18 18:36:55 +00:00
|
|
|
|
)
|
2025-03-29 22:23:02 +00:00
|
|
|
|
if process:
|
2025-03-29 10:12:11 +00:00
|
|
|
|
try:
|
2025-05-14 18:00:42 +00:00
|
|
|
|
if file.content_type:
|
2025-06-18 10:01:14 +00:00
|
|
|
|
stt_supported_content_types = getattr(
|
|
|
|
|
|
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
2025-06-16 12:13:40 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if any(
|
|
|
|
|
|
fnmatch(file.content_type, content_type)
|
2025-06-18 10:01:14 +00:00
|
|
|
|
for content_type in (
|
|
|
|
|
|
stt_supported_content_types
|
|
|
|
|
|
if stt_supported_content_types
|
|
|
|
|
|
and any(t.strip() for t in stt_supported_content_types)
|
|
|
|
|
|
else ["audio/*", "video/webm"]
|
|
|
|
|
|
)
|
2025-06-16 12:13:40 +00:00
|
|
|
|
):
|
2025-05-14 18:00:42 +00:00
|
|
|
|
file_path = Storage.get_file(file_path)
|
2025-05-23 20:36:30 +00:00
|
|
|
|
result = transcribe(request, file_path, file_metadata)
|
2025-05-14 18:00:42 +00:00
|
|
|
|
|
|
|
|
|
|
process_file(
|
|
|
|
|
|
request,
|
|
|
|
|
|
ProcessFileForm(file_id=id, content=result.get("text", "")),
|
|
|
|
|
|
user=user,
|
|
|
|
|
|
)
|
2025-05-23 18:10:57 +00:00
|
|
|
|
elif (not file.content_type.startswith(("image/", "video/"))) or (
|
|
|
|
|
|
request.app.state.config.CONTENT_EXTRACTION_ENGINE == "external"
|
2025-05-23 17:55:09 +00:00
|
|
|
|
):
|
2025-05-14 18:00:42 +00:00
|
|
|
|
process_file(request, ProcessFileForm(file_id=id), user=user)
|
2025-05-14 17:59:17 +00:00
|
|
|
|
else:
|
|
|
|
|
|
log.info(
|
2025-05-14 18:00:42 +00:00
|
|
|
|
f"File type {file.content_type} is not provided, but trying to process anyway"
|
2025-05-14 17:59:17 +00:00
|
|
|
|
)
|
|
|
|
|
|
process_file(request, ProcessFileForm(file_id=id), user=user)
|
2025-04-07 00:31:50 +00:00
|
|
|
|
|
|
|
|
|
|
file_item = Files.get_file_by_id(id=id)
|
2025-03-29 10:12:11 +00:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
|
|
|
|
|
log.error(f"Error processing file: {file_item.id}")
|
|
|
|
|
|
file_item = FileModelResponse(
|
|
|
|
|
|
**{
|
|
|
|
|
|
**file_item.model_dump(),
|
|
|
|
|
|
"error": str(e.detail) if hasattr(e, "detail") else str(e),
|
|
|
|
|
|
}
|
2025-02-26 21:09:52 +00:00
|
|
|
|
)
|
2024-10-04 05:22:22 +00:00
|
|
|
|
|
2024-10-21 06:45:15 +00:00
|
|
|
|
if file_item:
|
|
|
|
|
|
return file_item
|
2024-06-18 18:36:55 +00:00
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
2025-05-26 04:20:00 +00:00
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
|
2024-06-18 18:36:55 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
############################
|
|
|
|
|
|
# List Files
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-10-21 06:38:26 +00:00
|
|
|
|
@router.get("/", response_model=list[FileModelResponse])
|
2025-04-06 22:14:57 +00:00
|
|
|
|
async def list_files(user=Depends(get_verified_user), content: bool = Query(True)):
|
2024-08-23 14:19:04 +00:00
|
|
|
|
if user.role == "admin":
|
|
|
|
|
|
files = Files.get_files()
|
|
|
|
|
|
else:
|
|
|
|
|
|
files = Files.get_files_by_user_id(user.id)
|
2025-04-04 03:02:24 +00:00
|
|
|
|
|
2025-04-06 22:14:57 +00:00
|
|
|
|
if not content:
|
2025-04-04 03:02:24 +00:00
|
|
|
|
for file in files:
|
2025-04-15 12:53:05 +00:00
|
|
|
|
if "content" in file.data:
|
|
|
|
|
|
del file.data["content"]
|
2025-04-06 22:14:57 +00:00
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
return files
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-04-08 04:42:37 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Search Files
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/search", response_model=list[FileModelResponse])
|
|
|
|
|
|
async def search_files(
|
2025-04-08 04:48:54 +00:00
|
|
|
|
filename: str = Query(
|
|
|
|
|
|
...,
|
2025-04-08 04:49:55 +00:00
|
|
|
|
description="Filename pattern to search for. Supports wildcards such as '*.txt'",
|
2025-04-08 04:48:54 +00:00
|
|
|
|
),
|
2025-04-08 05:00:06 +00:00
|
|
|
|
content: bool = Query(True),
|
2025-04-08 04:49:55 +00:00
|
|
|
|
user=Depends(get_verified_user),
|
2025-04-08 04:42:37 +00:00
|
|
|
|
):
|
2025-04-08 04:56:21 +00:00
|
|
|
|
"""
|
|
|
|
|
|
Search for files by filename with support for wildcard patterns.
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Get files according to user role
|
|
|
|
|
|
if user.role == "admin":
|
|
|
|
|
|
files = Files.get_files()
|
|
|
|
|
|
else:
|
|
|
|
|
|
files = Files.get_files_by_user_id(user.id)
|
2025-04-08 04:42:37 +00:00
|
|
|
|
|
2025-04-08 04:44:47 +00:00
|
|
|
|
# Get matching files
|
2025-04-08 04:48:54 +00:00
|
|
|
|
matching_files = [
|
|
|
|
|
|
file for file in files if fnmatch(file.filename.lower(), filename.lower())
|
|
|
|
|
|
]
|
2025-04-08 04:49:55 +00:00
|
|
|
|
|
2025-04-08 04:42:37 +00:00
|
|
|
|
if not matching_files:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
2025-04-08 04:48:54 +00:00
|
|
|
|
detail="No files found matching the pattern.",
|
2025-04-08 04:42:37 +00:00
|
|
|
|
)
|
2025-04-08 05:00:06 +00:00
|
|
|
|
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
for file in matching_files:
|
2025-04-15 12:53:05 +00:00
|
|
|
|
if "content" in file.data:
|
|
|
|
|
|
del file.data["content"]
|
2025-04-08 05:00:06 +00:00
|
|
|
|
|
2025-04-08 04:42:37 +00:00
|
|
|
|
return matching_files
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 22:20:04 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Delete All Files
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete("/all")
|
2024-06-21 12:58:57 +00:00
|
|
|
|
async def delete_all_files(user=Depends(get_admin_user)):
|
|
|
|
|
|
result = Files.delete_all_files()
|
2024-06-18 22:20:04 +00:00
|
|
|
|
if result:
|
|
|
|
|
|
try:
|
2024-10-21 06:38:26 +00:00
|
|
|
|
Storage.delete_all_files()
|
2025-07-18 15:40:29 +00:00
|
|
|
|
VECTOR_DB_CLIENT.reset()
|
2024-06-18 22:20:04 +00:00
|
|
|
|
except Exception as e:
|
2024-10-21 06:38:26 +00:00
|
|
|
|
log.exception(e)
|
2025-02-06 23:32:06 +00:00
|
|
|
|
log.error("Error deleting files")
|
2024-10-21 06:38:26 +00:00
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
|
|
|
|
|
|
)
|
2024-06-18 22:20:04 +00:00
|
|
|
|
return {"message": "All files deleted successfully"}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Get File By Id
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/{id}", response_model=Optional[FileModel])
|
2024-08-22 14:08:03 +00:00
|
|
|
|
async def get_file_by_id(id: str, user=Depends(get_verified_user)):
|
2024-06-21 12:58:57 +00:00
|
|
|
|
file = Files.get_file_by_id(id)
|
2024-06-18 18:36:55 +00:00
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "read", user)
|
|
|
|
|
|
):
|
2024-06-18 18:36:55 +00:00
|
|
|
|
return file
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
2024-06-20 20:49:04 +00:00
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
2024-06-18 18:36:55 +00:00
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-10-04 07:23:14 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Get File Data Content By Id
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/{id}/data/content")
|
|
|
|
|
|
async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
|
|
|
|
|
|
file = Files.get_file_by_id(id)
|
|
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "read", user)
|
|
|
|
|
|
):
|
2024-10-04 07:23:14 +00:00
|
|
|
|
return {"content": file.data.get("content", "")}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
############################
|
|
|
|
|
|
# Update File Data Content By Id
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ContentForm(BaseModel):
|
|
|
|
|
|
content: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/{id}/data/content/update")
|
|
|
|
|
|
async def update_file_data_content_by_id(
|
2024-12-13 06:32:28 +00:00
|
|
|
|
request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
|
2024-10-04 07:23:14 +00:00
|
|
|
|
):
|
|
|
|
|
|
file = Files.get_file_by_id(id)
|
|
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "write", user)
|
|
|
|
|
|
):
|
2024-10-04 07:23:14 +00:00
|
|
|
|
try:
|
2024-12-13 06:32:28 +00:00
|
|
|
|
process_file(
|
2025-01-29 10:55:52 +00:00
|
|
|
|
request,
|
|
|
|
|
|
ProcessFileForm(file_id=id, content=form_data.content),
|
2025-02-05 08:07:45 +00:00
|
|
|
|
user=user,
|
2024-12-13 06:32:28 +00:00
|
|
|
|
)
|
2024-10-04 07:23:14 +00:00
|
|
|
|
file = Files.get_file_by_id(id=id)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
|
|
|
|
|
log.error(f"Error processing file: {file.id}")
|
|
|
|
|
|
|
|
|
|
|
|
return {"content": file.data.get("content", "")}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 21:33:44 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Get File Content By Id
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-10-12 09:31:10 +00:00
|
|
|
|
@router.get("/{id}/content")
|
2025-03-29 22:23:02 +00:00
|
|
|
|
async def get_file_content_by_id(
|
|
|
|
|
|
id: str, user=Depends(get_verified_user), attachment: bool = Query(False)
|
|
|
|
|
|
):
|
2024-06-21 12:58:57 +00:00
|
|
|
|
file = Files.get_file_by_id(id)
|
2025-03-03 20:03:21 +00:00
|
|
|
|
|
|
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "read", user)
|
|
|
|
|
|
):
|
2024-10-21 06:38:26 +00:00
|
|
|
|
try:
|
|
|
|
|
|
file_path = Storage.get_file(file.path)
|
|
|
|
|
|
file_path = Path(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if the file already exists in the cache
|
|
|
|
|
|
if file_path.is_file():
|
2024-12-16 07:08:51 +00:00
|
|
|
|
# Handle Unicode filenames
|
|
|
|
|
|
filename = file.meta.get("name", file.filename)
|
|
|
|
|
|
encoded_filename = quote(filename) # RFC5987 encoding
|
2024-12-19 02:15:58 +00:00
|
|
|
|
|
2025-02-20 07:44:11 +00:00
|
|
|
|
content_type = file.meta.get("content_type")
|
|
|
|
|
|
filename = file.meta.get("name", file.filename)
|
|
|
|
|
|
encoded_filename = quote(filename)
|
2024-12-19 02:15:58 +00:00
|
|
|
|
headers = {}
|
2025-02-20 07:44:11 +00:00
|
|
|
|
|
2025-03-29 22:23:02 +00:00
|
|
|
|
if attachment:
|
2025-02-20 07:44:11 +00:00
|
|
|
|
headers["Content-Disposition"] = (
|
|
|
|
|
|
f"attachment; filename*=UTF-8''{encoded_filename}"
|
|
|
|
|
|
)
|
2025-03-29 10:12:11 +00:00
|
|
|
|
else:
|
2025-03-29 22:23:02 +00:00
|
|
|
|
if content_type == "application/pdf" or filename.lower().endswith(
|
|
|
|
|
|
".pdf"
|
|
|
|
|
|
):
|
2025-03-29 10:12:11 +00:00
|
|
|
|
headers["Content-Disposition"] = (
|
|
|
|
|
|
f"inline; filename*=UTF-8''{encoded_filename}"
|
|
|
|
|
|
)
|
|
|
|
|
|
content_type = "application/pdf"
|
|
|
|
|
|
elif content_type != "text/plain":
|
|
|
|
|
|
headers["Content-Disposition"] = (
|
|
|
|
|
|
f"attachment; filename*=UTF-8''{encoded_filename}"
|
|
|
|
|
|
)
|
2025-02-20 07:44:11 +00:00
|
|
|
|
|
|
|
|
|
|
return FileResponse(file_path, headers=headers, media_type=content_type)
|
2024-12-16 07:08:51 +00:00
|
|
|
|
|
2024-10-21 06:38:26 +00:00
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
2025-02-06 23:32:06 +00:00
|
|
|
|
log.error("Error getting file content")
|
2024-10-24 22:02:26 +00:00
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/{id}/content/html")
|
|
|
|
|
|
async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
|
|
|
|
|
|
file = Files.get_file_by_id(id)
|
2025-03-03 20:03:21 +00:00
|
|
|
|
|
|
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-04-30 15:34:53 +00:00
|
|
|
|
file_user = Users.get_user_by_id(file.user_id)
|
|
|
|
|
|
if not file_user.role == "admin":
|
2025-04-30 15:41:16 +00:00
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
2025-04-30 15:34:53 +00:00
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "read", user)
|
|
|
|
|
|
):
|
2024-10-24 22:02:26 +00:00
|
|
|
|
try:
|
|
|
|
|
|
file_path = Storage.get_file(file.path)
|
|
|
|
|
|
file_path = Path(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if the file already exists in the cache
|
|
|
|
|
|
if file_path.is_file():
|
2025-02-25 14:36:25 +00:00
|
|
|
|
log.info(f"file_path: {file_path}")
|
2024-10-24 22:02:26 +00:00
|
|
|
|
return FileResponse(file_path)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
2025-02-06 23:32:06 +00:00
|
|
|
|
log.error("Error getting file content")
|
2024-06-22 21:49:00 +00:00
|
|
|
|
raise HTTPException(
|
2024-10-21 06:38:26 +00:00
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
|
2024-06-22 21:49:00 +00:00
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-10-12 09:31:10 +00:00
|
|
|
|
@router.get("/{id}/content/{file_name}")
|
2024-08-22 14:08:03 +00:00
|
|
|
|
async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
|
2024-06-22 21:49:00 +00:00
|
|
|
|
file = Files.get_file_by_id(id)
|
|
|
|
|
|
|
2025-03-03 20:03:21 +00:00
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "read", user)
|
|
|
|
|
|
):
|
2024-10-21 00:45:37 +00:00
|
|
|
|
file_path = file.path
|
2024-12-16 07:08:51 +00:00
|
|
|
|
|
|
|
|
|
|
# Handle Unicode filenames
|
|
|
|
|
|
filename = file.meta.get("name", file.filename)
|
|
|
|
|
|
encoded_filename = quote(filename) # RFC5987 encoding
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-10-02 13:19:09 +00:00
|
|
|
|
if file_path:
|
2024-10-21 06:38:26 +00:00
|
|
|
|
file_path = Storage.get_file(file_path)
|
2024-10-02 13:19:09 +00:00
|
|
|
|
file_path = Path(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if the file already exists in the cache
|
|
|
|
|
|
if file_path.is_file():
|
2024-10-12 09:31:10 +00:00
|
|
|
|
return FileResponse(file_path, headers=headers)
|
2024-10-02 13:19:09 +00:00
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
2024-06-18 21:33:44 +00:00
|
|
|
|
else:
|
2024-10-02 13:19:09 +00:00
|
|
|
|
# File path doesn’t exist, return the content as .txt if possible
|
|
|
|
|
|
file_content = file.content.get("content", "")
|
|
|
|
|
|
file_name = file.filename
|
|
|
|
|
|
|
|
|
|
|
|
# Create a generator that encodes the file content
|
|
|
|
|
|
def generator():
|
|
|
|
|
|
yield file_content.encode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
return StreamingResponse(
|
|
|
|
|
|
generator(),
|
|
|
|
|
|
media_type="text/plain",
|
2024-12-16 07:08:51 +00:00
|
|
|
|
headers=headers,
|
2024-06-18 21:33:44 +00:00
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
2024-06-20 20:49:04 +00:00
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
2024-06-18 21:33:44 +00:00
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 18:36:55 +00:00
|
|
|
|
############################
|
|
|
|
|
|
# Delete File By Id
|
|
|
|
|
|
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete("/{id}")
|
2024-08-22 14:08:03 +00:00
|
|
|
|
async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
|
2024-06-21 12:58:57 +00:00
|
|
|
|
file = Files.get_file_by_id(id)
|
2025-03-03 20:03:21 +00:00
|
|
|
|
|
|
|
|
|
|
if not file:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-03-31 08:10:18 +00:00
|
|
|
|
if (
|
|
|
|
|
|
file.user_id == user.id
|
|
|
|
|
|
or user.role == "admin"
|
|
|
|
|
|
or has_access_to_file(id, "write", user)
|
|
|
|
|
|
):
|
2025-01-13 17:21:00 +00:00
|
|
|
|
|
2024-06-21 12:58:57 +00:00
|
|
|
|
result = Files.delete_file_by_id(id)
|
2024-06-18 18:36:55 +00:00
|
|
|
|
if result:
|
2024-10-21 06:38:26 +00:00
|
|
|
|
try:
|
2024-12-29 01:40:00 +00:00
|
|
|
|
Storage.delete_file(file.path)
|
2025-07-18 15:40:29 +00:00
|
|
|
|
VECTOR_DB_CLIENT.delete(collection_name=f"file-{id}")
|
2024-10-21 06:38:26 +00:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log.exception(e)
|
2025-02-06 23:32:06 +00:00
|
|
|
|
log.error("Error deleting files")
|
2024-10-21 06:38:26 +00:00
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
|
|
|
|
|
|
)
|
2024-06-18 18:36:55 +00:00
|
|
|
|
return {"message": "File deleted successfully"}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
|
|
|
detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(
|
2024-06-20 20:49:04 +00:00
|
|
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
2024-06-18 18:36:55 +00:00
|
|
|
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
|
|
|
|
|
)
|