perf: fix cache key generation for model list caching

- Replace Request object with user.id in cache key for get_all_models
- Request objects are new instances per HTTP request, preventing cache hits
- Cache keys now use user.id ensuring proper cache functionality
- Affects both Ollama and OpenAI model list endpoints

Signed-off-by: Sihyeon Jang <sihyeon.jang@navercorp.com>
This commit is contained in:
Sihyeon Jang 2025-09-03 05:17:41 +09:00
parent 22c4ef4fb0
commit 3ccbb46938
2 changed files with 2 additions and 2 deletions

View file

@ -340,7 +340,7 @@ def merge_ollama_models_lists(model_lists):
return list(merged_models.values()) return list(merged_models.values())
@cached(ttl=MODELS_CACHE_TTL) @cached(ttl=MODELS_CACHE_TTL, key=lambda _, user: f"ollama_all_models_{user.id}" if user else "ollama_all_models")
async def get_all_models(request: Request, user: UserModel = None): async def get_all_models(request: Request, user: UserModel = None):
log.info("get_all_models()") log.info("get_all_models()")
if request.app.state.config.ENABLE_OLLAMA_API: if request.app.state.config.ENABLE_OLLAMA_API:

View file

@ -401,7 +401,7 @@ async def get_filtered_models(models, user):
return filtered_models return filtered_models
@cached(ttl=MODELS_CACHE_TTL) @cached(ttl=MODELS_CACHE_TTL, key=lambda _, user: f"openai_all_models_{user.id}" if user else "openai_all_models")
async def get_all_models(request: Request, user: UserModel) -> dict[str, list]: async def get_all_models(request: Request, user: UserModel) -> dict[str, list]:
log.info("get_all_models()") log.info("get_all_models()")