Merge pull request #1954 from abhinav-1305/add-custom-inference

feat: Add support for Bedrock custom inference profiles via model_id
This commit is contained in:
Tal 2025-08-03 08:06:43 +03:00 committed by GitHub
commit a23b527101
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 28 additions and 0 deletions

View file

@ -250,6 +250,26 @@ model="bedrock/us.meta.llama4-scout-17b-instruct-v1:0"
fallback_models=["bedrock/us.meta.llama4-maverick-17b-instruct-v1:0"] fallback_models=["bedrock/us.meta.llama4-maverick-17b-instruct-v1:0"]
``` ```
#### Custom Inference Profiles
To use a custom inference profile with Amazon Bedrock (for cost allocation tags and other configuration settings), add the `model_id` parameter to your configuration:
```toml
[config] # in configuration.toml
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
fallback_models=["bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"]
[aws]
AWS_ACCESS_KEY_ID="..."
AWS_SECRET_ACCESS_KEY="..."
AWS_REGION_NAME="..."
[litellm]
model_id = "your-custom-inference-profile-id"
```
The `model_id` parameter will be passed to all Bedrock completion calls, allowing you to use custom inference profiles for better cost allocation and reporting.
See [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock. See [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock.
### DeepSeek ### DeepSeek

View file

@ -352,6 +352,12 @@ class LiteLLMAIHandler(BaseAiHandler):
# Support for custom OpenAI body fields (e.g., Flex Processing) # Support for custom OpenAI body fields (e.g., Flex Processing)
kwargs = _process_litellm_extra_body(kwargs) kwargs = _process_litellm_extra_body(kwargs)
# Support for Bedrock custom inference profile via model_id
model_id = get_settings().get("litellm.model_id")
if model_id and 'bedrock/' in model:
kwargs["model_id"] = model_id
get_logger().info(f"Using Bedrock custom inference profile: {model_id}")
get_logger().debug("Prompts", artifact={"system": system, "user": user}) get_logger().debug("Prompts", artifact={"system": system, "user": user})
if get_settings().config.verbosity_level >= 2: if get_settings().config.verbosity_level >= 2:

View file

@ -19,6 +19,7 @@ key = "" # Acquire through https://platform.openai.com
# OpenAI Flex Processing (optional, for cost savings) # OpenAI Flex Processing (optional, for cost savings)
# [litellm] # [litellm]
# extra_body='{"processing_mode": "flex"}' # extra_body='{"processing_mode": "flex"}'
# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
[pinecone] [pinecone]
api_key = "..." api_key = "..."

View file

@ -334,6 +334,7 @@ enable_callbacks = false
success_callback = [] success_callback = []
failure_callback = [] failure_callback = []
service_callback = [] service_callback = []
# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
[pr_similar_issue] [pr_similar_issue]
skip_comments = false skip_comments = false