Merge pull request #1954 from abhinav-1305/add-custom-inference

feat: Add support for Bedrock custom inference profiles via model_id
2025-12-12 02:45:18 +00:00 · 2025-08-03 08:06:43 +03:00 · 2025-08-03 08:06:43 +03:00 · a23b527101
commit a23b527101
parent 642c413f08 a8b8202567
4 changed files with 28 additions and 0 deletions
--- a/docs/docs/usage-guide/changing_a_model.md
+++ b/docs/docs/usage-guide/changing_a_model.md
@ -250,6 +250,26 @@ model="bedrock/us.meta.llama4-scout-17b-instruct-v1:0"
 fallback_models=["bedrock/us.meta.llama4-maverick-17b-instruct-v1:0"]
 ```

+#### Custom Inference Profiles
+
+To use a custom inference profile with Amazon Bedrock (for cost allocation tags and other configuration settings), add the `model_id` parameter to your configuration:
+
+```toml
+[config] # in configuration.toml
+model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
+fallback_models=["bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"]
+
+[aws]
+AWS_ACCESS_KEY_ID="..."
+AWS_SECRET_ACCESS_KEY="..."
+AWS_REGION_NAME="..."
+
+[litellm]
+model_id = "your-custom-inference-profile-id"
+```
+
+The `model_id` parameter will be passed to all Bedrock completion calls, allowing you to use custom inference profiles for better cost allocation and reporting.
+
 See [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock.

 ### DeepSeek
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@ -352,6 +352,12 @@ class LiteLLMAIHandler(BaseAiHandler):
            # Support for custom OpenAI body fields (e.g., Flex Processing)
            kwargs = _process_litellm_extra_body(kwargs)

+            # Support for Bedrock custom inference profile via model_id
+            model_id = get_settings().get("litellm.model_id")
+            if model_id and 'bedrock/' in model:
+                kwargs["model_id"] = model_id
+                get_logger().info(f"Using Bedrock custom inference profile: {model_id}")
+
            get_logger().debug("Prompts", artifact={"system": system, "user": user})

            if get_settings().config.verbosity_level >= 2:
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -19,6 +19,7 @@ key = ""  # Acquire through https://platform.openai.com
 # OpenAI Flex Processing (optional, for cost savings)
 # [litellm]
 # extra_body='{"processing_mode": "flex"}'
+# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock

 [pinecone]
 api_key = "..."
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -334,6 +334,7 @@ enable_callbacks = false
 success_callback = []
 failure_callback = []
 service_callback = []
+# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock

 [pr_similar_issue]
 skip_comments = false