Add token limit and model override fields to ProviderConfig

Copilot · Copilot · commit f9936311d61a · 2026-05-04T10:20:01.000-07:00
Adds the following optional fields to ProviderConfig across all SDKs:

- modelId: well-known model ID for agent config + token limit lookup

- wireModel: model name sent to the provider API for inference

- maxPromptTokens: prompt token cap (triggers compaction)

- maxOutputTokens: response token cap

Both modelId and wireModel default to the session's configured model when unset.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs
@@ -1528,6 +1528,43 @@ public class ProviderConfig
     /// </summary>
     [JsonPropertyName("headers")]
     public IDictionary<string, string>? Headers { get; set; }
+
+    /// <summary>
+    /// Well-known model ID used to look up agent configuration (tools, prompts,
+    /// reasoning behavior) and default token limits from the capability catalog.
+    /// Useful for fine-tuned models that should inherit the configuration of a
+    /// known base model.
+    /// Defaults to the session's configured model (see <see cref="SessionConfig.Model"/>)
+    /// when not explicitly set.
+    /// </summary>
+    [JsonPropertyName("modelId")]
+    public string? ModelId { get; set; }
+
+    /// <summary>
+    /// Model identifier sent to the provider API for inference.
+    /// Use this when the name your provider knows (e.g. an Azure deployment name
+    /// or a custom fine-tune name) differs from the well-known model ID used for
+    /// configuration lookup.
+    /// Defaults to the session's configured model (see <see cref="SessionConfig.Model"/>)
+    /// when not explicitly set.
+    /// </summary>
+    [JsonPropertyName("wireModel")]
+    public string? WireModel { get; set; }
+
+    /// <summary>
+    /// Maximum number of tokens allowed in the prompt for a single LLM API request.
+    /// Used by the runtime to trigger conversation compaction before sending a request
+    /// when the prompt (system message, history, tool definitions, user message) exceeds this limit.
+    /// </summary>
+    [JsonPropertyName("maxPromptTokens")]
+    public int? MaxPromptTokens { get; set; }
+
+    /// <summary>
+    /// Maximum number of tokens the model can generate in a single response.
+    /// When hit, the model stops generating and returns a truncated response.
+    /// </summary>
+    [JsonPropertyName("maxOutputTokens")]
+    public int? MaxOutputTokens { get; set; }
 }
 
 /// <summary>
diff --git a/go/types.go b/go/types.go
@@ -859,6 +859,29 @@ type ProviderConfig struct {
 	Azure *AzureProviderOptions `json:"azure,omitempty"`
 	// Headers are custom HTTP headers included in outbound provider requests.
 	Headers map[string]string `json:"headers,omitempty"`
+	// ModelID is the well-known model ID used to look up agent configuration
+	// (tools, prompts, reasoning behavior) and default token limits from the
+	// capability catalog. Useful for fine-tuned models that should inherit the
+	// configuration of a known base model.
+	// Defaults to the session's configured model (SessionConfig.Model) when
+	// not explicitly set.
+	ModelID string `json:"modelId,omitempty"`
+	// WireModel is the model identifier sent to the provider API for inference.
+	// Use this when the name your provider knows (e.g. an Azure deployment name
+	// or a custom fine-tune name) differs from the well-known model ID used for
+	// configuration lookup.
+	// Defaults to the session's configured model (SessionConfig.Model) when
+	// not explicitly set.
+	WireModel string `json:"wireModel,omitempty"`
+	// MaxPromptTokens is the maximum number of tokens allowed in the prompt for
+	// a single LLM API request. Used by the runtime to trigger conversation
+	// compaction before sending a request when the prompt (system message,
+	// history, tool definitions, user message) exceeds this limit.
+	MaxPromptTokens int `json:"maxPromptTokens,omitempty"`
+	// MaxOutputTokens is the maximum number of tokens the model can generate in
+	// a single response. When hit, the model stops generating and returns a
+	// truncated response.
+	MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
 }
 
 // AzureProviderOptions contains Azure-specific provider configuration
diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts
@@ -1503,6 +1503,39 @@ export interface ProviderConfig {
      * Custom HTTP headers to include in outbound provider requests.
      */
     headers?: Record<string, string>;
+
+    /**
+     * Well-known model ID used to look up agent configuration (tools, prompts,
+     * reasoning behavior) and default token limits from the capability catalog.
+     * Useful for fine-tuned models that should inherit the configuration of a
+     * known base model.
+     * Defaults to the session's configured model (see {@link SessionConfig.model})
+     * when not explicitly set.
+     */
+    modelId?: string;
+
+    /**
+     * Model identifier sent to the provider API for inference.
+     * Use this when the name your provider knows (e.g. an Azure deployment name
+     * or a custom fine-tune name) differs from the well-known model ID used
+     * for configuration lookup.
+     * Defaults to the session's configured model (see {@link SessionConfig.model})
+     * when not explicitly set.
+     */
+    wireModel?: string;
+
+    /**
+     * Maximum number of tokens allowed in the prompt for a single LLM API request.
+     * Used by the runtime to trigger conversation compaction before sending a request
+     * when the prompt (system message, history, tool definitions, user message) exceeds this limit.
+     */
+    maxPromptTokens?: number;
+
+    /**
+     * Maximum number of tokens the model can generate in a single response.
+     * When hit, the model stops generating and returns a truncated response.
+     */
+    maxOutputTokens?: number;
 }
 
 /**
diff --git a/python/copilot/client.py b/python/copilot/client.py
@@ -2275,6 +2275,14 @@ def _convert_provider_to_wire_format(
             wire_provider["bearerToken"] = provider["bearer_token"]
         if "headers" in provider:
             wire_provider["headers"] = provider["headers"]
+        if "model_id" in provider:
+            wire_provider["modelId"] = provider["model_id"]
+        if "wire_model" in provider:
+            wire_provider["wireModel"] = provider["wire_model"]
+        if "max_prompt_tokens" in provider:
+            wire_provider["maxPromptTokens"] = provider["max_prompt_tokens"]
+        if "max_output_tokens" in provider:
+            wire_provider["maxOutputTokens"] = provider["max_output_tokens"]
         if "azure" in provider:
             azure = provider["azure"]
             wire_azure: dict[str, Any] = {}
diff --git a/python/copilot/session.py b/python/copilot/session.py
@@ -832,6 +832,28 @@ class ProviderConfig(TypedDict, total=False):
     bearer_token: str
     azure: AzureProviderOptions  # Azure-specific options
     headers: dict[str, str]
+    # Well-known model ID used to look up agent configuration (tools, prompts,
+    # reasoning behavior) and default token limits from the capability catalog.
+    # Useful for fine-tuned models that should inherit the configuration of a
+    # known base model.
+    # Defaults to the session's configured model (SessionConfig.model) when
+    # not explicitly set.
+    model_id: str
+    # Model identifier sent to the provider API for inference. Use this when the
+    # name your provider knows (e.g. an Azure deployment name or a custom
+    # fine-tune name) differs from the well-known model ID used for
+    # configuration lookup.
+    # Defaults to the session's configured model (SessionConfig.model) when
+    # not explicitly set.
+    wire_model: str
+    # Maximum number of tokens allowed in the prompt for a single LLM API
+    # request. Used by the runtime to trigger conversation compaction before
+    # sending a request when the prompt (system message, history, tool
+    # definitions, user message) exceeds this limit.
+    max_prompt_tokens: int
+    # Maximum number of tokens the model can generate in a single response.
+    # When hit, the model stops generating and returns a truncated response.
+    max_output_tokens: int
 
 
 class SessionConfig(TypedDict, total=False):