當您向 Messages API 發出請求時,Claude 的回應包含一個 stop_reason 欄位,該欄位指示模型停止生成回應的原因。理解這些值對於構建能夠適當處理不同回應類型的強大應用程式至關重要。
有關 API 回應中 stop_reason 的詳細信息,請參閱 Messages API 參考。
stop_reason 欄位是每個成功的 Messages API 回應的一部分。與指示請求處理失敗的錯誤不同,stop_reason 告訴您 Claude 為什麼成功完成了其回應生成。
{
"id": "msg_01234",
"type": "message",
"role": "assistant",
"content": [
{
"type": "text",
"text": "Here's the answer to your question..."
}
],
"stop_reason": "end_turn",
"stop_sequence": null,
"usage": {
"input_tokens": 100,
"output_tokens": 50
}
}最常見的停止原因。表示 Claude 自然完成了其回應。
if response.stop_reason == "end_turn":
# Process the complete response
print(response.content[0].text)有時 Claude 會返回空回應(恰好 2-3 個令牌,沒有內容),其中 stop_reason: "end_turn"。這通常發生在 Claude 認為助手回合已完成時,特別是在工具結果之後。
常見原因:
如何防止空回應:
# INCORRECT: Adding text immediately after tool_result
messages = [
{"role": "user", "content": "Calculate the sum of 1234 and 5678"},
{"role": "assistant", "content": [
{
"type": "tool_use",
"id": "toolu_123",
"name": "calculator",
"input": {"operation": "add", "a": 1234, "b": 5678}
}
]},
{"role": "user", "content": [
{
"type": "tool_result",
"tool_use_id": "toolu_123",
"content": "6912"
},
{
"type": "text",
"text": "Here's the result" # Don't add text after tool_result
}
]}
]
# CORRECT: Send tool results directly without additional text
messages = [
{"role": "user", "content": "Calculate the sum of 1234 and 5678"},
{"role": "assistant", "content": [
{
"type": "tool_use",
"id": "toolu_123",
"name": "calculator",
"input": {"operation": "add", "a": 1234, "b": 5678}
}
]},
{"role": "user", "content": [
{
"type": "tool_result",
"tool_use_id": "toolu_123",
"content": "6912"
}
]} # Just the tool_result, no additional text
]
# If you still get empty responses after fixing the above:
def handle_empty_response(client, messages):
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=messages
)
# Check if response is empty
if (response.stop_reason == "end_turn" and
not response.content):
# INCORRECT: Don't just retry with the empty response
# This won't work because Claude already decided it's done
# CORRECT: Add a continuation prompt in a NEW user message
messages.append({"role": "user", "content": "Please continue"})
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=messages
)
return response最佳實踐:
Claude 停止是因為達到了您在請求中指定的 max_tokens 限制。
# Request with limited tokens
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=10,
messages=[{"role": "user", "content": "Explain quantum physics"}]
)
if response.stop_reason == "max_tokens":
# Response was truncated
print("Response was cut off at token limit")
# Consider making another request to continueClaude 遇到了您的自訂停止序列之一。
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
stop_sequences=["END", "STOP"],
messages=[{"role": "user", "content": "Generate text until you say END"}]
)
if response.stop_reason == "stop_sequence":
print(f"Stopped at sequence: {response.stop_sequence}")Claude 正在調用工具並期望您執行它。
對於大多數工具使用實現,我們建議使用 tool runner,它會自動處理工具執行、結果格式化和對話管理。
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
tools=[weather_tool],
messages=[{"role": "user", "content": "What's the weather?"}]
)
if response.stop_reason == "tool_use":
# Extract and execute the tool
for content in response.content:
if content.type == "tool_use":
result = execute_tool(content.name, content.input)
# Return result to Claude for final response與網路搜尋等伺服器工具一起使用,當 Claude 需要暫停長時間運行的操作時。
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
tools=[{"type": "web_search_20250305", "name": "web_search"}],
messages=[{"role": "user", "content": "Search for latest AI news"}]
)
if response.stop_reason == "pause_turn":
# Continue the conversation
messages = [
{"role": "user", "content": original_query},
{"role": "assistant", "content": response.content}
]
continuation = client.messages.create(
model="claude-sonnet-4-5",
messages=messages,
tools=[{"type": "web_search_20250305", "name": "web_search"}]
)Claude 由於安全考慮而拒絕生成回應。
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{"role": "user", "content": "[Unsafe request]"}]
)
if response.stop_reason == "refusal":
# Claude declined to respond
print("Claude was unable to process this request")
# Consider rephrasing or modifying the request如果在使用 Claude Sonnet 4.5 或 Opus 4.1 時經常遇到 refusal 停止原因,您可以嘗試更新 API 呼叫以使用 Sonnet 4(claude-sonnet-4-20250514),它具有不同的使用限制。了解更多關於 理解 Sonnet 4.5 的 API 安全過濾器。
要了解有關 Claude Sonnet 4.5 的 API 安全過濾器觸發的拒絕的更多信息,請參閱 理解 Sonnet 4.5 的 API 安全過濾器。
Claude 停止是因為達到了模型的上下文窗口限制。這允許您請求最大可能的令牌,而無需知道確切的輸入大小。
# Request with maximum tokens to get as much as possible
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=64000, # Model's maximum output tokens
messages=[{"role": "user", "content": "Large input that uses most of context window..."}]
)
if response.stop_reason == "model_context_window_exceeded":
# Response hit context window limit before max_tokens
print("Response reached model's context window limit")
# The response is still valid but was limited by context window此停止原因在 Sonnet 4.5 和更新的模型中默認可用。對於較早的模型,使用測試版標頭 model-context-window-exceeded-2025-08-26 來啟用此行為。
養成在回應處理邏輯中檢查 stop_reason 的習慣:
def handle_response(response):
if response.stop_reason == "tool_use":
return handle_tool_use(response)
elif response.stop_reason == "max_tokens":
return handle_truncation(response)
elif response.stop_reason == "model_context_window_exceeded":
return handle_context_limit(response)
elif response.stop_reason == "pause_turn":
return handle_pause(response)
elif response.stop_reason == "refusal":
return handle_refusal(response)
else:
# Handle end_turn and other cases
return response.content[0].text當回應因令牌限制或上下文窗口而被截斷時:
def handle_truncated_response(response):
if response.stop_reason in ["max_tokens", "model_context_window_exceeded"]:
# Option 1: Warn the user about the specific limit
if response.stop_reason == "max_tokens":
message = "[Response truncated due to max_tokens limit]"
else:
message = "[Response truncated due to context window limit]"
return f"{response.content[0].text}\n\n{message}"
# Option 2: Continue generation
messages = [
{"role": "user", "content": original_prompt},
{"role": "assistant", "content": response.content[0].text}
]
continuation = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=messages + [{"role": "user", "content": "Please continue"}]
)
return response.content[0].text + continuation.content[0].text對於可能暫停的伺服器工具:
def handle_paused_conversation(initial_response, max_retries=3):
response = initial_response
messages = [{"role": "user", "content": original_query}]
for attempt in range(max_retries):
if response.stop_reason != "pause_turn":
break
messages.append({"role": "assistant", "content": response.content})
response = client.messages.create(
model="claude-sonnet-4-5",
messages=messages,
tools=original_tools
)
return response區分 stop_reason 值和實際錯誤很重要:
try:
response = client.messages.create(...)
# Handle successful response with stop_reason
if response.stop_reason == "max_tokens":
print("Response was truncated")
except anthropic.APIError as e:
# Handle actual errors
if e.status_code == 429:
print("Rate limit exceeded")
elif e.status_code == 500:
print("Server error")使用串流時,stop_reason 是:
message_start 事件中為 nullmessage_delta 事件中提供with client.messages.stream(...) as stream:
for event in stream:
if event.type == "message_delta":
stop_reason = event.delta.stop_reason
if stop_reason:
print(f"Stream ended with: {stop_reason}")使用 tool runner 更簡單:下面的示例顯示手動工具處理。對於大多數用例,tool runner 會自動處理工具執行,代碼少得多。
def complete_tool_workflow(client, user_query, tools):
messages = [{"role": "user", "content": user_query}]
while True:
response = client.messages.create(
model="claude-sonnet-4-5",
messages=messages,
tools=tools
)
if response.stop_reason == "tool_use":
# Execute tools and continue
tool_results = execute_tools(response.content)
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": tool_results})
else:
# Final response
return responsedef get_complete_response(client, prompt, max_attempts=3):
messages = [{"role": "user", "content": prompt}]
full_response = ""
for _ in range(max_attempts):
response = client.messages.create(
model="claude-sonnet-4-5",
messages=messages,
max_tokens=4096
)
full_response += response.content[0].text
if response.stop_reason != "max_tokens":
break
# Continue from where it left off
messages = [
{"role": "user", "content": prompt},
{"role": "assistant", "content": full_response},
{"role": "user", "content": "Please continue from where you left off."}
]
return full_response使用 model_context_window_exceeded 停止原因,您可以請求最大可能的令牌,而無需計算輸入大小:
def get_max_possible_tokens(client, prompt):
"""
Get as many tokens as possible within the model's context window
without needing to calculate input token count
"""
response = client.messages.create(
model="claude-sonnet-4-5",
messages=[{"role": "user", "content": prompt}],
max_tokens=64000 # Set to model's maximum output tokens
)
if response.stop_reason == "model_context_window_exceeded":
# Got the maximum possible tokens given input size
print(f"Generated {response.usage.output_tokens} tokens (context limit reached)")
elif response.stop_reason == "max_tokens":
# Got exactly the requested tokens
print(f"Generated {response.usage.output_tokens} tokens (max_tokens reached)")
else:
# Natural completion
print(f"Generated {response.usage.output_tokens} tokens (natural completion)")
return response.content[0].text通過適當處理 stop_reason 值,您可以構建更強大的應用程式,優雅地處理不同的回應場景並提供更好的用戶體驗。