tencent cloud

Deep Reasoning

Baixar
Modo Foco
Tamanho da Fonte
Última atualização: 2026-06-11 18:02:54

Feature Overview

The Deep Reasoning model supports reasoning before generating a final answer, improving the accuracy and explainability of complex tasks.

Use Cases

Complex code generation, code repair, and code refactoring.
Mathematical derivation, logical analysis, and multi-step decision-making.
Complex information is extracted and then comprehensively summarized.
Tasks that require greater stability and fewer reasoning errors.

Enabling/Disabling Deep Reasoning

Control whether to enable the thinking mode using the thinking parameter.
Enable deep thinking: "thinking":{"type":"enabled"}
Disable deep thinking: "thinking":{"type":"disabled"}

Supported Models

Model Name
Model (API Parameter)
Default Value and Description
DeepSeek-V4-Flash
deepseek-v4-flash
enabled
DeepSeek-V4-Pro
deepseek-v4-pro
enabled
DeepSeek-v3.2
deepseek-v3.2
disabled
GLM-5.1
glm-5.1
enabled
GLM-5V-Turbo
glm-5v-turbo
enabled
GLM-5-Turbo
glm-5-turbo
enabled
GLM-5
glm-5
enabled
Kimi-K2.6
kimi-k2.6
enabled
Kimi-K2.5
kimi-k2.5
enabled
MiniMax-M2.7
minimax-m2.7
enabled, cannot be disabled.
MiniMax-M2.5
minimax-m2.5
enabled, cannot be disabled.

Example: Enabling Deep Reasoning

Note:
Please replace YOUR_API_KEY with the API Key you created.
cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\
-H 'Content-Type: application/json' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-d '{
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
"thinking": {"type": "enabled"},
"stream": false
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub-intl.tencentcloudmaas.com/v1",
)

response = client.chat.completions.create(
model="deepseek-v4-pro",
messages=[
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
extra_body={"thinking": {"type": "enabled"}},
)

# The reasoning_content field is not directly declared by the OpenAI SDK, so you must access it using getattr.
msg = response.choices[0].message
if hasattr(msg, "reasoning_content"):
print("Thinking process:", getattr(msg, "reasoning_content"))
print("Final answer:", msg.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub-intl.tencentcloudmaas.com/v1',
});

// Node.js SDK: Expand the thinking field directly to the top level.
const response = await client.chat.completions.create({
model: 'deepseek-v4-pro',
messages: [
{ role: 'user', content: 'Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?' }
],
thinking: { type: 'enabled' },
} as any);

const msg: any = response.choices[0].message;
if (msg.reasoning_content) console.log('Thinking process:', msg.reasoning_content);
console.log('Final answer:', msg.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class ThinkingChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "deepseek-v4-pro");
body.put("messages", List.of(
Map.of("role", "user", "content", "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?")
));
body.put("thinking", Map.of("type", "enabled"));

Request request = new Request.Builder()
.url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
// In the response body, the message.reasoning_content field represents the thinking process, and the message.content field represents the final answer.
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "deepseek-v4-pro",
"messages": []map[string]string{
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
},
"thinking": map[string]string{"type": "enabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
// In the response body, the message.reasoning_content field represents the thinking process, and the message.content field represents the final answer.
fmt.Println(string(data))
}

Example: Disabling Deep Reasoning

cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\
-H 'Content-Type: application/json' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-d '{
"model": "deepseek-v4-flash",
"messages": [
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
"thinking": {"type": "disabled"},
"stream": false
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub-intl.tencentcloudmaas.com/v1",
)

response = client.chat.completions.create(
model="deepseek-v4-flash",
messages=[
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
extra_body={"thinking": {"type": "disabled"}},
)
print(response.choices[0].message.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub-intl.tencentcloudmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'deepseek-v4-flash',
messages: [
{ role: 'user', content: 'Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?' }
],
thinking: { type: 'disabled' },
} as any);
console.log(response.choices[0].message.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class DisableThinking {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "deepseek-v4-flash");
body.put("messages", List.of(
Map.of("role", "user", "content", "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?")
));
body.put("thinking", Map.of("type", "disabled"));

Request request = new Request.Builder()
.url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "deepseek-v4-flash",
"messages": []map[string]string{
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
},
"thinking": map[string]string{"type": "disabled"},
})

req, _ := http.NewRequest("POST",
"https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

Reasoning Depth Configuration

Control the reasoning depth using the reasoning_effort parameter. This parameter constrains the level of reasoning intensity the model applies. Higher reasoning intensity typically leads to more comprehensive responses, but also results in higher latency and token consumption.
reasoning_effort Value
Description
low
Lightweight reasoning, with fewer inference steps, faster speed, suitable for simple tasks.
medium
Balanced mode, suitable for most daily, moderately complex tasks.
high
Deep reasoning, with the longest inference time and deepest thinking, suitable for high-difficulty mathematics, programming, or complex logical reasoning tasks, but with the highest latency and cost.

Supported Models

Model Name
Model (API Parameter)
Description
DeepSeek-V4-Flash
deepseek-v4-flash
Default high
DeepSeek-V4-Pro
deepseek-v4-pro
Default high
DeepSeek-v3.2
deepseek-v3.2
Default high

Example: Configuring Reasoning Depth

cURL
Python
Node.js
Java
Go
curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\
-H 'Content-Type: application/json' \\
-H 'Authorization: Bearer YOUR_API_KEY' \\
-d '{
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
"stream": false,
"temperature": 0.9,
"reasoning_effort": "high"
}'
from openai import OpenAI

client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://tokenhub-intl.tencentcloudmaas.com/v1",
)

response = client.chat.completions.create(
model="deepseek-v4-pro",
messages=[
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
],
temperature=0.9,
extra_body={"reasoning_effort": "high"},
)

msg = response.choices[0].message
if hasattr(msg, "reasoning_content"):
print("Thinking process:", getattr(msg, "reasoning_content"))
print("Final answer:", msg.content)
import OpenAI from 'openai';

const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://tokenhub-intl.tencentcloudmaas.com/v1',
});

const response = await client.chat.completions.create({
model: 'deepseek-v4-pro',
messages: [
{ role: 'user', content: 'Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?' }
],
temperature: 0.9,
reasoning_effort: 'high',
} as any);

const msg: any = response.choices[0].message;
if (msg.reasoning_content) console.log('Thinking process:', msg.reasoning_content);
console.log('Final answer:', msg.content);
import okhttp3.*;
import com.google.gson.Gson;
import java.util.*;

public class ReasoningEffortChat {
public static void main(String[] args) throws Exception {
Map<String, Object> body = new HashMap<>();
body.put("model", "deepseek-v4-pro");
body.put("messages", List.of(
Map.of("role", "user", "content", "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?")
));
body.put("temperature", 0.9);
body.put("reasoning_effort", "high");

Request request = new Request.Builder()
.url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions")
.header("Authorization", "Bearer YOUR_API_KEY")
.post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json")))
.build();

try (Response response = new OkHttpClient().newCall(request).execute()) {
System.out.println(response.body().string());
}
}
}
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)

func main() {
body, _ := json.Marshal(map[string]interface{}{
"model": "deepseek-v4-pro",
"messages": []map[string]string{
{"role": "user", "content": "Xiaoming had 5 apples, gave 2 to Xiaohong, bought 3 more, and finally, how many are left?"}
},
"temperature": 0.9,
"reasoning_effort": "high",
})

req, _ := http.NewRequest("POST",
"https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer YOUR_API_KEY")
req.Header.Set("Content-Type", "application/json")

resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()

data, _ := io.ReadAll(resp.Body)
fmt.Println(string(data))
}

Response Example

After thinking is enabled, the response includes a reasoning_content thinking process field:
{
"id": "c95dc87ecce440678c3bb08f5868fee6",
"object": "chat.completion",
"created": 1775146546,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "The answer is 6."
"reasoning_content": "The user now needs to solve the problem of the change in Xiaoming's apple count, and must first clarify each step of the change. Initially, Xiaoming had 5 apples and gave 2 to Xiaohong, so 2 should be subtracted at this point, right? Then, he bought 3 more, so 3 should be added. Therefore, the calculation is 5 minus 2 plus 3. First, calculate 5-2=3, then 3+3=6."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 22,
"completion_tokens": 264,
"total_tokens": 286
}
}
For tool calls in thinking mode, you must provide the historical reasoning_content in each request round to obtain the best results. For details, see Interleaved Thinking.

Ajuda e Suporte

Esta página foi útil?

comentários