glm-5.1 model to complete a basic conversation. Replace YOUR_API_KEY with the API Key you created.curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "hello"}],"max_tokens": 1024}'
from openai import OpenAIclient = OpenAI(api_key="YOUR_API_KEY",base_url="https://tokenhub-intl.tencentcloudmaas.com/v1",)response = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "hello"}],max_tokens=1024,)print(response.choices[0].message.content)
import OpenAI from 'openai';const client = new OpenAI({apiKey: 'YOUR_API_KEY',baseURL: 'https://tokenhub-intl.tencentcloudmaas.com/v1',});const response = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: 'hello' }],max_tokens: 1024,});console.log(response.choices[0].message.content);
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmQuickStart {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "hello")));body.put("max_tokens", 1024);Request request = new Request.Builder().url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "hello"}},"max_tokens": 1024,})req, _ := http.NewRequest("POST","https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
reasoning_content field. If you do not need the reasoning process, see the reasoning mode section below to disable it.model Parameter Value | Positioning | Context / Max Output | Multimodal | Recommended Scenario |
glm-5.2 | Flagship (latest) | 1M tokens / 128K tokens | Not supported. | Agent and coding, long-running tasks, full-stack development, code translation, research reproduction |
glm-5.1 | Flagship | 200K tokens / 128K tokens | Not supported. | General conversation, content creation, knowledge Q&A, complex reasoning |
glm-5 | Previous-generation flagship | 200K tokens / 128K tokens | Not supported. | Scenarios prioritizing stability |
glm-5-turbo | Agent-optimized | 200K tokens / 128K tokens | Not supported. | Tool calling, long-chain Agent tasks |
glm-5v-turbo | Multimodal | 200K tokens / 128K tokens | Supports images, videos, and files. | Image understanding, video analysis, document parsing |
thinking parameter, without switching the model parameter.thinking field. This field is an object format and contains a type attribute:Field | Type | Value | Default Value | Description |
type | String | enabled / disabled | enabled | Controls whether thinking capability is enabled for the current request. |
curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "hello"}],"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "hello"}],extra_body={"thinking": {"type": "disabled"}},)
const response = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: 'hello' }],// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'disabled' },});
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmThinkingDisabled {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "hello")));body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "hello"}},"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
thinking is not a standard OpenAI field. When using the OpenAI SDK, you must pass it through the SDK's mechanism for extra fields (use extra_body for Python, or pass the field directly for Node.js). For direct HTTP calls, place it at the top level of the request body.reasoning_content field, which is at the same level as content:{"choices": [{"message": {"role": "assistant","reasoning_content": "Let me analyze this problem...","content": "The final answer is..."}}]}
reasoning_content is not a standard OpenAI field, you must access it via null-checking or reflection when using language-specific SDKs. For direct HTTP calls, simply read the response JSON.message = response.choices[0].messageif hasattr(message, "reasoning_content") and message.reasoning_content:print("Thinking process:", message.reasoning_content)print("Answer:", message.content)
const message = response.choices[0].message;// @ts-ignore - reasoning_content is a GLM extension fieldif (message.reasoning_content) {console.log('Thinking process:', message.reasoning_content);}console.log('Answer:', message.content);
// After obtaining the response string from an HTTP call, parse the reasoning_content and content fields using Gson.import com.google.gson.JsonObject;import com.google.gson.JsonParser;String respBody = response.body().string();JsonObject json = JsonParser.parseString(respBody).getAsJsonObject();JsonObject message = json.getAsJsonArray("choices").get(0).getAsJsonObject().getAsJsonObject("message");if (message.has("reasoning_content") && !message.get("reasoning_content").isJsonNull()) {System.out.println("Thinking process: " + message.get("reasoning_content").getAsString());}System.out.println("Answer: " + message.get("content").getAsString());
type Message struct {Role string `json:"role"`Content string `json:"content"`ReasoningContent string `json:"reasoning_content,omitempty"`}type Choice struct {Index int `json:"index"`Message Message `json:"message"`}type ChatResponse struct {Choices []Choice `json:"choices"`}var result ChatResponsejson.Unmarshal(data, &result)if result.Choices[0].Message.ReasoningContent != "" {fmt.Println("Thinking process:", result.Choices[0].Message.ReasoningContent)}fmt.Println("Answer:", result.Choices[0].Message.Content)
messages for subsequent rounds, do not write back the reasoning_content. Only pass the content field as the assistant message.stream=True). The reasoning content can be lengthy, and non-streaming calls are prone to triggering gateway timeouts.reasoning_content is fully output before the content. Client processing logic: accumulate the delta.reasoning_content to output the thinking process, then accumulate the delta.content to output the final answer.curl -N -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5.1","messages": [{"role": "user", "content": "Explain quantum entanglement"}],"stream": true,"stream_options": {"include_usage": true},"thinking": {"type": "enabled"}}'
stream = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "Explain quantum entanglement"}],stream=True,stream_options={"include_usage": True},extra_body={"thinking": {"type": "enabled"}},)is_answering = Falsefor chunk in stream:if not chunk.choices:continuedelta = chunk.choices[0].deltaif hasattr(delta, "reasoning_content") and delta.reasoning_content:print(delta.reasoning_content, end="", flush=True)if hasattr(delta, "content") and delta.content:if not is_answering:print("\\n--- Answer ---\\n")is_answering = Trueprint(delta.content, end="", flush=True)
const stream = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: 'Explain quantum entanglement' }],stream: true,stream_options: { include_usage: true },// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'enabled' },});let isAnswering = false;for await (const chunk of stream) {if (!chunk.choices?.length) continue;const delta = chunk.choices[0].delta;// @ts-ignore - reasoning_content is a GLM extension fieldif (delta.reasoning_content) {process.stdout.write(delta.reasoning_content);}if (delta.content) {if (!isAnswering) {process.stdout.write('\\n--- Answer ---\\n');isAnswering = true;}process.stdout.write(delta.content);}}
import okhttp3.*;import com.google.gson.*;import java.util.*;import java.io.BufferedReader;import java.io.InputStreamReader;public class GlmStream {public static void main(String[] args) throws Exception {Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "Explain quantum entanglement")));body.put("stream", true);body.put("stream_options", Map.of("include_usage", true));body.put("thinking", Map.of("type", "enabled"));Request request = new Request.Builder().url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute();BufferedReader reader = new BufferedReader(new InputStreamReader(response.body().byteStream()))) {String line;boolean isAnswering = false;while ((line = reader.readLine()) != null) {if (!line.startsWith("data: ")) continue;String data = line.substring(6);if (data.equals("[DONE]")) break;JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();JsonArray choices = chunk.getAsJsonArray("choices");if (choices == null || choices.size() == 0) continue;JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");if (delta.has("reasoning_content") && !delta.get("reasoning_content").isJsonNull()) {System.out.print(delta.get("reasoning_content").getAsString());}if (delta.has("content") && !delta.get("content").isJsonNull()) {if (!isAnswering) {System.out.println("\\n--- Answer ---");isAnswering = true;}System.out.print(delta.get("content").getAsString());}}}}}
package mainimport ("bufio""bytes""encoding/json""fmt""net/http""strings")type StreamDelta struct {Content string `json:"content,omitempty"`ReasoningContent string `json:"reasoning_content,omitempty"`}type StreamChoice struct {Delta StreamDelta `json:"delta"`}type StreamChunk struct {Choices []StreamChoice `json:"choices"`}func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "Explain quantum entanglement"}},"stream": true,"stream_options": map[string]bool{"include_usage": true},"thinking": map[string]string{"type": "enabled"},})req, _ := http.NewRequest("POST","https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()isAnswering := falsescanner := bufio.NewScanner(resp.Body)for scanner.Scan() {line := scanner.Text()if !strings.HasPrefix(line, "data: ") {continue}data := strings.TrimPrefix(line, "data: ")if data == "[DONE]" {break}var chunk StreamChunkif err := json.Unmarshal([]byte(data), &chunk); err != nil {continue}if len(chunk.Choices) == 0 {continue}delta := chunk.Choices[0].Deltaif delta.ReasoningContent != "" {fmt.Print(delta.ReasoningContent)}if delta.Content != "" {if !isAnswering {fmt.Println("\\n--- Answer ---")isAnswering = true}fmt.Print(delta.Content)}}}
glm-5.2 additionally provides the reasoning_effort parameter beyond the thinking field to control reasoning depth, enabling flexible trade-offs between effectiveness and latency. This parameter is only supported by glm-5.2. For other GLM models, continue using the thinking field (see Enable or Disable Thinking above). When this parameter is not passed, the default value is max.Value | Description |
high | Enhances reasoning. Suitable for scenarios such as general reasoning and code generation. |
max | Deep reasoning (default value). Suitable for scenarios such as complex reasoning, long-running tasks, and deep code analysis. |
reasoning_effort is not a standard OpenAI field. When using the OpenAI SDK, you must pass it through the SDK's mechanism for extra fields (use extra_body for Python, or pass the field directly for Node.js). For direct HTTP calls, place it at the top level of the request body. An example is provided below:curl --location -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\--header "Authorization: Bearer ${API_KEY}" \\--header 'Content-Type: application/json' \\--data-raw '{"model": "glm-5.2","messages": [{"role": "user", "content": "Analyze the performance bottlenecks in the following code and provide optimization suggestions."}],"thinking": {"type": "enabled"},"reasoning_effort": "max","temperature": 1.0,"max_tokens": 65536}'
from openai import OpenAIclient = OpenAI(api_key="YOUR_API_KEY",base_url="https://tokenhub-intl.tencentcloudmaas.com/v1")response = client.chat.completions.create(model="glm-5.2",messages=[{"role": "user", "content": "Prove that the square root of 2 is an irrational number."}],extra_body={"thinking": {"type": "enabled"},"reasoning_effort": "max",},temperature=1.0,max_tokens=65536,)print(response.choices[0].message.content)
import OpenAI from 'openai';const client = new OpenAI({apiKey: 'YOUR_API_KEY',baseURL: 'https://tokenhub-intl.tencentcloudmaas.com/v1',});const completion = await client.chat.completions.create({model: 'glm-5.2',messages: [{ role: 'user', content: 'Analyze the complexity of this code and provide optimization suggestions.' },],// @ts-ignore - thinking / reasoning_effort are GLM extension fieldsthinking: { type: 'enabled' },// @ts-ignorereasoning_effort: 'max',temperature: 1.0,max_tokens: 65536,});console.log(completion.choices[0].message.content);
thinking and reasoning_effort are used together: thinking controls whether to enable the reasoning capability (enable / disable), while reasoning_effort further controls the reasoning depth on glm-5.2 (high / max). For deep reasoning scenarios, it is recommended to set both thinking: {"type": "enabled"} and reasoning_effort: "max".tools / tool_choice). For general usage, see Language Model Invocation Overview.tool_call.arguments are returned incrementally in multiple chunks. The client needs to accumulate and concatenate them based on tool_call.index:completion = client.chat.completions.create(model="glm-5.1",messages=[{"role": "user", "content": "Query Shenzhen weather"}],tools=tools,stream=True,extra_body={"thinking": {"type": "disabled"}},)arg_buffer = ""tool_name = ""for chunk in completion:if not chunk.choices:continuedelta = chunk.choices[0].deltaif hasattr(delta, "tool_calls") and delta.tool_calls:for tc in delta.tool_calls:if tc.function and tc.function.name:tool_name = tc.function.nameif tc.function and tc.function.arguments:arg_buffer += tc.function.argumentsprint(f"Calling tool: {tool_name}")print(f"Complete parameters: {arg_buffer}") # Output: {"city": "Shenzhen"}
const completion = await client.chat.completions.create({model: 'glm-5.1',messages: [{ role: 'user', content: 'Query Shenzhen weather' }],tools: tools,stream: true,// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'disabled' },});let argBuffer = '';let toolName = '';for await (const chunk of completion) {if (!chunk.choices?.length) continue;const delta = chunk.choices[0].delta;if (delta.tool_calls) {for (const tc of delta.tool_calls) {if (tc.function?.name) toolName = tc.function.name;if (tc.function?.arguments) argBuffer += tc.function.arguments;}}}console.log(`Calling tool: ${toolName}`);console.log(`Complete parameters: ${argBuffer}`); // Output: {"city": "Shenzhen"}
import okhttp3.*;import com.google.gson.*;import java.util.*;import java.io.BufferedReader;import java.io.InputStreamReader;// For the definition of tools, see the Function Calling section in the Invocation Overview.Map<String, Object> body = new HashMap<>();body.put("model", "glm-5.1");body.put("messages", List.of(Map.of("role", "user", "content", "Query Shenzhen weather")));body.put("tools", tools);body.put("stream", true);body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();StringBuilder argBuffer = new StringBuilder();String toolName = "";try (Response response = new OkHttpClient().newCall(request).execute();BufferedReader reader = new BufferedReader(new InputStreamReader(response.body().byteStream()))) {String line;while ((line = reader.readLine()) != null) {if (!line.startsWith("data: ")) continue;String data = line.substring(6);if (data.equals("[DONE]")) break;JsonObject chunk = JsonParser.parseString(data).getAsJsonObject();JsonArray choices = chunk.getAsJsonArray("choices");if (choices == null || choices.size() == 0) continue;JsonObject delta = choices.get(0).getAsJsonObject().getAsJsonObject("delta");if (delta.has("tool_calls") && delta.get("tool_calls").isJsonArray()) {for (JsonElement tcEl : delta.getAsJsonArray("tool_calls")) {JsonObject fn = tcEl.getAsJsonObject().getAsJsonObject("function");if (fn != null) {if (fn.has("name") && !fn.get("name").isJsonNull()) {toolName = fn.get("name").getAsString();}if (fn.has("arguments") && !fn.get("arguments").isJsonNull()) {argBuffer.append(fn.get("arguments").getAsString());}}}}}}System.out.println("Calling tool: " + toolName);System.out.println("Complete parameters: " + argBuffer);
package mainimport ("bufio""bytes""encoding/json""fmt""net/http""strings")type ToolCallFunction struct {Name string `json:"name,omitempty"`Arguments string `json:"arguments,omitempty"`}type ToolCall struct {Index int `json:"index"`Function ToolCallFunction `json:"function"`}type ToolStreamDelta struct {ToolCalls []ToolCall `json:"tool_calls,omitempty"`}type ToolStreamChoice struct {Delta ToolStreamDelta `json:"delta"`}type ToolStreamChunk struct {Choices []ToolStreamChoice `json:"choices"`}func main() {// For the definition of tools, see the Function Calling section in the Invocation Overview.body, _ := json.Marshal(map[string]interface{}{"model": "glm-5.1","messages": []map[string]string{{"role": "user", "content": "Query Shenzhen weather"}},"tools": tools,"stream": true,"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()var argBuffer strings.Buildervar toolName stringscanner := bufio.NewScanner(resp.Body)for scanner.Scan() {line := scanner.Text()if !strings.HasPrefix(line, "data: ") {continue}data := strings.TrimPrefix(line, "data: ")if data == "[DONE]" {break}var chunk ToolStreamChunkif err := json.Unmarshal([]byte(data), &chunk); err != nil {continue}if len(chunk.Choices) == 0 {continue}for _, tc := range chunk.Choices[0].Delta.ToolCalls {if tc.Function.Name != "" {toolName = tc.Function.Name}if tc.Function.Arguments != "" {argBuffer.WriteString(tc.Function.Arguments)}}}fmt.Printf("Calling tool: %s\\nComplete parameters: %s\\n", toolName, argBuffer.String())}
tool_stream parameter (Boolean type) to control the streaming chunk granularity of tool invocation parameters:extra_body={"tool_stream": True, "thinking": {"type": "disabled"}}
glm-5v-turbo is the only model in the GLM series that supports multimodal input, accepting images, videos, and files as input and outputting text.curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "Please describe this picture"},{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}}]}],"max_tokens": 1024,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "Please describe this picture"},{"type": "image_url", "image_url": {"url": "https://example.com/photo.png"}},],}],max_tokens=1024,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: 'Please describe this picture' },{ type: 'image_url', image_url: { url: 'https://example.com/photo.png' } },],}],max_tokens: 1024,// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
import okhttp3.*;import com.google.gson.Gson;import java.util.*;public class GlmImageInput {public static void main(String[] args) throws Exception {List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "Please describe this picture"),Map.of("type", "image_url", "image_url",Map.of("url", "https://example.com/photo.png")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 1024);body.put("thinking", Map.of("type", "disabled"));Request request = new Request.Builder().url("https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions").header("Authorization", "Bearer YOUR_API_KEY").post(RequestBody.create(new Gson().toJson(body), MediaType.parse("application/json"))).build();try (Response response = new OkHttpClient().newCall(request).execute()) {System.out.println(response.body().string());}}}
package mainimport ("bytes""encoding/json""fmt""io""net/http")func main() {body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "Please describe this picture"},{"type": "image_url", "image_url": map[string]string{"url": "https://example.com/photo.png",}},},},},"max_tokens": 1024,"thinking": map[string]string{"type": "disabled"},})req, _ := http.NewRequest("POST","https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions",bytes.NewBuffer(body))req.Header.Set("Authorization", "Bearer YOUR_API_KEY")req.Header.Set("Content-Type", "application/json")resp, _ := http.DefaultClient.Do(req)defer resp.Body.Close()data, _ := io.ReadAll(resp.Body)fmt.Println(string(data))}
image_url.url:import base64with open("local.jpg", "rb") as f:b64 = base64.b64encode(f.read()).decode()response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "What is in the picture?"},{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},],}],max_tokens=1024,extra_body={"thinking": {"type": "disabled"}},)
image_url field in the image example above with video_url:curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "Please summarize the content of this video"},{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}}]}],"max_tokens": 2048,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "Please summarize the content of this video"},{"type": "video_url", "video_url": {"url": "https://example.com/demo.mp4"}},],}],max_tokens=2048,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: 'Please summarize the content of this video' },{ type: 'video_url', video_url: { url: 'https://example.com/demo.mp4' } },],}],max_tokens: 2048,// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "Please summarize the content of this video"),Map.of("type", "video_url", "video_url",Map.of("url", "https://example.com/demo.mp4")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 2048);body.put("thinking", Map.of("type", "disabled"));// The rest of the HTTP request logic is the same as in the image example.
body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "Please summarize the content of this video"},{"type": "video_url", "video_url": map[string]string{"url": "https://example.com/demo.mp4",}},},},},"max_tokens": 2048,"thinking": map[string]string{"type": "disabled"},})// The rest of the HTTP request logic is the same as in the image example.
curl -X POST 'https://tokenhub-intl.tencentcloudmaas.com/v1/chat/completions' \\-H 'Authorization: Bearer YOUR_API_KEY' \\-H 'Content-Type: application/json' \\-d '{"model": "glm-5v-turbo","messages": [{"role": "user","content": [{"type": "text", "text": "Please extract the key points of this document"},{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}}]}],"max_tokens": 4096,"thinking": {"type": "disabled"}}'
response = client.chat.completions.create(model="glm-5v-turbo",messages=[{"role": "user","content": [{"type": "text", "text": "Please extract the key points of this document"},{"type": "file_url", "file_url": {"url": "https://example.com/report.pdf"}},],}],max_tokens=4096,extra_body={"thinking": {"type": "disabled"}},)print(response.choices[0].message.content)
const response = await client.chat.completions.create({model: 'glm-5v-turbo',messages: [{role: 'user',content: [{ type: 'text', text: 'Please extract the key points of this document' },{ type: 'file_url', file_url: { url: 'https://example.com/report.pdf' } },],}],max_tokens: 4096,// @ts-ignore - thinking is a GLM extension fieldthinking: { type: 'disabled' },});console.log(response.choices[0].message.content);
List<Map<String, Object>> content = List.of(Map.of("type", "text", "text", "Please extract the key points of this document"),Map.of("type", "file_url", "file_url",Map.of("url", "https://example.com/report.pdf")));Map<String, Object> body = new HashMap<>();body.put("model", "glm-5v-turbo");body.put("messages", List.of(Map.of("role", "user", "content", content)));body.put("max_tokens", 4096);body.put("thinking", Map.of("type", "disabled"));// The rest of the HTTP request logic is the same as in the image example.
body, _ := json.Marshal(map[string]interface{}{"model": "glm-5v-turbo","messages": []map[string]interface{}{{"role": "user","content": []map[string]interface{}{{"type": "text", "text": "Please extract the key points of this document"},{"type": "file_url", "file_url": map[string]string{"url": "https://example.com/report.pdf",}},},},},"max_tokens": 4096,"thinking": map[string]string{"type": "disabled"},})// The rest of the HTTP request logic is the same as in the image example.
Restriction Item | Description |
Thinking mode enabled by default. | Enabled by default when the thinking parameter is not passed, and the response will contain the reasoning_content field. Explicitly disable it when not needed. |
Timeout risk for non-streaming calls | The output is longer in thinking mode. Use stream=True. |
Multimodal support limited to glm-5v-turbo. | The other three models do not support image, video, or file input. |
Multimodal inputs cannot be mixed. | Only one type of image, video, or file can be uploaded in a single request. |
File input supports URL only. | file_url does not support Base64 or Data URI. |
Tool parameters returned incrementally | During streaming calls, tool_call.arguments are returned in multiple chunks and need to be concatenated by the client. |
Request body size limit | The body of a single request must not exceed 100 MB. |
Esta página foi útil?
Você também pode entrar em contato com a Equipe de vendas ou Enviar um tíquete em caso de ajuda.
comentários