import os
from openai import OpenAI
client = OpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore region.
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
# This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
model="qwen-plus",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who are you?"},
],
# For Qwen3 models, use the enable_thinking parameter to control the thinking process (default is True for open source models, False for commercial models).
# When using a Qwen3 open source model without streaming output, uncomment the following line to avoid errors.
# extra_body={"enable_thinking": False},
)
print(completion.model_dump_json())
Java
Request example
// This code uses OpenAI SDK version 2.6.0
import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.chat.completions.ChatCompletion;
import com.openai.models.chat.completions.ChatCompletionCreateParams;
public class Main {
public static void main(String[] args) {
OpenAIClient client = OpenAIOkHttpClient.builder()
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
// The following is the base_url for the Singapore region.
.baseUrl("https://dashscope-intl.aliyuncs.com/compatible-mode/v1")
.build();
ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
.addUserMessage("Who are you?")
.model("qwen-plus")
.build();
try {
ChatCompletion chatCompletion = client.chat().completions().create(params);
System.out.println(chatCompletion);
} catch (Exception e) {
System.err.println("Error occurred: " + e.getMessage());
e.printStackTrace();
}
}
}
Node.js
import OpenAI from "openai";
const openai = new OpenAI(
{
// If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
apiKey: process.env.DASHSCOPE_API_KEY,
// The following is the base_url for the Singapore region.
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
}
);
async function main() {
const completion = await openai.chat.completions.create({
model: "qwen-plus", //This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "Who are you?" }
],
});
console.log(JSON.stringify(completion))
}
main();
Go
package main
import (
"context"
"os"
"github.com/openai/openai-go"
"github.com/openai/openai-go/option"
)
func main() {
client := openai.NewClient(
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
option.WithAPIKey(os.Getenv("DASHSCOPE_API_KEY")), // defaults to os.LookupEnv("OPENAI_API_KEY")
// The following is the base_url for the Singapore region.
option.WithBaseURL("https://dashscope-intl.aliyuncs.com/compatible-mode/v1/"),
)
chatCompletion, err := client.Chat.Completions.New(
context.TODO(), openai.ChatCompletionNewParams{
Messages: openai.F(
[]openai.ChatCompletionMessageParamUnion{
openai.UserMessage("Who are you?"),
},
),
Model: openai.F("qwen-plus"),
},
)
if err != nil {
panic(err.Error())
}
println(chatCompletion.Choices[0].Message.Content)
}
C# (HTTP)
using System.Net.Http.Headers;
using System.Text;
class Program
{
private static readonly HttpClient httpClient = new HttpClient();
static async Task Main(string[] args)
{
// If the environment variable is not set, replace the following line with: string? apiKey = "sk-xxx";
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
string? apiKey = Environment.GetEnvironmentVariable("DASHSCOPE_API_KEY");
if (string.IsNullOrEmpty(apiKey))
{
Console.WriteLine("API Key is not set. Make sure the 'DASHSCOPE_API_KEY' environment variable is set.");
return;
}
// Set the request URL and content
// The following is the base_url for the Singapore region.
string url = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions";
// This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
string jsonContent = @"{
""model"": ""qwen-plus"",
""messages"": [
{
""role"": ""system"",
""content"": ""You are a helpful assistant.""
},
{
""role"": ""user"",
""content"": ""Who are you?""
}
]
}";
// Send the request and get the response
string result = await SendPostRequestAsync(url, jsonContent, apiKey);
// Print the result
Console.WriteLine(result);
}
private static async Task<string> SendPostRequestAsync(string url, string jsonContent, string apiKey)
{
using (var content = new StringContent(jsonContent, Encoding.UTF8, "application/json"))
{
// Set the request headers
httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
// Send the request and get the response
HttpResponseMessage response = await httpClient.PostAsync(url, content);
// Handle the response
if (response.IsSuccessStatusCode)
{
return await response.Content.ReadAsStringAsync();
}
else
{
return $"Request failed: {response.StatusCode}";
}
}
}
}
PHP (HTTP)
<?php
// Set the request URL
// The following is the base_url for the Singapore region.
$url = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions';
// If the environment variable is not set, replace the following line with: $apiKey = "sk-xxx";
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
$apiKey = getenv('DASHSCOPE_API_KEY');
// Set the request headers
$headers = [
'Authorization: Bearer '.$apiKey,
'Content-Type: application/json'
];
// Set the request body
$data = [
// This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
"model" => "qwen-plus",
"messages" => [
[
"role" => "system",
"content" => "You are a helpful assistant."
],
[
"role" => "user",
"content" => "Who are you?"
]
]
];
// Initialize a cURL session
$ch = curl_init();
// Set cURL options
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// Execute the cURL session
$response = curl_exec($ch);
// Check for errors
if (curl_errno($ch)) {
echo 'Curl error: ' . curl_error($ch);
}
// Close the cURL resource
curl_close($ch);
// Print the response
echo $response;
?>
curl
API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key. If you use a model in the Beijing region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "qwen-plus",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Who are you?"
}
]
}'
import os
from openai import OpenAI
client = OpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
model="qwen-plus", # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
messages=[{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': 'Who are you?'}],
stream=True,
stream_options={"include_usage": True}
)
for chunk in completion:
print(chunk.model_dump_json())
Node.js
import OpenAI from "openai";
const openai = new OpenAI(
{
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
apiKey: process.env.DASHSCOPE_API_KEY,
// The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
}
);
async function main() {
const completion = await openai.chat.completions.create({
model: "qwen-plus", // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
messages: [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who are you?"}
],
stream: true,
});
for await (const chunk of completion) {
console.log(JSON.stringify(chunk));
}
}
main();
curl
API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
If you use a model in the Beijing region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
For more ways to use large language models to analyze images, see Visual understanding.
Python
import os
from openai import OpenAI
client = OpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
model="qwen-vl-plus", # This example uses qwen-vl-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models
messages=[{"role": "user","content": [
{"type": "image_url",
"image_url": {"url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"}},
{"type": "text", "text": "What is this?"},
]}]
)
print(completion.model_dump_json())
Node.js
import OpenAI from "openai";
const openai = new OpenAI(
{
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
apiKey: process.env.DASHSCOPE_API_KEY,
// The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
}
);
async function main() {
const response = await openai.chat.completions.create({
model: "qwen-vl-max", // This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models
messages: [{role: "user",content: [
{ type: "image_url",image_url: {"url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"}},
{ type: "text", text: "What is this?" },
]}]
});
console.log(JSON.stringify(response));
}
main();
curl
API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
If you use a model in the Beijing region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
The following is an example of passing a list of images. For more usage information, such as passing a video file, see Visual understanding.
Python
import os
from openai import OpenAI
client = OpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
# This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models
model="qwen-vl-max",
messages=[{
"role": "user",
"content": [
{
"type": "video",
"video": [
"https://img.alicdn.com/imgextra/i3/O1CN01K3SgGo1eqmlUgeE9b_!!6000000003923-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i4/O1CN01BjZvwg1Y23CF5qIRB_!!6000000003000-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i4/O1CN01Ib0clU27vTgBdbVLQ_!!6000000007859-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i1/O1CN01aygPLW1s3EXCdSN4X_!!6000000005710-0-tps-3840-2160.jpg"]
},
{
"type": "text",
"text": "Describe the process in this video."
}]}]
)
print(completion.model_dump_json())
Node.js
// Make sure you have specified "type": "module" in package.json.
import OpenAI from "openai";
const openai = new OpenAI({
// If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
apiKey: process.env.DASHSCOPE_API_KEY,
// The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
});
async function main() {
const response = await openai.chat.completions.create({
// This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models
model: "qwen-vl-max",
messages: [{
role: "user",
content: [
{
type: "video",
video: [
"https://img.alicdn.com/imgextra/i3/O1CN01K3SgGo1eqmlUgeE9b_!!6000000003923-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i4/O1CN01BjZvwg1Y23CF5qIRB_!!6000000003000-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i4/O1CN01Ib0clU27vTgBdbVLQ_!!6000000007859-0-tps-3840-2160.jpg",
"https://img.alicdn.com/imgextra/i1/O1CN01aygPLW1s3EXCdSN4X_!!6000000005710-0-tps-3840-2160.jpg"
]
},
{
type: "text",
text: "Describe the process in this video."
}
]}]
});
console.log(JSON.stringify(response));
}
main();
curl
The API keys for the Singapore/Virginia and Beijing regions are different. Get an API key. The following is the base_url for the Singapore region.
For the complete Function calling workflow code, see Function Calling.
For Function calling code for Qwen3 (thinking mode) and QwQ models, see Deep thinking.
Python
import os
from openai import OpenAI
client = OpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
tools = [
# Tool 1: Get the current time
{
"type": "function",
"function": {
"name": "get_current_time",
"description": "Useful when you want to know the current time.",
"parameters": {} # parameters is an empty dictionary because no input is needed to get the current time
}
},
# Tool 2: Get the weather for a specified city
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Useful when you want to check the weather in a specific city.",
"parameters": {
"type": "object",
"properties": {
# A location is required to check the weather, so the parameter is set to location
"location": {
"type": "string",
"description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
}
},
"required": ["location"]
}
}
}
]
messages = [{"role": "user", "content": "What is the weather like in Hangzhou?"}]
completion = client.chat.completions.create(
model="qwen-plus", # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
messages=messages,
tools=tools
)
print(completion.model_dump_json())
Node.js
import OpenAI from "openai";
const openai = new OpenAI(
{
// If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
// API keys for the Singapore/Virginia and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key
apiKey: process.env.DASHSCOPE_API_KEY,
// The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
}
);
const messages = [{"role": "user", "content": "What is the weather like in Hangzhou?"}];
const tools = [
// Tool 1: Get the current time
{
"type": "function",
"function": {
"name": "get_current_time",
"description": "Useful when you want to know the current time.",
// parameters is empty because no input is needed to get the current time
"parameters": {}
}
},
// Tool 2: Get the weather for a specified city
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Useful when you want to check the weather in a specific city.",
"parameters": {
"type": "object",
"properties": {
// A location is required to check the weather, so the parameter is set to location
"location": {
"type": "string",
"description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
}
},
"required": ["location"]
}
}
}
];
async function main() {
const response = await openai.chat.completions.create({
model: "qwen-plus", // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
messages: messages,
tools: tools,
});
console.log(JSON.stringify(response));
}
main();
curl
The API keys for the Singapore/Virginia and Beijing regions are different. Get an API key. The following is the base_url for the Singapore region.
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "qwen-plus",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the weather like in Hangzhou?"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_time",
"description": "Useful when you want to know the current time.",
"parameters": {}
}
},
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Useful when you want to check the weather in a specific city.",
"parameters": {
"type": "object",
"properties": {
"location":{
"type": "string",
"description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
}
},
"required": ["location"]
}
}
}
]
}'
Asynchronous invocation
import os
import asyncio
from openai import AsyncOpenAI
import platform
client = AsyncOpenAI(
# If the environment variable is not set, replace the following line with: api_key="sk-xxx"
# If you use a model in the China (Beijing) region, you need to use an API KEY for that region. Get it here: https://bailian.console.alibabacloud.com/?tab=model#/api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
# The following is the base_url for the Singapore/Virginia region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
async def main():
response = await client.chat.completions.create(
messages=[{"role": "user", "content": "Who are you?"}],
model="qwen-plus", # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models
)
print(response.model_dump_json())
if platform.system() == "Windows":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())
model string(Required)
The name of the model to use.
Supported models include Qwen large language models (commercial and open source), Qwen-VL, Qwen-Coder, Qwen-Omni, and Qwen-Math.
Qwen-Audio does not support the OpenAI compatible protocol. It only supports the Qwen protocol.
For specific model names and billing information, see Models.
messages array(Required)
Conversation history for the model, in chronological order.
Message types
System Messageobject(Optional)
A system message that sets the role, tone, task objectives, or constraints for the large language model. Place it at the beginning of the messages array.
Do not set a System Message for the QwQ model. A System Message has no effect on the QVQ model.
Properties
content string(Required)
A system instruction that defines the model's role, behavior, response style, and task constraints.
role string(Required)
The role for a system message. The value is fixed as system.
User Messageobject(Required)
A user message that passes questions, instructions, or context to the model.
Properties
content string or array(Required)
The message content. This is a string for text-only input. It is an array for multi-modal input, such as images, or if explicit caching is enabled.
Properties for multi-modal models or when explicit caching is enabled
type string(Required)
Valid values:
text
Set to text for text input.
image_url
Set to image_url for image input.
input_audio
Set to input_audio for audio input.
video
Set to video when the video input is an image list.
video_url
Set to video_url for video file input.
Only some Qwen-VL models can accept video files as input. For more information, see Video understanding (Qwen-VL). The QVQ and Qwen-Omni models support direct video file input.
text string
The input text. This parameter is required when type is text.
image_url object
The input image information. This parameter is required when type is image_url.
Properties
urlstring (Required)
The URL or Base64 Data URL of the image. To pass a local file, see Visual understanding.
input_audio object
The input audio information. This parameter is required when type is input_audio.
It informs the model of the time interval between adjacent frames. This helps the model better understand the temporal dynamics of the video. This function applies to both video file and image list inputs. It is suitable for scenarios such as event time localization or segment content summarization.
Supports the Qwen2.5-VL, Qwen3-VL, and QVQ models.
Example values are as follows:
For image list input: {"video":["https://xx1.jpg",...,"https://xxn.jpg"],"fps":2}
For video file input: {"video": "https://xx1.mp4","fps":2}
A larger fps value is suitable for high-speed motion scenarios, such as sports events or action movies. A smaller fps value is suitable for long videos or content with static scenes.
min_pixels integer(Optional)
Sets the minimum pixel threshold for an input image or video frame. If an input image or video frame has a pixel count less than min_pixels, it is scaled up until the total pixel count is greater than min_pixels.
For image input:
Applicable models: Qwen-VL, QVQ
The value range is as follows:
min_pixels value range
Qwen3-VL: The default and minimum value is 65536.
qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default and minimum value is 4096.
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: The default and minimum value is 3136.
Example value: {"type": "image_url","image_url": {"url":"https://xxxx.jpg"},"min_pixels": 65536}
For video file or image list input:
Applicable models: Qwen-VL, QVQ
The value range is as follows:
min_pixels value range
Qwen3-VL (including commercial and open source versions), qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default value is 65536. The minimum value is 4096.
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: The default value is 50176. The minimum value is 3136.
Example values:
For video file input: {"type": "video_url","video_url": {"url":"https://xxxx.mp4"},"min_pixels": 65536}
For image list input: {"type": "video","video": ["https://xx1.jpg",...,"https://xxn.jpg"],"min_pixels": 65536}
max_pixels integer(Optional)
Sets the maximum pixel threshold for an input image or video frame. If an input image or video has a pixel count within the [min_pixels, max_pixels] range, the model processes the original image. If the pixel count is greater than max_pixels, the image is scaled down until its total pixel count is less than max_pixels.
Qwen3-VL: The default value is 2621440. The maximum value is 16777216.
qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default value is 1310720. The maximum value is 16777216.
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: The default value is 1003520. The maximum value is 12845056.
If vl_high_resolution_images is True:
Qwen3-VL, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: max_pixels is invalid. The maximum pixel count for an input image is fixed at 16777216.
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: max_pixels is invalid. The maximum pixel count for an input image is fixed at 12845056.
Example value: {"type": "image_url","image_url": {"url":"https://xxxx.jpg"},"max_pixels": 8388608}
For video file or image list input:
Applicable models: Qwen-VL, QVQ
The value range is as follows:
max_pixels value range
qwen3-vl-plus series, qwen3-vl-flash series, qwen3-vl-235b-a22b-thinking, and qwen3-vl-235b-a22b-instruct: The default value is 655360. The maximum value is 2048000.
Other open source Qwen3-VL models, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default value is 655360. The maximum value is 786432.
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: The default value is 501760. The maximum value is 602112.
Example values:
For video file input: {"type": "video_url","video_url": {"url":"https://xxxx.mp4"},"max_pixels": 655360}
For image list input: {"type": "video","video": ["https://xx1.jpg",...,"https://xxn.jpg"],"max_pixels": 655360}
total_pixels integer(Optional)
Limits the total number of pixels for all frames extracted from a video (pixels per frame × total number of frames). If the video's total pixel count exceeds this limit, the system scales down the video frames. However, it still ensures that the pixel value of any single frame remains within the [min_pixels, max_pixels] range. Applicable to Qwen-VL and QVQ.
For long videos with many extracted frames, lower this value to reduce token consumption and processing time. However, this may cause a loss of image detail.
Value range
qwen3-vl-plus series, qwen3-vl-flash series, qwen3-vl-235b-a22b-thinking, and qwen3-vl-235b-a22b-instruct: The default and minimum value is 134217728. This value corresponds to 131072 image tokens (1 image token per 32×32 pixels).
Other open source Qwen3-VL models, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default and minimum value is 67108864. This value corresponds to 65536 image tokens (1 image token per 32×32 pixels).
Other qwen-vl-plus models, other qwen-vl-max models, open source Qwen2.5-VL series, and QVQ series models: The default and minimum value is 51380224. This value corresponds to 65536 image tokens (1 image token per 28×28 pixels).
Example values
For video file input: {"type": "video_url","video_url": {"url":"https://xxxx.mp4"},"total_pixels": 134217728}
For image list input: {"type": "video","video": ["https://xx1.jpg",...,"https://xxn.jpg"],"total_pixels": 134217728}
cache_control object(Optional)
Enables explicit caching. For more information, see Explicit cache.
Properties
typestring (Required)
Only ephemeral is supported.
role string(Required)
The role for a user message. The value is fixed as user.
Assistant Message object(Optional)
The model's reply. This is typically sent back to the model as context in a multi-turn conversation.
Properties
content string(Optional)
The text content of the model's reply. If tool_calls is included, content can be empty. Otherwise, content is required.
role string(Required)
The role for an assistant message. The value is fixed as assistant.
qwen3-vl-plus, qwen3-vl-plus-2025-09-23, and later snapshot models
qwen3-vl-flash series (non-thinking mode)
qwen3-vl-flash, qwen3-vl-flash-2025-10-15, and later snapshot models
qwen-vl-max series
qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-2025-04-08, and later snapshot models
qwen-vl-plus series
qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-2025-01-25, and later snapshot models
Qwen Turbo series (non-thinking mode)
qwen-turbo, qwen-turbo-latest, qwen-turbo-2024-11-01, and later snapshot models
Qwen open source series
Qwen3 open source models (non-thinking mode), Qwen2.5 series text models, Qwen3-VL open source models (non-thinking mode)
tool_callsarray(Optional)
The tool and input parameter information that is returned after a function call is initiated. It contains one or more objects and is obtained from the tool_calls field of the previous model response.
Properties
idstring(Required)
The ID of the tool response.
typestring (Required)
The tool type. Currently, only function is supported.
functionobject (Required)
The tool and input parameter information.
Properties
namestring (Required)
The tool name.
argumentsstring (Required)
The input parameter information, in a JSON string format.
indexinteger (Required)
The index of the current tool information in the tool_calls array.
Tool Message object(Optional)
The output information of the tool.
Properties
content string(Required)
The output content of the tool function. It must be a string. If the tool returns structured data, such as JSON, serialize it into a string.
role string(Required)
The value is fixed as tool.
tool_call_id string(Required)
The ID returned after a function calling is initiated. Obtain it from completion.choices[0].message.tool_calls[$index].id. This ID marks the tool that corresponds to the Tool Message.
stream boolean(Optional) Defaults to: false
Enables streaming output mode. For more information, see Streaming output.
Valid values:
false: The model returns the complete content at once after generation.
true: The output is generated and sent incrementally. A data block (chunk) is returned as soon as a part of the content is generated. You must read these chunks in real time to piece together the full reply.
Setting this parameter to true improves the user experience and reduces the risk of timeouts.
stream_options object(Optional)
Configuration options for streaming output. This parameter is effective only when stream is set to true.
Properties
include_usage boolean(Optional) Defaults to: false
Specifies whether to include token consumption information in the last data block of the response.
Valid values:
true
false
For streaming output, token consumption information can appear only in the last data block of the response.
modalities array(Optional) Defaults to: ["text"]
The modality of the output data. This parameter applies only to Qwen-Omni models. For more information, see Omni-modal.
Valid values:
["text","audio"]: Returns text and audio.
["text"]: Specifies a text-only output.
audio object(Optional)
The voice and format of the output audio. This parameter applies only to Qwen-Omni models, and the modalities parameter must be set to ["text","audio"]. For more information, see Omni-modal.
Properties
voice string(Required)
The voice of the output audio. For more information, see Voice list.
format string(Required)
The format of the output audio. Only wav is supported.
temperature float(Optional)
The sampling temperature. This value controls the diversity of the text that the model generates.
Higher values increase diversity; lower values make output more deterministic.
Value range: [0, 2)
Both `temperature` and `top_p` control the diversity of the generated text. Set only one of them. For more information, see Text generation overview.
Do not modify the default temperature value for QVQ models.
top_p float(Optional)
The probability threshold for nucleus sampling. It controls the diversity of the text that the model generates.
A higher `top_p` value results in more diverse text. A lower `top_p` value produces more deterministic text.
Value range: (0, 1.0]
Both `temperature` and `top_p` control the diversity of the generated text. Set only one of them. For more information, see Text generation overview.
Do not modify the default top_p value for QVQ models.
top_k integer(Optional)
The number of candidate tokens to sample from during generation. A larger value results in more random output, while a smaller value results in more deterministic output. If this parameter is set to null or a value greater than 100, the top_k strategy is disabled, and only the top_p strategy is effective. The value must be an integer that is greater than or equal to 0.
Default top_k values
QVQ series, qwen-vl-plus-2025-07-10, and qwen-vl-plus-2025-08-15: 10.
QwQ series: 40.
Other qwen-vl-plus series, models before qwen-vl-max-2025-08-13, qwen2.5-omni-7b: 1.
Qwen3-Omni-Flash series: 50.
All other models: 20.
This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place this parameter in the extra_body object. The parameter is configured as follows: extra_body={"top_k":xxx}.
Do not modify the default top_k value for QVQ models.
presence_penaltyfloat(Optional)
Controls how much the model avoids repeating content.
Value range: [-2.0, 2.0]. Positive values reduce repetition, and negative values increase it.
For scenarios that require diversity and creativity, such as creative writing or brainstorming, increase this value. For scenarios that require consistency and terminological accuracy, such as technical documents or formal text, decrease this value.
If the parameter value is positive, the model penalizes tokens that already exist in the text. The penalty amount does not depend on how many times the token appears. This reduces the chance of these tokens reappearing. As a result, content repetition decreases and word diversity increases.
Example
Prompt: Translate this sentence into English: "Esta película es buena. La trama es buena, la actuación es buena, la música es buena, y en general, toda la película es simplemente buena. Es realmente buena, de hecho. La trama es tan buena, y la actuación es tan buena, y la música es tan buena."
Parameter value 2.0: This movie is very good. The plot is great, the acting is great, the music is also very good, and overall, the whole movie is incredibly good. In fact, it is truly excellent. The plot is very exciting, the acting is outstanding, and the music is so beautiful.
Parameter value 0.0: This movie is good. The plot is good, the acting is good, the music is also good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very outstanding, and the music is also excellent.
Parameter value -2.0: This movie is very good. The plot is very good, the acting is very good, the music is also very good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very good, and the music is also very good.
When using the qwen-vl-plus-2025-01-25 model for text extraction, set presence_penalty to 1.5.
Do not modify the default presence_penalty value for QVQ models.
response_format object (Optional) Defaults to: {"type": "text"}
The format of the returned content. Valid values:
{"type": "text"}: Outputs a text reply.
{"type": "json_object"}: Outputs a standard-format JSON string.
{"type": "json_schema","json_schema": {...} }: Outputs a JSON string in a specified format.
If you specify {"type": "json_object"}, you must explicitly instruct the model to output JSON in the prompt, for example, "Please output in JSON format". Otherwise, an error occurs.
json_object: Outputs a standard-format JSON string.
json_schema: Outputs a JSON string in a specified format.
json_schema object
This field is required when `type` is `json_schema`. It defines the configuration for the structured output.
Properties
name string(Required)
A unique name for the schema. The name can contain only letters (case-insensitive), numbers, underscores (_), and hyphens (-). The name can be up to 64 characters long.
description string(Optional)
A description of the schema's purpose. This helps the model understand the semantic context of the output.
schema object(Optional)
An object that conforms to the JSON Schema standard. It defines the data structure for the model output.
To learn how to build a JSON Schema, see JSON Schema
strict boolean(Optional) Defaults to: false
Specifies whether the model must strictly follow all schema constraints.
true (Recommended)
The model strictly follows all constraints, such as field types, required items, and formats. This ensures 100% compliance of the output.
false (Not recommended)
The model loosely follows the schema. The output may not conform to the specification and can result in validation failure.
max_input_tokens integer(optional)
The maximum allowed token length for the input. This parameter is currently supported only by the qwen-plus-0728 and qwen-plus-latest models.
qwen-plus-latest default value: 129,024
The default value may be adjusted to 1,000,000 in the future.
qwen-plus-2025-07-28 default value: 1,000,000
This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place the parameter in the extra_body object.The configuration is as follows: extra_body={"max_input_tokens": xxx}.
max_tokensinteger (Optional)
Maximum tokens in the response. Generation stops when this limit is reached, and the returned finish_reason is length.
The default and maximum values are the model's maximum output length. For more information, see Models.
This parameter is useful for controlling the output length in scenarios such as generating summaries or keywords, or for reducing costs and shortening response times.
When max_tokens is triggered, the finish_reason field of the response is length.
max_tokens does not limit the length of the chain-of-thought.
vl_high_resolution_images boolean(Optional) Defaults to: false
Increases the maximum pixel limit for input images when enabled. The limit is set to the pixel value that corresponds to 16384 tokens. For more information, see Process high-resolution images.
vl_high_resolution_images: true: A fixed-resolution strategy is used, and the max_pixels setting is ignored. If an image exceeds this resolution, its total pixels are downscaled to this limit.
Click to view the pixel limits for each model
When vl_high_resolution_images is True, different models have different pixel limits:
QVQ series and other Qwen2.5-VL series models: 12845056 (each Token corresponds to 28*28 pixels, i.e., 16384*28*28)
If vl_high_resolution_images is false, the actual resolution is determined by both max_pixels and the default limit. If the image exceeds max_pixels, it is downscaled to max_pixels. The default pixel limits for models are the default value of max_pixels.
This parameter is not a standard OpenAI parameter. When making calls with the Python SDK, place this parameter in the extra_body object. The configuration is as follows: extra_body={"vl_high_resolution_images":xxx}.
n integer(Optional) Defaults to: 1
Specifies the number of responses to generate, with a value in the range of 1-4. This is useful for scenarios that require multiple candidate responses, such as creative writing or ad copy.
This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, place it in the extra_body object. It is configured as follows: extra_body={"enable_thinking": xxx}.
thinking_budgetinteger (Optional)
The maximum number of tokens for the thinking process. This applies to Qwen3-VL, and the commercial and open source versions of Qwen3 models. For more information, see Limit thinking length.
The default value is the model's maximum chain-of-thought length. For more information, see Models.
This parameter is not a standard OpenAI parameter. When you use the Python SDK, place this parameter in the extra_body object. Configure the parameter as follows: extra_body={"thinking_budget": xxx}.
enable_code_interpreterboolean (Optional) Defaults to: false
Specifies whether to enable the code interpreter feature. This parameter takes effect only when model is set to qwen3-max-preview and enable_thinking is set to true. For more information, see Code interpreter.
Valid values:
true
false
This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place this parameter in the extra_body object. The configuration is as follows: extra_body={"enable_code_interpreter": xxx}.
seed integer(Optional)
A random number seed. This parameter ensures that results are reproducible for the same input and parameters. If you pass the same seed value in a call and other parameters remain unchanged, the model returns the same result as much as possible.
Value range: [0,231-1].
logprobsboolean (Optional) Defaults to: false
Specifies whether to return the log probabilities of the output tokens. Valid values:
true
false
Content that is generated during the thinking phase (reasoning_content) does not return log probabilities.
Supported models
Snapshot models of the qwen-plus series (excluding the main model)
Snapshot models of the qwen-turbo series (excluding the main model)
Qwen3 open source models
top_logprobsinteger (Optional) Defaults to: 0
Specifies the number of most likely candidate tokens to return at each generation step.
Value range: [0, 5]
This parameter is effective only when logprobs is set to true.
stop string or array(Optional)
Specifies stop words. When a string or token_id that is specified in stop appears in the text generated by the model, generation stops immediately.
Pass sensitive words to control the model's output.
When stop is an array, do not use a token_id and a string as elements at the same time. For example, ["Hello",104307] is not a valid value.
tools array(Optional)
An array that contains one or more tool objects for the model to call in function calling. For more information, see Function Calling.
When `tools` is set and the model determines that a tool needs to be called, the response returns tool information in the `tool_calls` field.
Properties
type string(Required)
The tool type. Currently, only function is supported.
function object(Required)
Properties
name string(Required)
The tool name. Only letters, numbers, underscores (_), and hyphens (-) are allowed. The maximum length is 64 tokens.
description string(Required)
Helps the model understand when to use this tool.
parameters object(Optional) Defaults to: {}
A description of the tool's parameters, which must be a valid JSON Schema, see JSON Schema. If the parameters parameter is empty, it means the tool has no input parameters, such as a time query tool.
Specify parameters for more accurate tool calling.
tool_choicestring or object(Optional) Defaults to: auto
The tool selection policy. Set this parameter to force a tool call for certain types of questions, such as always using a specific tool or disabling all tools.
Valid values:
auto
The model automatically selects a tool.
none
To disable tool calling, set the tool_choice parameter to none.
To force a call to a specific tool, set the tool_choice parameter to {"type": "function", "function": {"name": "the_function_to_call"}}, where the_function_to_call is the name of the specified tool function.
Models in thinking mode do not support forcing a call to a specific tool.
parallel_tool_callsboolean (Optional) Defaults to: false
Specifies whether to enable parallel tool calling. For more information, see Parallel tool calling.
Valid values:
true
false
enable_searchboolean(Optional) Defaults to: false
Enables web search. For more information, see Web search.
Valid values:
true
If web search is not triggered after being enabled, optimize the prompt or set the forced_search parameter in search_options to enable forced search.
false
Enabling the web search feature may increase token consumption.
This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, include this parameter in the extra_body object. Configure it as follows: extra_body={"enable_search": True}.
search_options object(Optional)
The web search strategy. For more information, see Web search.
Properties
forced_searchboolean (Optional) Defaults to: false
Specifies whether to force web search. This parameter takes effect only when enable_search is true.
Valid values:
true
false: The model decides whether to perform a web search.
search_strategystring (Optional) Defaults to: turbo
The search strategy. This parameter takes effect only when enable_search is true.
Valid values:
turbo (default): Balances response speed and search effectiveness. This is suitable for most scenarios.
max: Uses a more comprehensive search strategy and can call multiple search engines to obtain more detailed results. However, the response time may be longer.
agent: Calls the web search tool and the large language model multiple times to perform multi-round information retrieval and content integration.
agent strategy is applicable only to qwen3-max and qwen3-max-2025-09-23.
agent strategy cannot be set with other web search strategies.
enable_search_extensionboolean (Optional) Defaults to: false
Specifies whether to enable domain-specific search. This parameter takes effect only when enable_search is true.
Valid values:
true
false
This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, place it in the extra_body object.The configuration is as follows: extra_body={"search_options": xxx}.
Chat response object (non-streaming output)
{
"choices": [
{
"message": {
"role": "assistant",
"content": "I am a large-scale language model developed by Alibaba Cloud. My name is Qwen."
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 3019,
"completion_tokens": 104,
"total_tokens": 3123,
"prompt_tokens_details": {
"cached_tokens": 2048
}
},
"created": 1735120033,
"system_fingerprint": null,
"model": "qwen-plus",
"id": "chatcmpl-6ada9ed2-7f33-9de2-8bb0-78bd4035025a"
}
id string
The unique ID for this call.
choices array
An array of content that the model generated.
Properties
finish_reason string
Indicates the reason why the model stopped generating output.
The following are three scenarios:
stop: The model stopped naturally or because the stop input parameter was triggered.
length: The generation stopped because the output reached the maximum length.
tool_calls: The model stopped to call a tool.
index integer
The index of the current object in the choices array.
logprobs object
The probability information of the tokens in the model's output.
Properties
contentarray
An array of tokens and their log probabilities.
Properties
tokenstring
The text of the current token.
bytesarray
A list of the raw UTF-8 bytes of the current token. This list is used to accurately restore the output content, such as emojis or Chinese characters.
logprobfloat
The log probability of the current token. A return value of null indicates an extremely low probability.
top_logprobsarray
The most likely candidate tokens for the current token's position. The number of candidates matches the top_logprobs request parameter. Each element contains:
Properties
tokenstring
The candidate token text.
bytesarray
A list of the raw UTF-8 bytes of the current token. This list is used to accurately restore the output content, such as emojis or Chinese characters.
logprobfloat
The log probability of this candidate token. A return value of null indicates an extremely low probability.
message object
The message generated by the model.
Properties
contentstring
The content of the model's response.
reasoning_contentstring
The content of the model's chain-of-thought.
refusalstring
This parameter is always null.
rolestring
The role of the message. The value is always assistant.
audioobject
This parameter is always null.
function_call (to be deprecated) object
This value is always null. See the tool_calls parameter.
tool_callsarray
The tool and input parameter information that the model generates after a function call is initiated.
Properties
idstring
The unique identifier for this tool response.
typestring
The type of the tool. Currently, only function is supported.
functionobject
Information about the tool.
Properties
namestring
The name of the tool.
argumentsstring
The input parameters, in a JSON-formatted string.
Model outputs are non-deterministic. The output parameters might not match the function signature. Validate the parameters before calling the function.
indexinteger
The index of the current tool in the tool_calls array.
created integer
The Unix timestamp in seconds when the request was created.
model string
The model used for the request.
objectstring
The value is always chat.completion.
service_tierstring
This parameter is currently fixed as null.
system_fingerprint string
This parameter is currently fixed as null.
usageobject
The token consumption information for this request.
Properties
completion_tokensinteger
The number of tokens in the model's output.
prompt_tokensinteger
The number of tokens in the input.
total_tokensinteger
The total number of tokens consumed, which is the sum of prompt_tokens and completion_tokens.
completion_tokens_detailsobject
A fine-grained breakdown of output tokens when you use a Qwen-VL model.
Properties
audio_tokensinteger
This parameter is currently set to null.
reasoning_tokensinteger
This parameter is currently set to null.
text_tokensinteger
The number of text tokens in the output of a Qwen-VL model.
prompt_tokens_detailsobject
A fine-grained breakdown of input tokens.
Properties
audio_tokensinteger
This parameter is currently set to null.
cached_tokensinteger
The number of tokens that hit the cache. For more information, see context cache.
text_tokensinteger
The number of text tokens in the input of a Qwen-VL model.
image_tokensinteger
The number of image tokens in the input of a Qwen-VL model.
video_tokensinteger
The number of tokens for the input video file or image list in a Qwen-VL model.
The number of tokens used to create the explicit cache.
cache_creation_input_tokensinteger
The number of tokens used to create the explicit cache.
cache_typestring
When you use an explicit cache, the parameter value is ephemeral. Otherwise, this parameter does not exist.
Chat response chunk object (streaming output)
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":"assistant","tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"I am a ","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"large-scale ","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"language model ","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"from Alibaba ","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"Cloud. My name ","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"is Qwen","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":".","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":{"completion_tokens":17,"prompt_tokens":22,"total_tokens":39,"completion_tokens_details":null,"prompt_tokens_details":{"audio_tokens":null,"cached_tokens":0}}}
id string
The unique ID for this call. Each chunk object has the same ID.
choices array
An array of content that is generated by the model. This array can contain one or more objects. If you set the include_usage parameter to true, choices is an empty array in the last chunk.
Properties
deltaobject
The incremental object of the request.
Properties
contentstring
The incremental message content.
reasoning_contentstring
The incremental chain-of-thought content.
function_callobject
This value defaults to null. For more information, see the tool_calls parameter.
audio object
The response generated when you use the Qwen-Omni model.
Properties
datastring
The incremental Base64-encoded audio data.
expires_atinteger
The UNIX timestamp when the request was created.
refusalobject
This parameter is always null.
rolestring
The role of the incremental message object. This property has a value only in the first chunk.
tool_callsarray
The tool and input parameter information that the model generates after a function call.
Properties
indexinteger
The index of the current tool in the tool_calls array.
idstring
The unique ID for this tool response.
functionobject
Information about the invoked tool.
Properties
argumentsstring
Incremental information about the input parameters. Concatenate the arguments from all chunks to get the complete input parameters.
Because large model responses have a degree of randomness, the output parameter information may not conform to the function signature. Validate the parameters before you call the function.
namestring
The tool name. This property has a value only in the first chunk.
typestring
The tool type. Currently, only function is supported.
finish_reasonstring
A model stops generating in one of four scenarios:
stop: The model stopped generating content naturally or the stop parameter was triggered.
If the generation is incomplete, null is returned.
length: The content generation stopped because the maximum output length was reached.
tool_calls: The model stopped to make a tool call.
indexinteger
The index of the current response in the choices array. If the input parameter n is greater than 1, use this parameter to concatenate the complete content for different responses.
logprobs object
The probability information of the current object.
Properties
contentarray
An array of tokens with log probability information.
Properties
tokenstring
The current token.
bytesarray
A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content and is helpful when you handle emojis and Chinese characters.
logprobfloat
The log probability of the current token. A return value of null indicates an extremely low probability.
top_logprobsarray
The most likely tokens at the current token position and their log probabilities. The number of elements is consistent with the top_logprobs input parameter.
Properties
tokenstring
The current token.
bytesarray
A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content and is helpful when you handle emojis and Chinese characters.
logprobfloat
The log probability of the current token. A return value of null indicates an extremely low probability.
created integer
The timestamp when this request was created. Each chunk has the same timestamp.
model string
The model used for this request.
objectstring
The value is always chat.completion.chunk.
service_tierstring
This parameter is currently fixed as null.
system_fingerprintstring
This parameter is currently fixed as null.
usageobject
The tokens that are consumed by this request. This is displayed in the last chunk only when include_usage is true.
Properties
completion_tokensinteger
The number of tokens in the model's output.
prompt_tokensinteger
The number of input tokens.
total_tokensinteger
The total number of tokens, which is the sum of prompt_tokens and completion_tokens.
completion_tokens_detailsobject
Detailed information about the output tokens.
Properties
audio_tokens integer
The number of audio tokens in the output of a Qwen-Omni model.
reasoning_tokensinteger
The number of tokens in the thinking process.
text_tokens integer
The number of output text tokens.
prompt_tokens_detailsobject
A fine-grained breakdown of input tokens.
Properties
audio_tokensinteger
The number of tokens in the input audio.
The number of audio tokens in a video file is returned in this parameter.
text_tokensinteger
The number of tokens in the input text.
video_tokensinteger
The number of tokens for the input video, which can be an image list or a video file.
image_tokensinteger
The number of tokens in the input image.
cached_tokensinteger
The number of tokens that hit the cache. For more information, see Context cache.