All Products
Search
Document Center

Alibaba Cloud Model Studio:Qwen API reference

Last Updated:Dec 15, 2025

This topic describes the input and output parameters of the Qwen API. It also provides call examples in popular programming languages, such as Python, for typical scenarios.

For an overview of the models, selection advice, and usage instructions, see Overview of text generation models.

You can call the Qwen API using the OpenAI compatible protocol or the DashScope protocol.

OpenAI compatible

Singapore region

The base_url for SDK: https://dashscope-intl.aliyuncs.com/compatible-mode/v1

HTTP request endpoint: POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions

China (Beijing) region

The base_url for SDK: https://dashscope.aliyuncs.com/compatible-mode/v1

HTTP request endpoint: POST https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions

First create an API key and export the API key as an environment variable. If you use the OpenAI SDK, install the SDK.

Request body

POST /chat/completions

Debug OpenAI-compatible API online

POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions
Get Singapore API key

Text input

Python

import os
from openai import OpenAI


client = OpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",  
)

completion = client.chat.completions.create(
    # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
    model="qwen-plus",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who are you?"},
    ],
    # For Qwen3 models, use the enable_thinking parameter to control the thinking process (default is True for open source models, False for commercial models).
    # When using a Qwen3 open source model without streaming output, uncomment the following line to avoid errors.
    # extra_body={"enable_thinking": False},
)
print(completion.model_dump_json())

Java

Request example

// This code uses OpenAI SDK version 2.6.0
import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.chat.completions.ChatCompletion;
import com.openai.models.chat.completions.ChatCompletionCreateParams;

public class Main {
    public static void main(String[] args) {
        OpenAIClient client = OpenAIOkHttpClient.builder()
                // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
                .baseUrl("https://dashscope-intl.aliyuncs.com/compatible-mode/v1") 
                .build();

        ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
                .addUserMessage("Who are you?")
                .model("qwen-plus")
                .build();

        try {
            ChatCompletion chatCompletion = client.chat().completions().create(params);
            System.out.println(chatCompletion);
        } catch (Exception e) {
            System.err.println("Error occurred: " + e.getMessage());
            e.printStackTrace();
        }
    }
}

Node.js

import OpenAI from "openai";

const openai = new OpenAI(
    {
        // If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
        apiKey: process.env.DASHSCOPE_API_KEY,
        // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
        baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" 
    }
);

async function main() {
    const completion = await openai.chat.completions.create({
        model: "qwen-plus",  //This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
        messages: [
            { role: "system", content: "You are a helpful assistant." },
            { role: "user", content: "Who are you?" }
        ],
    });
    console.log(JSON.stringify(completion))
}

main();

Go

package main

import (
	"context"
	"os"

	"github.com/openai/openai-go"
	"github.com/openai/openai-go/option"
)

func main() {
	client := openai.NewClient(
	        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
		option.WithAPIKey(os.Getenv("DASHSCOPE_API_KEY")), // defaults to os.LookupEnv("OPENAI_API_KEY")
		// The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1/
		option.WithBaseURL("https://dashscope-intl.aliyuncs.com/compatible-mode/v1/"), 
	)
	chatCompletion, err := client.Chat.Completions.New(
		context.TODO(), openai.ChatCompletionNewParams{
			Messages: openai.F(
				[]openai.ChatCompletionMessageParamUnion{
					openai.UserMessage("Who are you?"),
				},
			),
			Model: openai.F("qwen-plus"),
		},
	)

	if err != nil {
		panic(err.Error())
	}

	println(chatCompletion.Choices[0].Message.Content)
}

C# (HTTP)

using System.Net.Http.Headers;
using System.Text;

class Program
{
    private static readonly HttpClient httpClient = new HttpClient();

    static async Task Main(string[] args)
    {
        // If the environment variable is not set, replace the following line with: string? apiKey = "sk-xxx";
        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
        string? apiKey = Environment.GetEnvironmentVariable("DASHSCOPE_API_KEY");

        if (string.IsNullOrEmpty(apiKey))
        {
            Console.WriteLine("API Key is not set. Make sure the 'DASHSCOPE_API_KEY' environment variable is set.");
            return;
        }

        // Set the request URL and content
        // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
        string url = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions";
        // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
        string jsonContent = @"{
            ""model"": ""qwen-plus"",
            ""messages"": [
                {
                    ""role"": ""system"",
                    ""content"": ""You are a helpful assistant.""
                },
                {
                    ""role"": ""user"", 
                    ""content"": ""Who are you?""
                }
            ]
        }";

        // Send the request and get the response
        string result = await SendPostRequestAsync(url, jsonContent, apiKey);

        // Print the result
        Console.WriteLine(result);
    }

    private static async Task<string> SendPostRequestAsync(string url, string jsonContent, string apiKey)
    {
        using (var content = new StringContent(jsonContent, Encoding.UTF8, "application/json"))
        {
            // Set the request headers
            httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
            httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));

            // Send the request and get the response
            HttpResponseMessage response = await httpClient.PostAsync(url, content);

            // Handle the response
            if (response.IsSuccessStatusCode)
            {
                return await response.Content.ReadAsStringAsync();
            }
            else
            {
                return $"Request failed: {response.StatusCode}";
            }
        }
    }
}

PHP (HTTP)

<?php
// Set the request URL
// The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions
$url = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions';
// If the environment variable is not set, replace the following line with: $apiKey = "sk-xxx";
// API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
$apiKey = getenv('DASHSCOPE_API_KEY');
// Set the request headers
$headers = [
    'Authorization: Bearer '.$apiKey,
    'Content-Type: application/json'
];
// Set the request body
$data = [
    // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
    "model" => "qwen-plus",
    "messages" => [
        [
            "role" => "system",
            "content" => "You are a helpful assistant."
        ],
        [
            "role" => "user",
            "content" => "Who are you?"
        ]
    ]
];
// Initialize a cURL session
$ch = curl_init();
// Set cURL options
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
// Execute the cURL session
$response = curl_exec($ch);
// Check for errors
if (curl_errno($ch)) {
    echo 'Curl error: ' . curl_error($ch);
}
// Close the cURL resource
curl_close($ch);
// Print the response
echo $response;
?>

curl

The API keys for the Singapore and Beijing regions are different. To obtain an API key, visit https://www.alibabacloud.com/help/en/model-studio/get-api-key. If you use a model in the Beijing region, replace the URL with `https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions`.
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "qwen-plus",
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user", 
            "content": "Who are you?"
        }
    ]
}'

Streaming output

For more information, see Streaming.

Python

import os
from openai import OpenAI

client = OpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
    model="qwen-plus",  # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
    messages=[{'role': 'system', 'content': 'You are a helpful assistant.'},
                {'role': 'user', 'content': 'Who are you?'}],
    stream=True,
    stream_options={"include_usage": True}
    )
for chunk in completion:
    print(chunk.model_dump_json())

Node.js

import OpenAI from "openai";

const openai = new OpenAI(
    {
        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
        apiKey: process.env.DASHSCOPE_API_KEY,
        // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
        baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    }
);

async function main() {
    const completion = await openai.chat.completions.create({
        model: "qwen-plus", // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
        messages: [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Who are you?"}
        ],
        stream: true,
    });
    for await (const chunk of completion) {
        console.log(JSON.stringify(chunk));
    }
}

main();

curl

The API keys for the Singapore and Beijing regions are different. To obtain an API key, visit https://www.alibabacloud.com/help/en/model-studio/get-api-key. If you use a model in the Beijing region, replace the URL with `https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions`.
curl --location "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions" \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header "Content-Type: application/json" \
--data '{
    "model": "qwen-plus",
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user", 
            "content": "Who are you?"
        }
    ],
    "stream":true
}'

Image input

For more ways to use large language models to analyze images, see Visual understanding.

Python

import os
from openai import OpenAI

client = OpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
    model="qwen-vl-plus",  # This example uses qwen-vl-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/models
    messages=[{"role": "user","content": [
            {"type": "image_url",
             "image_url": {"url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"}},
            {"type": "text", "text": "What is this?"},
            ]}]
    )
print(completion.model_dump_json())

Node.js

import OpenAI from "openai";

const openai = new OpenAI(
    {
        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
        apiKey: process.env.DASHSCOPE_API_KEY,
         // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
        baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    }
);

async function main() {
    const response = await openai.chat.completions.create({
        model: "qwen-vl-max", // This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/models
        messages: [{role: "user",content: [
            { type: "image_url",image_url: {"url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"}},
            { type: "text", text: "What is this?" },
        ]}]
    });
    console.log(JSON.stringify(response));
}

main();

curl

The API keys for the Singapore and Beijing regions are different. To obtain an API key, visit https://www.alibabacloud.com/help/en/model-studio/get-api-key. If you use a model in the Beijing region, replace the URL with `https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions`.
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H 'Content-Type: application/json' \
-d '{
  "model": "qwen-vl-plus",
  "messages": [{
      "role": "user",
      "content": [
       {"type": "image_url","image_url": {"url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"}},
       {"type": "text","text": "What is this?"}
       ]}]
}'

Video input

The following is an example of passing a list of images. For more usage information, such as passing a video file, see Visual understanding.

Python

import os
from openai import OpenAI

client = OpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",  
)
completion = client.chat.completions.create(
    # This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/models
    model="qwen-vl-max",
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "video",
                "video": [
                    "https://img.alicdn.com/imgextra/i3/O1CN01K3SgGo1eqmlUgeE9b_!!6000000003923-0-tps-3840-2160.jpg",
                    "https://img.alicdn.com/imgextra/i4/O1CN01BjZvwg1Y23CF5qIRB_!!6000000003000-0-tps-3840-2160.jpg",
                    "https://img.alicdn.com/imgextra/i4/O1CN01Ib0clU27vTgBdbVLQ_!!6000000007859-0-tps-3840-2160.jpg",
                    "https://img.alicdn.com/imgextra/i1/O1CN01aygPLW1s3EXCdSN4X_!!6000000005710-0-tps-3840-2160.jpg"]
            },
            {
                "type": "text",
                "text": "Describe the process in this video."
            }]}]
)
print(completion.model_dump_json())

Node.js

// Make sure you have specified "type": "module" in package.json.
import OpenAI from "openai"; 

const openai = new OpenAI({
    // If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
    // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    apiKey: process.env.DASHSCOPE_API_KEY, 
    // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"    
});

async function main() {
    const response = await openai.chat.completions.create({
        // This example uses qwen-vl-max. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/models 
        model: "qwen-vl-max",
        messages: [{
            role: "user",
            content: [
                {
                    type: "video",
                    video: [
                        "https://img.alicdn.com/imgextra/i3/O1CN01K3SgGo1eqmlUgeE9b_!!6000000003923-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i4/O1CN01BjZvwg1Y23CF5qIRB_!!6000000003000-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i4/O1CN01Ib0clU27vTgBdbVLQ_!!6000000007859-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i1/O1CN01aygPLW1s3EXCdSN4X_!!6000000005710-0-tps-3840-2160.jpg"
                    ]
                },
                {
                    type: "text",
                    text: "Describe the process in this video."
                }
        ]}]
    });
    console.log(JSON.stringify(response));
}

main();

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and configure an API key. The `base_url` is for the Singapore region. If you use a model in the Beijing region, you must replace the `base_url` with `https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions`.
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H 'Content-Type: application/json' \
-d '{
    "model": "qwen-vl-max",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "video",
                    "video": [
                        "https://img.alicdn.com/imgextra/i3/O1CN01K3SgGo1eqmlUgeE9b_!!6000000003923-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i4/O1CN01BjZvwg1Y23CF5qIRB_!!6000000003000-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i4/O1CN01Ib0clU27vTgBdbVLQ_!!6000000007859-0-tps-3840-2160.jpg",
                        "https://img.alicdn.com/imgextra/i1/O1CN01aygPLW1s3EXCdSN4X_!!6000000005710-0-tps-3840-2160.jpg"
                    ]
                },
                {
                    "type": "text",
                    "text": "Describe the process in this video."
                }
            ]
        }
    ]
}'

Tool calling

For the complete Function calling workflow code, see Function calling.
For Function calling code for Qwen3 (thinking mode) and QwQ models, see Deep thinking.

Python

import os
from openai import OpenAI

client = OpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",  
)

tools = [
    # Tool 1: Get the current time
    {
        "type": "function",
        "function": {
            "name": "get_current_time",
            "description": "Useful when you want to know the current time.",
            "parameters": {}  # parameters is an empty dictionary because no input is needed to get the current time
        }
    },  
    # Tool 2: Get the weather for a specified city
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Useful when you want to check the weather in a specific city.",
            "parameters": {  
                "type": "object",
                "properties": {
                    # A location is required to check the weather, so the parameter is set to location
                    "location": {
                        "type": "string",
                        "description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
                    }
                },
                "required": ["location"]
            }
        }
    }
]
messages = [{"role": "user", "content": "What is the weather like in Hangzhou?"}]
completion = client.chat.completions.create(
    model="qwen-plus",  # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
    messages=messages,
    tools=tools
)

print(completion.model_dump_json())

Node.js

import OpenAI from "openai";

const openai = new OpenAI(
    {
        // If the environment variable is not set, replace the following line with: apiKey: "sk-xxx",
        // API keys for the Singapore and Beijing regions are different. To get an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
        apiKey: process.env.DASHSCOPE_API_KEY,
        // The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
        baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    }
);

const messages = [{"role": "user", "content": "What is the weather like in Hangzhou?"}];
const tools = [
// Tool 1: Get the current time
{
    "type": "function",
    "function": {
        "name": "get_current_time",
        "description": "Useful when you want to know the current time.",
        // parameters is empty because no input is needed to get the current time
        "parameters": {}  
    }
},  
// Tool 2: Get the weather for a specified city
{
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Useful when you want to check the weather in a specific city.",
        "parameters": {  
            "type": "object",
            "properties": {
                // A location is required to check the weather, so the parameter is set to location
                "location": {
                    "type": "string",
                    "description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
                }
            },
            "required": ["location"]
        }
    }
}
];

async function main() {
    const response = await openai.chat.completions.create({
        model: "qwen-plus", // This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
        messages: messages,
        tools: tools,
    });
    console.log(JSON.stringify(response));
}

main();

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and Configure an API Key. The following `base_url` is for the Singapore region. If you use a model from the Beijing region, you must replace the `base_url` with `https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions`.
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "qwen-plus",
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user", 
            "content": "What is the weather like in Hangzhou?"
        }
    ],
    "tools": [
    {
        "type": "function",
        "function": {
            "name": "get_current_time",
            "description": "Useful when you want to know the current time.",
            "parameters": {}
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Useful when you want to check the weather in a specific city.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location":{
                        "type": "string",
                        "description": "A city or district, such as Beijing, Hangzhou, or Yuhang District."
                    }
                },
                "required": ["location"]
            }
        }
    }
  ]
}'

Asynchronous invocation

import os
import asyncio
from openai import AsyncOpenAI
import platform

client = AsyncOpenAI(
    # If the environment variable is not set, replace the following line with: api_key="sk-xxx"
    # If you use a model in the China (Beijing) region, you need to use an API KEY for that region. Get it here: https://bailian.console.alibabacloud.com/?tab=model#/api-key
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the base_url for the Singapore region. If you use a model in the Beijing region, replace the base_url with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

async def main():
    response = await client.chat.completions.create(
        messages=[{"role": "user", "content": "Who are you?"}],
        model="qwen-plus",  # This example uses qwen-plus. You can change the model name as needed. For a list of models, see https://www.alibabacloud.com/help/zh/model-studio/getting-started/models
    )
    print(response.model_dump_json())

if platform.system() == "Windows":
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())

model string (Required)

The name of the model to use.

Supported models include Qwen large language models (commercial and open source), Qwen-VL, Qwen-Coder, Qwen-Omni, and Qwen-Math.

For specific model names and billing information, see Model list.

messages array (Required)

The context to pass to the large language model, arranged in conversational order.

Message types

System Message object (Optional)

A system message that is used to set the role, tone, task objectives, or constraints for the large language model. It is usually placed at the beginning of the messages array.

System Messages are not recommended for the QwQ model and have no effect on the QVQ model.

Properties

content string (Required)

A system instruction that defines the model's role, behavior, response style, and task constraints.

role string (Required)

The role for a system message. The value is fixed as system.

User Message object (Required)

A user message that is used to pass questions, instructions, or context to the model.

Properties

content string or array (Required)

The message content. This is a string for text-only input. It is an array for multimodal input, such as images, or if explicit caching is enabled.

Properties for multimodal models or explicit caching

type string (Required)

Valid values:

  • text

    Set to text for text input.

  • image_url

    Set to image_url for image input.

  • input_audio

    Set to input_audio for audio input.

  • video

    Set to video when the video input is a list of images.

  • video_url

    Set to video_url for video file input.

    Only some Qwen-VL models can accept video files as input. For more information, see Video understanding (Qwen-VL). The QVQ and Qwen-Omni models support direct video file input.

text string

The input text. This parameter is required when type is text.

image_url object

The input image information. This parameter is required when type is image_url.

Properties

url string(Required)

The URL or Base64-encoded Data URL of the image. To pass a local file, see Visual understanding.

input_audio object

The input audio information. This parameter is required when type is input_audio.

Properties

data string(Required)

The URL or Base64-encoded Data URL of the audio. To pass a local file, see Input Base64-encoded local files.

format string(Required)

The format of the input audio, such as mp3 or wav.

video array

The video information in the form of an image list. This parameter is required when type is video. For usage instructions, see Video understanding (Qwen-VL), Video understanding (QVQ), or Video understanding (Qwen-Omni).

Example value:

[
    "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/xzsgiz/football1.jpg",
    "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/tdescd/football2.jpg",
    "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/zefdja/football3.jpg",
    "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/aedbqh/football4.jpg"
]

video_url object

The input video file information. This parameter is required when type is video_url.

Qwen-VL can only understand the visual information in video files. Qwen-Omni can understand both the visual and audio information.

Properties

url string (Required)

The public URL or Base64-encoded Data URL of the video file. To input a local video file, see Input Base64-encoded local files.

min_pixels integer (Optional)

Sets the minimum pixel threshold for the input image. If an input image or video frame has fewer pixels than min_pixels, it is enlarged until its total pixels exceed min_pixels.

  • Applicable models: QVQ, Qwen-VL

  • Value range: As follows

    min_pixels value range

    • Qwen3-VL: The default and minimum value is 65536.

    • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default and minimum value is 4096.

    • QVQ and other Qwen2.5-VL models: The default and minimum value is 3136.

  • Example value: {"type": "image_url","image_url": {"url":"https://xxxx.jpg"},"min_pixels": 65536}

max_pixels integer (Optional)

Sets the maximum pixel threshold for an input image or video frame. If the pixel count of an input image is within the [min_pixels, max_pixels] range, the model recognizes the original image. If the pixel count exceeds max_pixels, the image is downscaled until its total pixel count is below max_pixels.

  • Applicable models: QVQ, Qwen-VL

  • Value range: As follows

    max_pixels value range

    The value of `max_pixels` depends on whether the vl_high_resolution_images parameter is enabled.

    • When vl_high_resolution_images is False:

      • Qwen3-VL: Default value is 2621440, maximum value is 16777216

      • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: Default value is 1310720, maximum value is 16777216.

      • QVQ and other Qwen2.5-VL models: Default value is 1003520, maximum value is 12845056.

    • When vl_high_resolution_images is True:

      • Qwen3-VL, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: max_pixels is invalid. The maximum pixel count for the input image is fixed at 16777216.

      • QVQ and other Qwen2.5-VL models: max_pixels is invalid. The maximum pixel count for the input image is fixed at 12845056.

  • Example value: {"type": "image_url","image_url": {"url":"https://xxxx.jpg"},"max_pixels": 8388608}

cache_control object (Optional)

Enables explicit caching. For more information, see Explicit cache.

Properties

type string (Required)

Only ephemeral is supported.

role string (Required)

The role of a user message. The value is user.

Assistant Message object (Optional)

The model's reply. This is typically sent back to the model as context in a multi-turn conversation.

Properties

content string (Optional)

The text content of the model's reply. When tool_calls is included, content can be empty. Otherwise, content is required.

role string (Required)

The role of an assistant message is assistant.

partial boolean (Optional) Default value: false

Enables partial mode.

Valid values:

  • true

  • false

Supported models

  • Qwen-Max series

    qwen3-max, qwen3-max-2025-09-23, qwen3-max-preview (non-thinking mode), qwen-max, qwen-max-latest, and snapshot models from qwen-max-2025-01-25 or later

  • Qwen-Plus series (non-thinking mode)

    qwen-plus, qwen-plus-latest, and snapshot models from qwen-plus-2025-01-25 or later

  • Qwen-Flash series (non-thinking mode)

    qwen-flash, and snapshot models from qwen-flash-2025-07-28 or later

  • Qwen-Coder series

    qwen3-coder-plus, qwen3-coder-flash, qwen3-coder-480b-a35b-instruct, qwen3-coder-30b-a3b-instruct

  • Qwen-VL series

    • qwen3-vl-plus series (non-thinking mode)

      qwen3-vl-plus, and snapshot models from qwen3-vl-plus-2025-09-23 or later

    • qwen3-vl-flash series (non-thinking mode)

      qwen3-vl-flash, and snapshot models from qwen3-vl-flash-2025-10-15 or later

    • qwen-vl-max series

      qwen-vl-max, qwen-vl-max-latest, and snapshot models from qwen-vl-max-2025-04-08 or later

    • qwen-vl-plus series

      qwen-vl-plus, qwen-vl-plus-latest, and snapshot models from qwen-vl-plus-2025-01-25 or later

  • Qwen-Turbo series (non-thinking mode)

    qwen-turbo, qwen-turbo-latest, and snapshot models from qwen-turbo-2024-11-01 or later

  • Qwen open-source series

    Qwen3 open-source models (non-thinking mode), Qwen2.5 series text models, Qwen3-VL open-source models (non-thinking mode)

tool_calls array (Optional)

The tool and input parameter information that is returned after initiating Function calling. It contains one or more objects and is obtained from the tool_calls field of the previous model response.

Properties

id string (Required)

The ID of the tool response.

type string (Required)

The tool type. Currently, only function is supported.

function object (Required)

The tool and input parameter information.

Properties

name string (Required)

The tool name.

arguments string (Required)

The input parameter information, in JSON string format.

index integer (Required)

The index of the current tool information in the tool_calls array.

Tool Message object (Optional)

The output information of the tool.

Properties

content string (Required)

The output content of the tool function. It must be a string. If the tool returns structured data, such as JSON, it must be serialized into a string.

role string (Required)

Fixed in tool.

tool_call_id string (Required)

The ID returned after initiating Function calling, which is obtained from `completion.choices[0].message.tool_calls[$index].id`. This ID is used to associate the tool message with the corresponding tool.

stream boolean (Optional) Default value: false

Specifies whether to reply in streaming output mode. For more information, see Streaming.

Valid values:

  • false: The model returns the complete content at once after generation.

  • true: The output is generated and sent incrementally. A data block (chunk) is returned as soon as a part of the content is generated. You must read these chunks in real time to piece together the full reply.

We recommend that you set this parameter to true to improve the user experience and reduce the risk of timeouts.

stream_options object (Optional)

Configuration options for streaming output. This parameter is effective only when stream is set to true.

Properties

include_usage boolean (Optional) Default value: false

Specifies whether to include token consumption information in the last data block of the response.

Valid values:

  • true

  • false

For streaming output, token consumption information can appear only in the last data block of the response.

modalities array (Optional) Default value: ["text"]

The modality of the output data. This parameter applies only to Qwen-Omni models. For more information, see Omni-modal.

Valid values:

  • ["text","audio"]

  • ["text"]

audio object (Optional)

The voice and format of the output audio. This parameter applies only to Qwen-Omni models, and the modalities parameter must be set to ["text","audio"]. For more information, see Omni-modal.

Properties

voice string (Required)

The voice of the output audio. For more information, see Voice list.

format string (Required)

The format of the output audio. Only wav is supported.

temperature float (Optional)

The sampling temperature, which controls the diversity of the text that is generated by the model.

A higher temperature results in more diverse text, while a lower temperature produces more deterministic text.

Value range: [0, 2)

Both `temperature` and `top_p` control the diversity of the generated text. We recommend that you set only one of them. For more information, see Overview of text generation models.

We do not recommend that you modify the default temperature value for QVQ models.

top_p float (Optional)

The probability threshold for nucleus sampling, which controls the diversity of the text that is generated by the model.

A higher `top_p` value results in more diverse text. A lower `top_p` value produces more deterministic text.

Value range: (0, 1.0]

Both `temperature` and `top_p` control the diversity of the generated text. We recommend that you set only one of them. For more information, see Overview of text generation models.

We do not recommend that you modify the default top_p value for QVQ models.

top_k integer (Optional)

The number of candidate tokens to sample from during generation. A larger value results in more random output, while a smaller value results in more deterministic output. If this parameter is set to null or a value greater than 100, the top_k strategy is disabled, and only the top_p strategy is effective. The value must be an integer that is greater than or equal to 0.

Default top_k values

QVQ series, qwen-vl-plus-2025-07-10, qwen-vl-plus-2025-08-15: 10;

QwQ series: 40;

Other qwen-vl-plus series, models before qwen-vl-max-2025-08-13, qwen2.5-omni-7b: 1;

Qwen3-Omni-Flash series: 50;

All other models: 20.

This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place this parameter in the extra_body object. The parameter is configured as follows: `extra_body={"top_k":xxx}`.
We do not recommend that you modify the default top_k value for QVQ models.

presence_penalty float (Optional)

Controls the repetition of content in the text that is generated by the model.

The value range is [-2.0, 2.0]. Positive values reduce repetition, while negative values increase it.

For creative writing or brainstorming scenarios that require diversity, fun, or creativity, we recommend that you increase this value. For technical documents or formal texts that emphasize consistency and terminological accuracy, we recommend that you decrease this value.

Default presence_penalty values

qwen3-max-preview (thinking mode), Qwen3 (non-thinking mode), Qwen3-Instruct series, qwen3-0.6b/1.7b/4b (thinking mode), QVQ series, qwen-max, qwen-max-latest, qwen-max-latest, qwen2.5-vl series, qwen-vl-max series, qwen-vl-plus, Qwen3-VL (non-thinking): 1.5;

qwen-vl-plus-latest, qwen-vl-plus-2025-08-15: 1.2

qwen-vl-plus-2025-01-25: 1.0;

qwen3-8b/14b/32b/30b-a3b/235b-a22b (thinking mode), qwen-plus/qwen-plus-latest/2025-04-28 (thinking mode), qwen-turbo/qwen-turbo/2025-04-28 (thinking mode): 0.5;

All other models: 0.0.

How it works

If the parameter value is positive, the model applies a penalty to tokens that already exist in the current text. The penalty is not related to the number of times the token appears. This reduces the chance of these tokens reappearing, which decreases content repetition and increases lexical diversity.

Example

Prompt: Translate this sentence into English: "Esta película es buena. La trama es buena, la actuación es buena, la música es buena, y en general, toda la película es simplemente buena. Es realmente buena, de hecho. La trama es tan buena, y la actuación es tan buena, y la música es tan buena."

Parameter value 2.0: This movie is very good. The plot is great, the acting is great, the music is also very good, and overall, the whole movie is incredibly good. In fact, it is truly excellent. The plot is very exciting, the acting is outstanding, and the music is so beautiful.

Parameter value 0.0: This movie is good. The plot is good, the acting is good, the music is also good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very outstanding, and the music is also excellent.

Parameter value -2.0: This movie is very good. The plot is very good, the acting is very good, the music is also very good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very good, and the music is also very good.

When you use the qwen-vl-plus-2025-01-25 model for text extraction, we recommend that you set `presence_penalty` to 1.5.
We do not recommend that you modify the default presence_penalty value for QVQ models.

response_format object (Optional) Default value: {"type": "text"}

The format of the returned content. Valid values:

  • {"type": "text"}: Outputs a text reply.

  • {"type": "json_object"}: Outputs a standard-format JSON string.

  • {"type": "json_schema","json_schema": {...} }: Outputs a JSON string in your specified format.

Reference: Structured output.
If you specify {"type": "json_object"}, you must explicitly instruct the model to output JSON in the prompt, for example, "Please output in JSON format". Otherwise, an error occurs.
For a list of supported models, see Supported models.

Properties

type string (Required)

The format of the returned content. Valid values:

  • text: Outputs a text reply.

  • json_object: Outputs a standard-format JSON string.

  • json_schema: Outputs a JSON string in your specified format.

json_schema object

Required when type is json_schema. Specifies the schema of the output.

Properties

name string (Required)

A unique name for the schema. The name can contain only letters (case-insensitive), numbers, underscores (_), and hyphens (-). The name can be up to 64 characters long.

description string (Optional)

A description of the schema's purpose, which helps the model understand the semantic context of the output.

schema object (Optional)

An object that follows the JSON Schema standard and defines the data structure for the model's output.

For more information about how to build a JSON Schema, see JSON Schema

strict boolean (Optional) Default value: false

Specifies whether the model must strictly follow all schema constraints.

  • true (Recommended)

    The model strictly follows all constraints, such as field types, required items, and formats. This ensures 100% compliance of the output.

  • false (Not recommended)

    The model loosely follows the schema. The output may not conform to the specification and can result in validation failure.

max_input_tokens integer (Optional)

The maximum allowed token length for the input. This parameter is currently supported only by the qwen-plus-0728 and qwen-plus-latest models.

  • qwen-plus-latest default value: 129,024

    The default value may be adjusted to 1,000,000 in the future.
  • qwen-plus-2025-07-28 default value: 1,000,000

This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place the parameter in the extra_body object.The configuration is as follows: extra_body={"max_input_tokens": xxx}.

max_tokens integer (Optional)

Limits the maximum number of tokens in the model's output. If the generated content exceeds this value, generation stops prematurely, and the returned finish_reason is length.

The default and maximum values are the model's maximum output length. For more information, see Model list.

This parameter is useful for scenarios that require you to control the output length, such as generating summaries or keywords, or for reducing costs and response times.

When max_tokens is triggered, the `finish_reason` field of the response is length.

max_tokens does not limit the length of the chain-of-thought for thinking models.

vl_high_resolution_images boolean (Optional) Default value: false

Specifies whether to increase the maximum pixel limit for input images to the pixel value that corresponds to 16384 tokens. For more information, see Processing high-resolution images.

  • vl_high_resolution_images: true: A fixed-resolution strategy is used, and the max_pixels setting is ignored. If an image exceeds this resolution, its total pixels are downscaled to this limit.

    Click to view the pixel limits for each model

    When vl_high_resolution_images is True, different models have different pixel limits:

    • Qwen3-VL series, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, and qwen-vl-plus-0815: 16777216 (each token corresponds to 32*32 pixels, i.e., 16384*32*32)

    • QVQ series, other Qwen2.5-VL models: 12845056 (each token corresponds to 28*28 pixels, i.e., 16384*28*28)

  • vl_high_resolution_images is false: The actual resolution is determined by both max_pixels and the default limit, and the maximum of the two is used. If the image exceeds this pixel limit, it is downscaled.

    Click to view the default pixel limits for each model

    When vl_high_resolution_images is false, different models have different default pixel limits:

    • Qwen3-VL series: 2621440 (2560*32*32, meaning the default token limit is 2560)

    • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, and qwen-vl-plus-0815: 1310720 (1280*32*32, meaning the default token limit is 1280)

    • QVQ series, other Qwen2.5-VL models: 1003520 (1280*28*28, meaning the default token limit is 1280)

This parameter is not a standard OpenAI parameter. When you make calls with the Python SDK, place this parameter in the extra_body object. The configuration is as follows: `extra_body={"vl_high_resolution_images":xxx}`.

n integer (Optional) Default value: 1

Specifies the number of responses to generate, with a value in the range of 1-4. This is useful for scenarios that require multiple candidate responses, such as creative writing or ad copy.

Only qwen-plus and Qwen3 (non-thinking mode) models are supported.
If you pass the tools parameter, you must set n to 1.
Increasing `n` increases the consumption of output tokens but not input tokens.

enable_thinking boolean (Optional)

When you use a hybrid thinking model, which can think or not think before replying, this parameter specifies whether to enable thinking mode. It applies to Qwen3, Qwen3-Omni-Flash, and Qwen3-VL models. For more information, see Deep thinking.

Valid values:

  • true

    When enabled, the thinking content is returned in the reasoning_content field.
  • false

Default values for different models: Supported models

This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, place it in the extra_body object. It is configured as follows: extra_body={"enable_thinking": xxx}.

thinking_budget integer (Optional)

The maximum number of tokens for the thinking process. This applies to Qwen3-VL, and the commercial and open source versions of Qwen3 models. For more information, see Limit thinking length.

The default value is the model's maximum chain-of-thought length. For more information, see Model list.

This parameter is not a standard OpenAI parameter. When you use the Python SDK, place this parameter in the extra_body object. Configure the parameter as follows: extra_body={"thinking_budget": xxx}.

enable_code_interpreter boolean (Optional) Default value: false

Specifies whether to enable the code interpreter feature. This takes effect only when model is qwen3-max-preview and enable_thinking is true. For more information, see Code interpreter.

Valid values:

  • true

  • false

This parameter is not a standard OpenAI parameter. When you make calls using the Python SDK, place this parameter in the extra_body object. The configuration is as follows: extra_body={"enable_code_interpreter": xxx}.

seed integer (Optional)

A random number seed. This parameter ensures that results are reproducible for the same input and parameters. If you pass the same seed value in a call and other parameters remain unchanged, the model returns the same result as much as possible.

Value range: [0, 231-1].

logprobs boolean (Optional) Default value: false

Specifies whether to return the log probabilities of the output tokens. Valid values:

  • true

  • false

Content that is generated during the thinking phase (reasoning_content) does not return log probabilities.

Supported models

  • Snapshot models of the qwen-plus series (excluding the main model)

  • Snapshot models of the qwen-turbo series (excluding the main model)

  • Qwen3 open source models

top_logprobs integer (Optional) Default value: 0

Specifies the number of most likely candidate tokens to return at each generation step.

Value range: [0, 5]

This parameter is effective only when logprobs is set to true.

stop string or array (Optional)

Specifies stop words. When a string or token_id that is specified in stop appears in the text generated by the model, generation stops immediately.

You can pass sensitive words to control the model's output.

When `stop` is an array, you cannot use a token_id and a string as elements at the same time. For example, you cannot specify ["Hello",104307].

tools array (Optional)

An array that contains one or more tool objects for the model to call in Function calling. For more information, see Function calling.

When `tools` is set and the model determines that a tool needs to be called, the response returns tool information in the `tool_calls` field.

Properties

type string (Required)

The tool type. Currently, only function is supported.

function object (Required)

Properties

name string (Required)

The tool name. Only letters, numbers, underscores (_), and hyphens (-) are allowed. The maximum length is 64 tokens.

description string (Required)

A description of the tool that helps the model decide when and how to call it.

parameters object (Required)

A description of the tool's parameters, which must be a valid JSON Schema. For a description of JSON Schema, see this link. If the parameters parameter is empty, it means the tool has no input parameters, such as a time query tool.

tool_choice string or object (Optional) Default value: auto

The tool selection strategy. To force a tool call for a certain type of question, for example, always use a specific tool or disable all tools, you can set this parameter.

Valid values:

  • auto

    The large language model chooses the tool strategy independently.

  • none

    If you do not want to call any tools, you can set the tool_choice parameter to none.

  • {"type": "function", "function": {"name": "the_function_to_call"}}

    To force a call to a specific tool, you can set the tool_choice parameter to {"type": "function", "function": {"name": "the_function_to_call"}}, where the_function_to_call is the name of the specified tool function.

    Models in thinking mode do not support forcing a call to a specific tool.

parallel_tool_calls boolean (Optional) Default value: false

Specifies whether to enable parallel tool calling. For more information, see Parallel tool calling.

Valid values:

  • true

  • false

enable_search boolean (Optional) Default value: false

Specifies whether to enable web search. For more information, see Web search.

Valid values:

  • true

    If web search is not triggered after being enabled, you can optimize the prompt or set the forced_search parameter in search_options to enable forced search.
  • false

Enabling the web search feature may increase token consumption.
This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, include this parameter in the extra_body object. You can configure it as follows: extra_body={"enable_search": True}.

search_options object (Optional)

The web search strategy. For more information, see Web search.

Properties

forced_search boolean (Optional) Default value: false

Specifies whether to force web search. This is effective only when enable_search is true.

Valid values:

  • true

  • false: The model decides whether to use web search.

search_strategy string (Optional) Default value: turbo

The search volume strategy. This is effective only when enable_search is true.

Valid values:

  • turbo (Default): Balances response speed and search effectiveness. This is suitable for most scenarios.

  • max: Adopts a more comprehensive search strategy and calls multiple search engines to obtain more detailed results. However, the response time may be longer.

  • agent: Can call the web search tool and the large language model multiple times to achieve multi-round information retrieval and content integration.

    agent strategy is applicable only to qwen3-max and qwen3-max-2025-09-23.
    agent strategy cannot be set simultaneously with other web search strategies.

enable_search_extension boolean (Optional) Default value: false

Specifies whether to enable domain-specific search. This parameter takes effect only when enable_search is true.

Valid values:

  • true

  • false

This parameter is not a standard OpenAI parameter. When you make a call using the Python SDK, place it in the extra_body object.The configuration is as follows: extra_body={"search_options": xxx}.

Chat response object (non-streaming output)

{
    "choices": [
        {
            "message": {
                "role": "assistant",
                "content": "I am a large-scale language model developed by Alibaba Cloud. My name is Qwen."
            },
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null
        }
    ],
    "object": "chat.completion",
    "usage": {
        "prompt_tokens": 3019,
        "completion_tokens": 104,
        "total_tokens": 3123,
        "prompt_tokens_details": {
            "cached_tokens": 2048
        }
    },
    "created": 1735120033,
    "system_fingerprint": null,
    "model": "qwen-plus",
    "id": "chatcmpl-6ada9ed2-7f33-9de2-8bb0-78bd4035025a"
}

id string

The unique ID for this call.

choices array

An array of content that is generated by the model.

Properties

finish_reason string

Indicates why the model stopped generating.

The following are three cases:

  • stop: The model output stops naturally or the stop parameter in the input is triggered.

  • length: The generation ends because the output length exceeds the limit.

  • tool_calls: A tool needs to be called.

index integer

The index of the current object in the choices array.

logprobs object

The token probability information of the model's output.

Properties

content array

An array that contains each token and its log probability.

Properties

token string

The text of the current token.

bytes array

A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content, such as emojis or Chinese characters.

logprob float

The log probability of the current token. A return value of null indicates an extremely low probability.

top_logprobs array

The most likely candidate tokens at the current token position. The number of elements is consistent with the top_logprobs request parameter. Each element contains:

Properties

token string

The candidate token text.

bytes array

A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content, such as emojis or Chinese characters.

logprob float

The log probability of this candidate token. A return value of null indicates an extremely low probability.

message object

The message that is output by the model.

Properties

content string

The content of the model's reply.

reasoning_content string

The content of the model's chain-of-thought.

refusal string

This parameter is currently fixed as null.

role string

The role of the message. The value is fixed as assistant.

audio object

This parameter is currently fixed as null.

function_call (to be deprecated) object

This value is fixed as null. See the tool_calls parameter.

tool_calls array

The tool and input parameter information that is generated by the model after initiating Function calling.

Properties

id string

The unique ID for this tool response.

type string

The tool type. Currently, only function is supported.

function object

The tool information.

Properties

name string

The tool name.

arguments string

The input parameter information, in JSON string format.

Because of the randomness of large language model responses, the output parameter information may not conform to the function signature. You must validate the parameters before you call the function.

index integer

The index of the current tool in the tool_calls array.

created integer

The Unix timestamp in seconds when the request was created.

model string

The model that is used for this request.

object string

The value is always chat.completion.

service_tier string

This parameter is currently fixed as null.

system_fingerprint string

This parameter is currently fixed as null.

usage object

The token consumption information for this request.

Properties

completion_tokens integer

The number of tokens in the model's output.

prompt_tokens integer

The number of tokens in the input.

total_tokens integer

The total number of tokens that are consumed, which is the sum of prompt_tokens and completion_tokens.

completion_tokens_details object

A fine-grained breakdown of output tokens when you use a Qwen-VL model.

Properties

audio_tokens integer

This parameter is currently fixed as null.

reasoning_tokens integer

This parameter is currently fixed as null.

text_tokens integer

The number of text tokens in the output of a Qwen-VL model.

prompt_tokens_details object

A fine-grained breakdown of input tokens.

Properties

audio_tokens integer

This parameter is currently fixed as null.

cached_tokens integer

The number of tokens that hit the cache. For more information, see context cache.

text_tokens integer

The number of text tokens in the input of a Qwen-VL model.

image_tokens integer

The number of image tokens in the input of a Qwen-VL model.

video_tokens integer

The number of tokens for the input video file or image list in a Qwen-VL model.

cache_creation object

Information about the creation of an explicit cache.

Properties

ephemeral_5m_input_tokens integer

The number of tokens that are used to create the explicit cache.

cache_creation_input_tokens integer

The number of tokens that are used to create the explicit cache.

cache_type string

When you use an explicit cache, the parameter value is ephemeral. Otherwise, this parameter does not exist.

Chat response chunk object (streaming output)

{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":"assistant","tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"I am","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"from","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"Alibaba","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"Cloud's large-scale","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"language model. My name","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"is Qwen","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"."},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}
{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":{"completion_tokens":17,"prompt_tokens":22,"total_tokens":39,"completion_tokens_details":null,"prompt_tokens_details":{"audio_tokens":null,"cached_tokens":0}}}

id string

The unique ID for this call. Each chunk object has the same ID.

choices array

An array of content that is generated by the model, which can contain one or more objects. If the include_usage parameter is set to true, choices is an empty array in the last chunk.

Properties

delta object

The incremental object of the request.

Properties

content string

The incremental message content.

reasoning_content string

The incremental chain-of-thought content.

function_call object

This value defaults to null. See the tool_calls parameter.

audio object

The reply that is generated when you use the Qwen-Omni model.

Properties

data string

The incremental Base64-encoded audio data.

expires_at integer

The timestamp when the request was created.

refusal object

This parameter is currently fixed as null.

role string

The role of the incremental message object. It has a value only in the first chunk.

tool_calls array

The tool and input parameter information that is generated by the model after initiating Function calling.

Properties

index integer

The index of the current tool in the tool_calls array.

id string

The unique ID for this tool response.

function object

The information of the called tool.

Properties

arguments string

The incremental input parameter information. The complete input parameter is the concatenation of the arguments from all chunks.

Because of the randomness of large language model responses, the output parameter information may not conform to the function signature. You must validate the parameters before you call the function.

name string

The tool name. It has a value only in the first chunk.

type string

The tool type. Currently, only function is supported.

finish_reason string

The model stops generating for the following four reasons:

  • stop: The model output stops naturally or the stop parameter in the input is triggered.

  • null: Generation is not finished.

  • length: The generation ends because the output length exceeds the limit.

  • tool_calls: A tool needs to be called.

index integer

The index of the current response in the choices array. When the input parameter `n` is greater than 1, you must use this parameter to concatenate the complete content for different responses.

logprobs object

The probability information of the current object.

Properties

content array

An array of tokens with log probability information.

Properties

token string

The current token.

bytes array

A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content, which is helpful when you handle emojis and Chinese characters.

logprob float

The log probability of the current token. A return value of null indicates an extremely low probability.

top_logprobs array

The most likely tokens at the current token position and their log probabilities. The number of elements is consistent with the top_logprobs input parameter.

Properties

token string

The current token.

bytes array

A list of the raw UTF-8 bytes of the current token. This is used to accurately restore the output content, which is helpful when you handle emojis and Chinese characters.

logprob float

The log probability of the current token. A return value of null indicates an extremely low probability.

created integer

The timestamp when this request was created. Each chunk has the same timestamp.

model string

The model that is used for this request.

object string

The value is always chat.completion.chunk.

service_tier string

This parameter is currently fixed as null.

system_fingerprintstring

This parameter is currently fixed as null.

usage object

The tokens that are consumed by this request. This is displayed in the last chunk only when include_usage is true.

Properties

completion_tokens integer

The number of tokens in the model's output.

prompt_tokens integer

The number of input tokens.

total_tokens integer

The total number of tokens, which is the sum of prompt_tokens and completion_tokens.

completion_tokens_details object

Detailed information about the output tokens.

Properties

audio_tokens integer

The number of audio tokens in the output of a Qwen-Omni model.

reasoning_tokens integer

The number of tokens in the thinking process.

text_tokens integer

The number of output text tokens.

prompt_tokens_details object

A fine-grained breakdown of input tokens.

Properties

audio_tokens integer

The number of tokens in the input audio.

The number of audio tokens in a video file is returned in this parameter.

text_tokens integer

The number of tokens in the input text.

video_tokens integer

The number of tokens for the input video, which can be an image list or a video file.

image_tokens integer

The number of tokens in the input image.

cached_tokens integer

The number of tokens that hit the cache. For more information, see Context cache.

cache_creation object

Information about the creation of an explicit cache.

Properties

ephemeral_5m_input_tokens integer

The number of tokens that are used to create the explicit cache.

cache_creation_input_tokens integer

The number of tokens that are used to create the explicit cache.

cache_type string

The cache type. The value is fixed as ephemeral.

DashScope

Singapore region

HTTP request endpoints:

  • Qwen large language model: POST https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation

  • Qwen-VL model: POST https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation

For SDK calls, the base_url is:

Python code

dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

Java code

  • Method 1:

    import com.alibaba.dashscope.protocol.Protocol;
    Generation gen = new Generation(Protocol.HTTP.getValue(), "https://dashscope-intl.aliyuncs.com/api/v1");
  • Method 2:

    import com.alibaba.dashscope.utils.Constants;
    Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";

China (Beijing) region

HTTP request endpoints:

  • Qwen large language model: POST https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation

  • Qwen-VL model: POST https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation

No base_url configuration is required for SDK calls.

First create an API key and export the API key as an environment variable. If you use the DashScope SDK, install the DashScope SDK.

Request body

Text input

Python

import os
import dashscope

dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
# The preceding base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
messages = [
    {'role': 'system', 'content': 'You are a helpful assistant.'},
    {'role': 'user', 'content': 'Who are you?'}
]
response = dashscope.Generation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen-plus", # This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    messages=messages,
    result_format='message'
    )
print(response)

Java

// We recommend using DashScope SDK version 2.12.0 or later.
import java.util.Arrays;
import java.lang.System;
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.utils.JsonUtils;
import com.alibaba.dashscope.protocol.Protocol;

public class Main {
    public static GenerationResult callWithMessage() throws ApiException, NoApiKeyException, InputRequiredException {
        Generation gen = new Generation(Protocol.HTTP.getValue(), "https://dashscope-intl.aliyuncs.com/api/v1");
        // The preceding base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
        Message systemMsg = Message.builder()
                .role(Role.SYSTEM.getValue())
                .content("You are a helpful assistant.")
                .build();
        Message userMsg = Message.builder()
                .role(Role.USER.getValue())
                .content("Who are you?")
                .build();
        GenerationParam param = GenerationParam.builder()
                // If the environment variable is not configured, replace the following line with .apiKey("sk-xxx") using your Model Studio API key.
                // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
                .model("qwen-plus")
                .messages(Arrays.asList(systemMsg, userMsg))
                .resultFormat(GenerationParam.ResultFormat.MESSAGE)
                .build();
        return gen.call(param);
    }
    public static void main(String[] args) {
        try {
            GenerationResult result = callWithMessage();
            System.out.println(JsonUtils.toJson(result));
        } catch (ApiException | NoApiKeyException | InputRequiredException e) {
            // Use a logging framework to record exception information.
            System.err.println("An error occurred while calling the generation service: " + e.getMessage());
        }
        System.exit(0);
    }
}

PHP (HTTP)

<?php
// The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
$url = "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation";
// The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
$apiKey = getenv('DASHSCOPE_API_KEY');

$data = [
    // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    "model" => "qwen-plus",
    "input" => [
        "messages" => [
            [
                "role" => "system",
                "content" => "You are a helpful assistant."
            ],
            [
                "role" => "user",
                "content" => "Who are you?"
            ]
        ]
    ],
    "parameters" => [
        "result_format" => "message"
    ]
];

$jsonData = json_encode($data);

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $jsonData);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
    "Authorization: Bearer $apiKey",
    "Content-Type: application/json"
]);

$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

if ($httpCode == 200) {
    echo "Response: " . $response;
} else {
    echo "Error: " . $httpCode . " - " . $response;
}

curl_close($ch);
?>

Node.js (HTTP)

DashScope does not provide an SDK for the Node.js environment. To make calls using the OpenAI Node.js SDK, see the OpenAI section of this topic.

import fetch from 'node-fetch';
// The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
$apiKey = getenv('DASHSCOPE_API_KEY');
const apiKey = process.env.DASHSCOPE_API_KEY;

const data = {
    model: "qwen-plus", // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    input: {
        messages: [
            {
                role: "system",
                content: "You are a helpful assistant."
            },
            {
                role: "user",
                content: "Who are you?"
            }
        ]
    },
    parameters: {
        result_format: "message"
    }
};

fetch('https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation', {
// The preceding URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
    method: 'POST',
    headers: {
        'Authorization': `Bearer ${apiKey}`,
        'Content-Type': 'application/json'
    },
    body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
    console.log(JSON.stringify(data));
})
.catch(error => {
    console.error('Error:', error);
});

C# (HTTP)

using System.Net.Http.Headers;
using System.Text;

class Program
{
    private static readonly HttpClient httpClient = new HttpClient();

    static async Task Main(string[] args)
    {
        // If the environment variable is not configured, replace the following line with string? apiKey = "sk-xxx"; using your Model Studio API key.
        // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
$apiKey = getenv('DASHSCOPE_API_KEY');
        string? apiKey = Environment.GetEnvironmentVariable("DASHSCOPE_API_KEY");

        if (string.IsNullOrEmpty(apiKey))
        {
            Console.WriteLine("API key not set. Make sure the 'DASHSCOPE_API_KEY' environment variable is set.");
            return;
        }

        // Set the request URL and content.
        // The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
        string url = "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation";
        // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
        string jsonContent = @"{
            ""model"": ""qwen-plus"", 
            ""input"": {
                ""messages"": [
                    {
                        ""role"": ""system"",
                        ""content"": ""You are a helpful assistant.""
                    },
                    {
                        ""role"": ""user"",
                        ""content"": ""Who are you?""
                    }
                ]
            },
            ""parameters"": {
                ""result_format"": ""message""
            }
        }";

        // Send the request and get the response.
        string result = await SendPostRequestAsync(url, jsonContent, apiKey);

        // Print the result.
        Console.WriteLine(result);
    }

    private static async Task<string> SendPostRequestAsync(string url, string jsonContent, string apiKey)
    {
        using (var content = new StringContent(jsonContent, Encoding.UTF8, "application/json"))
        {
            // Set the request header.
            httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey);
            httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));

            // Send the request and get the response.
            HttpResponseMessage response = await httpClient.PostAsync(url, content);

            // Process the response.
            if (response.IsSuccessStatusCode)
            {
                return await response.Content.ReadAsStringAsync();
            }
            else
            {
                return $"Request failed: {response.StatusCode}";
            }
        }
    }
}

Go (HTTP)

DashScope does not provide an SDK for Go. To make calls using the OpenAI Go SDK, see the OpenAI-Go section of this topic.

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"
)

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type Input struct {
	Messages []Message `json:"messages"`
}

type Parameters struct {
	ResultFormat string `json:"result_format"`
}

type RequestBody struct {
	Model      string     `json:"model"`
	Input      Input      `json:"input"`
	Parameters Parameters `json:"parameters"`
}

func main() {
	// Create an HTTP client.
	client := &http.Client{}

	// Build the request body.
	requestBody := RequestBody{
		// This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
		Model: "qwen-plus",
		Input: Input{
			Messages: []Message{
				{
					Role:    "system",
					Content: "You are a helpful assistant.",
				},
				{
					Role:    "user",
					Content: "Who are you?",
				},
			},
		},
		Parameters: Parameters{
			ResultFormat: "message",
		},
	}

	jsonData, err := json.Marshal(requestBody)
	if err != nil {
		log.Fatal(err)
	}

	// Create a POST request.
	// The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
	req, err := http.NewRequest("POST", "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation", bytes.NewBuffer(jsonData))
	if err != nil {
		log.Fatal(err)
	}

	// Set the request header.
	// If the environment variable is not configured, replace the following line with apiKey := "sk-xxx" using your Model Studio API key.
	// The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
	apiKey := os.Getenv("DASHSCOPE_API_KEY")
	req.Header.Set("Authorization", "Bearer "+apiKey)
	req.Header.Set("Content-Type", "application/json")

	// Send the request.
	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()

	// Read the response body.
	bodyText, err := io.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	// Print the response content.
	fmt.Printf("%s\n", bodyText)
}

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and configure an API key
The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
curl --location "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header "Content-Type: application/json" \
--data '{
    "model": "qwen-plus",
    "input":{
        "messages":[      
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": "Who are you?"
            }
        ]
    },
    "parameters": {
        "result_format": "message"
    }
}'

Streaming output

For more information, see Streaming.

Text generation models

Python

import os
import dashscope

# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
messages = [
    {'role':'system','content':'you are a helpful assistant'},
    {'role': 'user','content': 'Who are you?'}
]
responses = dashscope.Generation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    # This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    model="qwen-plus",
    messages=messages,
    result_format='message',
    stream=True,
    incremental_output=True
    )
for response in responses:
    print(response)  

Java

import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.utils.JsonUtils;
import io.reactivex.Flowable;
import java.lang.System;
import com.alibaba.dashscope.protocol.Protocol;

public class Main {
    private static final Logger logger = LoggerFactory.getLogger(Main.class);
    private static void handleGenerationResult(GenerationResult message) {
        System.out.println(JsonUtils.toJson(message));
    }
    public static void streamCallWithMessage(Generation gen, Message userMsg)
            throws NoApiKeyException, ApiException, InputRequiredException {
        GenerationParam param = buildGenerationParam(userMsg);
        Flowable<GenerationResult> result = gen.streamCall(param);
        result.blockingForEach(message -> handleGenerationResult(message));
    }
    private static GenerationParam buildGenerationParam(Message userMsg) {
        return GenerationParam.builder()
                // If the environment variable is not configured, replace the following line with .apiKey("sk-xxx") using your Model Studio API key.
                // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
                .model("qwen-plus")
                .messages(Arrays.asList(userMsg))
                .resultFormat(GenerationParam.ResultFormat.MESSAGE)
                .incrementalOutput(true)
                .build();
    }
    public static void main(String[] args) {
        try {
            // The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1.
            Generation gen = new Generation(Protocol.HTTP.getValue(), "https://dashscope-intl.aliyuncs.com/api/v1");
            Message userMsg = Message.builder().role(Role.USER.getValue()).content("Who are you?").build();
            streamCallWithMessage(gen, userMsg);
        } catch (ApiException | NoApiKeyException | InputRequiredException  e) {
            logger.error("An exception occurred: {}", e.getMessage());
        }
        System.exit(0);
    }
}

curl

# ======= Important =======
# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
# === Delete this comment before execution ====

curl --location "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header "Content-Type: application/json" \
--header "X-DashScope-SSE: enable" \
--data '{
    "model": "qwen-plus",
    "input":{
        "messages":[      
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": "Who are you?"
            }
        ]
    },
    "parameters": {
        "result_format": "message",
        "incremental_output":true
    }
}'

Multimodal models

Python

import os
from dashscope import MultiModalConversation
import dashscope
# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/compatible-mode/api/v1.
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
            {"text": "What is depicted in the image?"}
        ]
    }
]

responses = MultiModalConversation.call(
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx", using your Model Studio API key.
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    model='qwen3-vl-plus',  #  You can replace the model with another multimodal model and modify the messages accordingly.
    messages=messages,
    stream=True,
    incremental_output=True)
    
full_content = ""
print("Streaming output content:")
for response in responses:
    if response["output"]["choices"][0]["message"].content:
        print(response.output.choices[0].message.content[0]['text'])
        full_content += response.output.choices[0].message.content[0]['text']
print(f"Full content: {full_content}")

Java

import java.util.Arrays;
import java.util.Collections;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import io.reactivex.Flowable;
import com.alibaba.dashscope.utils.Constants;

public class Main {
    static {
        // The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
        Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";
    }
    public static void streamCall()
            throws ApiException, NoApiKeyException, UploadFileException {
        MultiModalConversation conv = new MultiModalConversation();
        // must create mutable map.
        MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
                .content(Arrays.asList(Collections.singletonMap("image", "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"),
                        Collections.singletonMap("text", "What is depicted in the image?"))).build();
        MultiModalConversationParam param = MultiModalConversationParam.builder()
                // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
                // If the environment variable is not configured, replace the following line with .apiKey("sk-xxx") using your Model Studio API key.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                .model("qwen3-vl-plus")  //  You can replace the model with another multimodal model and modify the messages accordingly.
                .messages(Arrays.asList(userMessage))
                .incrementalOutput(true)
                .build();
        Flowable<MultiModalConversationResult> result = conv.streamCall(param);
        result.blockingForEach(item -> {
            try {
                var content = item.getOutput().getChoices().get(0).getMessage().getContent();
                    // Check if the content exists and is not empty.
                if (content != null &&  !content.isEmpty()) {
                    System.out.println(content.get(0).get("text"));
                    }
            } catch (Exception e){
                System.exit(0);
            }
        });
    }

    public static void main(String[] args) {
        try {
            streamCall();
        } catch (ApiException | NoApiKeyException | UploadFileException e) {
            System.out.println(e.getMessage());
        }
        System.exit(0);
    }
}

curl

# ======= Important =======
# The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation.
# === Delete this comment before execution ===

curl -X POST https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H 'Content-Type: application/json' \
-H 'X-DashScope-SSE: enable' \
-d '{
    "model": "qwen3-vl-plus",
    "input":{
        "messages":[
            {
                "role": "user",
                "content": [
                    {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
                    {"text": "What is depicted in the image?"}
                ]
            }
        ]
    },
    "parameters": {
        "incremental_output": true
    }
}'

Image input

For more information about how to use large models to analyze images, see Visual understanding.

Python

import os
import dashscope

# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'  
messages = [
    {
        "role": "user",
        "content": [
            {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
            {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/tiger.png"},
            {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/rabbit.png"},
            {"text": "What are these?"}
        ]
    }
]
response = dashscope.MultiModalConversation.call(
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    # The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    # This example uses qwen-vl-max. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    model='qwen-vl-max',
    messages=messages
    )
print(response)

Java

// Copyright (c) Alibaba, Inc. and its affiliates.

import java.util.Arrays;
import java.util.Collections;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.JsonUtils;
import com.alibaba.dashscope.utils.Constants;
public class Main {
    static {
     Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";  // The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1;
    }
    public static void simpleMultiModalConversationCall()
            throws ApiException, NoApiKeyException, UploadFileException {
        MultiModalConversation conv = new MultiModalConversation();
        MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
                .content(Arrays.asList(
                        Collections.singletonMap("image", "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"),
                        Collections.singletonMap("image", "https://dashscope.oss-cn-beijing.aliyuncs.com/images/tiger.png"),
                        Collections.singletonMap("image", "https://dashscope.oss-cn-beijing.aliyuncs.com/images/rabbit.png"),
                        Collections.singletonMap("text", "What are these?"))).build();
        MultiModalConversationParam param = MultiModalConversationParam.builder()
                // If the environment variable is not configured, replace the following line with .apiKey("sk-xxx") using your Model Studio API key.
                // The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                // This example uses qwen-vl-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
                .model("qwen-vl-plus")
                .message(userMessage)
                .build();
        MultiModalConversationResult result = conv.call(param);
        System.out.println(JsonUtils.toJson(result));
    }

    public static void main(String[] args) {
        try {
            simpleMultiModalConversationCall();
        } catch (ApiException | NoApiKeyException | UploadFileException e) {
            System.out.println(e.getMessage());
        }
        System.exit(0);
    }
}

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and configure an API key
The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation.
curl --location 'https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '{
    "model": "qwen-vl-plus",
    "input":{
        "messages":[
            {
                "role": "user",
                "content": [
                    {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"},
                    {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/tiger.png"},
                    {"image": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/rabbit.png"},
                    {"text": "What are these?"}
                ]
            }
        ]
    }
}'

Video input

The following code provides an example of how to pass video frames. For more information about other usage methods, such as passing a video file, see Visual understanding.

Python

import os
# DashScope SDK version 1.20.10 or later is required.
import dashscope
# The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
messages = [{"role": "user",
             "content": [
                  # If the model is in the Qwen2.5-VL series and an image list is passed, you can set the fps parameter. This parameter indicates that the image list is extracted from the original video at an interval of 1/fps seconds.
                 {"video":["https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/xzsgiz/football1.jpg",
                           "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/tdescd/football2.jpg",
                           "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/zefdja/football3.jpg",
                           "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/aedbqh/football4.jpg"],
                   "fps":2},
                 {"text": "Describe the specific process in this video"}]}]
response = dashscope.MultiModalConversation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    model='qwen2.5-vl-72b-instruct',  # This example uses qwen2.5-vl-72b-instruct. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models.
    messages=messages
)
print(response["output"]["choices"][0]["message"].content[0]["text"])

Java

// DashScope SDK version 2.18.3 or later is required.
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;

import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;

public class Main {
    static {
         // The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
        Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";
    }
    private static final String MODEL_NAME = "qwen2.5-vl-72b-instruct"; // This example uses qwen2.5-vl-72b-instruct. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/models.
    public static void videoImageListSample() throws ApiException, NoApiKeyException, UploadFileException {
        MultiModalConversation conv = new MultiModalConversation();
        MultiModalMessage systemMessage = MultiModalMessage.builder()
                .role(Role.SYSTEM.getValue())
                .content(Arrays.asList(Collections.singletonMap("text", "You are a helpful assistant.")))
                .build();
        //  If the model is in the Qwen2.5-VL series and an image list is passed, you can set the fps parameter. This parameter indicates that the image list is extracted from the original video at an interval of 1/fps seconds.
        Map<String, Object> params = Map.of(
                "video", Arrays.asList("https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/xzsgiz/football1.jpg",
                        "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/tdescd/football2.jpg",
                        "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/zefdja/football3.jpg",
                        "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/aedbqh/football4.jpg"),
                "fps",2);
        MultiModalMessage userMessage = MultiModalMessage.builder()
                .role(Role.USER.getValue())
                .content(Arrays.asList(
                        params,
                        Collections.singletonMap("text", "Describe the specific process in this video")))
                .build();
        MultiModalConversationParam param = MultiModalConversationParam.builder()
                // If the environment variable is not configured, replace the following line with .apiKey("sk-xxx") using your Model Studio API key.
                // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                .model(MODEL_NAME)
                .messages(Arrays.asList(systemMessage, userMessage)).build();
        MultiModalConversationResult result = conv.call(param);
        System.out.print(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
    }
    public static void main(String[] args) {
        try {
            videoImageListSample();
        } catch (ApiException | NoApiKeyException | UploadFileException e) {
            System.out.println(e.getMessage());
        }
        System.exit(0);
    }
}

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and configure an API key
The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation.
curl -X POST https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H 'Content-Type: application/json' \
-d '{
  "model": "qwen2.5-vl-72b-instruct",
  "input": {
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "video": [
              "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/xzsgiz/football1.jpg",
              "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/tdescd/football2.jpg",
              "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/zefdja/football3.jpg",
              "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/aedbqh/football4.jpg"
            ],
            "fps":2
                 
          },
          {
            "text": "Describe the specific process in this video"
          }
        ]
      }
    ]
  }
}'

Tool calling

For the complete code for the Function calling flow, see Overview of text generation models.

Python

import os
import dashscope
  # The following base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_time",
            "description": "Useful for when you want to know the current time.",
            "parameters": {}
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Useful for when you want to query the weather in a specific city.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "A city or district, such as Beijing, Hangzhou, or Yuhang."
                    }
                }
            },
            "required": [
                "location"
            ]
        }
    }
]
messages = [{"role": "user", "content": "What is the weather like in Hangzhou?"}]
response = dashscope.Generation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    # This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
    model='qwen-plus',
    messages=messages,
    tools=tools,
    result_format='message'
)
print(response)

Java

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import com.alibaba.dashscope.aigc.conversation.ConversationParam.ResultFormat;
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.tools.FunctionDefinition;
import com.alibaba.dashscope.tools.ToolFunction;
import com.alibaba.dashscope.utils.JsonUtils;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.github.victools.jsonschema.generator.Option;
import com.github.victools.jsonschema.generator.OptionPreset;
import com.github.victools.jsonschema.generator.SchemaGenerator;
import com.github.victools.jsonschema.generator.SchemaGeneratorConfig;
import com.github.victools.jsonschema.generator.SchemaGeneratorConfigBuilder;
import com.github.victools.jsonschema.generator.SchemaVersion;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import com.alibaba.dashscope.protocol.Protocol;

public class Main {
    public class GetWeatherTool {
        private String location;
        public GetWeatherTool(String location) {
            this.location = location;
        }
        public String call() {
            return location + " is sunny today";
        }
    }
    public class GetTimeTool {
        public GetTimeTool() {
        }
        public String call() {
            LocalDateTime now = LocalDateTime.now();
            DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
            String currentTime = "Current time: " + now.format(formatter) + ".";
            return currentTime;
        }
    }
    public static void SelectTool()
            throws NoApiKeyException, ApiException, InputRequiredException {
        SchemaGeneratorConfigBuilder configBuilder =
                new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_2020_12, OptionPreset.PLAIN_JSON);
        SchemaGeneratorConfig config = configBuilder.with(Option.EXTRA_OPEN_API_FORMAT_VALUES)
                .without(Option.FLATTENED_ENUMS_FROM_TOSTRING).build();
        SchemaGenerator generator = new SchemaGenerator(config);
        ObjectNode jsonSchema_weather = generator.generateSchema(GetWeatherTool.class);
        ObjectNode jsonSchema_time = generator.generateSchema(GetTimeTool.class);
        FunctionDefinition fdWeather = FunctionDefinition.builder().name("get_current_weather").description("Get the weather for a specified area")
                .parameters(JsonUtils.parseString(jsonSchema_weather.toString()).getAsJsonObject()).build();
        FunctionDefinition fdTime = FunctionDefinition.builder().name("get_current_time").description("Get the current time")
                .parameters(JsonUtils.parseString(jsonSchema_time.toString()).getAsJsonObject()).build();
        Message systemMsg = Message.builder().role(Role.SYSTEM.getValue())
                .content("You are a helpful assistant. When asked a question, use tools wherever possible.")
                .build();
        Message userMsg = Message.builder().role(Role.USER.getValue()).content("Weather in Hangzhou").build();
        List<Message> messages = new ArrayList<>();
        messages.addAll(Arrays.asList(systemMsg, userMsg));
        GenerationParam param = GenerationParam.builder()
                // The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                // This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
                .model("qwen-plus")
                .messages(messages)
                .resultFormat(ResultFormat.MESSAGE)
                .tools(Arrays.asList(
                        ToolFunction.builder().function(fdWeather).build(),
                        ToolFunction.builder().function(fdTime).build()))
                .build();
        Generation gen = new Generation(Protocol.HTTP.getValue(), "https://dashscope-intl.aliyuncs.com/api/v1");
        // The preceding base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
        GenerationResult result = gen.call(param);
        System.out.println(JsonUtils.toJson(result));
    }
    public static void main(String[] args) {
        try {
            SelectTool();
        } catch (ApiException | NoApiKeyException | InputRequiredException e) {
            System.out.println(String.format("Exception %s", e.getMessage()));
        }
        System.exit(0);
    }
}

curl

The API keys for the Singapore and Beijing regions are different. For more information, see Obtain and configure an API key
The following URL is for the Singapore region. If you use a model in the Beijing region, replace the URL with https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation.
curl --location "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header "Content-Type: application/json" \
--data '{
    "model": "qwen-plus",
    "input": {
        "messages": [{
            "role": "user",
            "content": "What is the weather like in Hangzhou?"
        }]
    },
    "parameters": {
        "result_format": "message",
        "tools": [{
            "type": "function",
            "function": {
                "name": "get_current_time",
                "description": "Useful for when you want to know the current time.",
                "parameters": {}
            }
        },{
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Useful for when you want to query the weather in a specific city.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "A city or district, such as Beijing, Hangzhou, or Yuhang."
                        }
                    }
                },
                "required": ["location"]
            }
        }]
    }
}'

Asynchronous invocation

# Your Dashscope Python SDK version must be 1.19.0 or later.
import asyncio
import platform
import os
import dashscope
from dashscope.aigc.generation import AioGeneration

dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
# The preceding base_url is for the Singapore region. If you use a model in the Beijing region, replace the base_url with https://dashscope.aliyuncs.com/api/v1.
async def main():
    response = await AioGeneration.call(
        # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
        # The API keys for the Singapore and Beijing regions are different. To obtain an API key, see https://www.alibabacloud.com/help/en/model-studio/get-api-key.
        api_key=os.getenv('DASHSCOPE_API_KEY'),
        # This example uses qwen-plus. You can replace the model name as needed. For a list of models, see https://www.alibabacloud.com/help/en/model-studio/getting-started/models.
        model="qwen-plus",
        messages=[{"role": "user", "content": "Who are you"}],
        result_format="message",
    )
    print(response)

if platform.system() == "Windows":
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())

Document understanding

Python

import os
import dashscope

# Currently, only the Beijing region supports calling the qwen-long-latest model.
dashscope.base_http_api_url = 'https://dashscope.aliyuncs.com/api/v1'
messages = [
        {'role': 'system', 'content': 'you are a helpful assisstant'},
        # Replace '{FILE_ID}' with the file-id used in your actual conversation scenario.
        {'role':'system','content':f'fileid://{FILE_ID}'},
        {'role': 'user', 'content': 'What is this article about?'}]
response = dashscope.Generation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen-long-latest",
    messages=messages,
    result_format='message'
)
print(response)

Java

import os
import dashscope

# Currently, only the Beijing region supports calling the qwen-long-latest model.
dashscope.base_http_api_url = 'https://dashscope.aliyuncs.com/api/v1'
messages = [
        {'role': 'system', 'content': 'you are a helpful assisstant'},
        # Replace '{FILE_ID}' with the file-id used in your actual conversation scenario.
        {'role':'system','content':f'fileid://{FILE_ID}'},
        {'role': 'user', 'content': 'What is this article about?'}]
response = dashscope.Generation.call(
    # If the environment variable is not configured, replace the following line with api_key="sk-xxx" using your Model Studio API key.
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="qwen-long-latest",
    messages=messages,
    result_format='message'
)
print(response)

curl

Currently, you can call document understanding models only in the Beijing region.
Replace {FILE_ID} with the file ID for your conversation scenario.
curl --location "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header "Content-Type: application/json" \
--data '{
    "model": "qwen-long-latest",
    "input":{
        "messages":[      
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "system",
                "content": "fileid://{FILE_ID}"
            },
            {
                "role": "user",
                "content": "What is this article about?"
            }
        ]
    },
    "parameters": {
        "result_format": "message"
    }
}'

model string (Required)

The name of the model.

Supported models include Qwen large language models (commercial and open source), Qwen-VL, and Qwen-Coder.

For specific model names and billing information, see Model list.

messages array (Required)

The context to pass to the large language model, arranged in conversational order.

When you make a call using HTTP, place messages in the input object.

Message types

System Message object (Optional)

A system message that sets the role, tone, task objectives, or constraints for the large language model. This message is usually placed at the beginning of the messages array.

We do not recommend setting a System Message for QwQ models. Setting a System Message for QVQ models has no effect.

Properties

content string (Required)

The content of the message.

role string (Required)

The role for a system message. The value is fixed as system.

User Message object (Required)

A user message that passes questions, instructions, or context to the model.

Properties

content string or array (Required)

The content of the message. The value is a string if the input is only text. The value is an array if the input includes multimodal data, such as images, or if explicit caching is enabled.

Properties

text string (Required)

The input text.

image string (Optional)

Specifies the image file for image understanding. You can pass the image in one of the following three ways:

  • A public URL of the image.

  • The Base64 encoding of the image, in the format data:image/<format>;base64,<data>.

  • The absolute path of a local file.

Applicable models: Qwen-VL, QVQ

Example value: {"image":"https://xxxx.jpeg"}

video array or string (Optional)

The video that is passed to the Qwen-VL model or QVQ model.

  • If you pass an image list, the value is an array.

  • If you pass a video file, the value is a string.

For information about how to pass a local file, see Local files (Qwen-VL) or Local files (QVQ).

Example values:

  • Image list: {"video":["https://xx1.jpg",...,"https://xxn.jpg"]}

  • Video file: {"video":"https://xxx.mp4"}

fps float (Optional)

The number of frames to extract per second. The value must be in the range of (0.1, 10). The default value is 2.0.

This parameter has two functions:

  • When you input a video file, this parameter controls the frame extraction frequency. One frame is extracted every seconds.

    Applicable to the Qwen-VL model and QVQ model.
  • This parameter informs the model of the time interval between adjacent frames, which helps the model better understand the temporal dynamics of the video. This function applies to both video file and image list inputs and is suitable for scenarios such as event time localization or segment content summarization.

    Supports the Qwen2.5-VL and Qwen3-VL models, and the QVQ model.

Example values are as follows:

  • Image list input: {"video":["https://xx1.jpg",...,"https://xxn.jpg"], "fps":2}

  • Video file input: {"video": "https://xx1.mp4", "fps":2}

A larger fps value is suitable for high-speed motion scenarios, such as sports events or action movies. A smaller fps value is suitable for long videos or content with static scenes.

The OpenAI compatible protocol does not support this parameter. For video files, one frame is extracted every 0.5 seconds by default. For image lists, the frames are assumed to be extracted from a video at the same interval.

min_pixels integer (Optional)

Sets the minimum pixel threshold for an input image or video frame. If an input image or video frame has fewer pixels than the min_pixels value, the image or frame is enlarged until its total number of pixels exceeds the min_pixels value.

  • Image input:

    • Applicable models: QVQ and Qwen-VL models are supported.

    • Value range: The value range is as follows:

      min_pixels value range

      • Qwen3-VL: The default and miminum value is 65536.

      • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default and minimum value is 4096.

      • QVQ and other Qwen2.5-VL models: The default and minimum value is 3136.

    • Example value: {"image":"https://xxxx.jpg", "min_pixels": 65536}

  • For video file or image list input:

    • Applicable models: This parameter is supported only for the qwen3-vl-plus and qwen3-vl-plus-2025-09-23 models.

    • Value range: The default value is 65536. The minimum value is 4096.

    • Example values:

      • When you input a video file: {"video":"https://xxxx.mp4","min_pixels": 65536}

      • When you input an image list: {"video":["https://xx1.jpg",...,"https://xxn.jpg"], "min_pixels": 65536}

The OpenAI compatible protocol supports this parameter only when you pass images.

max_pixels integer (Optional)

Sets the maximum pixel threshold for an input image or video frame. If the number of pixels in an input image is within the [min_pixels, max_pixels] range, the model recognizes the original image. If the number of pixels in the input image exceeds the max_pixels value, the image is downscaled until its total number of pixels is less than the max_pixels value.

  • Image input:

    • Applicable models: QVQ and Qwen-VL models are supported.

    • Value range: The value range is as follows:

      max_pixels value range

      The value of max_pixels depends on whether the vl_high_resolution_images parameter is enabled.

      • If vl_high_resolution_images is set to False:

        • Qwen3-VL: The default value is 2621440. The maximum value is 16777216.

        • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The default value is 1310720. The maximum value is 16777216.

        • QVQ and other Qwen2.5-VL models: The default value is 1003520. The maximum value is 12845056.

      • If vl_high_resolution_images is set to True:

        • Qwen3-VL, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, qwen-vl-plus-0815: The max_pixels parameter is invalid. The maximum number of pixels for the input image is fixed at 16777216.

        • QVQ and other Qwen2.5-VL models: The max_pixels parameter is invalid. The maximum number of pixels for the input image is fixed at 12845056.

    • Example value: {"image":"https://xxxx.jpg", "max_pixels": 8388608}

  • For video file or image list input:

    • Applicable models: This parameter is supported only for the qwen3-vl-plus and qwen3-vl-plus-2025-09-23 models.

    • Value range: The default value is 655360. The maximum value is 2048000.

    • Example values:

      • When you input a video file: {"video":"https://xxxx.mp4","max_pixels": 655360}

      • When you input an image list: {"video":["https://xx1.jpg",...,"https://xxn.jpg"], "max_pixels": 655360}

The OpenAI compatible protocol supports this parameter only when you pass images.

total_pixels integer (Optional)

Limits the total number of pixels of all frames that are extracted from a video. The total number of pixels is calculated using the following formula: Pixels of a single frame × Total number of frames.

If the total number of pixels in the video exceeds this limit, the system scales down the video frames. However, the system ensures that the number of pixels in a single frame is within the [min_pixels, max_pixels] range.

  • Applicable models: This parameter is supported only for the qwen3-vl-plus and qwen3-vl-plus-2025-09-23 models.

  • Value description: The default and minimum values are both 134217728. This corresponds to 131072 image tokens. One image token is equivalent to 32 × 32 pixels.

  • Example values:

    • When you input a video file: {"video":"https://xxxx.mp4","total_pixels": 134217728}

    • When you input an image list: {"video":["https://xx1.jpg",...,"https://xxn.jpg"], "total_pixels": 134217728}

For long videos from which many frames are extracted, you can reduce this value to decrease token consumption and processing time. However, this may result in a loss of image detail.

cache_control object (Optional)

This parameter is supported only by models that support explicit cache. This parameter is used to enable explicit caching.

Properties

type string(Required)

The value is fixed as ephemeral.

role string (Required)

The role for a user message. The value is fixed as user.

Assistant Message object (Optional)

The model's response to the user's message.

Properties

content string (Optional)

The content of the message. This parameter is optional only when the tool_calls parameter is specified in the assistant message.

role string (Required)

The value is fixed as assistant.

partial boolean (Optional)

Specifies whether to enable partial mode. For more information, see Partial mode.

Supported models

  • Qwen-Max series

    qwen3-max, qwen3-max-2025-09-23, qwen3-max-preview (non-thinking mode), qwen-max, qwen-max-latest, and snapshot models from qwen-max-2025-01-25 or later

  • Qwen-Plus series (non-thinking mode)

    qwen-plus, qwen-plus-latest, and snapshot models from qwen-plus-2025-01-25 or later

  • Qwen-Flash series (non-thinking mode)

    qwen-flash, and snapshot models from qwen-flash-2025-07-28 or later

  • Qwen-Coder series

    qwen3-coder-plus, qwen3-coder-flash, qwen3-coder-480b-a35b-instruct, qwen3-coder-30b-a3b-instruct

  • Qwen-VL series

    • qwen3-vl-plus series (non-thinking mode)

      qwen3-vl-plus, and snapshot models from qwen3-vl-plus-2025-09-23 or later

    • qwen3-vl-flash series (non-thinking mode)

      qwen3-vl-flash, and snapshot models from qwen3-vl-flash-2025-10-15 or later

    • qwen-vl-max series

      qwen-vl-max, qwen-vl-max-latest, and snapshot models from qwen-vl-max-2025-04-08 or later

    • qwen-vl-plus series

      qwen-vl-plus, qwen-vl-plus-latest, and snapshot models from qwen-vl-plus-2025-01-25 or later

  • Qwen-Turbo series (non-thinking mode)

    qwen-turbo, qwen-turbo-latest, and snapshot models from qwen-turbo-2024-11-01 or later

  • Qwen open-source series

    Qwen3 open-source models (non-thinking mode), Qwen2.5 series text models, Qwen3-VL open-source models (non-thinking mode)

tool_calls array (Optional)

The tool and input parameter information that is returned after you initiate Function calling. This parameter contains one or more objects and is obtained from the tool_calls field of the previous model response.

Properties

id string

The ID of the tool response.

type string

The tool type. Currently, only function is supported.

function object

The tool and input parameter information.

Properties

name string

The tool name.

arguments string

The input parameter information, in JSON string format.

index integer

The index of the current tool information in the tool_calls array.

Tool Message object (Optional)

The output information of the tool.

Properties

content string (Required)

The output content of the tool function. The value must be a string.

role string (Required)

The value is fixed as tool.

tool_call_id string (Optional)

The ID that is returned after you initiate Function calling. You can obtain the ID from response.output.choices[0].message.tool_calls[$index]["id"]. This parameter is used to associate the Tool Message with the corresponding tool.

temperature float (Optional)

The sampling temperature, which controls the diversity of the text that is generated by the model.

A higher temperature results in more diverse text. A lower temperature produces more deterministic text.

The value must be in the range of [0, 2).

When you make a call using HTTP, place temperature in the parameters object.
We do not recommend that you modify the default temperature value for QVQ models.

top_p float (Optional)

The probability threshold for nucleus sampling. This parameter controls the diversity of the text that is generated by the model.

A higher top_p value results in more diverse text. A lower top_p value produces more deterministic text.

The value must be in the range of (0, 1.0].

Default top_p values

Qwen3 (non-thinking mode), Qwen3-Instruct series, Qwen3-Coder series, qwen-max series, qwen-plus series (non-thinking mode), qwen-flash series (non-thinking mode), qwen-turbo series (non-thinking mode), Qwen open source series, qwen-vl-max-2025-08-13, and Qwen3-VL (non-thinking mode): 0.8

qwen-vl-plus series, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-2025-04-08, qwen2.5-vl-3b-instruct, qwen2.5-vl-7b-instruct, qwen2.5-vl-32b-instruct, and qwen2.5-vl-72b-instruct: 0.001

QVQ series, qwen-vl-plus-2025-07-10, qwen-vl-plus-2025-08-15 : 0.5

qwen3-max-preview (thinking mode), Qwen3-Omni-Flash series: 1.0

Qwen3 (thinking mode), Qwen3-VL (thinking mode), Qwen3-Thinking, QwQ series, and Qwen3-Omni-Captioner: 0.95

In the Java SDK, this parameter is named topP. When you make a call using HTTP, place top_p in the parameters object.
We do not recommend that you modify the default top_p value for QVQ models.

top_k integer (Optional)

The size of the candidate set for sampling during generation. For example, a value of 50 indicates that only the 50 highest-scoring tokens in a single generation are used as the candidate set for random sampling. A larger value results in higher randomness, and a smaller value results in higher determinism. A value of None or a value greater than 100 disables the top_k strategy. In this case, only the top_p strategy is effective.

The value must be greater than or equal to 0.

Default top_k values

QVQ series, qwen-vl-plus-2025-07-10, and qwen-vl-plus-2025-08-15: 10

QwQ series: 40

Other qwen-vl-plus series, models before qwen-vl-max-2025-08-13, qwen2.5-omni-7b: 1

Qwen3-Omni-Flash series: 50

All other models: 20

In the Java SDK, this parameter is named topK. When you make a call using HTTP, place top_k in the parameters object.
We do not recommend that you modify the default top_k value for QVQ models.

enable_thinking boolean (Optional)

When you use a hybrid thinking model, this parameter specifies whether to enable thinking mode. It applies to Qwen3 and Qwen3-VL models. For more information, see Deep thinking.

Valid values:

  • true

    If this parameter is enabled, the thinking content is returned in the reasoning_content field.
  • false

Default values for different models: Supported models

In the Java SDK, this parameter is named enableThinking. When you make a call using HTTP, place enable_thinking in the parameters object.

thinking_budget integer (Optional)

The maximum length of the thinking process. This applies to Qwen3-VL, and the commercial and open source versions of Qwen3 models. For more information, see Limit thinking length.

The default value is the model's maximum chain-of-thought length. For more information, see Model list.

In the Java SDK, this parameter is named thinkingBudget. When you make a call using HTTP, place thinking_budget in the parameters object.
The value defaults to the model's maximum chain-of-thought length.

enable_code_interpreter boolean (Optional) Default value: false

Specifies whether to enable the code interpreter feature. This parameter is applicable only to qwen3-max-preview in thinking mode. For more information, see Code interpreter.

Valid values:

  • true

  • false

This parameter is not supported by the Java SDK. When you make a call using HTTP, place enable_code_interpreter in the parameters object.

repetition_penalty float (Optional)

The penalty for repeating consecutive sequences during model generation. A higher repetition_penalty value can reduce the repetition of the generated text. A value of 1.0 indicates no penalty. The value must be greater than 0.

In the Java SDK, this parameter is named repetitionPenalty. When you make a call using HTTP, place repetition_penalty in the parameters object.
If you use the qwen-vl-plus_2025-01-25 model for text extraction, we recommend that you set repetition_penalty to 1.0.
We do not recommend that you modify the default repetition_penalty value for QVQ models.

presence_penalty float (Optional)

Controls the repetition of content in the text that is generated by the model.

The value range is [-2.0, 2.0]. Positive values reduce repetition, while negative values increase it.

For creative writing or brainstorming scenarios that require diversity, fun, or creativity, we recommend that you increase this value. For technical documents or formal texts that emphasize consistency and terminological accuracy, we recommend that you decrease this value.

Default presence_penalty values

qwen3-max-preview (thinking mode), Qwen3 (non-thinking mode), Qwen3-Instruct series, qwen3-0.6b/1.7b/4b (thinking mode), QVQ series, qwen-max, qwen-max-latest, qwen-max-latest, qwen2.5-vl series, qwen-vl-max series, qwen-vl-plus, Qwen3-VL (non-thinking): 1.5;

qwen-vl-plus-latest, qwen-vl-plus-2025-08-15: 1.2

qwen-vl-plus-2025-01-25: 1.0;

qwen3-8b/14b/32b/30b-a3b/235b-a22b (thinking mode), qwen-plus/qwen-plus-latest/2025-04-28 (thinking mode), qwen-turbo/qwen-turbo/2025-04-28 (thinking mode): 0.5;

All other models: 0.0.

How it works

If the parameter value is positive, the model applies a penalty to tokens that already exist in the current text. The penalty is not related to the number of times the token appears. This reduces the chance of these tokens reappearing, which decreases content repetition and increases lexical diversity.

Example

Prompt: Translate this sentence into English: "Esta película es buena. La trama es buena, la actuación es buena, la música es buena, y en general, toda la película es simplemente buena. Es realmente buena, de hecho. La trama es tan buena, y la actuación es tan buena, y la música es tan buena."

Parameter value 2.0: This movie is very good. The plot is great, the acting is great, the music is also very good, and overall, the whole movie is incredibly good. In fact, it is truly excellent. The plot is very exciting, the acting is outstanding, and the music is so beautiful.

Parameter value 0.0: This movie is good. The plot is good, the acting is good, the music is also good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very outstanding, and the music is also excellent.

Parameter value -2.0: This movie is very good. The plot is very good, the acting is very good, the music is also very good, and overall, the whole movie is very good. In fact, it is really great. The plot is very good, the acting is also very good, and the music is also very good.

When you use the qwen-vl-plus-2025-01-25 model for text extraction, we recommend that you set `presence_penalty` to 1.5.
We do not recommend that you modify the default presence_penalty value for QVQ models.
The Java SDK does not support this parameter. When you make a call using HTTP, place presence_penalty in the parameters object.

vl_high_resolution_images boolean (Optional) Default value: false

Specifies whether to increase the maximum pixel limit for input images to the pixel value that corresponds to 16384 tokens. For more information, see Processing high-resolution images.

  • vl_high_resolution_images: true: A fixed-resolution strategy is used, and the max_pixels setting is ignored. If an image exceeds this resolution, its total pixels are downscaled to this limit.

    Click to view the pixel limits for each model

    When vl_high_resolution_images is True, different models have different pixel limits:

    • Qwen3-VL series, qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, and qwen-vl-plus-0815: 16777216 (each token corresponds to 32*32 pixels, i.e., 16384*32*32)

    • QVQ series, other Qwen2.5-VL models: 12845056 (each token corresponds to 28*28 pixels, i.e., 16384*28*28)

  • vl_high_resolution_images is false: The actual resolution is determined by both max_pixels and the default limit, and the maximum of the two is used. If the image exceeds this pixel limit, it is downscaled.

    Click to view the default pixel limits for each model

    When vl_high_resolution_images is false, different models have different default pixel limits:

    • Qwen3-VL series: 2621440 (2560*32*32, meaning the default token limit is 2560)

    • qwen-vl-max, qwen-vl-max-latest, qwen-vl-max-0813, qwen-vl-plus, qwen-vl-plus-latest, and qwen-vl-plus-0815: 1310720 (1280*32*32, meaning the default token limit is 1280)

    • QVQ series, other Qwen2.5-VL models: 1003520 (1280*28*28, meaning the default token limit is 1280)

In the Java SDK, this parameter is named vlHighResolutionImages. The minimum required version is 2.20.8. When you make a call using HTTP, place vl_high_resolution_imagesparameters object.

vl_enable_image_hw_output boolean (Optional) Default value: false

Specifies whether to return the dimensions of the scaled image. The model scales the input image. If this parameter is set to true, the model returns the height and width of the scaled image. If streaming output is enabled, this information is returned in the last data packet (chunk). This parameter is supported by the Qwen-VL model.

In the Java SDK, this parameter is named vlEnableImageHwOutput. The minimum Java SDK version is 2.20.8. When you make a call using HTTP, place vl_enable_image_hw_output in the parameters object.

max_input_tokens integer (Optional)

The maximum allowed token length for the input. This parameter is currently supported only by the qwen-plus-0728 and qwen-plus-latest models.

  • qwen-plus-latest default value: 129,024

    The default value may be adjusted to 1,000,000 in the future.
  • qwen-plus-2025-07-28 default value: 1,000,000

The Java SDK does not currently support this parameter. When you make a call using HTTP, place max_input_tokens in the parameters object.

max_tokens integer (Optional)

Limits the maximum number of tokens in the model's output. If the generated content exceeds this value, generation stops prematurely, and the returned finish_reason is length.

The default and maximum values are the model's maximum output length. For more information, see Model list.

This parameter is useful for scenarios that require you to control the output length, such as generating summaries or keywords, or for reducing costs and response times.

When max_tokens is triggered, the `finish_reason` field of the response is length.

max_tokens does not limit the length of the chain-of-thought for thinking models.
In the Java SDK, this parameter is named maxTokens. For Qwen-VL models, it is named maxLength in the Java SDK, but versions after 2.18.4 also support setting it as maxTokens. When you make a call using HTTP, place max_tokens in the parameters object.

seed integer (Optional)

A random number seed. This parameter ensures that results are reproducible for the same input and parameters. If you pass the same seed value in a call and other parameters remain unchanged, the model returns the same result as much as possible.

Value range: [0, 231-1].

When you make a call using HTTP, place seed in the parameters object.

stream boolean (Optional) Default value: false

Specifies whether to return the response in streaming output mode. Parameter values:

  • false: The model returns the complete result at once after the generation is complete.

  • true: The output is generated and sent incrementally. This means that a packet is returned as soon as a part of the content is generated.

This parameter is supported only by the Python SDK. To implement streaming output using the Java SDK, you can call the streamCall interface. To implement streaming output using HTTP, you must specify X-DashScope-SSE as enable in the request header.
The commercial edition of Qwen3 (in thinking mode), the open source edition of Qwen3, QwQ, and QVQ support only streaming output.

incremental_output boolean (Optional) Default value: false (The default for Qwen3-Max, Qwen3-VL, Qwen3 open source edition, QwQ, and QVQ models is true)

Specifies whether to enable incremental output in streaming output mode. We recommend that you set this parameter to true.

Parameter values:

  • false: Each output is the entire sequence that is generated. The last output is the complete result.

    I
    I like
    I like apple
    I like apple.
  • true (recommended): The output is incremental. This means that subsequent output does not include previously generated content. You must read these fragments in real time to obtain the complete result.

    I
    like
    apple
    .
In the Java SDK, this parameter is named incrementalOutput. When you make a call using HTTP, place incremental_output in the parameters object.
QwQ models and Qwen3 models in thinking mode support only the true value. Because the default value for Qwen3 commercial models is false, you must manually set this parameter to true when you use the thinking mode.
Qwen3 open source models do not support the false value.

response_format object (Optional) Default value: {"type": "text"}

The format of the returned content. Valid values:

  • {"type": "text"}: Outputs a text reply.

  • {"type": "json_object"}: Outputs a standard-format JSON string.

  • {"type": "json_schema","json_schema": {...} }: Outputs a JSON string in your specified format.

Reference: Structured output.
For a list of supported models, see Supported models.
If you specify {"type": "json_object"}, you must explicitly instruct the model to output JSON in the prompt, for example, "Please output in JSON format". Otherwise, an error occurs.
In the Java SDK, this parameter is responseFormat. When making an HTTP call, place response_format in the parameters object.

Properties

type string (Required)

The format of the returned content. Valid values:

  • text: Outputs a text reply.

  • json_object: Outputs a standard-format JSON string.

  • json_schema: Outputs a JSON string in your specified format.

json_schema object

Required when type is json_schema. Specifies the schema of the output.

Properties

name string (Required)

A unique name for the schema. The name can contain only letters (case-insensitive), numbers, underscores (_), and hyphens (-). The name can be up to 64 characters long.

description string (Optional)

A description of the schema's purpose, which helps the model understand the semantic context of the output.

schema object (Optional)

An object that follows the JSON Schema standard and defines the data structure for the model's output.

For more information about how to build a JSON Schema, see JSON Schema

strict boolean (Optional) Default value: false

Specifies whether the model must strictly follow all schema constraints.

  • true (Recommended)

    The model strictly follows all constraints, such as field types, required items, and formats. This ensures 100% compliance of the output.

  • false (Not recommended)

    The model loosely follows the schema. The output may not conform to the specification and can result in validation failure.

result_format string (Optional) Default value: text (The default for Qwen3-Max, Qwen3-VL, QwQ models, and Qwen3 open source models (except qwen3-next-80b-a3b-instruct) is message)

The format of the returned data. We recommend that you set this parameter to message to facilitate multi-turn conversation.

The platform will unify the default value to message in the future.
In the Java SDK, this parameter is named resultFormat. When you make a call using HTTP, place result_format in the parameters object.
For Qwen-VL, QVQ models, setting this parameter to text has no effect.
Qwen3-Max, Qwen3-VL, and Qwen3 models in thinking mode can only be set to message. Because the default value for Qwen3 commercial models is text, you must set this parameter to message.
If you use the Java SDK to call a Qwen3 open source model and pass text, the response is still returned in the message format.

logprobs boolean (Optional) Default value: false

Specifies whether to return the log probabilities of the output tokens. Valid values:

  • true

  • false

The following models are supported:

  • Snapshot models of the qwen-plus series (excluding the main model)

  • Snapshot models of the qwen-turbo series (excluding the main model)

  • Qwen3 open source models

When you make a call using HTTP, place logprobs in the parameters object.

top_logprobs integer (Optional) Default value: 0

Specifies the number of most likely candidate tokens to return at each generation step.

Value range: [0, 5]

This parameter is effective only when logprobs is set to true.

In the Java SDK, the parameter is topLogprobs. For HTTP calls, specify top_logprobs in the parameters object.

n integer (Optional) Default value: 1

The number of responses to generate. The value must be in the range of 1 to 4. For scenarios that require you to generate multiple responses, such as creative writing or ad copy, you can set a larger n value.

Currently, this parameter is supported only by the qwen-plus and Qwen3 (non-thinking mode) models. The value is fixed at 1 when the tools parameter is passed.
Setting a larger n value does not increase input token consumption, but it increases output token consumption.
When you make a call using HTTP, place n in the parameters object.

stop string or array (Optional)

Specifies stop words. When a string or token_id that is specified in stop appears in the text generated by the model, generation stops immediately.

You can pass sensitive words to control the model's output.

When `stop` is an array, you cannot use a token_id and a string as elements at the same time. For example, you cannot specify ["Hello",104307].
When you make a call using HTTP, place stop in the parameters object.

tools array (Optional)

An array that contains one or more tool objects for the model to call in Function calling. For more information, see Function calling.

When you use the tools parameter, you must set the result_format parameter to message.

You must set the tools parameter when you initiate Function calling or submit tool execution results.

Properties

type string (Required)

The tool type. Currently, only function is supported.

function object (Required)

Properties

name string (Required)

The name of the tool function. The name must consist of letters and numbers. It can also include underscores (_) and hyphens (-). The maximum length is 64 characters.

description string (Required)

A description of the tool function. This helps the model decide when and how to call the tool function.

parameters objcet (Required)

A description of the tool's parameters. The value must be a valid JSON Schema. For a description of JSON Schema, see this link. If the parameters parameter is empty, the function has no input parameters.

When you make a call using HTTP, place tools in the parameters object. This parameter is temporarily not supported for the qwen-vl series models.

tool_choice string or object (Optional) Default value: auto

The tool selection strategy. To force a tool call for a specific type of question, for example, to always use a specific tool or disable all tools, you can set this parameter.

  • auto

    The large language model independently chooses the tool strategy.

  • none

    If you want to temporarily disable tool calling in a specific request, you can set the tool_choice parameter to none.

  • {"type": "function", "function": {"name": "the_function_to_call"}}

    To force a call to a specific tool, you can set the tool_choice parameter to {"type": "function", "function": {"name": "the_function_to_call"}}, where the_function_to_call is the name of the specified tool function.

    Models in thinking mode do not support forcing a call to a specific tool.
In the Java SDK, this parameter is named toolChoice. When you make a call using HTTP, place tool_choice in the parameters object.

parallel_tool_calls boolean (Optional) Default value: false

Specifies whether to enable parallel tool calling.

Valid values:

  • true

  • false

For more information about parallel tool calling, see Parallel tool calling.

In the Java SDK, this parameter is named parallelToolCalls. When you make a call using HTTP, place parallel_tool_calls in the parameters object.

Chat response object (same format for streaming and non-streaming output)

{
  "status_code": 200,
  "request_id": "902fee3b-f7f0-9a8c-96a1-6b4ea25af114",
  "code": "",
  "message": "",
  "output": {
    "text": null,
    "finish_reason": null,
    "choices": [
      {
        "finish_reason": "stop",
        "message": {
          "role": "assistant",
          "content": "I am a large-scale language model developed by Alibaba Cloud. My name is Qwen."
        }
      }
    ]
  },
  "usage": {
    "input_tokens": 22,
    "output_tokens": 17,
    "total_tokens": 39
  }
}

status_code string

The status code of the request. A value of 200 indicates that the request was successful. Otherwise, the request failed.

The Java SDK does not return this parameter. If the call fails, an exception is thrown that contains the content of the status_code and message parameters.

request_id string

The unique identifier for this call.

The Java SDK returns this parameter as requestId

code string

The error code. This parameter is empty when the call is successful.

Only the Python SDK returns this parameter.

output object

The result of the call.

Properties

text string

The reply generated by the model. This field contains the reply content when the result_format input parameter is set to text.

finish_reason string

This parameter is not empty when the result_format input parameter is set to text.

There are four scenarios:

  • The value is null upon generation.

  • stop: The model's output ends naturally or triggers a stop condition in the input parameters.

  • length: The generation ends because the output exceeded the maximum length.

  • The value is tool_calls because a tool was called.

choices array

Information about the model's output. The choices parameter is returned when result_format is set to message.

Properties

finish_reason string

The four cases are as follows:

  • null: Generation is in progress.

  • stop: The model's output ends naturally or triggers a stop condition in the input parameters.

  • length: The generation ends because the output exceeded the maximum length.

  • tool_calls: A tool call occurs.

message object

The message object from the model.

Properties

role string

The role of the output message. The value is fixed as assistant.

content string or array

The content of the output message. The value is an array when you use Qwen-VL or Qwen-Audio series models. Otherwise, the value is a string.

If you initiate Function calling, this value is empty.

Properties

text string

The content of the output message when you use Qwen-VL or Qwen-Audio series models.

image_hw array

If the vl_enable_image_hw_output parameter is enabled for a Qwen-VL series model, the following cases apply:

  • Image input: Returns the height and width of the image in pixels.

  • Video input: Returns an empty array.

reasoning_content string

The deep thinking content.

tool_calls array

If the model needs to call a tool, the `tool_calls` parameter is generated.

Properties

function object

The name of the called tool and its input parameters.

Properties

name string

The name of the called tool.

arguments string

The parameters to be passed to the tool, in the format of a JSON string.

Because large model responses are random, the output JSON string may not always match your function's requirements. We recommend that you validate the parameters before you pass them to the function.

index integer

The index of the current tool_calls object in the `tool_calls` array.

id string

The ID of this tool response.

type string

The type of the tool. The value is fixed as function.

logprobs object

The probability information for the current `choices` object.

Properties

content array

An array of tokens with log probability information.

Properties

token string

The current token.

bytes array

A list of the raw UTF-8 bytes for the current token. This is used to accurately restore the output content and is helpful when you handle emojis and Chinese characters.

logprob float

The log probability of the current token. A `null` return value indicates an extremely low probability.

top_logprobs array

The most likely tokens at the current token position and their log probabilities. The number of elements is the same as the value of the top_logprobs input parameter.

Properties

token string

The current token.

bytes array

A list of the raw UTF-8 bytes for the current token. This is used to accurately restore the output content and is helpful when you handle emojis and Chinese characters.

logprob float

The log probability of the current token. A `null` return value indicates an extremely low probability.

usage map

Information about the tokens used in this chat request.

Properties

input_tokens integer

The number of tokens in the user input.

output_tokens integer

The number of tokens in the model output.

input_tokens_details integer

Details about the number of tokens in the input when you use the Qwen-VL model or QVQ model.

Properties

text_tokens integer

When you use the Qwen-VL model or QVQ model, this parameter indicates the number of tokens in the input text.

image_tokens integer

The number of tokens in the input image.

video_tokens integer

The number of tokens in the input video file or image list.

total_tokens integer

This field is returned when the input is plain text. The value is the sum of input_tokens and output_tokens.

image_tokens integer

This field is returned when the input includes an image. The value is the number of tokens in the user's input image.

video_tokens integer

This field is returned when the input includes a video. The value is the number of tokens in the user's input video.

audio_tokens integer

This field is returned when the input includes audio. The value is the number of tokens in the user's input audio.

output_tokens_details integer

Details about the number of tokens in the output.

Properties

text_tokens integer

The number of tokens in the output text.

reasoning_tokens integer

The number of tokens in the Qwen3 model's thinking process.

prompt_tokens_details object

A fine-grained breakdown of input tokens.

Properties

cached_tokens integer

The number of tokens that hit the cache. For more information about the context cache, see Context cache.

cache_creation object

Information about the creation of an explicit cache.

Properties

ephemeral_5m_input_tokens integer

The number of tokens used to create an explicit cache with a 5-minute validity period.

cache_creation_input_tokens integer

The number of tokens used to create an explicit cache.

cache_type string

When you use an explicit cache, the value of this parameter is ephemeral. Otherwise, this parameter is not returned.

Error codes

If the model call returns an error message, see Error messages for a solution.