CosyVoice
重要 cosyvoice-v3.5-plus 和 cosyvoice-v3.5-flash 模型目前仅在北京地域可用,且专门用于声音设计和声音复刻场景(无系统音色)。在使用它们进行语音合成之前,请先参见CosyVoice声音复刻/设计API创建目标音色。创建完成后,只需将代码中的 voice 字段更新为您的音色 ID,并将 model 字段指定为对应模型,即可正常运行。
使用系统音色进行语音合成以下示例演示如何使用系统音色(参见音色列表)进行语音合成。 将合成音频保存为文件Python# coding=utf-8
import os
import dashscope
from dashscope.audio.tts_v2 import *
# 新加坡地域和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:dashscope.api_key = "sk-xxx"
dashscope.api_key = os.environ.get('DASHSCOPE_API_KEY')
# 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
dashscope.base_websocket_api_url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference'
# 模型
# 不同模型版本需要使用对应版本的音色:
# cosyvoice-v3-flash/cosyvoice-v3-plus:使用longanyang等音色。
# cosyvoice-v2:使用longxiaochun_v2等音色。
# 每个音色支持的语言不同,合成日语、韩语等非中文语言时,需选择支持对应语言的音色。详见CosyVoice音色列表。
model = "cosyvoice-v3-flash"
# 音色
voice = "longanyang"
# 实例化SpeechSynthesizer,并在构造方法中传入模型(model)、音色(voice)等请求参数
synthesizer = SpeechSynthesizer(model=model, voice=voice)
# 发送待合成文本,获取二进制音频
audio = synthesizer.call("今天天气怎么样?")
# 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
print('[Metric] requestId为:{},首包延迟为:{}毫秒'.format(
synthesizer.get_last_request_id(),
synthesizer.get_first_package_delay()))
# 将音频保存至本地
with open('output.mp3', 'wb') as f:
f.write(audio)
Javaimport com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.utils.Constants;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
public class Main {
// 模型
// 不同模型版本需要使用对应版本的音色:
// cosyvoice-v3-flash/cosyvoice-v3-plus:使用longanyang等音色。
// cosyvoice-v2:使用longxiaochun_v2等音色。
// 每个音色支持的语言不同,合成日语、韩语等非中文语言时,需选择支持对应语言的音色。详见CosyVoice音色列表。
private static String model = "cosyvoice-v3-flash";
// 音色
private static String voice = "longanyang";
public static void streamAudioDataToSpeaker() {
// 请求参数
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
// 新加坡地域和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model(model) // 模型
.voice(voice) // 音色
.build();
// 同步模式:禁用回调(第二个参数为null)
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
ByteBuffer audio = null;
try {
// 阻塞直至音频返回
audio = synthesizer.call("今天天气怎么样?");
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
// 任务结束关闭websocket连接
synthesizer.getDuplexApi().close(1000, "bye");
}
if (audio != null) {
// 将音频数据保存到本地文件“output.mp3”中
File file = new File("output.mp3");
// 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
System.out.println(
"[Metric] requestId为:"
+ synthesizer.getLastRequestId()
+ "首包延迟(毫秒)为:"
+ synthesizer.getFirstPackageDelay());
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(audio.array());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static void main(String[] args) {
// 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
Constants.baseWebsocketApiUrl = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference";
streamAudioDataToSpeaker();
System.exit(0);
}
}
将LLM生成的文本实时转成语音并通过扬声器播放以下代码展示通过本地设备播放千问大语言模型(qwen-turbo)实时返回的文本内容。 Python运行Python示例前,需要通过pip安装第三方音频播放库。 # coding=utf-8
# Installation instructions for pyaudio:
# APPLE Mac OS X
# brew install portaudio
# pip install pyaudio
# Debian/Ubuntu
# sudo apt-get install python-pyaudio python3-pyaudio
# or
# pip install pyaudio
# CentOS
# sudo yum install -y portaudio portaudio-devel && pip install pyaudio
# Microsoft Windows
# python -m pip install pyaudio
import os
import pyaudio
import dashscope
from dashscope.audio.tts_v2 import *
from http import HTTPStatus
from dashscope import Generation
# 新加坡地域和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:dashscope.api_key = "sk-xxx"
dashscope.api_key = os.environ.get('DASHSCOPE_API_KEY')
# 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
dashscope.base_websocket_api_url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference'
# 不同模型版本需要使用对应版本的音色:
# cosyvoice-v3-flash/cosyvoice-v3-plus:使用longanyang等音色。
# cosyvoice-v2:使用longxiaochun_v2等音色。
# 每个音色支持的语言不同,合成日语、韩语等非中文语言时,需选择支持对应语言的音色。详见CosyVoice音色列表。
model = "cosyvoice-v3-flash"
voice = "longanyang"
class Callback(ResultCallback):
_player = None
_stream = None
def on_open(self):
print("websocket is open.")
self._player = pyaudio.PyAudio()
self._stream = self._player.open(
format=pyaudio.paInt16, channels=1, rate=22050, output=True
)
def on_complete(self):
print("speech synthesis task complete successfully.")
def on_error(self, message: str):
print(f"speech synthesis task failed, {message}")
def on_close(self):
print("websocket is closed.")
# stop player
self._stream.stop_stream()
self._stream.close()
self._player.terminate()
def on_event(self, message):
print(f"recv speech synthsis message {message}")
def on_data(self, data: bytes) -> None:
print("audio result length:", len(data))
self._stream.write(data)
def synthesizer_with_llm():
callback = Callback()
synthesizer = SpeechSynthesizer(
model=model,
voice=voice,
format=AudioFormat.PCM_22050HZ_MONO_16BIT,
callback=callback,
)
messages = [{"role": "user", "content": "请介绍一下你自己"}]
responses = Generation.call(
model="qwen-turbo",
messages=messages,
result_format="message", # set result format as 'message'
stream=True, # enable stream output
incremental_output=True, # enable incremental output
)
for response in responses:
if response.status_code == HTTPStatus.OK:
print(response.output.choices[0]["message"]["content"], end="")
synthesizer.streaming_call(response.output.choices[0]["message"]["content"])
else:
print(
"Request id: %s, Status code: %s, error code: %s, error message: %s"
% (
response.request_id,
response.status_code,
response.code,
response.message,
)
)
synthesizer.streaming_complete()
print('requestId: ', synthesizer.get_last_request_id())
if __name__ == "__main__":
synthesizer_with_llm()
使用声音复刻音色进行语音合成 声音复刻与语音合成是紧密关联的两个独立步骤,遵循“先创建,后使用”的流程: 准备录音文件 将符合声音复刻:输入音频格式的音频文件上传至公网可访问的位置,如阿里云对象存储OSS,并确保URL可公开访问。 创建音色 调用创建音色接口。此步骤必须指定target_model/targetModel,声明创建的音色将由哪个语音合成模型驱动。 若已有创建好的音色(调用查询音色列表接口查看),可跳过这一步直接进行下一步。 使用音色进行语音合成 使用创建音色接口创建音色成功后,系统会返回一个voice_id/voiceID: 该 voice_id/voiceID 可直接作为语音合成接口或各语言 SDK 中的 voice 参数使用,用于后续的文本转语音。 支持多种调用形态,包括非流式、单向流式以及双向流式合成。 合成时指定的语音合成模型必须与创建音色时的 target_model/targetModel 保持一致,否则合成会失败。
示例代码: import os
import time
import dashscope
from dashscope.audio.tts_v2 import VoiceEnrollmentService, SpeechSynthesizer
# 1. 环境准备
# 推荐通过环境变量配置API Key
# 新加坡和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:dashscope.api_key = "sk-xxx"
dashscope.api_key = os.getenv("DASHSCOPE_API_KEY")
if not dashscope.api_key:
raise ValueError("DASHSCOPE_API_KEY environment variable not set.")
# 以下为新加坡地域WebSocket url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
dashscope.base_websocket_api_url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference'
# 以下为新加坡地域HTTP url,若使用北京地域的模型,需将url替换为:https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
# 2. 定义复刻参数
TARGET_MODEL = "cosyvoice-v3.5-plus"
# 为音色起一个有意义的前缀
VOICE_PREFIX = "myvoice" # 仅允许数字和小写字母,小于十个字符
# 公网可访问音频URL
AUDIO_URL = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/cosyvoice/cosyvoice-zeroshot-sample.wav" # 示例URL,请替换为自己的
# 3. 创建音色 (异步任务)
print("--- Step 1: Creating voice enrollment ---")
service = VoiceEnrollmentService()
try:
voice_id = service.create_voice(
target_model=TARGET_MODEL,
prefix=VOICE_PREFIX,
url=AUDIO_URL
)
print(f"Voice enrollment submitted successfully. Request ID: {service.get_last_request_id()}")
print(f"Generated Voice ID: {voice_id}")
except Exception as e:
print(f"Error during voice creation: {e}")
raise e
# 4. 轮询查询音色状态
print("\n--- Step 2: Polling for voice status ---")
max_attempts = 30
poll_interval = 10 # 秒
for attempt in range(max_attempts):
try:
voice_info = service.query_voice(voice_id=voice_id)
status = voice_info.get("status")
print(f"Attempt {attempt + 1}/{max_attempts}: Voice status is '{status}'")
if status == "OK":
print("Voice is ready for synthesis.")
break
elif status == "UNDEPLOYED":
print(f"Voice processing failed with status: {status}. Please check audio quality or contact support.")
raise RuntimeError(f"Voice processing failed with status: {status}")
# 对于 "DEPLOYING" 等中间状态,继续等待
time.sleep(poll_interval)
except Exception as e:
print(f"Error during status polling: {e}")
time.sleep(poll_interval)
else:
print("Polling timed out. The voice is not ready after several attempts.")
raise RuntimeError("Polling timed out. The voice is not ready after several attempts.")
# 5. 使用复刻音色进行语音合成
print("\n--- Step 3: Synthesizing speech with the new voice ---")
try:
synthesizer = SpeechSynthesizer(model=TARGET_MODEL, voice=voice_id)
text_to_synthesize = "恭喜,已成功复刻并合成了属于自己的声音!"
# call()方法返回二进制音频数据
audio_data = synthesizer.call(text_to_synthesize)
print(f"Speech synthesis successful. Request ID: {synthesizer.get_last_request_id()}")
# 6. 保存音频文件
output_file = "my_custom_voice_output.mp3"
with open(output_file, "wb") as f:
f.write(audio_data)
print(f"Audio saved to {output_file}")
except Exception as e:
print(f"Error during speech synthesis: {e}")
使用声音设计音色进行语音合成 声音设计与语音合成是紧密关联的两个独立步骤,遵循“先创建,后使用”的流程: 准备声音设计所需的声音描述与试听文本。 调用创建音色接口,创建一个专属音色,获取音色名和预览音频。 此步骤必须指定target_model,声明创建的音色将由哪个语音合成模型驱动 试听获取预览音频来判断是否符合预期;若符合要求,继续下一步,否则,重新设计。 若已有创建好的音色(调用查询音色列表接口查看),可跳过这一步直接进行下一步。 使用音色进行语音合成 使用创建音色接口创建音色成功后,系统会返回一个voice_id/voiceID: 该 voice_id/voiceID 可直接作为语音合成接口或各语言 SDK 中的 voice 参数使用,用于后续的文本转语音。 支持多种调用形态,包括非流式、单向流式以及双向流式合成。 合成时指定的语音合成模型必须与创建音色时的 target_model/targetModel 保持一致,否则合成会失败。
示例代码: 生成专属音色并试听效果,若对效果满意,进行下一步;否则重新生成。 Pythonimport requests
import base64
import os
def create_voice_and_play():
# 新加坡和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx"
api_key = os.getenv("DASHSCOPE_API_KEY")
if not api_key:
print("错误: 未找到DASHSCOPE_API_KEY环境变量,请先设置API Key")
return None, None, None
# 准备请求数据
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data = {
"model": "voice-enrollment",
"input": {
"action": "create_voice",
"target_model": "cosyvoice-v3.5-plus",
"voice_prompt": "A composed middle-aged male announcer with a deep, rich and magnetic voice, a steady speaking speed and clear articulation, is suitable for news broadcasting or documentary commentary.",
"preview_text": "Dear listeners, hello everyone. Welcome to the evening news.",
"prefix": "announcer"
},
"parameters": {
"sample_rate": 24000,
"response_format": "wav"
}
}
# 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:https://dashscope.aliyuncs.com/api/v1/services/audio/tts/customization
url = "https://dashscope-intl.aliyuncs.com/api/v1/services/audio/tts/customization"
try:
# 发送请求
response = requests.post(
url,
headers=headers,
json=data,
timeout=60 # 添加超时设置
)
if response.status_code == 200:
result = response.json()
# 获取音色ID
voice_id = result["output"]["voice_id"]
print(f"音色ID: {voice_id}")
# 获取预览音频数据
base64_audio = result["output"]["preview_audio"]["data"]
# 解码Base64音频数据
audio_bytes = base64.b64decode(base64_audio)
# 保存音频文件到本地
filename = f"{voice_id}_preview.wav"
# 将音频数据写入本地文件
with open(filename, 'wb') as f:
f.write(audio_bytes)
print(f"音频已保存到本地文件: {filename}")
print(f"文件路径: {os.path.abspath(filename)}")
return voice_id, audio_bytes, filename
else:
print(f"请求失败,状态码: {response.status_code}")
print(f"响应内容: {response.text}")
return None, None, None
except requests.exceptions.RequestException as e:
print(f"网络请求发生错误: {e}")
return None, None, None
except KeyError as e:
print(f"响应数据格式错误,缺少必要的字段: {e}")
print(f"响应内容: {response.text if 'response' in locals() else 'No response'}")
return None, None, None
except Exception as e:
print(f"发生未知错误: {e}")
return None, None, None
if __name__ == "__main__":
print("开始创建语音...")
voice_id, audio_data, saved_filename = create_voice_and_play()
if voice_id:
print(f"\n成功创建音色 '{voice_id}'")
print(f"音频文件已保存: '{saved_filename}'")
print(f"文件大小: {os.path.getsize(saved_filename)} 字节")
else:
print("\n音色创建失败")
Java需要导入Gson依赖,若是使用Maven或者Gradle,添加依赖方式如下: Maven在pom.xml中添加如下内容: <!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.13.1</version>
</dependency>
Gradle在build.gradle中添加如下内容: // https://mvnrepository.com/artifact/com.google.code.gson/gson
implementation("com.google.code.gson:gson:2.13.1")
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Base64;
public class Main {
public static void main(String[] args) {
Main example = new Main();
example.createVoice();
}
public void createVoice() {
// 新加坡和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:String apiKey = "sk-xxx"
String apiKey = System.getenv("DASHSCOPE_API_KEY");
// 创建JSON请求体字符串
String jsonBody = "{\n" +
" \"model\": \"voice-enrollment\",\n" +
" \"input\": {\n" +
" \"action\": \"create_voice\",\n" +
" \"target_model\": \"cosyvoice-v3.5-plus\",\n" +
" \"voice_prompt\": \"A composed middle-aged male announcer with a deep, rich and magnetic voice, a steady speaking speed and clear articulation, is suitable for news broadcasting or documentary commentary.\",\n" +
" \"preview_text\": \"Dear listeners, hello everyone. Welcome to the evening news.\",\n" +
" \"prefix\": \"announcer\"\n" +
" },\n" +
" \"parameters\": {\n" +
" \"sample_rate\": 24000,\n" +
" \"response_format\": \"wav\"\n" +
" }\n" +
"}";
HttpURLConnection connection = null;
try {
// 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:https://dashscope.aliyuncs.com/api/v1/services/audio/tts/customization
URL url = new URL("https://dashscope-intl.aliyuncs.com/api/v1/services/audio/tts/customization");
connection = (HttpURLConnection) url.openConnection();
// 设置请求方法和头部
connection.setRequestMethod("POST");
connection.setRequestProperty("Authorization", "Bearer " + apiKey);
connection.setRequestProperty("Content-Type", "application/json");
connection.setDoOutput(true);
connection.setDoInput(true);
// 发送请求体
try (OutputStream os = connection.getOutputStream()) {
byte[] input = jsonBody.getBytes("UTF-8");
os.write(input, 0, input.length);
os.flush();
}
// 获取响应
int responseCode = connection.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
// 读取响应内容
StringBuilder response = new StringBuilder();
try (BufferedReader br = new BufferedReader(
new InputStreamReader(connection.getInputStream(), "UTF-8"))) {
String responseLine;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
}
// 解析JSON响应
JsonObject jsonResponse = JsonParser.parseString(response.toString()).getAsJsonObject();
JsonObject outputObj = jsonResponse.getAsJsonObject("output");
JsonObject previewAudioObj = outputObj.getAsJsonObject("preview_audio");
// 获取音色名称
String voiceId = outputObj.get("voice_id").getAsString();
System.out.println("音色ID: " + voiceId);
// 获取Base64编码的音频数据
String base64Audio = previewAudioObj.get("data").getAsString();
// 解码Base64音频数据
byte[] audioBytes = Base64.getDecoder().decode(base64Audio);
// 保存音频到本地文件
String filename = voiceId + "_preview.wav";
saveAudioToFile(audioBytes, filename);
System.out.println("音频已保存到本地文件: " + filename);
} else {
// 读取错误响应
StringBuilder errorResponse = new StringBuilder();
try (BufferedReader br = new BufferedReader(
new InputStreamReader(connection.getErrorStream(), "UTF-8"))) {
String responseLine;
while ((responseLine = br.readLine()) != null) {
errorResponse.append(responseLine.trim());
}
}
System.out.println("请求失败,状态码: " + responseCode);
System.out.println("错误响应: " + errorResponse.toString());
}
} catch (Exception e) {
System.err.println("请求发生错误: " + e.getMessage());
e.printStackTrace();
} finally {
if (connection != null) {
connection.disconnect();
}
}
}
private void saveAudioToFile(byte[] audioBytes, String filename) {
try {
File file = new File(filename);
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(audioBytes);
}
System.out.println("音频已保存到: " + file.getAbsolutePath());
} catch (IOException e) {
System.err.println("保存音频文件时发生错误: " + e.getMessage());
e.printStackTrace();
}
}
}
使用上一步生成的专属音色进行语音合成。 这里参考了非流式调用示例代码,将voice参数替换为声音设计生成的专属音色进行语音合成。 关键原则:声音设计时使用的模型 (target_model) 必须与后续进行语音合成时使用的模型 (model) 保持一致,否则会导致合成失败。 Python# coding=utf-8
import dashscope
from dashscope.audio.tts_v2 import *
import os
# 新加坡地域和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:dashscope.api_key = "sk-xxx"
dashscope.api_key = os.environ.get('DASHSCOPE_API_KEY')
# 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
dashscope.base_websocket_api_url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference'
# 声音设计、语音合成要使用相同的模型
model = "cosyvoice-v3.5-plus"
# 将voice参数替换为声音设计生成的专属音色
voice = "your_voice"
# 实例化SpeechSynthesizer,并在构造方法中传入模型(model)、音色(voice)等请求参数
synthesizer = SpeechSynthesizer(model=model, voice=voice)
# 发送待合成文本,获取二进制音频
audio = synthesizer.call("今天天气怎么样?")
# 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
print('[Metric] requestId为:{},首包延迟为:{}毫秒'.format(
synthesizer.get_last_request_id(),
synthesizer.get_first_package_delay()))
# 将音频保存至本地
with open('output.mp3', 'wb') as f:
f.write(audio)
Javaimport com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.utils.Constants;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
public class Main {
// 声音设计、语音合成要使用相同的模型
private static String model = "cosyvoice-v3.5-plus";
// 将voice参数替换为声音设计生成的专属音色
private static String voice = "your_voice_id";
public static void streamAudioDataToSpeaker() {
// 请求参数
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
// 新加坡地域和北京地域的API Key不同。获取API Key:https://www.alibabacloud.com/help/zh/model-studio/get-api-key
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model(model) // 模型
.voice(voice) // 音色
.build();
// 同步模式:禁用回调(第二个参数为null)
SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
ByteBuffer audio = null;
try {
// 阻塞直至音频返回
audio = synthesizer.call("今天天气怎么样?");
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
// 任务结束关闭websocket连接
synthesizer.getDuplexApi().close(1000, "bye");
}
if (audio != null) {
// 将音频数据保存到本地文件“output.mp3”中
File file = new File("output.mp3");
// 首次发送文本时需建立 WebSocket 连接,因此首包延迟会包含连接建立的耗时
System.out.println(
"[Metric] requestId为:"
+ synthesizer.getLastRequestId()
+ "首包延迟(毫秒)为:"
+ synthesizer.getFirstPackageDelay());
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(audio.array());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static void main(String[] args) {
// 以下为新加坡地域url,若使用北京地域的模型,需将url替换为:wss://dashscope.aliyuncs.com/api-ws/v1/inference
Constants.baseWebsocketApiUrl = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference";
streamAudioDataToSpeaker();
System.exit(0);
}
}
|