All Products
Search
Document Center

Alibaba Cloud Model Studio:Embedding

Last Updated:Feb 06, 2026

Embedding models convert data such as text, images, and videos into numerical vectors. These vectors are used for downstream tasks such as semantic search, recommendation, clustering, classification, and anomaly detection.

Preparations

You must obtain an API key and set the API key as an environment variable. If you use the OpenAI SDK or DashScope SDK to make calls, you must also install the SDK.

Get embeddings

Text embedding

When you call the API, specify the text to embed and the model name in the request.

OpenAI compatible interface

import os
from openai import OpenAI

input_text = "The quality of the clothes is excellent"

client = OpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),  # If you have not configured the environment variable, replace it with your API key here
    # The following is the URL for the Singapore region. If you use a model in the China (Beijing) region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

completion = client.embeddings.create(
    model="text-embedding-v4",
    input=input_text
)

print(completion.model_dump_json())
const OpenAI = require("openai");

// Initialize the OpenAI client
const openai = new OpenAI({
    // Make sure you have correctly set the DASHSCOPE_API_KEY environment variable
    apiKey: process.env.DASHSCOPE_API_KEY, // If you have not configured the environment variable, replace it with your API key here
    // The following is the URL for the Singapore region. If you use a model in the China (Beijing) region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1
    baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});

async function getEmbedding() {
    try {
        const inputTexts = "The quality of the clothes is excellent";
        const completion = await openai.embeddings.create({
            model: "text-embedding-v4",
            input: inputTexts,
            dimensions: 1024 // Specify the embedding dimensions (This parameter is supported only by text-embedding-v3 and text-embedding-v4)
        });

        console.log(JSON.stringify(completion, null, 2));
    } catch (error) {
        console.error('Error:', error);
    }
}

getEmbedding();
curl --location 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1/embeddings' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '{
    "model": "text-embedding-v4",
    "input": "The quality of the clothes is excellent"
}'

DashScope

import dashscope
from http import HTTPStatus

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

input_text = "The quality of the clothes is excellent"
resp = dashscope.TextEmbedding.call(
    model="text-embedding-v4",
    input=input_text,
)

if resp.status_code == HTTPStatus.OK:
    print(resp)
import com.alibaba.dashscope.embeddings.TextEmbedding;
import com.alibaba.dashscope.embeddings.TextEmbeddingParam;
import com.alibaba.dashscope.embeddings.TextEmbeddingResult;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.utils.Constants;

import java.util.Collections;
public class Main {
    static {
        Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";
        // For the China (Beijing) region, replace it with: https://dashscope.aliyuncs.com/api/v1
    }
     public static void main(String[] args) {
        String inputTexts = "The quality of the clothes is excellent";
        try {
            // Build the request parameters
            TextEmbeddingParam param = TextEmbeddingParam
                    .builder()
                    .model("text-embedding-v4")
                    // Input text
                    .texts(Collections.singleton(inputTexts))
                    .build();

            // Create a model instance and call it
            TextEmbedding textEmbedding = new TextEmbedding();
            TextEmbeddingResult result = textEmbedding.call(param);

            // Print the result
            System.out.println(result);

        } catch (NoApiKeyException e) {
            // Catch and handle the exception for an unset API key
            System.err.println("An exception occurred when calling the API: " + e.getMessage());
            System.err.println("Check if your API key is configured correctly.");
            e.printStackTrace();
        }
    }
}
# ======= Important =======
# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding
# === Delete this comment before execution ====

curl --location 'https://dashscope-intl.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '{
    "model": "text-embedding-v4",
    "input": {
        "texts": [
        "The quality of the clothes is excellent"
        ]
    }
}'

Independent multimodal embeddings

The independent multimodal embedding feature is available only through the DashScope SDK or API. It is not supported by the OpenAI compatible interface or the console. This feature generates separate, independent vectors for content of different modalities, such as text, images, and videos. It is suitable for scenarios where you need to process each content type individually.

import dashscope
import json
import os
from http import HTTPStatus

dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'
# The preceding is the base URL for the Singapore region. If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1

# The input can be a video
# video = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250107/lbcemt/new+video.mp4"
# input = [{'video': video}]
# Or an image
image = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png"
input = [{'image': image}]
resp = dashscope.MultiModalEmbedding.call(
    # If you have not configured the environment variable, replace the following line with your Model Studio API key: api_key="sk-xxx",
    # API keys for the Singapore and China (Beijing) regions are different. To obtain an API key, see https://www.alibabacloud.com/help/zh/model-studio/get-api-key
    api_key=os.getenv('DASHSCOPE_API_KEY'),
    model="tongyi-embedding-vision-plus",
    input=input
)

print(json.dumps(resp.output, indent=4))
    

Fused multimodal embeddings

The fused multimodal embedding feature is available only through the DashScope SDK or API. It is not supported by the OpenAI compatible interface or the console. This feature combines content from different modalities, such as text, images, and videos, into a single fused vector. It is suitable for scenarios such as text-to-image search, image-to-image search, text-to-video search, and cross-modal retrieval.

qwen3-vl-embedding supports both fused and independent embedding generation. When text, image, and video are placed in the same object, a fused vector is generated. When they are separate elements, an independent vector is generated for each. qwen2.5-vl-embedding only supports fused embeddings and does not support independent embeddings.
import dashscope
import json
import os
from http import HTTPStatus

# Fused multimodal embedding: Combines text, images, and videos into a single fused vector
# Suitable for scenarios such as cross-modal retrieval and image search
text = "This is a test text for generating a fused multimodal embedding"
image = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png"
video = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250107/lbcemt/new+video.mp4"

# The input includes text, an image, and a video. The model will combine them into a single fused vector.
input_data = [
    {
        "text": text,
        "image": image,
        "video": video
    }
]

# Use qwen3-vl-embedding to generate a fused vector
resp = dashscope.MultiModalEmbedding.call(
    # If you have not configured the environment variable, replace the following line with your Model Studio API key: api_key="sk-xxx",
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    model="qwen3-vl-embedding",
    input=input_data,
    # Optional parameter: Specify the embedding dimensions (Supports 2560, 2048, 1536, 1024, 768, 512, 256. Default: 2560)
    # parameters={"dimension": 1024}
)

print(json.dumps(resp.output, indent=4))

Model selection

The appropriate model depends on your input data type and application scenario.

  • For plain text or code: Use text-embedding-v4, the most powerful model currently available. It supports advanced features such as instructions and sparse vectors, and covers most text processing scenarios.

  • Processing multimodal content:

    • Fused multimodal embeddings: To represent single-modal or mixed-modal inputs as a fused vector for scenarios such as cross-modal retrieval and image search, use qwen3-vl-embedding. For example, you can input an image of a shirt with the text "find a similar style that looks more youthful". The model can fuse the image and text instruction into a single vector for understanding.

    • Independent embeddings: To generate independent vectors for each input, such as an image and its corresponding text title, choose tongyi-embedding-vision-plus , tongyi-embedding-vision-flash or the general multimodal model multimodal-embedding-v1. This generates a separate vector for each part of the input (image, text).

The following table provides detailed specifications for all available embedding models.

Text embedding

Singapore

Model

Embedding dimensions

Batch size

Max tokens per batch (Note)

Price (per 1M input tokens)

Supported languages

Free quota(Note)

text-embedding-v4

Part of the Qwen3-Embedding series

2,048, 1,536, 1,024 (default), 768, 512, 256, 128, 64

10

8,192

$0.07

100+ major languages, including Chinese, English, Spanish, French, Portuguese, Indonesian, Japanese, Korean, German, and Russian

1 million tokens

Validity: 90 days after activating Model Studio

text-embedding-v3

1,024 (default), 768, 512

50+ major languages, including Chinese, English, Spanish, French, Portuguese, Indonesian, Japanese, Korean, German, and Russian

500,000 tokens

Validity: 90 days after activating Model Studio

Beijing

Model

Embedding dimensions

Batch size

Max tokens per batch (Note)

Price (per 1M input tokens)

Supported languages

text-embedding-v4

Part of the Qwen3-Embedding series

2,048, 1,536, 1,024 (default), 768, 512, 256, 128, 64

10

8,192

$0.072

100+ major languages, including Chinese, English, Spanish, French, Portuguese, Indonesian, Japanese, Korean, and German, and multiple programming languages

Note

Batch size is the maximum number of texts that can be processed in a single API call. For example, the batch size for text-embedding-v4 is 10. This means you can pass up to 10 texts for embedding in a single request, and each text must not exceed 8,192 tokens. This limit applies to:

  • String array input: The array can contain a maximum of 10 elements.

  • File input: The text file can contain a maximum of 10 lines of text.

Multimodal embedding

The model generates continuous vectors based on user inputs, which can be text, images, or videos. This is suitable for tasks such as video classification, image classification, image-text retrieval, text/image-to-image search, and text/image-to-video search.

The interface supports uploading a single text segment, a single image, or a single video file. It also allows combinations of different types (such as text + image). Some models support multiple inputs of the same type (such as multiple images). For more information, see the restriction details for the specific model.

Singapore

Model

Embedding dimensions

Text length limit

Image size limit

Video size limit

Price (per 1M input tokens)

Free quota(Note)

tongyi-embedding-vision-plus

1152, 1024, 512, 256, 128, 64

1,024 tokens

A single file cannot be larger than 3 MB.

Video file size up to 10 MB

Image/Video: $0.09

Text: $0.09

1 million tokens

Validity: 90 days after activating Model Studio

tongyi-embedding-vision-flash

768, 512, 256, 128, 64

Image/Video: $0.03

Text: $0.09

Beijing

Model

Embedding dimensions

Text length limit

Image size limit

Video size limit

Price (per 1M input tokens)

qwen3-vl-embedding

2560, 2048, 1536, 1024, 768, 512, 256

32,000 tokens

Max 1 image, up to 5 MB

Video file size up to 50 MB

Image/Video: $0.258

Text: $0.1

multimodal-embedding-v1

1024

512 tokens

Max 8 images, up to 3 MB each

Video file size up to 10 MB

Free trial

Input restrictions:

Fused multimodal embedding models

Model

Text

Image

Video

Max elements per request

qwen3-vl-embedding

Supports 33 major languages, including Chinese, English, Japanese, Korean, French, and German

JPEG, PNG, WEBP, BMP, TIFF, ICO, DIB, ICNS, SGI (URL or Base64 supported)

MP4, AVI, MOV (URL only)

Total content elements per request must not exceed 20. Images, text, and videos share this limit.

Independent multimodal embedding models

Model

Text

Image

Video

Max elements per request

tongyi-embedding-vision-plus

Chinese/English

JPG, PNG, BMP (URL or Base64 supported)

MP4, MPEG, AVI, MOV, MPG, WEBM, FLV, MKV (URL only)

No limit on the number of content elements. Total token count must not exceed the token limit.

tongyi-embedding-vision-flash

multimodal-embedding-v1

Total content elements per request must not exceed 20. Max 1 image, 1 video, and 20 text entries, sharing the total limit.

Core features

Switch embedding dimensions

text-embedding-v4 , text-embedding-v3, tongyi-embedding-vision-plus, tongyi-embedding-vision-flash, and qwen3-vl-embedding support custom embedding dimensions. Higher dimensions retain richer semantic information but also increase storage and computation costs.

  • General scenarios (Recommended): 1024 dimensions offer the best balance between performance and cost, suitable for most semantic retrieval tasks.

  • High-precision scenarios: For domains requiring high precision, choose 1536 or 2048 dimensions. This provides a certain level of precision improvement but significantly increases storage and computation overhead.

  • Resource-constrained scenarios: In cost-sensitive scenarios, choose 768 or lower dimensions. This significantly reduces resource consumption but results in some loss of semantic information.

OpenAI compatible interface

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    # The following is the URL for the Singapore region. If you use a model in the China (Beijing) region, replace the URL with: https://dashscope.aliyuncs.com/compatible-mode/v1
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

resp = client.embeddings.create(
    model="text-embedding-v4",
    input=["I like it and will buy from here again"],
    # Set the embedding dimensions to 256
    dimensions=256
)
print(f"Embedding dimensions: {len(resp.data[0].embedding)}")

DashScope

import dashscope

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

resp = dashscope.TextEmbedding.call(
    model="text-embedding-v4",
    input=["I like it and will buy from here again"],
    # Set the embedding dimensions to 256
    dimension=256
)

print(f"Embedding dimensions: {len(resp.output['embeddings'][0]['embedding'])}")

Distinguish between query and document text (text_type)

This parameter can currently only be enabled through the DashScope SDK and API.

To achieve the best results in search-related tasks, process different types of content with targeted embedding to fully leverage their respective roles. The text_type parameter is designed for this purpose:

  • text_type: 'query': Use for query text entered by the user. The model generates a "title-like" vector that is more directional and optimized for "asking" and "finding".

  • text_type: 'document' (default): Use for document text stored in the database. The model generates a "body-like" vector that contains more comprehensive information and is optimized for being retrieved.

When using short text to match long text, distinguish between query and document. For tasks where all texts have the same role, such as clustering or classification, you do not need to set this parameter.

Use instructions to improve performance (instruct)

This parameter can currently only be enabled through the DashScope SDK and API.

Provide clear English instructions to guide text-embedding-v4 to optimize vector quality for specific retrieval scenarios, effectively improving precision. When using this feature, set the text_type parameter to query.

# Scenario: When building document vectors for a search engine, you can add an instruction to optimize the vector quality for retrieval.
resp = dashscope.TextEmbedding.call(
    model="text-embedding-v4",
    input="Research papers on machine learning",
    text_type="query",
    instruct="Given a research paper query, retrieve relevant research paper"
)

Dense and sparse vectors

This parameter can currently only be enabled through the DashScope SDK and API.

text-embedding-v4 and text-embedding-v3 support three types of vector output to meet the needs of different retrieval strategies.

Vector type (output_type)

Core advantages

Main drawbacks

Typical application scenarios

dense

Deep semantic understanding. Can identify synonyms and context, leading to more relevant retrieval results.

Higher computational and storage costs. Cannot guarantee exact keyword matching.

Semantic search, AI chat, content recommendation.

sparse

High computational efficiency. Focuses on exact keyword matching and fast filtering.

Sacrifices semantic understanding. Cannot handle synonyms or context.

Log retrieval, product SKU search, precise information filtering.

dense&sparse

Combines semantics and keywords for the best search results. Generation cost is the same. The API call overhead is identical to the single-vector mode.

Large storage requirements. More complex system architecture and retrieval logic.

High-quality, production-grade hybrid search engines.

Use examples

The following code is for demonstration purposes only. In a production environment, pre-compute embedding vectors and store them in a vector database. During retrieval, you only need to compute the query vector.

Semantic search

Achieve precise semantic matching by calculating the vector similarity between a query and documents.

import dashscope
import numpy as np
from dashscope import TextEmbedding

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

def cosine_similarity(a, b):
    """Calculate cosine similarity"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def semantic_search(query, documents, top_k=5):
    """Perform semantic search"""
    # Generate the query vector
    query_resp = TextEmbedding.call(
        model="text-embedding-v4",
        input=query,
        dimension=1024
    )
    query_embedding = query_resp.output['embeddings'][0]['embedding']

    # Generate the document vectors
    doc_resp = TextEmbedding.call(
        model="text-embedding-v4",
        input=documents,
        dimension=1024
    )

    # Calculate similarities
    similarities = []
    for i, doc_emb in enumerate(doc_resp.output['embeddings']):
        similarity = cosine_similarity(query_embedding, doc_emb['embedding'])
        similarities.append((i, similarity))

    # Sort and return the top_k results
    similarities.sort(key=lambda x: x[1], reverse=True)
    return [(documents[i], sim) for i, sim in similarities[:top_k]]

# Example usage
documents = [
    "Artificial intelligence is a branch of computer science",
    "Machine learning is an important method for achieving artificial intelligence",
    "Deep learning is a subfield of machine learning"
]
query = "What is AI?"
results = semantic_search(query, documents, top_k=2)
for doc, sim in results:
    print(f"Similarity: {sim:.3f}, Document: {doc}")

Recommendation system

Discover user interests and recommend similar items by analyzing vectors from their historical behavior.

import dashscope
import numpy as np
from dashscope import TextEmbedding

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

def cosine_similarity(a, b):
    """Calculate cosine similarity"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def build_recommendation_system(user_history, all_items, top_k=10):
    """Build a recommendation system"""
    # Generate user history vectors
    history_resp = TextEmbedding.call(
        model="text-embedding-v4",
        input=user_history,
        dimension=1024
    )

    # Calculate the user preference vector (by averaging)
    user_embedding = np.mean([
        emb['embedding'] for emb in history_resp.output['embeddings']
    ], axis=0)

    # Generate all item vectors
    items_resp = TextEmbedding.call(
        model="text-embedding-v4",
        input=all_items,
        dimension=1024
    )

    # Calculate recommendation scores
    recommendations = []
    for i, item_emb in enumerate(items_resp.output['embeddings']):
        score = cosine_similarity(user_embedding, item_emb['embedding'])
        recommendations.append((all_items[i], score))

    # Sort and return the recommendation results
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:top_k]

# Example usage
user_history = ["Science Fiction", "Action", "Suspense"]
all_movies = ["Future World", "Space Adventure", "Ancient War", "Romantic Journey", "Superhero"]
recommendations = build_recommendation_system(user_history, all_movies)
for movie, score in recommendations:
    print(f"Recommendation Score: {score:.3f}, Movie: {movie}")

Text clustering

Automatically group similar texts by analyzing the distances between their vectors.

# scikit-learn is required: pip install scikit-learn
import dashscope
import numpy as np
from sklearn.cluster import KMeans

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

def cluster_texts(texts, n_clusters=2):
    """Cluster a set of texts"""
    # 1. Get the vectors for all texts
    resp = dashscope.TextEmbedding.call(
        model="text-embedding-v4",
        input=texts,
        dimension=1024
    )
    embeddings = np.array([item['embedding'] for item in resp.output['embeddings']])

    # 2. Use the KMeans algorithm for clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto').fit(embeddings)

    # 3. Organize and return the results
    clusters = {i: [] for i in range(n_clusters)}
    for i, label in enumerate(kmeans.labels_):
        clusters[label].append(texts[i])
    return clusters


# Example usage
documents_to_cluster = [
    "Mobile phone company A releases a new phone",
    "Search engine company B launches a new system",
    "World Cup final: Argentina vs. France",
    "China wins another gold medal at the Olympics",
    "A company releases its latest AI chip",
    "European Cup match report"
]
clusters = cluster_texts(documents_to_cluster, n_clusters=2)
for cluster_id, docs in clusters.items():
    print(f"--- Cluster {cluster_id} ---")
    for doc in docs:
        print(f"- {doc}")

Text classification

Recognize and classify new categories without pre-labeled examples by calculating the vector similarity between the input text and predefined labels.

import dashscope
import numpy as np

# If you use a model in the China (Beijing) region, replace the base_url with: https://dashscope.aliyuncs.com/api/v1
dashscope.base_http_api_url = 'https://dashscope-intl.aliyuncs.com/api/v1'

def cosine_similarity(a, b):
    """Calculate cosine similarity"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def classify_text_zero_shot(text, labels):
    """Perform zero-shot text classification"""
    # 1. Get the vectors for the input text and all labels
    resp = dashscope.TextEmbedding.call(
        model="text-embedding-v4",
        input=[text] + labels,
        dimension=1024
    )
    embeddings = resp.output['embeddings']
    text_embedding = embeddings[0]['embedding']
    label_embeddings = [emb['embedding'] for emb in embeddings[1:]]

    # 2. Calculate the similarity with each label
    scores = [cosine_similarity(text_embedding, label_emb) for label_emb in label_embeddings]

    # 3. Return the label with the highest similarity
    best_match_index = np.argmax(scores)
    return labels[best_match_index], scores[best_match_index]


# Example usage
text_to_classify = "The fabric of this dress is comfortable and the style is nice"
possible_labels = ["Digital Products", "Apparel & Accessories", "Food & Beverage", "Home & Living"]

label, score = classify_text_zero_shot(text_to_classify, possible_labels)
print(f"Input text: '{text_to_classify}'")
print(f"Best matching category: '{label}' (Similarity: {score:.3f})")

Anomaly detection

Identify anomalous data that differs significantly from normal patterns by calculating the vector similarity between a text vector and the center of normal sample vectors.

The threshold in the example code is for demonstration purposes only. In real-world business scenarios, the specific similarity value varies depending on the data content and distribution. There is no fixed threshold. Calibrate this value based on your own dataset.
import dashscope
import numpy as np


def cosine_similarity(a, b):
    """Calculate cosine similarity"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def detect_anomaly(new_comment, normal_comments, threshold=0.6):
    # 1. Vectorize all normal comments and the new comment
    all_texts = normal_comments + [new_comment]
    resp = dashscope.TextEmbedding.call(
        model="text-embedding-v4",
        input=all_texts,
        dimension=1024
    )
    embeddings = [item['embedding'] for item in resp.output['embeddings']]

    # 2. Calculate the center vector (average value) of the normal comments
    normal_embeddings = np.array(embeddings[:-1])
    normal_center_vector = np.mean(normal_embeddings, axis=0)

    # 3. Calculate the similarity between the new comment and the center vector
    new_comment_embedding = np.array(embeddings[-1])
    similarity = cosine_similarity(new_comment_embedding, normal_center_vector)

    # 4. Determine if it is an anomaly
    is_anomaly = similarity < threshold
    return is_anomaly, similarity


# Example usage
normal_user_comments = [
    "Today's meeting was productive",
    "The project is progressing smoothly",
    "The new version will be released next week",
    "User feedback is positive"
]

test_comments = {
    "Normal comment": "The feature works as expected",
    "Anomaly - meaningless garbled text": "asdfghjkl zxcvbnm"
}

print("--- Anomaly Detection Example ---")
for desc, comment in test_comments.items():
    is_anomaly, score = detect_anomaly(comment, normal_user_comments)
    result = "Yes" if is_anomaly else "No"
    print(f"Comment: '{comment}'")
    print(f"Is anomaly: {result} (Similarity to normal samples: {score:.3f})\n")

API reference

Error codes

If a call fails, see Error messages for troubleshooting.

Rate limiting

See Rate limits.

Model performance (MTEB/CMTEB)

Evaluation benchmarks

  • MTEB: Massive Text Embedding Benchmark, a comprehensive evaluation of general-purpose capabilities for tasks such as classification, clustering, and retrieval.

  • CMTEB: Chinese Massive Text Embedding Benchmark, an evaluation specifically for Chinese text.

  • Scores range from 0 to 100. A higher value indicates better performance.

Model

MTEB

MTEB (Retrieval task)

CMTEB

CMTEB (Retrieval task)

text-embedding-v3 (512 dimensions)

62.11

54.30

66.81

71.88

text-embedding-v3 (768 dimensions)

62.43

54.74

67.90

72.29

text-embedding-v3 (1024 dimensions)

63.39

55.41

68.92

73.23

text-embedding-v4 (512 dimensions)

64.73

56.34

68.79

73.33

text-embedding-v4 (1024 dimensions)

68.36

59.30

70.14

73.98

text-embedding-v4 (2048 dimensions)

71.58

61.97

71.99

75.01