向量資料是向量Bucket的核心資源,由三個部分組成:
Key(主鍵):向量的唯一識別碼
Data(向量資料):高維數值數組
Metadata(中繼資料):Key-Value結構,用於儲存向量的附加屬性資訊(如類別、來源、時間戳記等),可用於查詢時的後置過濾。
每個向量都儲存在特定的向量索引中,繼承索引的維度、資料類型和距離度量配置。
寫入向量資料
將向量資料及其中繼資料上傳到指定的向量索引中。
單個向量索引表最多可儲存 20 億行向量資料。
寫入向量資料的 API (PutVectors)單批次最大寫入 500 條,每秒最多支援 5 次請求(即 QPS 上限為 5)。
控制台
在向量Bucket頁面,單擊已建立的向量Bucket。
在剛建立的索引行,單擊查看資料,單擊向量資料插入。
配置向量資料,可以同時添加多條向量資料:
主索引值:為向量設定唯一識別碼。
向量資料:輸入向量數值數組,格式為用逗號分隔的數字,如:0.1, 0.2, 0.3, 0.4, 0.5。向量維度需要和選擇的 embedded 模型的維度一致。
中繼資料:可添加中繼資料資訊,如類別、標題、時間戳記等。總大小最大支援 40KB。
支援添加的中繼資料類型為 String。
單行向量最多支援添加共 10 個可過濾中繼資料和不可過濾中繼資料欄位。
不可過濾中繼資料key的大小是 1 ~ 63字元。
單個可過濾中繼資料最大支援 2KB。
根據可過濾中繼資料進行標量後過濾時,單次過濾指令中的可過濾中繼資料長度最大為 64KB,單次過濾指令中的可過濾中繼資料數量最多為 1024 個,過濾指令最多支援 8 層,且支援過濾內容為空白。
單擊確定完成資料插入。
SDK
Python
import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
parser = argparse.ArgumentParser(description="vector put vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)
def main():
args = parser.parse_args()
# Loading credentials values from the environment variables
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
# Using the SDK's default configuration
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
cfg.account_id = args.account_id
if args.endpoint is not None:
cfg.endpoint = args.endpoint
vector_client = oss_vectors.Client(cfg)
vectors = [
{
"data": {"float32": [0.1] * 128},
"key": "key1",
"metadata": {"metadata1": "value1", "metadata2": "value2"}
},
{
"data": {"float32": [0.2] * 128},
"key": "key2",
"metadata": {"metadata3": "value3", "metadata4": "value4"}
}
]
result = vector_client.put_vectors(oss_vectors.models.PutVectorsRequest(
bucket=args.bucket,
index_name=args.index_name,
vectors=vectors,
))
print(f'status code: {result.status_code},'
f' request id: {result.request_id},'
)
if __name__ == "__main__":
main()Go
package main
import (
"context"
"flag"
"log"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)
var (
region string
bucketName string
accountId string
)
func init() {
flag.StringVar(®ion, "region", "", "The region in which the vector bucket is located.")
flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}
func main() {
flag.Parse()
if len(bucketName) == 0 || len(region) == 0 || len(accountId) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters")
}
cfg := oss.LoadDefaultConfig().
WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
WithRegion(region).WithAccountId(accountId)
client := vectors.NewVectorsClient(cfg)
request := &vectors.PutVectorsRequest{
Bucket: oss.Ptr(bucketName),
IndexName: oss.Ptr("exampleIndex"),
Vectors: [ ]map[string]any{
{
"key": "vector1",
"data": map[string]any{
"float32": [ ]float32{1.2, 2.5, 3},
},
"metadata": map[string]any{
"Key1": "value2",
"Key2": [ ]string{"1", "2", "3"},
},
},
},
}
result, err := client.PutVectors(context.TODO(), request)
if err != nil {
log.Fatalf("failed to put vectors %v", err)
}
log.Printf("put vectors result:%#v\n", result)
}ossutil
在名為 examplebucket 的向量Bucket中,向名為 index 的向量索引添加一個向量,該向量的資料為 1,向量主鍵為 vector1,中繼資料為 {"Key1": "32"}。
使用JSON設定檔,vectors.json內容如下:
[ { "data": { "float32": [1] }, "key": "vector1", "metadata": { "Key1": "32" } } ]命令樣本:
ossutil vectors-api put-vectors --bucket examplebucket --index-name index --vectors file://vectors.json使用JSON配置參數:
ossutil vectors-api put-vectors --bucket examplebucket --index-name index --vectors "[{\"data\":{\"float32\":[1]},\"key\":\"vector1\",\"metadata\":{\"Key1\":\"32\"}}]"
API
調用PutVectors介面以寫入向量資料。
執行向量檢索
使用語義內容、中繼資料等條件,執行向量檢索操作,快速定位目標資料。具備亞秒級檢索效能,召回率為 90% 左右。
控制台
當前使用控制台進行向量資料查詢僅支援單行向量相似檢索,多次迴圈檢索請使用 API 或 SDK。
在向量Bucket頁面,單擊已建立的向量Bucket。
在剛建立的索引行,單擊查看資料,單擊向量資料查詢。
配置檢索參數:
向量資料:輸入查詢向量資料,格式與上傳時相同,如:0.15, 0.25, 0.35, 0.45, 0.55
可過濾中繼資料:通過中繼資料進行結果過濾,如類別、時間範圍等。
TopK(返回數量):設定返回最相似結果的數量,預設範圍:1~100。
返回相似距離:選擇是否返回相似性距離值
返回中繼資料:選擇是否返迴向量的中繼資料資訊
單擊確定執行查詢
系統返回按相似性排序的向量列表。
SDK
Python
import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
parser = argparse.ArgumentParser(description="vector query vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)
def main():
args = parser.parse_args()
# Loading credentials values from the environment variables
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
# Using the SDK's default configuration
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
cfg.account_id = args.account_id
if args.endpoint is not None:
cfg.endpoint = args.endpoint
vector_client = oss_vectors.Client(cfg)
query_filter = {
"$and": [{
"type": {
"$nin": ["comedy", "documentary"]
}
}]
}
query_vector = {"float32": [0.1] * 128}
result = vector_client.query_vectors(oss_vectors.models.QueryVectorsRequest(
bucket=args.bucket,
index_name=args.index_name,
filter=query_filter,
query_vector=query_vector,
return_distance=True,
return_metadata=True,
top_k=10
))
print(f'status code: {result.status_code},'
f' request id: {result.request_id},'
)
if result.vectors:
for vector in result.vectors:
print(f'vector: {vector}')
if __name__ == "__main__":
main()Go
package main
import (
"context"
"flag"
"log"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)
var (
region string
bucketName string
accountId string
indexName string
)
func init() {
flag.StringVar(®ion, "region", "", "The region in which the vector bucket is located.")
flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
flag.StringVar(&indexName, "index", "", "The name of vector index.")
}
func main() {
flag.Parse()
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
if len(region) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, region required")
}
if len(accountId) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, accounId required")
}
if len(indexName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, index required")
}
cfg := oss.LoadDefaultConfig().
WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
WithRegion(region).WithAccountId(accountId)
client := vectors.NewVectorsClient(cfg)
request := &vectors.QueryVectorsRequest{
Bucket: oss.Ptr(bucketName),
IndexName: oss.Ptr(indexName),
Filter: map[string]any{
"$and": []map[string]any{
{
"type": map[string]any{
"$in": []string{"comedy", "documentary"},
},
},
},
},
QueryVector: map[string]any{
"float32": []float32{float32(32)},
},
ReturnMetadata: oss.Ptr(true),
ReturnDistance: oss.Ptr(true),
TopK: oss.Ptr(10),
}
result, err := client.QueryVectors(context.TODO(), request)
if err != nil {
log.Fatalf("failed to query vectors %v", err)
}
log.Printf("query vectors result:%#v\n", result)
}ossutil
在名為examplebucket的向量Bucket中名為 index 的向量索引裡,type為comedy和documentary的向量中與查詢向量最相似的前 10 條。
ossutil vectors-api query-vectors --bucket examplebucket --index-name index --filter "{\"$and\":[{\"type\":{\"$in\":[\"comedy\",\"documentary\"]}}]}" --query-vector "{\"float32\":[32]}" --top-k 10API
調用QueryVectors介面以進行向量相似性檢索。
擷取向量資料
控制台
在向量Bucket頁面,單擊已建立的向量Bucket,進入索引列表頁面,單擊索引名稱,可以進入向量資料頁面查看向量資訊。
SDK
Python
import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
parser = argparse.ArgumentParser(description="vector get vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)
def main():
args = parser.parse_args()
# Loading credentials values from the environment variables
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
# Using the SDK's default configuration
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
cfg.account_id = args.account_id
if args.endpoint is not None:
cfg.endpoint = args.endpoint
vector_client = oss_vectors.Client(cfg)
keys = ['key1', 'key2']
result = vector_client.get_vectors(oss_vectors.models.GetVectorsRequest(
bucket=args.bucket,
index_name=args.index_name,
keys=keys,
return_data=True,
return_metadata=True
))
print(f'status code: {result.status_code},'
f' request id: {result.request_id},'
)
if result.vectors:
for vector in result.vectors:
print(f'vector id: {vector}')
if __name__ == "__main__":
main()Go
package main
import (
"context"
"flag"
"log"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)
var (
region string
bucketName string
accountId string
)
func init() {
flag.StringVar(®ion, "region", "", "The region in which the vector bucket is located.")
flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}
func main() {
flag.Parse()
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
if len(region) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, region required")
}
if len(accountId) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, accounId required")
}
cfg := oss.LoadDefaultConfig().
WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
WithRegion(region).WithAccountId(accountId)
client := vectors.NewVectorsClient(cfg)
request := &vectors.GetVectorsRequest{
Bucket: oss.Ptr(bucketName),
IndexName: oss.Ptr("index"),
Keys: []string{"key1", "key2", "key3"},
ReturnData: oss.Ptr(true),
ReturnMetadata: oss.Ptr(false),
}
result, err := client.GetVectors(context.TODO(), request)
if err != nil {
log.Fatalf("failed to get vectors %v", err)
}
log.Printf("get vectors result:%#v\n", result)
}ossutil
擷取名為 examplebucket 的向量Bucket中索引名為index,主鍵為key和key1的向量屬性。
ossutil vectors-api get-vectors --bucket examplebucket --index-name index --keys key,key1API
調用GetVectors介面以擷取指定的向量資料。
列舉向量資料
控制台
在向量Bucket頁面,單擊已建立的向量Bucket,進入索引列表頁面,單擊索引名稱。
SDK
Python
import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
parser = argparse.ArgumentParser(description="list vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--account_id', help='The account id.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
def main():
args = parser.parse_args()
# Loading credentials values from the environment variables
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
# Using the SDK's default configuration
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
cfg.account_id = args.account_id
if args.endpoint is not None:
cfg.endpoint = args.endpoint
client = oss_vectors.Client(cfg)
# Create the Paginator for the ListVectors operation
paginator = client.list_vectors_paginator()
# Create request with bucket and index name
request = oss_vectors.models.ListVectorsRequest(
bucket=args.bucket,
index_name=args.index_name
)
# Iterate through the vectors pages
for page in paginator.iter_page(request):
for o in page.vectors:
print(f'Vector: {o}')
if __name__ == "__main__":
main()Go
package main
import (
"context"
"flag"
"log"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)
var (
region string
bucketName string
accountId string
indexName string
)
func init() {
flag.StringVar(®ion, "region", "", "The region in which the vector bucket is located.")
flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
flag.StringVar(&indexName, "index", "", "The name of vector index.")
}
func main() {
flag.Parse()
if len(region) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, region required")
}
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
if len(accountId) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, accounId required")
}
if len(indexName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, index required")
}
cfg := oss.LoadDefaultConfig().
WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
WithRegion(region).WithAccountId(accountId)
client := vectors.NewVectorsClient(cfg)
request := &vectors.ListVectorsRequest{
Bucket: oss.Ptr(bucketName),
IndexName: oss.Ptr(indexName),
ReturnMetadata: oss.Ptr(true),
ReturnData: oss.Ptr(false),
}
p := client.NewListVectorsPaginator(request)
var i int
log.Println("Vectors:")
for p.HasNext() {
i++
page, err := p.NextPage(context.TODO())
if err != nil {
log.Fatalf("failed to get page %v, %v", i, err)
}
for _, v := range page.Vectors {
log.Printf("vector:%v\n", v)
}
}
}ossutil
列舉名為 examplebucket 的向量Bucket中索引名為index下的所有向量。
ossutil vectors-api list-vectors --bucket examplebucket --index-name indexAPI
調用ListVectors介面以列舉向量索引中的所有向量資料。
刪除向量資料
支援大量刪除向量資料。刪除操作無法復原,請謹慎操作,確保已備份重要資料。
控制台
在向量Bucket頁面,單擊已建立的向量Bucket,進入索引列表頁面,單擊索引名稱,進入向量資料頁面查看向量資訊,選擇需要刪除的向量資料完成刪除操作。
SDK
Python
import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors
parser = argparse.ArgumentParser(description="vector delete vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)
def main():
args = parser.parse_args()
# Loading credentials values from the environment variables
credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()
# Using the SDK's default configuration
cfg = oss.config.load_default()
cfg.credentials_provider = credentials_provider
cfg.region = args.region
cfg.account_id = args.account_id
if args.endpoint is not None:
cfg.endpoint = args.endpoint
vector_client = oss_vectors.Client(cfg)
keys = ['key1', 'key2', 'key3']
result = vector_client.delete_vectors(oss_vectors.models.DeleteVectorsRequest(
bucket=args.bucket,
index_name=args.index_name,
keys=keys,
))
print(f'status code: {result.status_code},'
f' request id: {result.request_id},'
)
if __name__ == "__main__":
main()Go
package main
import (
"context"
"flag"
"log"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)
var (
region string
bucketName string
accountId string
)
func init() {
// 定義命令列參數
flag.StringVar(®ion, "region", "", "The region in which the vector bucket is located.")
flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}
func main() {
// 解析命令列參數
flag.Parse()
// 驗證必需參數
if len(bucketName) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, bucket name required")
}
if len(region) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, region required")
}
if len(accountId) == 0 {
flag.PrintDefaults()
log.Fatalf("invalid parameters, accountId required")
}
// 建立配置,設定憑證提供者、地區和帳號ID
cfg := oss.LoadDefaultConfig().
WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
WithRegion(region).
WithAccountId(accountId)
// 建立向量儲存用戶端
client := vectors.NewVectorsClient(cfg)
// 構造刪除向量資料的請求
request := &vectors.DeleteVectorsRequest{
Bucket: oss.Ptr(bucketName),
IndexName: oss.Ptr("index"),
Keys: [ ]string{
"key1", "key2", // 要刪除的向量鍵列表
},
}
// 執行刪除向量資料的操作
result, err := client.DeleteVectors(context.TODO(), request)
if err != nil {
log.Fatalf("failed to delete vectors %v", err)
}
// 輸出操作結果
log.Printf("delete vectors result:%#v\n", result)
}ossutil
刪除向量儲存空間examplebucket中向量名字為index,向量主鍵為key、key1的向量。
ossutil vectors-api delete-vectors --bucket examplebucket --index-name index --keys key,key1API
調用DeleteVectors介面以刪除向量資料。