全部产品
Search
文档中心

Tablestore:Menulis data vektor

更新时间:Jan 18, 2026

Tabel data mendukung penulisan data vektor dalam format string dan biner. Format string lebih mudah dibaca dan menyederhanakan troubleshooting, sedangkan format biner mengurangi biaya penyimpanan.

Prasyarat

Anda telah mengonversi konten, seperti gambar, video, dan teks, menjadi data vektor menggunakan Large Language Model (LLM). Untuk informasi selengkapnya, lihat Generate vectors.

Binary format

Menyimpan data vektor dalam format biner membutuhkan ruang disk yang lebih kecil dan mengurangi biaya penyimpanan. Untuk skenario yang sensitif terhadap biaya dengan dimensi vektor tinggi, tulis data vektor dalam format biner.

Saat menulis data vektor dalam format biner, Anda harus mengonversi vektor tersebut ke data biner menggunakan Tablestore SDK atau tool.

Penting
  • Meskipun ditulis dalam format biner, vektor tetap bertipe Float32.

  • Vektor disimpan di tabel data dalam format biner dan juga dibaca sebagai data biner. Untuk meningkatkan keterbacaan, Anda dapat menggunakan kelas utilitas untuk mengonversinya ke format string.

Convert to binary using the Tablestore SDK

Catatan

Mulai dari Java SDK versi 5.17.6 dan Python SDK versi 6.2.1, Tablestore mendukung konversi biner data vektor menggunakan kelas utilitas VectorUtils.

import com.alicloud.openservices.tablestore.SyncClient;
import com.alicloud.openservices.tablestore.model.*;
import com.alicloud.openservices.tablestore.model.search.vector.VectorUtils;

import java.util.Random;
import java.util.UUID;


// Helper method to generate a random vector.
private static float[] generateRandomFloats(int length, Random random) {
    float[] result = new float[length];
    for (int i = 0; i < length; i++) {
        result[i] = random.nextFloat();
    }
    return result;
}

// Write data in batches.
private static void batchWriteRow(SyncClient tableStoreClient) throws Exception {
    
    Random random = new Random();
    // Write 1,000 rows of data in batches of 100 rows.
    for (int i = 0; i < 10; i++) {
        BatchWriteRowRequest batchWriteRowRequest = new BatchWriteRowRequest();
        for (int j = 0; j < 100; j++) {
            // Your business data.
            String text = "A string for full-text search. An embedding vector is generated from this field and written to the field_vector field below for vector semantic similarity search";
            // The converted vector. Perform the conversion.
            float[] vector = generateRandomFloats(1024,random);
            RowPutChange rowPutChange = new RowPutChange("TABLE_NAME");
            // Set the primary key.
            rowPutChange.setPrimaryKey(PrimaryKeyBuilder.createPrimaryKeyBuilder().addPrimaryKeyColumn("PK_1", PrimaryKeyValue.fromString(UUID.randomUUID().toString())).build());
            // Set attribute columns.
            rowPutChange.addColumn("field_string", ColumnValue.fromLong(i));
            rowPutChange.addColumn("field_long", ColumnValue.fromLong(i * 100 + j));
            rowPutChange.addColumn("field_text", ColumnValue.fromString(text));
            // Write vector data in binary format.
            rowPutChange.addColumn("field_vector", ColumnValue.fromBinary(VectorUtils.toBytes(vector)));

            batchWriteRowRequest.addRowChange(rowPutChange);
        }
        BatchWriteRowResponse batchWriteRowResponse = tableStoreClient.batchWriteRow(batchWriteRowRequest);
        System.out.println("Batch write successful: " + batchWriteRowResponse.isAllSucceed());
        if (!batchWriteRowResponse.isAllSucceed()) {
            for (BatchWriteRowResponse.RowResult rowResult : batchWriteRowResponse.getFailedRows()) {
                System.out.println("Failed row: " + batchWriteRowRequest.getRowChange(rowResult.getTableName(), rowResult.getIndex()).getPrimaryKey());
                System.out.println("Failure reason: " + rowResult.getError());
            }
        }
    }
}
import time
import tablestore.utils
from tablestore import *

def batch_write_vector(rows_count):
    print('Begin prepare data: %d' % rows_count)
    batch_write_row_reqs = BatchWriteRowRequest()
    put_row_items = []
    for i in range(rows_count):
        pk = [('PK1', i)]

        cols = [('field_string', 'key%03d' % i),
                ('field_long', i),
                ('field_text', 'some text'),
                ('field_vector', tablestore.utils.VectorUtils.floats_to_bytes([0.1, 0.2, 0.3, 0.4]))]

        put_row_item = PutRowItem(Row(pk,cols),Condition(RowExistenceExpectation.IGNORE))
        put_row_items.append(put_row_item)
    batch_write_row_reqs.add(TableInBatchWriteRowItem(table_name, put_row_items))
    client.batch_write_row(batch_write_row_reqs)

    print('End prepare data.')
    print('Wait for data sync to search index.')
    time.sleep(60)

Convert to binary using a tool

public class VectorUtils {
    private static final ByteOrder order = ByteOrder.LITTLE_ENDIAN;

    /**
     * Converts a float[] array to binary format.
     * @param vector The vector to convert.
     * @return byte The data in binary format.
     */
    public static byte[] toBytes(float[] vector) {
        if (vector == null || vector.length == 0) {
            throw new ClientException("vector is null or empty");
        }
        ByteBuffer buffer = ByteBuffer.allocate(vector.length * 4);
        buffer.order(order);
        for (float value : vector) {
            buffer.putFloat(value);
        }
        return buffer.array();
    }

    /**
     * Converts data from binary format back to a float[] array.
     * @param bytes The data in binary format.
     * @return Float The original vector.
     */
    public static float[] toFloats(byte[] bytes) {
        int length = bytes.length / 4;
        if (bytes.length % 4 != 0 || length == 0) {
            throw new ClientException("bytes length is not multiple of 4(SIZE_OF_FLOAT32) or length is 0");
        }
        ByteBuffer buffer = ByteBuffer.wrap(bytes);
        buffer.order(order);
        float[] vector = new float[length];
        buffer.asFloatBuffer().get(vector);
        return vector;
    }
}
// Float32ToBytes converts a []float32 slice to a byte array.
func Float32ToBytes(vector []float32) ([]byte, error) {
	if len(vector) == 0 {
		return nil, errors.New("vector is null or empty")
	}
	data := make([]byte, 4*len(vector))

	for i, v := range vector {
		binary.LittleEndian.PutUint32(data[i*4:(i+1)*4], math.Float32bits(v))
	}
	return data, nil
}

// ToFloat32 converts a byte array back to a []float32 slice.
func ToFloat32(data []byte) ([]float32, error) {
	if data == nil {
		return nil, errors.New("bytes is null")
	}
	if len(data)%4 != 0 || len(data) == 0 {
		return nil, errors.New("bytes length is not multiple of 4(SIZE_OF_FLOAT32) or length is 0")
	}
	floats := make([]float32, len(data)/4)
	buf := bytes.NewReader(data)

	for i := range floats {
		if err := binary.Read(buf, binary.LittleEndian, &floats[i]); err != nil {
			return nil, err
		}
	}

	return floats, nil
}
class VectorUtils:
    # Converts floats to a bytearray.
    @staticmethod
    def floats_to_bytes(floats):
        if not isinstance(floats, (list, tuple)) or not all(isinstance(f, float) for f in floats):
            raise TypeError("Input must be a list/tuple of floats")
        if len(floats) == 0:
            raise ValueError("vector is empty")
        return bytearray(struct.pack('<' + 'f' * len(floats), *floats))
      
    # Converts a bytearray back to floats.
    @staticmethod
    def bytes_to_floats(byte_data):
        if not isinstance(byte_data, bytearray):
            raise TypeError("Input must be a bytearray object")
        num_floats = len(byte_data) // 4
        if len(byte_data) % 4 != 0 or num_floats == 0:
            raise ValueError("bytes length is not multiple of 4(SIZE_OF_FLOAT32) or length is 0")
        floats = struct.unpack('<' + 'f' * num_floats, byte_data)
        return list(floats)

Verify the conversion

Bagian ini menggunakan Java SDK untuk menunjukkan cara memverifikasi konversi antara data vektor dan data biner. Konversi berhasil jika array bilangan titik mengambang yang dikonversi identik dengan array aslinya.

public class VectorUtilsTest {
    public static void main(String[] args) {
        float[] vector = new float[] { 1, 2, 3, 4 };
        byte[] bytes = VectorUtils.toBytes(vector);
        System.out.println("Converted binary data: " + Arrays.toString(bytes));
        float[] newVector = VectorUtils.toFloats(bytes);
        System.out.println("Converted floating-point number array: " + Arrays.toString(newVector));
    }
}

String format

Menyimpan vektor dalam format string membutuhkan ruang disk yang lebih besar tetapi lebih mudah dibaca. Saat menulis data vektor dalam format string, konversikan array Float32 menjadi string JSON, seperti [0.1,0.2,0.3,0.4].

Bagian ini menggunakan Java SDK untuk menunjukkan cara menulis data vektor dalam format string.

// Write data in batches.
private static void batchWriteRow(SyncClient tableStoreClient) throws Exception {
    // Write 1,000 rows of data in batches of 100 rows.
    for (int i = 0; i < 10; i++) {
        BatchWriteRowRequest batchWriteRowRequest = new BatchWriteRowRequest();
        for (int j = 0; j < 100; j++) {
            // Your business data.
            String text = "A string for full-text search. An embedding vector is generated from this field and written to the field_vector field below for vector semantic similarity search";
            // Convert text to a vector.
            String vector = "[1, 2, 3, 4]";
            RowPutChange rowPutChange = new RowPutChange("TABLE_NAME");
            // Set the primary key.
            rowPutChange.setPrimaryKey(PrimaryKeyBuilder.createPrimaryKeyBuilder().addPrimaryKeyColumn("PK_1", PrimaryKeyValue.fromString(UUID.randomUUID().toString())).build());
            // Set attribute columns.
            rowPutChange.addColumn("field_string", ColumnValue.fromLong(i));
            rowPutChange.addColumn("field_long", ColumnValue.fromLong(i * 100 + j));
            rowPutChange.addColumn("field_text", ColumnValue.fromString(text));
            // The vector format is a string of a float32 array, for example, [1, 5.1, 4.7, 0.08 ].
            rowPutChange.addColumn("field_vector", ColumnValue.fromString(vector));

            batchWriteRowRequest.addRowChange(rowPutChange);
        }
        BatchWriteRowResponse batchWriteRowResponse = tableStoreClient.batchWriteRow(batchWriteRowRequest);
        System.out.println("Batch write successful: " + batchWriteRowResponse.isAllSucceed());
        if (!batchWriteRowResponse.isAllSucceed()) {
            for (BatchWriteRowResponse.RowResult rowResult : batchWriteRowResponse.getFailedRows()) {
                System.out.println("Failed row: " + batchWriteRowRequest.getRowChange(rowResult.getTableName(), rowResult.getIndex()).getPrimaryKey());
                System.out.println("Failure reason: " + rowResult.getError());
            }
        }
    }
}