Parse retrieval results of Retrieval Engine Edition instances - OpenSearch

This topic describes how to parse the retrieval results of OpenSearch Retrieval Engine Edition into the Protobuf and FlatBuffers binary formats.

Protobuf format

Maven dependencies

<properties>
    <grpc.version>1.6.1</grpc.version>
    <protobuf.version>3.21.5</protobuf.version>
</properties>

<dependency>
    <groupId>com.google.protobuf</groupId>
    <artifactId>protobuf-java</artifactId>
    <version>${protobuf.version}</version>
</dependency>
<dependency>
    <groupId>com.google.protobuf</groupId>
    <artifactId>protobuf-java-util</artifactId>
    <version>${protobuf.version}</version>
</dependency>
<dependency>
    <groupId>io.grpc</groupId>
    <artifactId>grpc-netty</artifactId>
    <version>${grpc.version}</version>
    <scope>provided</scope>
</dependency>
<dependency>
    <groupId>io.grpc</groupId>
    <artifactId>grpc-protobuf</artifactId>
    <version>${grpc.version}</version>
    <scope>provided</scope>
</dependency>

Generate a Protobuf file

After you add dependencies by using Maven, you need to generate a Protobuf file to parse the retrieval result of the Retrieval Engine Edition instance into the Protobuf format. To generate a Protobuf file, perform the following steps:

Install Protobuf. We recommend that you install Protobuf V3.21.5. You can run the protoc --version command to view the version information.
Define a description file that uses the .proto extension in the project. You can modify the description file based on your business requirements. The following sample code provides an example:

Ha3ResultProto.proto file

syntax = "proto2";
package com.searchengine.example.demo.protobuf;
option cc_enable_arenas = true;

message PBAttrKVPair {
  optional string key = 1;
  repeated int64 int64Value = 2;
  repeated double doubleValue = 3;
  repeated bytes bytesValue = 4;
}

enum ValueType {
  ATTRIBUTE_TYPE = 0;
  VARIABLE_VALUE_TYPE = 1;
}

message PBInt64Attribute {
  optional string key = 1;
  optional ValueType type = 2;
  repeated int64 int64Value = 3;
  repeated uint32 offset = 4;
}

message PBDoubleAttribute {
  optional string key = 1;
  optional ValueType type = 2;
  repeated double doubleValue = 3;
  repeated uint32 offset = 4;
}

message PBBytesAttribute {
  optional string key = 1;
  optional ValueType type = 2;
  repeated bytes bytesValue = 3;
  repeated uint32 offset = 4;
}

message SortExprssionMeta {
  optional bool sortFlag = 1;
  optional string sortExprName = 2;
}

message PBSortValues {
  optional uint32 dimensionCount = 1;
  repeated double sortValues = 2;
  repeated SortExprssionMeta sortExprMetas = 3;
}

message PBKVPair {
  optional string key = 1;
  optional bytes value = 2;
}

message PBResult
{
  optional uint64 totalTime = 1;
  optional PBHits hits = 2;
  repeated PBAggregateResults aggResults = 3;
  repeated PBErrorResult errorResults = 4;
  optional bytes tracer = 5;
  optional bool fromCache = 6;
  optional PBMatchDocs matchDocs = 7;
  repeated PBMetaMap metaMap = 8;
}

message PBMatchDocs
{
  optional uint32 numMatchDocs = 1;
  optional uint32 totalMatchDocs = 2;
  repeated string clusterNames = 3;
  repeated uint32 clusterIds = 4;
  repeated uint32 hashids = 5;
  repeated uint32 docids = 6;
  repeated int32 fullIndexVersions = 7;
  repeated int32 indexVersions = 8;
  repeated uint64 pkHighers = 9;
  repeated uint64 pkLowers = 10;
  repeated uint32 searcherIps = 11;
  repeated bytes tracers = 12;
  repeated PBInt64Attribute int64AttrValues = 13;
  repeated PBDoubleAttribute doubleAttrValues = 14;
  repeated PBBytesAttribute bytesAttrValues = 15;
  optional PBSortValues sortValues = 16;
}

message PBHits
{
  optional uint32 numhits = 1;
  optional uint32 totalHits = 2;
  repeated PBHit hit = 3;
  repeated PBMetaHitMap metaHitMap = 4;
  optional double coveredPercent = 5;
  repeated SortExprssionMeta sortExprMetas = 6;
}

message PBHit {
  optional string clusterName = 1;
  optional uint32 hashid = 2;
  optional uint32 docid = 3;
  optional int32 fullIndexVersion = 4;
  optional int32 indexVersion = 5;
  optional uint64 pkHigher = 6;
  optional uint64 pkLower = 7;
  repeated PBAttrKVPair attributes = 8;
  repeated PBAttrKVPair variableValues = 9;
  repeated PBKVPair summary = 10;
  repeated PBKVPair property = 11;
  repeated string sortValues = 12;
  optional bytes tracer = 13;
  optional uint32 searcherIp = 14;
  optional string rawPk = 15;
  optional bytes summaryBytes = 16;
}

message PBMetaHitMap
{
  optional string metaHitKey = 1;
  repeated PBKVPair metaHitValue = 2;
}

message PBAggregateResults
{
  optional string aggregateKey = 1;
  repeated PBAggregateValue aggregateValue = 2;
}

message PBAggregateValue
{
  optional string groupValue = 1;
  repeated PBKVPair funNameResultPair = 2;
}

message PBErrorResult
{
  optional string partitionId = 1;
  optional string hostName = 2;
  optional uint32 errorCode = 3;
  optional string errorDescription = 4;
}

message PBMetaMap
{
  optional string metaKey = 1;
  repeated PBKVPair metaValue = 2;
}

Run the protoc --java_out=./ Ha3ResultProto.proto command in the directory of the Protobuf description file.
The path to store the generated .java file can be specified by using the package file. Example: package com.aliyun.demo.protobuf.
After the command is run, a .java file is automatically generated in the specified package. In the preceding example, the Ha3ResultProto.java file is generated in the com.aliyun.demo.protobuf package. You can directly reference this file when you parse the retrieval results of the Retrieval Engine Edition instance by using the SDK.

import com.aliyun.ha3engine.Client;
import com.aliyun.ha3engine.models.*;
import com.aliyun.tea.TeaException;
import com.aliyun.demo.protobuf.Ha3ResultProto;
import org.junit.Before;
import org.junit.Test;

import java.nio.ByteBuffer;
import java.util.*;

public class DataFormatService {
	/**
	* The Retrieval Engine Edition client. Query operations are supported.
	*/
	private Client client;

	@Before
	public void clientInit() throws Exception {

		/*
			Initialize the Retrieval Engine Edition client.
		*/
		Config config = new Config();

		// The API endpoint of the instance. You can view the API endpoint in the API Endpoint section of the Instance Details page.
		config.setEndpoint("");
		// The name of the instance. You can view the name in the upper-left corner of the Instance Details page. Example: ha-cn-i7*****605.
		config.setInstanceId("");
		// The username. You can view the username in the API Endpoint section of the Instance Details page.
		config.setAccessUserName("");
		// The password. You can modify the password in the API Endpoint section of the Instance Details page.
		config.setAccessPassWord("");
		// The HTTP proxy that you specify for calling API operations over the Internet.
		config.setHttpProxy("");
		client = new Client(config);
	}


	@Test
	public void protobufFormat() throws Exception {
		try {
			/*
				Example: Use a query string in Retrieval Engine Edition to query data.
			*/
			SearchRequestModel haQueryRequestModel = new SearchRequestModel();
			SearchQuery haRawQuery = new SearchQuery();
			haRawQuery.setQuery("query=id:8148508889615505646&&config=start:0,hit:100,format:protobuf&&cluster=general");
			haQueryRequestModel.setQuery(haRawQuery);
			SearchBytesResponseModel haSearchBytesResponseModel = client.SearchBytes(haQueryRequestModel);
			System.out.println("Query results by using the query string in Retrieval Engine Edition:\n" + Arrays.toString(haSearchBytesResponseModel.getBody()));
			// Convert the data to the Protobuf format.
			Ha3ResultProto.PBResult pbResult = Ha3ResultProto.PBResult.parseFrom(haSearchBytesResponseModel.getBody());
			System.out.println("Output result in the Protobuf format:\n" + pbResult);

		} catch (TeaException e) {
			System.out.println(e.getCode());
			System.out.println(e.getMessage());
			Map<String, Object> abc = e.getData();
			System.out.println(com.aliyun.teautil.Common.toJSONString(abc));
		}
	}
}

Usage notes

The Protobuf format is applicable only to queries in Retrieval Engine Edition. To specify the Protobuf format, you must set the format parameter to protobuf. If you want to specify the JSON format, you can set the format parameter to json.
The Retrieval Engine Edition client provides two query methods: Search and SearchBytes. The Search method returns the response body in the String format, and the SearchBytes method returns the response body in the byte[] format. Therefore, the SearchBytes query method can be used only in aliyun-sdk-ha3engine1.3.2.
To convert data to the Protobuf format, you must use aliyun-sdk-ha3engin1.3.2.

FlatBuffers format

Maven dependencies

<properties>
	<flatbuffers.java.version>2.0.7</flatbuffers.java.version>
</properties>

<dependency>
    <groupId>com.google.flatbuffers</groupId>
    <artifactId>flatbuffers-java</artifactId>
    <version>${flatbuffers.java.version}</version>
</dependency>

Generate a FlatBuffers file

After you add dependencies by using Maven, you need to generate a FlatBuffers file to parse the retrieval result of the Retrieval Engine Edition instance into the FlatBuffers format. To generate a FlatBuffers file, perform the following steps:

Install FlatBuffers. We recommend that you install FlatBuffers V2.0.7. You can run the flatc --version command to view the version information.
Define a description file that uses the .fbs extension in the project. You can modify the description file based on your business requirements. The following sample code provides two examples:

SqlResult.fbs file

include "TwoDimTable.fbs";

namespace com.searchengine.example.demo.protobuf;

table SqlErrorResult {
      partitionId: string (id:0);
      hostName: string (id:1);
      errorCode: uint (id:2);
      errorDescription: string (id:3);
}

table SqlResult {
      processTime: double (id:0);
      rowCount: uint32 (id:1);
      errorResult: SqlErrorResult (id:2);
      sqlTable: TwoDimTable (id:3);
      searchInfo: string (id:4);
}

root_type SqlResult;

TwoDimTable.fbs file

namespace com.searchengine.example.demo.protobuf;

// multi value
table MultiInt8   { value: [byte];   }
table MultiInt16  { value: [short];  }
table MultiInt32  { value: [int];    }
table MultiInt64  { value: [long];   }
table MultiUInt8  { value: [ubyte];  }
table MultiUInt16 { value: [ushort]; }
table MultiUInt32 { value: [uint];   }
table MultiUInt64 { value: [ulong];  }
table MultiFloat  { value: [float];  }
table MultiDouble { value: [double]; }
table MultiString { value: [string]; }

// column base storage
table Int8Column   { value: [byte];   }
table Int16Column  { value: [short];  }
table Int32Column  { value: [int];    }
table Int64Column  { value: [long];   }
table UInt8Column  { value: [ubyte];  }
table UInt16Column { value: [ushort]; }
table UInt32Column { value: [uint];   }
table UInt64Column { value: [ulong];  }
table FloatColumn  { value: [float];  }
table DoubleColumn { value: [double]; }
table StringColumn { value: [string]; }

table MultiInt8Column   { value: [MultiInt8];   }
table MultiUInt8Column  { value: [MultiUInt8];  }
table MultiInt16Column  { value: [MultiInt16];  }
table MultiUInt16Column { value: [MultiUInt16]; }
table MultiInt32Column  { value: [MultiInt32];  }
table MultiUInt32Column { value: [MultiUInt32]; }
table MultiInt64Column  { value: [MultiInt64];  }
table MultiUInt64Column { value: [MultiUInt64]; }
table MultiFloatColumn  { value: [MultiFloat];  }
table MultiDoubleColumn { value: [MultiDouble]; }
table MultiStringColumn { value: [MultiString]; }

// column type
union ColumnType {
      Int8Column,
      Int16Column,
      Int32Column,
      Int64Column,
      UInt8Column,
      UInt16Column,
      UInt32Column,
      UInt64Column,
      FloatColumn,
      DoubleColumn,
      StringColumn,
      MultiInt8Column,
      MultiInt16Column,
      MultiInt32Column,
      MultiInt64Column,
      MultiUInt8Column,
      MultiUInt16Column,
      MultiUInt32Column,
      MultiUInt64Column,
      MultiFloatColumn,
      MultiDoubleColumn,
      MultiStringColumn,
}

table Column {
      name: string;
      value: ColumnType;
}

table TwoDimTable {
      rowCount: uint (id:0);
      columns: [Column] (id:1);
}

Run the flatc --java_out=./ SqlResult.fbs command in the directory of the FlatBuffers description file.
The path to store the generated .java file can be specified by using the namespace file. Example: namespace com.aliyun.demo.flatbuffers.
After the command is run, the .java file is automatically generated in the specified package. In this example, one or more .java files are generated in the com.aliyun.demo.flatbuffers package. You can directly reference this file when you parse the retrieval results of the Retrieval Engine Edition instance by using the SDK.

import com.aliyun.ha3engine.Client;
import com.aliyun.ha3engine.models.*;
import com.aliyun.tea.TeaException;
import com.aliyun.demo.flatbuffers.Int64Column;
import com.aliyun.demo.flatbuffers.SqlResult;
import org.junit.Before;
import org.junit.Test;

import java.nio.ByteBuffer;
import java.util.*;

public class DataFormatService {
	/**
	* The Retrieval Engine Edition client. Query operations are supported.
	*/
	private Client client;


	@Before
	public void clientInit() throws Exception {

		/*
			Initialize the Retrieval Engine Edition client.
		*/
		Config config = new Config();

		// The API endpoint of the instance. You can view the API endpoint in the API Endpoint section of the Instance Details page.
		config.setEndpoint("");
		// The name of the instance. You can view the name in the upper-left corner of the Instance Details page. Example: ha-cn-i7*****605.
		config.setInstanceId("");
		// The username. You can view the username in the API Endpoint section of the Instance Details page.
		config.setAccessUserName("");
		// The password. You can modify the password in the API Endpoint section of the Instance Details page.
		config.setAccessPassWord("");
		// The HTTP proxy that you specify for calling API operations over the Internet.
		config.setHttpProxy("");
		client = new Client(config);
	}

	@Test
	public void flatBuffersFormat() throws Exception {
		try {
			/*
				Example: Use an SQL query string to query data.
			*/
			SearchRequestModel sqlQueryRRequestModel = new SearchRequestModel();
			SearchQuery SqlRawQuery = new SearchQuery();
			SqlRawQuery.setSql("query=select * from indexTableName&&kvpair=trace:INFO;format:flatbuffers");
			sqlQueryRRequestModel.setQuery(SqlRawQuery);
			SearchBytesResponseModel sqlSearchBytesResponseModel = client.SearchBytes(sqlQueryRRequestModel);
			System.out.println("Query results by using the SQL query string:\n" + Arrays.toString(sqlSearchBytesResponseModel.getBody()));
			// Convert the data to the FlatBuffers format.
			SqlResult sqlResult = SqlResult.getRootAsSqlResult(ByteBuffer.wrap(sqlSearchBytesResponseModel.getBody()));
			/*
				Specify the field for which you want to obtain the corresponding column. For example, the returned data contains three fields: id, content, and url. The column that corresponds to the specified field is returned based on the field data type.
				Obtain the column that corresponds to the id field. The data type of the id field is INT64. Therefore, the column of the INT64 type is used.
				If the type of the returned field is String, the column of the String type is used. The columns corresponding to other returned fields may be deduced by analogy.
			*/
			Int64Column int64Column = (Int64Column) sqlResult.sqlTable().columns(0).value(new Int64Column());
			// Obtain the name of the field. For example, if the fields id, content, and url are returned in sequence, the returned field name is id.
			String name = sqlResult.sqlTable().columns(0).name();
			System.out.println("Field name=" + name);

			// Obtain the number of data records corresponding to the field.
			int total = int64Column.valueLength();
			System.out.println(name + "Number of data records=" + total);

			// Traverse data.
			if (total != 0) {
				for (int i = 0; i < total; i++) {
					// Obtain the value of the field. For example, if N data records correspond to the id field, the Nth data record is obtained.
					long value = int64Column.value(i);
					System.out.println(Name + "The" + (i +1)th + "data record=" + value);
				}
			}
		} catch (TeaException e) {
			System.out.println(e.getCode());
			System.out.println(e.getMessage());
			Map<String, Object> abc = e.getData();
			System.out.println(com.aliyun.teautil.Common.toJSONString(abc));
		}
	}
}

Usage notes

The FlatBuffers format is applicable only to SQL queries. To specify the FlatBuffers format, you must set the format parameter to flatbuffers. If you want to specify the JSON format, you can set the format parameter to json.
To convert data to the FlatBuffers format, you must use aliyun-sdk-ha3engin1.3.2.