This topic describes how to access a Retrieval Engine Edition instance to query data by using the SDK.
Sample code
# -*- coding: utf-8 -*-
from alibabacloud_ha3engine import models, client
from alibabacloud_tea_util import models as util_models
from Tea.exceptions import TeaException, RetryError
def search():
Config = models.Config(
endpoint="ha-cn-7mz2ougaw02.ha.aliyuncs.com",
instance_id="ha-cn-7mz2ougaw02",
protocol="http",
access_user_name="user",
access_pass_word="111111"
)
# If the request takes an extended period of time to complete, you can configure this parameter to increase the wait time for the request. Unit: millisecond.
# This parameter can be used in the search_with_options method.
runtime = util_models.RuntimeOptions(
connect_timeout=5000,
read_timeout=10000,
autoretry=False,
ignore_ssl=False,
max_idle_conns=50
)
# Initialize the OpenSearch Retrieval Engine Edition V3.0 client.
ha3EngineClient = client.Client(Config)
optionsHeaders = {}
try:
# Example 1: Perform a Havenask query by using a query string.
# =====================================================
query_str = "config=hit:4,format:json,fetch_summary_type:pk,qrs_chain:search&&query=id:<pk>&&cluster=general"
haSearchQuery = models.SearchQuery(query=query_str)
# Optional. The request method. Only the GET and POST methods are supported. Default value: GET.
haSearchRequestModel = models.SearchRequestModel(headers=optionsHeaders, query=haSearchQuery, method='POST')
hastrSearchResponseModel = ha3EngineClient.search(haSearchRequestModel)
print(hastrSearchResponseModel)
# =====================================================
# Example 2: Perform a Havenask query by using a constructed query.
# =====================================================
# Configure a distinct clause.
DistinctClauses = []
# Specify the parameters of the aggregate clause.
aggregateClauses = []
haQueryAggregateClause = models.HaQueryAggregateClause(
group_key="cate_id", # Specify a field to be aggregated.
agg_fun="count()", # Specify an aggregation function.
range="0~10", # Specify an aggregation range.
max_group="5", # Specify the maximum number of groups that can be returned.
agg_filter="cate_id=1", # Specify an aggregation filter.
agg_sampler_thres_hold="5", # Specify a threshold value for sampled aggregation.
agg_sampler_step="5", # Specify a step size for sampling.
)
# Add one or more Aggregate objects.
aggregateClauses.append(haQueryAggregateClause)
# Create a Config object and use the config clause to configure parameters for paging and for the data format of return results.
CustomConfig = dict()
CustomConfig.__setitem__("no_summary", "yes")
CustomConfig.__setitem__("qrs_chain", "search")
haQueryconfig = models.HaQueryconfigClause(
# Specify the position from which to query data.
start="1",
# Specify the number of pages to return for the query.
hit="10",
# Specify a format of returned results. Formats such as XML, JSON, and Protobuf are supported.
format="JSON",
# Specify parameters for a custom config object.
custom_config=CustomConfig
)
# Specify the sorting condition.
haQuerySortClauseList = []
haQuerySortClause = models.HaQuerySortClause(
# Specify the field that you want to sort.
sort_key="id",
# Specify a sorting order. The plus sign (+) specifies an ascending order. The minus sign (-) specifies a descending order.
sort_order="+"
)
haQuerySortClauseList.append(haQuerySortClause)
# In Havenask queries, key-value pairs are of the DICT type. You can read the development guide to learn about a collection of keys.
haKvpairs = dict()
haKvpairs.__setitem__("uniqfield", "cate_id")
# Configure a distinct clause.
DistinctClauses = []
dist = models.HaQueryDistinctClause(
# Specify the name of a field to be extracted.
dist_key="cate_id",
# Specify the number of documents that are extracted each time.
dist_count="1",
# Specify the number of extraction times.
dist_times="1",
# Specify whether to retain the remaining documents after extraction.
reserved="false",
# Specify a filter condition to be used to select documents to be extracted.
dist_filter="cate_id<=3",
# Specify whether to subtract the number of discarded documents from the value of the totalHits parameter if the reserved parameter is set to false.
update_total_hit="false",
# Specify a threshold value for distinct extraction.
grade="1.2",
)
# Add one or more Distinct objects. The value of the dist parameter must be unique.
DistinctClauses.append(dist)
# Configure a custom query clause.
CustomQuery = dict()
CustomQuery.__setitem__("searcher_cache", "use:no")
haQuery = models.HaQuery(
# Specify a query clause that contains the index and query content.
query="id:8148508889615505646",
# Specify the name of the cluster to be requested.
cluster="general",
# Specify the parameters of the config clause.
config=haQueryconfig,
# Set the filter condition for the query.
# You can specify one or more filter conditions. If you want to specify more than one filter condition, use AND or OR to join the filter conditions.
# The supported operators are equal to (=), greater than (>), less than (<), less than or equal to (<=), greater than or equal to (>=), and not equal to (!=).
filter="id>100 AND id<=1000",
# Specify parameters in the aggregate clause.
aggregate=aggregateClauses,
# Specify parameters in the kvpairs clause.
kvpairs=haKvpairs,
# Specify parameters in the sort clause.
sort=haQuerySortClauseList,
# Specify parameters in the distinct clause.
distinct=DistinctClauses,
custom_query=CustomQuery
)
searchQuery = models.SearchQuery(query=ha3EngineClient.build_ha_search_query(haQuery))
searchRequestModel = models.SearchRequestModel(optionsHeaders, searchQuery)
# The request method. You do not need to specify this parameter if you use the default runtime parameters to initiate a request. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
haStructResponseModel = ha3EngineClient.search(query=searchRequestModel, method='POST')
print(haStructResponseModel)
# Example 3: Perform an SQL query by using a query string.
# =====================================================
sql_str = "select * from <indexTableName>&&kvpair=trace:INFO;formatType:json"
sqlsearchQuery = models.SearchQuery(sql=sql_str)
sqlSearchRequestModel = models.SearchRequestModel(optionsHeaders, sqlsearchQuery)
# Optional. The request method. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
sqlstrSearchResponseModel = ha3EngineClient.search(query=sqlSearchRequestModel, method='POST')
print(sqlstrSearchResponseModel)
# Example 4: Perform an SQL query by using a constructed query.
# =====================================================
sqlQueryKvpairs =dict()
sqlQueryKvpairs.__setitem__("trace", "INFO")
sqlQueryKvpairs.__setitem__("formatType", "full_json")
sqlQuery =models.SQLQuery(
query ="select * from odps",
kvpairs=sqlQueryKvpairs
)
searchQuery = models.SearchQuery(sql=ha3EngineClient.build_sqlsearch_query(sqlQuery))
searchRequestModel = models.SearchRequestModel(optionsHeaders, searchQuery)
# The request method. You do not need to specify this parameter if you use the default runtime parameters to initiate a request. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
sqlStructResponseModel = ha3EngineClient.search(query=searchRequestModel, method='POST')
print(sqlStructResponseModel)
except TeaException as e:
print(f"send request with TeaException : {e}")
except RetryError as e:
print(f"send request with Connection Exception : {e}")