All Products
Search
Document Center

OpenSearch:Document search demo

Last Updated:Feb 27, 2024

This topic describes how to access a Retrieval Engine Edition instance to query data by using the SDK.

Sample code

# -*- coding: utf-8 -*-


from alibabacloud_ha3engine import models, client
from alibabacloud_tea_util import models as util_models
from Tea.exceptions import TeaException, RetryError
def search():
    Config = models.Config(
        endpoint="ha-cn-7mz2ougaw02.ha.aliyuncs.com",
        instance_id="ha-cn-7mz2ougaw02",
        protocol="http",
        access_user_name="user",
        access_pass_word="111111"

    )

    # If the request takes an extended period of time to complete, you can configure this parameter to increase the wait time for the request. Unit: millisecond.
    # This parameter can be used in the search_with_options method.
    runtime = util_models.RuntimeOptions(
        connect_timeout=5000,
        read_timeout=10000,
        autoretry=False,
        ignore_ssl=False,
        max_idle_conns=50
    )

    # Initialize the OpenSearch Retrieval Engine Edition V3.0 client.
    ha3EngineClient = client.Client(Config)

    optionsHeaders = {}

    try:



        # Example 1: Perform a Havenask query by using a query string.
        # =====================================================
        query_str = "config=hit:4,format:json,fetch_summary_type:pk,qrs_chain:search&&query=id:<pk>&&cluster=general"
        haSearchQuery = models.SearchQuery(query=query_str)
      	# Optional. The request method. Only the GET and POST methods are supported. Default value: GET. 
        haSearchRequestModel = models.SearchRequestModel(headers=optionsHeaders, query=haSearchQuery, method='POST')
        hastrSearchResponseModel = ha3EngineClient.search(haSearchRequestModel)
        print(hastrSearchResponseModel)
        # =====================================================

        # Example 2: Perform a Havenask query by using a constructed query.
        # =====================================================
        # Configure a distinct clause.
        DistinctClauses = []

        # Specify the parameters of the aggregate clause.
        aggregateClauses = []
        haQueryAggregateClause = models.HaQueryAggregateClause(
            group_key="cate_id",  # Specify a field to be aggregated.
            agg_fun="count()",  # Specify an aggregation function.
            range="0~10",  # Specify an aggregation range.
            max_group="5",  # Specify the maximum number of groups that can be returned.
            agg_filter="cate_id=1",  # Specify an aggregation filter.
            agg_sampler_thres_hold="5",  # Specify a threshold value for sampled aggregation.
            agg_sampler_step="5",  # Specify a step size for sampling.
        )

        # Add one or more Aggregate objects.
        aggregateClauses.append(haQueryAggregateClause)

        # Create a Config object and use the config clause to configure parameters for paging and for the data format of return results.
        CustomConfig = dict()
        CustomConfig.__setitem__("no_summary", "yes")
        CustomConfig.__setitem__("qrs_chain", "search")
        haQueryconfig = models.HaQueryconfigClause(
            # Specify the position from which to query data.
            start="1",

            # Specify the number of pages to return for the query.
            hit="10",

            # Specify a format of returned results. Formats such as XML, JSON, and Protobuf are supported.
            format="JSON",

            # Specify parameters for a custom config object.
            custom_config=CustomConfig
        )

        # Specify the sorting condition.
        haQuerySortClauseList = []
        haQuerySortClause = models.HaQuerySortClause(
            # Specify the field that you want to sort.
            sort_key="id",
            # Specify a sorting order. The plus sign (+) specifies an ascending order. The minus sign (-) specifies a descending order.
            sort_order="+"
        )
        haQuerySortClauseList.append(haQuerySortClause)

        # In Havenask queries, key-value pairs are of the DICT type. You can read the development guide to learn about a collection of keys.
        haKvpairs = dict()
        haKvpairs.__setitem__("uniqfield", "cate_id")

        # Configure a distinct clause.
        DistinctClauses = []

        dist = models.HaQueryDistinctClause(
            # Specify the name of a field to be extracted.
            dist_key="cate_id",
            # Specify the number of documents that are extracted each time.
            dist_count="1",
            # Specify the number of extraction times.
            dist_times="1",
            # Specify whether to retain the remaining documents after extraction.
            reserved="false",
            # Specify a filter condition to be used to select documents to be extracted.
            dist_filter="cate_id<=3",
            # Specify whether to subtract the number of discarded documents from the value of the totalHits parameter if the reserved parameter is set to false.
            update_total_hit="false",
            # Specify a threshold value for distinct extraction.
            grade="1.2",
        )

        # Add one or more Distinct objects. The value of the dist parameter must be unique.
        DistinctClauses.append(dist)

        # Configure a custom query clause.
        CustomQuery = dict()
        CustomQuery.__setitem__("searcher_cache", "use:no")

        haQuery = models.HaQuery(

            # Specify a query clause that contains the index and query content.
            query="id:8148508889615505646",

            # Specify the name of the cluster to be requested.
            cluster="general",

            # Specify the parameters of the config clause.
            config=haQueryconfig,

            # Set the filter condition for the query.
            # You can specify one or more filter conditions. If you want to specify more than one filter condition, use AND or OR to join the filter conditions.
            # The supported operators are equal to (=), greater than (>), less than (<), less than or equal to (<=), greater than or equal to (>=), and not equal to (!=).
            filter="id>100 AND id<=1000",

            # Specify parameters in the aggregate clause.
            aggregate=aggregateClauses,

            # Specify parameters in the kvpairs clause.
            kvpairs=haKvpairs,

            # Specify parameters in the sort clause.
            sort=haQuerySortClauseList,

            # Specify parameters in the distinct clause.
            distinct=DistinctClauses,
            custom_query=CustomQuery

        )
        searchQuery = models.SearchQuery(query=ha3EngineClient.build_ha_search_query(haQuery))
        searchRequestModel = models.SearchRequestModel(optionsHeaders, searchQuery)
        # The request method. You do not need to specify this parameter if you use the default runtime parameters to initiate a request. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
        haStructResponseModel = ha3EngineClient.search(query=searchRequestModel, method='POST')
        print(haStructResponseModel)



        # Example 3: Perform an SQL query by using a query string.
        # =====================================================
        sql_str = "select * from <indexTableName>&&kvpair=trace:INFO;formatType:json"
        sqlsearchQuery = models.SearchQuery(sql=sql_str)
        sqlSearchRequestModel = models.SearchRequestModel(optionsHeaders, sqlsearchQuery)
				# Optional. The request method. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
        sqlstrSearchResponseModel = ha3EngineClient.search(query=sqlSearchRequestModel, method='POST')
        print(sqlstrSearchResponseModel)

        # Example 4: Perform an SQL query by using a constructed query.
        # =====================================================
        sqlQueryKvpairs  =dict()
        sqlQueryKvpairs.__setitem__("trace", "INFO")
        sqlQueryKvpairs.__setitem__("formatType", "full_json")

        sqlQuery =models.SQLQuery(
            query  ="select * from odps",
            kvpairs=sqlQueryKvpairs
        )

        searchQuery = models.SearchQuery(sql=ha3EngineClient.build_sqlsearch_query(sqlQuery))
        searchRequestModel = models.SearchRequestModel(optionsHeaders, searchQuery)
        # The request method. You do not need to specify this parameter if you use the default runtime parameters to initiate a request. Only the GET and POST methods are supported. Default value: GET. If the length of the query exceeds 30 KB, use the POST method.
        sqlStructResponseModel = ha3EngineClient.search(query=searchRequestModel, method='POST')
        print(sqlStructResponseModel)

    except TeaException as e:
        print(f"send request with TeaException : {e}")
    except RetryError as e:
        print(f"send request with Connection Exception  : {e}")