This topic describes aggregation operations in search index-based queries.

Aggregation types

  • Group by field values
    • Definition: Group query results based on field values. Same values are grouped together. The value of each group and the number of corresponding values are returned. Errors may occur when the number of values in a group is too large.
    • Parameters
      Parameter Description
      Name The unique name specified for the aggregation. You can find the required aggregation results based on the name.
      FieldName The name of the field to be aggregated. Only fields of the keyword, long, double, and bool types are supported.
      Size The number of returned groups.
      GroupBySorters Add sorting rules for items in a group. By default, group items are sorted in descending order. When you set more than one sorting rule, items are sorted in sequence based on the order in which the rules were added. Supported parameters are as follows.
      • Sort by parameter keys in alphabetical order.
      • Sort by parameter keys in reverse alphabetical order.
      • Sort by the number of rows in ascending order.
      • Sort by the number of rows in descending order.
      • Sort by the values of sub-aggregations in ascending order.
      • Sort by the values of sub-aggregations in descending order.
      SubAggregation and SubGroupBy Add sub-aggregations. Sub-aggregations perform additional aggregation operations based on the content of the grouping. Example:
      • Scenario

        Query the number of products in each category and the maximum and minimum product prices in each category.

      • Method

        First group query results by product categories, and then add two sub-aggregations to obtain the maximum and minimum product prices in each category.

      • Result
        • Fruits: 5. The maximum price is CNY 15, and the minimum price is CNY 3.
        • Toiletries: 10. The maximum price is CNY 98, and the minimum price is CNY 1.
        • Electronic devices: 3. The maximum price is CNY 8,699, and the minimum price is CNY 2,300.
        • Other products: 15. The maximum price is CNY 1,000, and the minimum price is CNY 80.
    • Example
      /**
       * Query the number of products and the maximum and minimum product prices in each category.
       * Example of returned results: Fruits: 5. The maximum price is CNY 15, and the minimum price is CNY 3. Toiletries: 10. The maximum price is CNY 98, and the minimum price is CNY 1. Electronic devices: 3. The maximum price is CNY 8,699, and the minimum price is CNY 2,300. Other products: 15. The maximum price is CNY 1,000,
       and the minimum price is CNY 80.
       */
      func GroupByField(client *tablestore.TableStoreClient, tableName string, indexName string) {
          searchRequest := &tablestore.SearchRequest{}
      
          searchRequest.
              SetTableName(tableName).    // Set the table name.
              SetIndexName(indexName).    // Set the search index name.
              SetSearchQuery(search.NewSearchQuery().
                  SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
                  SetLimit(0).
                  GroupBy(search.NewGroupByField("group1", "column_type").    // Group by values for a field in each category.
                      SubAggregation(search.NewMinAggregation("min_price", "column_price")).    // The minimum product price in each category.
                      SubAggregation(search.NewMaxAggregation("max_price", "column_price"))))   // The maximum product price in each category.
      
          searchResponse, err := client.Search(searchRequest)
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
          group, err := groupByResults.GroupByField("group1")
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
      
          for _, item := range group.Items {    // Traverse the returned groups.
              // The value of each group and the number of rows in each group.
              fmt.Println("\tkey: ", item.Key, ", rowCount: ", item.RowCount)    // Display the number of rows in each group.
      
              // The minimum price.
              minPrice, _ := item.SubAggregations.Min("min_price")
              if minPrice.HasValue() {
                  fmt.Println("\t\tmin_price: ", minPrice.Value)
              }
      
              // The maximum price.
              maxPrice, _ := item.SubAggregations.Max("max_price")
              if maxPrice.HasValue() {
                  fmt.Println("\t\tmax_price: ", maxPrice.Value)
              }
          }
      }
  • Group by range
    • Definition: Group query results based on value ranges of a field. Field values that fall within a certain range are grouped together. The number of items in each range is returned. For example, group the sales volume of a product by the ranges [0, 1000), [1000, 5000), and [5000, ∞) to obtain the sales volume of each range.
    • Parameters
      Parameter Description
      Name The unique name specified for the aggregation. You can find the required aggregation results based on the name.
      FieldName The name of the field to be aggregated. Only fields of the long and double types are supported.
      Range(fromInclusive float64, toExclusive float64) Add ranges for grouping. The range can start from the minimum NegInf and end at the maximum Inf.
      SubAggregation and SubGroupBy Add sub-aggregations. Sub-aggregations perform additional aggregation operations based on the content of the grouping. For example, after you group query results by sales volume and then by province, you can obtain which province has the largest proportion in a certain range of sales volume. You must add a GroupByField sub-aggregation in the GroupByRange aggregation to execute this query.
    • Example
      /**
       * Group sales volume by the ranges [0, 1000), [1000, 5000), and [5000, ∞) to obtain the sales volume in each range.
       */
      func GroupByRange(client *tablestore.TableStoreClient, tableName string, indexName string) {
          searchRequest := &tablestore.SearchRequest{}
      
          searchRequest.
              SetTableName(tableName).    // Set the table name.
              SetIndexName(indexName).    // Set the search index name.
              SetSearchQuery(search.NewSearchQuery().
                  SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
                  SetLimit(0).
                  GroupBy(search.NewGroupByRange("group1", "column_number").
                      Range(search.NegInf, 1000).
                      Range(1000, 5000).
                      Range(5000, search.Inf)))
      
          searchResponse, err := client.Search(searchRequest)
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
          group, err := groupByResults.GroupByRange("group1")
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          for _, item := range group.Items {    // Traverse the returned groups.
              fmt.Println("\t[", item.From, ", ", item.To, "), rowCount: ", item.RowCount)    // Display the number of rows in each group.
          }
      }
  • Group by geographical distance
    • Definition: Group query results based on their geographical distances to a central point. Distances that fall within a certain range are grouped together. The number of items in each range is returned. Group people based on their geographical distances to a Wanda Plaza to obtain the number of people in each distance range. Group the distances by the ranges [0, 1000), [1000, 5000), and [5000, ∞). The unit is meters.
    • Parameters
      Parameter Description
      Name The unique name specified for the aggregation. You can find the required aggregation results based on the name.
      FieldName The name of the field to be aggregated. Only fields of the geo_point type are supported.
      CenterPoint(latitude float64, longitude float64) latitude indicates the latitude of the central point, and longitude indicates the longitude of the central point.
      Range(fromInclusive float64, toExclusive float64) Add ranges for grouping. The unit is meters. The range can start from the minimum NegInf and end at the maximum Inf.
      SubAggregation and SubGroupBy Add sub-aggregations. Sub-aggregations perform additional aggregation operations based on the content of the grouping.
    • Example
      
      /**
       * Group people based on their geographical distances to a Wanda Plaza to obtain the number of people in each distance range. Group the distances by the ranges [0, 1000), [1000, 5000), and [5000, ∞). The unit is meters.
       */
      func GroupByGeoDistance(client *tablestore.TableStoreClient, tableName string, indexName string) {
          searchRequest := &tablestore.SearchRequest{}
      
          searchRequest.
              SetTableName(tableName).    // Set the table name.
              SetIndexName(indexName).    // Set the search index name.
              SetSearchQuery(search.NewSearchQuery().
                  SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
                  SetLimit(0).
                  GroupBy(search.NewGroupByGeoDistance("group1", "Col_GeoPoint", search.GeoPoint{Lat: 30.137817, Lon:120.08681}).
                      Range(search.NegInf, 1000).
                      Range(1000, 5000).
                      Range(5000, search.Inf)))
      
          searchResponse, err := client.Search(searchRequest)
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
          group, err := groupByResults.GroupByGeoDistance("group1")
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          for _, item := range group.Items {    // Traverse the returned groups.
              fmt.Println("\t[", item.From, ", ", item.To, "), rowCount: ", item.RowCount)    // Display the number of rows in each group.
          }
      }
  • Group by filter
    • Definition: Filter the query results and group them together to obtain the number of items matching each filter. Results are returned in the order in which the filters were added. For example, you can add the following three filters to obtain the number of items matching each filter: the sales volume exceeds 100, the production place is Zhejiang, and the description contains Hangzhou.
    • Parameters
      Parameter Description
      Name The unique name specified for the aggregation. You can find the required aggregation results based on the name.
      Query The added filters for a query.
      SubAggregation and SubGroupBy Add sub-aggregations. Sub-aggregations perform additional aggregation operations based on the content of the grouping.
    • Example
      /**
       * Filter the query results. For example, add the following three filters to obtain the number of items matching each filter: the sales volume exceeds 100, the production place is Zhejiang, and the description contains Hangzhou.
       */
      func GroupByFilter(client *tablestore.TableStoreClient, tableName string, indexName string) {
          searchRequest := &tablestore.SearchRequest{}
      
          searchRequest.
              SetTableName(tableName).    // Set the table name.
              SetIndexName(indexName).    // Set the search index name.
              SetSearchQuery(search.NewSearchQuery().
                  SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
                  SetLimit(0).
                  GroupBy(search.NewGroupByFilter("group1").
                      Query(&search.RangeQuery{
                          FieldName: "number",
                          From: 100,
                          IncludeLower: true}).
                      Query(&search.TermQuery{
                          FieldName: "place",
                          Term:      "Zhejiang",
                       }).
                      Query(&search.MatchQuery{
                          FieldName: "description",
                          Text: "Hangzhou",
                      })))
      
          searchResponse, err := client.Search(searchRequest)
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
          group, err := groupByResults.GroupByFilter("group1")
          if err ! = nil {
              fmt.Printf("%#v", err)
              return
          }
          for _, item := range group.Items {    // Traverse the returned groups.
              fmt.Println("\trowCount: ", item.RowCount)    // Display the number of rows in each group.
          }
      }

Multiple aggregations

A request supports multiple aggregations.

Note Implementing multiple complicated aggregations at the same time may cause a long response time.

Example

func MultipleAggregationsAndGroupBysSample(client *tablestore.TableStoreClient, tableName string, indexName string) {
    searchRequest := &tablestore.SearchRequest{}

    searchRequest.
        SetTableName(tableName).    // Set the table name.
        SetIndexName(indexName).    // Set the search index name.
        SetSearchQuery(search.NewSearchQuery().
            SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
            SetLimit(0).
            Aggregation(search.NewAvgAggregation("agg1", "Col_Long")).                // Calculate the average value for the Col_Long field.
            Aggregation(search.NewDistinctCountAggregation("agg2", "Col_Long")).    // Calculate the number of values for the Col_Long field.
            Aggregation(search.NewMaxAggregation("agg3", "Col_Long")).                // Calculate the maximum value for the Col_Long field.
            GroupBy(search.NewGroupByField("group1", "Col_Keyword").    // Perform the GroupByField operation for the Col_Keyword field.
                GroupBySorters([]search.GroupBySorter{}).    // Specify the order in which the groups are returned.
                Size(2).                                // Return only the first two groups.
                SubAggregation(search.NewAvgAggregation("sub_agg1", "Col_Long")).    // Perform sub-aggregations for each group.
                SubGroupBy(search.NewGroupByField("sub_group1", "Col_Keyword2"))).    // Perform SubGroupBy operations for each group 
            GroupBy(search.NewGroupByRange("group2", "Col_Long").        // Perform the GroupByRange operation for the Col_Long field.
                Range(search.NegInf, 3).            // The first group contains rows whose values in the Col_Long field are within the (-∞, 3) range.
                Range(3, 5).            // The second group contains rows whose values in the Col_Long field are within the [3, 5) range.
                Range(5, search.Inf)))            // The third group contains rows whose values in the Col_Long field are within the [5, +∞) range.

    // Specify to return all columns.
    searchResponse, err := client.Search(searchRequest)
    if err ! = nil {
        fmt.Printf("%#v", err)
        return
    }

    aggResults := searchResponse.AggregationResults     // Obtain the aggregation results.
    //agg1
    agg1, err := aggResults.Avg("agg1")        // Obtain the average of agg1.
    if err ! = nil {
        panic(err)
    }
    if agg1.HasValue() {                            // The aggregation agg1 has values.
        fmt.Println("(avg) agg1: ", agg1.Value)    // Display the average value for the Col_Long field.
    } else {
        fmt.Println("(avg) agg1: no value")        // No rows have any values for the Col_Long field.
    }

    //agg2
    agg2, err := aggResults.DistinctCount("agg2")    // Obtain the DistinctCount of agg2.
    if err ! = nil {
        panic(err)
    }
    fmt.Println("(distinct) agg2: ", agg2.Value)        // Display the number of values for the Col_Long field.

    //agg3
    agg3, err := aggResults.Max("agg3")        // Obtain the Max of agg3.
    if err ! = nil {
        panic(err)
    }
    if agg3.HasValue() {
        fmt.Println("(max) agg3: ", agg3.Value)    // Display the maximum value for the Col_Long field.
    } else {
        fmt.Println("(max) agg3: no value")        // No rows have any values for the Col_Long field.
    }

    groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
    //group1
    group1, err := groupByResults.GroupByField("group1")   // Obtain the GroupBy result of group1, which is of the GroupByField type.
    if err ! = nil {
        panic(err)
    }
    fmt.Println("group1: ")
    for _, item := range group1.Items {    // Traverse the returned groups.
        //item
        fmt.Println("\tkey: ", item.Key, ", rowCount: ", item.RowCount)    // Display the number of rows in each group.

        //sub agg
        subAgg1, err := item.SubAggregations.Avg("sub_agg1")   // Obtain the sub-aggregation result of sub_agg1.
        if err ! = nil {
            panic(err)
        }
        if subAgg1.HasValue() {    // If sub_agg1 has obtained the average value for the Col_Long field, the HasValue() parameter is true.
            fmt.Println("\t\tsub_agg1: ", subAgg1.Value)    // Display the average value for the Col_Long field obtained through sub-aggregation.
        }

        //sub group by
        subGroup1, err := item.SubGroupBys.GroupByField("sub_group1")    // Obtain the SubGroupBy result of sub_group1.
        if err ! = nil {
            panic(err)
        }
        fmt.Println("\t\tsub_group1")
        for _, subItem := range subGroup1.Items {     //Traverse the result of sub_group1.
            fmt.Println("\t\t\tkey: ", subItem.Key, ", rowCount: ", subItem.RowCount)    // Display the number of rows in each group.
            tablestore.Assert(subItem.SubAggregations.Empty(), "")
            tablestore.Assert(subItem.SubGroupBys.Empty(), "")
        }
    }

    //group2
    group2, err := groupByResults.GroupByRange("group2")    // Obtain the GroupBy result of group2, which is of the GroupByRange type.
    if err ! = nil {
        panic(err)
    }
    fmt.Println("group2: ")
    for _, item := range group2.Items {    //Traverse the returned groups.
        fmt.Println("\t[", item.From, ", ", item.To, "), rowCount: ", item.RowCount)    // Display the number of rows in each group.
    }
}

Nesting

GroupBy type aggregations support nesting. You can add sub-aggregations and GroupBy type sub-aggregations to a GroupBy type aggregation. GroupBy type aggregations can contain endless levels for nesting. However, to ensure the performance and reduce the complexity of aggregations, you are only allowed to set a certain number of levels for nesting.

Nested aggregations are used to perform additional aggregation operations within a group. A two-level nested aggregation is used as an example:
  • GroupBy + SubGroupBy: Group items by province and then by city to obtain data for each city in each province.
  • GroupBy + SubAggregation: Group items by province to obtain the maximum value of an indicator for each province.

Example

/**
 * Nested aggregations example: Two aggregations and one GroupByRange are added to the outermost level of GroupByField.
 */
func NestedSample(client *tablestore.TableStoreClient, tableName string, indexName string) {
    searchRequest := &tablestore.SearchRequest{}

    searchRequest.
        SetTableName(tableName).    // Set the table name.
        SetIndexName(indexName).    // Set the search index name.
        SetSearchQuery(search.NewSearchQuery().
            SetQuery(&search.MatchAllQuery{}).    // Specify to match all rows.
            SetLimit(0).
            GroupBy(search.NewGroupByField("group1", "field1").
                SubAggregation(search.NewMinAggregation("sub_agg1", "sub_field1")).
                SubAggregation(search.NewMaxAggregation("sub_agg2", "sub_field2")).
                SubGroupBy(search.NewGroupByRange("sub_group1", "sub_field3").
                    Range(search.NegInf, 3).
                    Range(3, 5).
                    Range(5, search.Inf))))

    searchResponse, err := client.Search(searchRequest)
    if err ! = nil {
        fmt.Printf("%#v", err)
        return
    }
    groupByResults := searchResponse.GroupByResults    // Obtain the aggregation results.
    group, err := groupByResults.GroupByField("group1")
    if err ! = nil {
        fmt.Printf("%#v", err)
        return
    }

    for _, item := range group.Items {    // Traverse the returned groups.
        // The value of each group and the number of rows in each group.
        fmt.Println("\tkey: ", item.Key, ", rowCount: ", item.RowCount)    // Display the number of rows in each group.

        //sub_agg1
        subAgg1, _ := item.SubAggregations.Min("sub_agg1")
        if subAgg1.HasValue() {
            fmt.Println("\t\tsub_agg1: ", subAgg1.Value)
        }

        //sub_agg2
        subAgg2, _ := item.SubAggregations.Max("sub_agg2")
        if subAgg2.HasValue() {
            fmt.Println("\t\tsub_agg2: ", subAgg2.Value)
        }

        //sub_group1
        subGroup, _ := item.SubGroupBys.GroupByRange("sub_group1")
        for _, item := range subGroup.Items {
            fmt.Println("\t\t[", item.From, ", ", item.To, "), rowCount: ", item.RowCount)
        }
    }
}