This test is performed to provide a comparative performance analysis of TPC-H queries on Object Storage Service (OSS) between an open source self-managed Presto cluster and a virtual cluster (VC) in the serverless Presto engine of Data Lake Analytics (DLA). The analysis results provide insight into the serverless Presto engine of DLA.

  1. Prepare TPC-H test data. For more information about how to generate TPC-H test data, see TPC official documentation.

    After the TPC-H test data is generated, you must convert the format of the test data into Parquet or ORC and upload the data to Object Storage Service (OSS).

  2. Create tables in the open source self-managed Presto cluster and the VC in the serverless Presto engine of DLA. Configuration example:
    In the open source self-managed Presto cluster, you can create tables in a Presto or Hive database. If you want to create tables in a Presto database, you must modify configurations. By default, you create tables in a Hive database. The following sample statements are used to create tables in a Hive database:
    
    CREATE SCHEMA if not exists tpch_1000x_orc
    location  'oss://path/to/tpch/tpch_1000x_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.lineitem (
        l_orderkey bigint,
        l_partkey bigint,
        l_suppkey bigint,
        l_linenumber int,
        l_quantity double,
        l_extendedprice double,
        l_discount double,
        l_tax double,
        l_returnflag string,
        l_linestatus string,
        l_shipdate string,
        l_commitdate string,
        l_receiptdate string,
        l_shipinstruct string,
        l_shipmode string,
        l_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/lineitem_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.customer (
        c_custkey bigint,
        c_name string,
        c_address string,
        c_nationkey int,
        c_phone string,
        c_acctbal double,
        c_mktsegment string,
        c_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/customer_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.nation (
        n_nationkey int,
        n_name string,
        n_regionkey int,
        n_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/nation_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.orders (
        o_orderkey bigint,
        o_custkey bigint,
        o_orderstatus string,
        o_totalprice double,
        o_orderdate string,
        o_orderpriority string,
        o_clerk string,
        o_shippriority int,
        o_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/orders_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.part (
        p_partkey bigint,
        p_name string,
        p_mfgr string,
        p_brand string,
        p_type string,
        p_size int,
        p_container string,
        p_retailprice double,
        p_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/part_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.partsupp (
        ps_partkey bigint,
        ps_suppkey bigint,
        ps_availqty int,
        ps_supplycost double,
        ps_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/partsupp_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.region (
        r_regionkey int,
        r_name string,
        r_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/region_orc/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_orc.supplier (
        s_suppkey bigint,
        s_name string,
        s_address string,
        s_nationkey int,
        s_phone string,
        s_acctbal double,
        s_comment string
    )
    STORED AS ORC
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/supplier_orc/'
    ;
    
    CREATE SCHEMA if not exists tpch_1000x_parquet
    location  'oss://path/to/tpch/tpch_1000x_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.lineitem (
        l_orderkey bigint,
        l_partkey bigint,
        l_suppkey bigint,
        l_linenumber int,
        l_quantity double,
        l_extendedprice double,
        l_discount double,
        l_tax double,
        l_returnflag string,
        l_linestatus string,
        l_shipdate string,
        l_commitdate string,
        l_receiptdate string,
        l_shipinstruct string,
        l_shipmode string,
        l_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/lineitem_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.customer (
        c_custkey bigint,
        c_name string,
        c_address string,
        c_nationkey int,
        c_phone string,
        c_acctbal double,
        c_mktsegment string,
        c_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/customer_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.nation (
        n_nationkey int,
        n_name string,
        n_regionkey int,
        n_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/nation_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.orders (
        o_orderkey bigint,
        o_custkey bigint,
        o_orderstatus string,
        o_totalprice double,
        o_orderdate string,
        o_orderpriority string,
        o_clerk string,
        o_shippriority int,
        o_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/orders_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.part (
        p_partkey bigint,
        p_name string,
        p_mfgr string,
        p_brand string,
        p_type string,
        p_size int,
        p_container string,
        p_retailprice double,
        p_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/part_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.partsupp (
        ps_partkey bigint,
        ps_suppkey bigint,
        ps_availqty int,
        ps_supplycost double,
        ps_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/partsupp_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.region (
        r_regionkey int,
        r_name string,
        r_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/region_parquet/'
    ;
    
    CREATE EXTERNAL TABLE tpch_1000x_parquet.supplier (
        s_suppkey bigint,
        s_name string,
        s_address string,
        s_nationkey int,
        s_phone string,
        s_acctbal double,
        s_comment string
    )
    STORED AS PARQUET
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/supplier_parquet/'
    ;
    The following sample statements are used to create tables in the serverless Presto engine of DLA:
    create external table if not exists `tpch_1000x_orc`.`customer` (
        `c_custkey` bigint,
        `c_name` string,
        `c_address` string,
        `c_nationkey` int,
        `c_phone` string,
        `c_acctbal` double,
        `c_mktsegment` string,
        `c_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/customer_orc/';
    
    CREATE EXTERNAL TABLE if not exists `tpch_1000x_orc`.`lineitem` (
        `l_orderkey` bigint,
        `l_partkey` bigint,
        `l_suppkey` bigint,
        `l_linenumber` int,
        `l_quantity` double,
        `l_extendedprice` double,
        `l_discount` double,
        `l_tax` double,
        `l_returnflag` string,
        `l_linestatus` string,
        `l_shipdate` string,
        `l_commitdate` string,
        `l_receiptdate` string,
        `l_shipinstruct` string,
        `l_shipmode` string,
        `l_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/lineitem_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`nation` (
        `n_nationkey` int,
        `n_name` string,
        `n_regionkey` int,
        `n_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/nation_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`orders` (
        `o_orderkey` bigint,
        `o_custkey` bigint,
        `o_orderstatus` string,
        `o_totalprice` double,
        `o_orderdate` string,
        `o_orderpriority` string,
        `o_clerk` string,
        `o_shippriority` int,
        `o_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/orders_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`part` (
        `p_partkey` bigint,
        `p_name` string,
        `p_mfgr` string,
        `p_brand` string,
        `p_type` string,
        `p_size` int,
        `p_container` string,
        `p_retailprice` double,
        `p_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/part_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`partsupp` (
        `ps_partkey` bigint,
        `ps_suppkey` bigint,
        `ps_availqty` int,
        `ps_supplycost` double,
        `ps_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/partsupp_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`region` (
        `r_regionkey` int,
        `r_name` string,
        `r_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/region_orc/';
    
    create external table if not exists `tpch_1000x_orc`.`supplier` (
        `s_suppkey` bigint,
        `s_name` string,
        `s_address` string,
        `s_nationkey` int,
        `s_phone` string,
        `s_acctbal` double,
        `s_comment` string
    )
    STORED AS `ORC`
    LOCATION 'oss://path/to/tpch/tpch_1000x_orc/supplier_orc/';
    
    create database if not exists`tpch_1000x_parquet`
    WITH DBPROPERTIES (
        catalog = 'oss',
        location = 'oss://path/to/tpch/tpch_1000x_parquet/'
    );
    
    create external table if not exists `tpch_1000x_parquet`.`customer` (
        `c_custkey` bigint,
        `c_name` string,
        `c_address` string,
        `c_nationkey` int,
        `c_phone` string,
        `c_acctbal` double,
        `c_mktsegment` string,
        `c_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/customer_parquet/';
    
    CREATE EXTERNAL TABLE if not exists `tpch_1000x_parquet`.`lineitem` (
        `l_orderkey` bigint,
        `l_partkey` bigint,
        `l_suppkey` bigint,
        `l_linenumber` int,
        `l_quantity` double,
        `l_extendedprice` double,
        `l_discount` double,
        `l_tax` double,
        `l_returnflag` string,
        `l_linestatus` string,
        `l_shipdate` string,
        `l_commitdate` string,
        `l_receiptdate` string,
        `l_shipinstruct` string,
        `l_shipmode` string,
        `l_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/lineitem_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`nation` (
        `n_nationkey` int,
        `n_name` string,
        `n_regionkey` int,
        `n_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/nation_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`orders` (
        `o_orderkey` bigint,
        `o_custkey` bigint,
        `o_orderstatus` string,
        `o_totalprice` double,
        `o_orderdate` string,
        `o_orderpriority` string,
        `o_clerk` string,
        `o_shippriority` int,
        `o_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/orders_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`part` (
        `p_partkey` bigint,
        `p_name` string,
        `p_mfgr` string,
        `p_brand` string,
        `p_type` string,
        `p_size` int,
        `p_container` string,
        `p_retailprice` double,
        `p_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/part_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`partsupp` (
        `ps_partkey` bigint,
        `ps_suppkey` bigint,
        `ps_availqty` int,
        `ps_supplycost` double,
        `ps_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/partsupp_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`region` (
        `r_regionkey` int,
        `r_name` string,
        `r_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/region_parquet/';
    
    create external table if not exists `tpch_1000x_parquet`.`supplier` (
        `s_suppkey` bigint,
        `s_name` string,
        `s_address` string,
        `s_nationkey` int,
        `s_phone` string,
        `s_acctbal` double,
        `s_comment` string
    )
    STORED AS `PARQUET`
    LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/supplier_parquet/';
  3. Perform TPC-H queries on the open source self-managed Presto cluster and VC in the serverless Presto engine of DLA, and record the execution time of each query.
    Note
    • Queries cannot be performed on the open source self-managed Presto cluster and VC in the serverless Presto engine of DLA at the same time. This aims to avoid inaccurate test results caused by OSS bandwidth preemption.
    • To perform TPC-H queries on the VC in the serverless Presto engine of DLA, you must add the following hint to the statement that you executed. This hint is used to specify the VC where queries are performed.
      /*+cluster=your-cu-name*/