This topic describes information about the performance test scenario of Apache Spark.

Create external tables

The following external tables in text format are created for the performance test on Apache Spark:

  • The customer table
    create external table customer(
      c_custkey integer, 
      c_name varchar(25), 
      c_address varchar(40), 
      c_nationkey integer, 
      c_phone char(15), 
      c_acctbal decimal(15, 2), 
      c_mktsegment char(10), 
      c_comment varchar(117)
    ) row format delimited fields terminated by '|' location '/.../customer' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The lineitem table
    create external table lineitem(
      l_orderkey integer, 
      l_partkey integer, 
      l_suppkey integer, 
      l_linenumber integer, 
      l_quantity decimal(15, 2), 
      l_extendedprice decimal(15, 2), 
      l_discount decimal(15, 2), 
      l_tax decimal(15, 2), 
      l_returnflag char(1), 
      l_linestatus char(1), 
      l_shipdate date, 
      l_commitdate date, 
      l_receiptdate date, 
      l_shipinstruct char(25), 
      l_shipmode char(10), 
      l_comment varchar(44)
    ) row format delimited fields terminated by '|' location '/.../lineitem' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The nation table
    create external table nation(
      n_nationkey integer, 
      n_name char(25), 
      n_regionkey integer, 
      n_comment varchar(152)
    ) row format delimited fields terminated by '|' location '/.../nation' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The orders table
    create external table orders(
      o_orderkey integer, 
      o_custkey integer, 
      o_orderstatus char(1), 
      o_totalprice decimal(15, 2), 
      o_orderdate date, 
      o_orderpriority char(15), 
      o_clerk char(15), 
      o_shippriority integer, 
      o_comment varchar(79)
    ) row format delimited fields terminated by '|' location '/.../orders' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The part table
    create external table part(
      p_partkey integer, 
      p_name varchar(55), 
      p_mfgr char(25), 
      p_brand char(10), 
      p_type varchar(25), 
      p_size integer, 
      p_container char(10), 
      p_retailprice decimal(15, 2), 
      p_comment varchar(23)
    ) row format delimited fields terminated by '|' location '/.../part' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The partsupp table
    create external table partsupp(
      ps_partkey integer, 
      ps_suppkey integer, 
      ps_availqty integer, 
      ps_supplycost decimal(15, 2), 
      ps_comment varchar(199)
    ) row format delimited fields terminated by '|' location '/.../partsupp' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The region table
    create external table region(
      r_regionkey integer, 
      r_name char(25), 
      r_comment varchar(152)
    ) row format delimited fields terminated by '|' location '/.../region/' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • The supplier table
    create external table supplier(
      s_suppkey integer, 
      s_name char(25), 
      s_address varchar(40), 
      s_nationkey integer, 
      s_phone char(15), 
      s_acctbal decimal(15, 2), 
      s_comment varchar(101)
    ) row format delimited fields terminated by '|' location '/.../supplier/' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );

Create tables

The following internal tables are created for the performance test on Apache Spark:

  • The customer table
    create table customer(
            c_custkey     integer,
            c_name        varchar(25),
            c_address     varchar(40),
            c_nationkey   integer,
            c_phone       char(15),
            c_acctbal     decimal(15,2),
            c_mktsegment  char(10),
            c_comment     varchar(117)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The lineitem table
    create table lineitem
    (
        l_orderkey    integer,
        l_partkey     integer,
        l_suppkey     integer,
        l_linenumber  integer,
        l_quantity    decimal(15,2),
        l_extendedprice  decimal(15,2),
        l_discount    decimal(15,2),
        l_tax         decimal(15,2),
        l_returnflag  char(1),
        l_linestatus  char(1),
        l_shipdate    date,
        l_commitdate  date,
        l_receiptdate date,
        l_shipinstruct char(25),
        l_shipmode     char(10),
        l_comment      varchar(44)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The nation table
    create table nation(
            n_nationkey  integer,
            n_name       char(25),
            n_regionkey  integer,
            n_comment    varchar(152)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The orders table
    create table orders(
        o_orderkey       integer,
            o_custkey        integer,
            o_orderstatus    char(1),
            o_totalprice     decimal(15,2),
            o_orderdate      date,
            o_orderpriority  char(15),
            o_clerk          char(15),
            o_shippriority   integer,
            o_comment        varchar(79)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The part table
    create table part(
            p_partkey     integer,
            p_name        varchar(55),
            p_mfgr        char(25),
            p_brand       char(10),
            p_type        varchar(25),
            p_size        integer,
            p_container   char(10),
            p_retailprice decimal(15,2),
            p_comment     varchar(23)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The partsupp table
    create table partsupp(
        ps_partkey     integer,
            ps_suppkey     integer,
            ps_availqty    integer,
            ps_supplycost  decimal(15,2),
            ps_comment     varchar(199)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The region table
    create table region(
        r_regionkey  integer,
        r_name       char(25),
        r_comment    varchar(152)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • The supplier table
    create table supplier(
        s_suppkey     integer,
        s_name        char(25),
        s_address     varchar(40),
        s_nationkey   integer,
        s_phone       char(15),
        s_acctbal     decimal(15,2),
        s_comment     varchar(101)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");