Ysera
Assistant Engineer
Assistant Engineer
  • UID634
  • Fans0
  • Follows0
  • Posts44
Reads:2469Replies:0

[MySQL]New in MySQL 8.0 - a taste of the new data dictionary

Created#
More Posted time:Oct 20, 2016 17:25 PM
It is widely known that since MySQL adopts the architecture with a uniform server layer + different underlying engine plug-ins, an frm file is created at the server layer for every table to save metadata information related with the table definition. However, some engines (such as InnoDB) can also store metadata. As a result, metadata redundancy arises. In addition, because of the separated management on the server layer and engine layer, it is hard to achieve crash-safe during DDL operations, let alone the transaction-based DDL.
To solve these problems (especially the unavailability of atomic DDL), the FRM file and other metadata files (frm, par, trn, trg, isl, db.opt) at the server layer have been canceled since MySQL 8.0. All metadata is stored using the InnoDB engine. Some other system tables such as the permission table also switched to the InnoDB engine.
I am also a beginner in this field, so I will not dig deep into the topic in this article. Because of the many changes involved, I will expand upon the changes one by one later.
All the tests and code in this article are based on MySQL8.0.0. This is the first developer version for the Version 8.0, so it may be subject to subsequent changes.
Test
First, we create a new database and two tables under the database to start our testing.
mysql> CREATE DATABASE sbtest;
Query OK, 1 row affected (0.00 sec)

mysql> USE sbtest
Database changed
mysql> CREATE TABLE t1 (a int primary key);
Query OK, 0 rows affected (0.00 sec)

mysql> CREATE TABLE t2 (a int primary key, b int);
Query OK, 0 rows affected (0.00 sec)

$ls -lh /u01/my80/data/sbtest
total 256K
-rw-r----- 1 yinfeng.zwx users 128K Oct  5 19:44 t1.ibd
-rw-r----- 1 yinfeng.zwx users 128K Oct  5 19:44 t2.ibd

$ls  /u01/my80/data/sbtest_9.SDI
/u01/my80/data/sbtest_9.SDI

$cat /u01/my80/data/sbtest_9.SDI
{
    "sdi_version": 1,
    "dd_version": 1,
    "dd_object_type": "Schema",
    "dd_object": {
        "name": "sbtest",
        "default_collation_id": 33,
        "created": 0,
        "last_altered": 0
    }
}


We can see that there is only the ibd file in the database directory, with no frm files. In the data directory, an SDI file is generated to describe the information of this sbtest database.
Now let's see how to create a MyISAM engine table:
mysql> create database my;
Query OK, 1 row affected (0.00 sec)

mysql> use my
Database changed
mysql> create table t1 (a int, b varchar(320)) engine=myisam;
Query OK, 0 rows affected (0.00 sec)

$ls my/
t1_435.SDI  t1.MYD  t1.MYI

{
    "sdi_version": 1,
    "dd_version": 1,
    "dd_object_type": "Table",
    "dd_object": {
        "name": "t1",
        "mysql_version_id": 80000,
        "created": 20161005201935,
        "last_altered": 20161005201935,
        "options": "avg_row_length=0;key_block_size=0;keys_disabled=0;pack_record=1;stats_auto_recalc=0;stats_sample_pages=0;",
        "columns": [
            {
                "name": "a",
                "type": 4,
                "is_nullable": true,
                "is_zerofill": false,
                "is_unsigned": false,
                "is_auto_increment": false,
                "is_virtual": false,
                "hidden": false,
                "ordinal_position": 1,
                "char_length": 11,
                "numeric_precision": 10,
                "numeric_scale": 0,
                "datetime_precision": 0,
                "has_no_default": false,
                "default_value_null": true,
                "default_value": "",
                "default_option": "",
                "update_option": "",
                "comment": "",
                "generation_expression": "",
                "generation_expression_utf8": "",
                "options": "interval_count=0;",
                "se_private_data": "",
                "column_key": 1,
                "column_type_utf8": "int(11)",
                "elements": [],
                "collation_id": 33
            },
            {
                "name": "b",
                "type": 16,
                "is_nullable": true,
                "is_zerofill": false,
                "is_unsigned": false,
                "is_auto_increment": false,
                "is_virtual": false,
                "hidden": false,
                "ordinal_position": 2,
                "char_length": 960,
                "numeric_precision": 0,
                "numeric_scale": 0,
                "datetime_precision": 0,
                "has_no_default": false,
                "default_value_null": true,
                "default_value": "",
                "default_option": "",
                "update_option": "",
                "comment": "",
                "generation_expression": "",
                "generation_expression_utf8": "",
                "options": "interval_count=0;",
                "se_private_data": "",
                "column_key": 1,
                "column_type_utf8": "varchar(320)",
                "elements": [],
                "collation_id": 33
            }
        ],
        "schema_ref": "my",
        "hidden": false,
        "se_private_id": 18446744073709551615,
        "engine": "MyISAM",
        "comment": "",
        "se_private_data": "",
        "row_format": 2,
        "partition_type": 0,
        "partition_expression": "",
        "default_partitioning": 0,
        "subpartition_type": 0,
        "subpartition_expression": "",
        "default_subpartitioning": 0,
        "indexes": [],
        "foreign_keys": [],
        "partitions": [],
        "collation_id": 33
    }
}


Here we created a MyISAM table t1, and an SDI file is created accordingly. In the file, detailed information about the table is recorded in the JSON format. According to the descriptions in the official documents, the file is there because of a function that has not been fully realized yet.
New definition of information schema
Some new IS tables use the View for re-designing, mainly including the tables below:
CHARACTER_SETS
COLLATIONS
COLLATION_CHARACTER_SET_APPLICABILITY
COLUMNS
KEY_COLUMN_USAGE
SCHEMATA
STATISTICS
TABLES
TABLE_CONSTRAINTS
VIEWS

#例如SCHEMATA

mysql> show create table information_schema.schemata\G
*************************** 1. row ***************************
                View: SCHEMATA
         Create View: CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `information_schema`.`SCHEMATA` AS select `cat`.`name` AS `CATALOG_NAME`,`sch`.`name` AS `SCHEMA_NAME`,`cs`.`name` AS `DEFAULT_CHARACTER_SET_NAME`,`col`.`name` AS `DEFAULT_COLLATION_NAME`,NULL AS `SQL_PATH` from (((`mysql`.`schemata` `sch` join `mysql`.`catalogs` `cat` on((`cat`.`id` = `sch`.`catalog_id`))) join `mysql`.`collations` `col` on((`sch`.`default_collation_id` = `col`.`id`))) join `mysql`.`character_sets` `cs` on((`col`.`character_set_id` = `cs`.`id`))) where can_access_database(`sch`.`name`)
character_set_client: utf8
collation_connection: utf8_general_ci
1 row in set (0.01 sec)


That is to say, although the DD system table is hidden and invisible, you can still obtain most of its information through the View. This method has actually greatly accelerated the query speed to the IS table. After it is switched to query the physical table, it is not required to create a temporary table for queries to every IS table (The operations of temporary tables include frm file creation at the server layer, and obtaining data or global data requiring lock protection at the engine layer). In addition, the optimizer can also choose a better execution plan for the query to the IS table (For example, using the index on the system table for the query).
The official team conducted a test regarding this and the results showed that the query performance to the IS table was greatly improved. Visit the official blog:
MySQL 8.0: Improvements to Information_schema
MySQL 8.0: Scaling and Performance of INFORMATION_SCHEMA
New options: information_schema_stats: CACHED | LATEST

The metadata information of the current table is cached in the statistics and tables tables to speed up the query performance to the IS table. You can either directly read the cached data in the memory using the information_schema_stats parameter, or get the latest data from the storage engine, the latter of which is obviously slower.


But under the is database, we can see two types of tables: TABLES and TABLES_DYNAMIC, as well as STATISTICS and STATISTICS_DYNAMIC. When LATEST is configured, it will read data from the **_DYNAMIC table.


This option also influences the operations of SHOW TABLES and other statements.
Data Dictionary Cache
After the structure of the data dictionary changes significantly, the corresponding cache for the data dictionary in memory also changes.
mysql> show variables like '%defin%';
+---------------------------------+-------+
| Variable_name                   | Value |
+---------------------------------+-------+
| schema_definition_cache         | 256   |
| stored_program_definition_cache | 256   |
| table_definition_cache          | 1400  |
| tablespace_definition_cache     | 256   |
+---------------------------------+-------+
4 rows in set (0.00 sec)


tablespace_definition_cache: The size of the tablespace cache, in which the definition of tablespace is stored. A tablespace may contain multiple tables.
table_definition_cache
stored_program_definition_cache: Stored procedure & function definition cache.
schema_definition_cache: Schema definition cache.
The hardcoded character string cache:
character set definition cache partition: Stores character set definition objects and has a hardcoded object limit of 256.
collation definition cache partition: Stores collation definition objects and has a hardcoded object limit of 256.


Changes in the system table
• Tables related to permissions are switched to the InnoDB engine
// including: user, db, tables_priv, columns_priv, procs_priv and proxies_priv
// Official Blog Introduction
• The func table is switched to the InnoDB transaction table
// Based on this change, the operations on function (such as CREATE FUNCTION or DROP FUNCTION, or user-defined UDF) may cause an implicit commit.
• The routine and event tables under the MySQL database are not used any more. The information is stored in the new DD table and is invisible under the MySQL database.
• Foreign key system table
// Two invisible system tables, namely foreign_keys and foreign_key_column_usage, are used to store foreign key information
// Because these two system tables are not visible, you need to obtain the foreign key information through the REFERENTIAL_CONSTRAINTS and KEY_COLUMN_USAGE tables under the IS database
// Introduction compatibility: the foreign key name cannot exceed 64 characters (allowed in previous versions)
Source code preview
Let's return to the source code directory. A large number of new code files are introduced to manage the new DD from the server layer. A series of uniform APIs are mainly defined and the code is stored in the sql/dd directory, and the function and class definition are in the namespace dd.
Different classes and inheritance relationships for different metadata are defined:
namespace dd {
    Weak_object
        Entity_object  
            Dictionary_object
                Tablespace
                Schema
                Event
                Routine
                    Function
                    Procedure
                Charset
                Collation
                Abstract_table
                    Table
                Spatial_reference_system
                Index_stat
                    View
                Table_stat
            Partition
            Trigger
            Index
            Foreign_key
            Parameter
            Column
        Partition_index
        Partition_value
        View_routine
        View_table
        Tablespace_file
        Foreign_key_element
        Index_element
        Column_type_element
        Parameter_type_element
    Object_table
        Dictionary_object_table
    Object_type
    Object_table_definition  

}


Data dictionary cache management class:
dd::cache {
    dd::cache::Dictionary_client
    Object_registry
    Element_map
    Multi_map_base
        Local_multi_map
        Shared_multi_map

    Cache_element
    Free_list
    Shared_dictionary_cache

    Storage_adapter
}


MySQL database stores the system tables. Through the show tables command, we can only see 37 tables, but from the disk, we can see there are far more than 37 ibd files in the mysql directory, which means that some of the system tables are not visible to the user. These tables are also used to manage the core data dictionary information, and the reason for the invisibility is to avoid inappropriate operations by users. (Of course, this routine may be changed in the future.) Access interfaces to these tables are defined in the sql/dd/impl/tables/ directory. These hidden tables include:
$grep 'std::string s_table_name' sql/dd/impl/tables/* | awk '{ print $4}'
s_table_name("catalogs");
s_table_name("character_sets");
s_table_name("collations");
s_table_name("columns");
s_table_name("column_type_elements");
s_table_name("events");
s_table_name("foreign_key_column_usage");
s_table_name("foreign_keys");
s_table_name("index_column_usage");
s_table_name("indexes");
s_table_name("index_partitions");
s_table_name("index_stats");
s_table_name("parameters");
s_table_name("parameter_type_elements");
s_table_name("routines");
s_table_name("schemata");
s_table_name("st_spatial_reference_systems");
s_table_name("table_partitions");
s_table_name("table_partition_values");
s_table_name("tables");
s_table_name("tablespace_files");
s_table_name("tablespaces");
s_table_name("table_stats");
s_table_name("triggers");
s_table_name("version");
s_table_name("view_routine_usage");
s_table_name("view_table_usage");


Let's look at some common operations of a table to understand how some code is called.
(Due to the huge changes to the New DD code, with dozens of related worklogs, I started from code learning through testing + code debugging as the first step, so the records may be a little in disorder.)
Database-level operations
1. Create a database
mysql> create database db1;
Query OK, 1 row affected (2.87 sec)

mysql> create database db2;
Query OK, 1 row affected (3.05 sec)


Entry function: mysql_create_db
-- Create the database directory
-- Build binlog and write to the file
-- Call the DD API: dd::create_schema
* Build the object dd::Schema
* Store to the mysql.schemata table of the data dictionary. Related stack:

    ```
    dd::create_schema
    |--> dd::cache::Dictionary_client::store<dd::Schema>
        |--> dd::cache::Storage_adapter::store<dd::Schema>
            |--> dd::Weak_object_impl::store
                |--> dd::Raw_new_record::insert


    Note: The schemata table is invisible to users.

        mysql> desc schemata;
        ERROR 3554 (HY000): Access to system table 'mysql.schemata' is rejected.

    ```

* Create and store the information of the current database to the SDI file. The SDI file name takes the database name as the prefix and the stack is as follows:

    ```
    dd::create_schema
    |--> dd::store_sdi
        |--> dd::sdi_file::store
            |--> write_sdi_file

    ```

* If it succeeds, commit the code; otherwise, perform the rollback


1. Modify database
mysql> alter database db1 default charset gbk;
Query OK, 1 row affected (2 min 17.54 sec)


Entry function: mysql_alter_db
-- Call DD API: dd::alter_schema
* Update data dictionary information. Related stacks:

```
dd::alter_schema
|--> dd::cache::Dictionary_client::update<dd::Schema>
    |--> dd::cache::Dictionary_client::store<dd::Schema>
        |--> dd::cache::Storage_adapter::store<dd::Schema>
            |--> dd::Weak_object_impl::store
                |--> dd::Raw_record::update
```

* Update the SDI file. Related stack:

```
dd::alter_schema
|--> dd::Sdi_updater::operator()
    |--> dd::update_sdi
        |--> dd::sdi_file::store
            |--> write_sdi_file
```

* But it is strange that the file is quickly deleted after the update. Why? (Version 8.0.0)
It seems that the sequence number of the SDI file didn't increase, thus the file was quickly deleted. The true objective is to create a new file to write new data, and then delete the old SDI file.
ref: http://bugs.mysql.com/bug.php?id=83281


-- Write binlog
1. show databases
mysql> show databases; +--------------------+
| Database           |
+--------------------+
| db1                |
| db2                |
| information_schema |
| mysql              |
| performance_schema |
| sys                |
+--------------------+
6 rows in set (1.40 sec)


When you execute the command, you actually convert it to a standard query in SQL. The stack is as follows:
dispatch_command
|-->mysql_parse
    |-->parse_sql
        |-->MYSQLparse
            |--> dd::info_schema::build_show_databases_query


The SQL statement after conversion is similar to:
SELECT SCHEMA_NAME as `Database`,
          FROM information_schema.schemata;


Because it is read directly from the system table, this means that a folder created in the data directory will not be treated as a new database directory.
1. Delete database
mysql> drop database db2;
Query OK, 0 rows affected (1 min 1.86 sec)


-- Delete related files
-- Delete records in the mysql/schemata system table


mysql_rm_db
|--> dd::drop_schema
    |--> dd::cache::Dictionary_client::drop<dd::Schema>
        |-->dd::cache::Storage_adapter::drop<dd::Schema>
            |--> dd::Weak_object_impl::drop
                |--> dd::Raw_record::drop
                    |--> handler::ha_delete_row


Table-level operations
1. Create table
mysql> create table t1 (a int primary key, b int, c int, key(b));
Query OK, 0 rows affected (7 min 12.29 sec)


Entry function:
mysql_create_table_no_lock
|--> create_table_impl
    |--> rea_create_table


-- Insert a new record in dd first (dd::create_table --> dd::create_dd_user_table)
// Initialize the dd::Table object according to the table creation statement, including the table column definition, various attributes and options, and index definition
// Store the record to the system table
```
dd::create_dd_user_table
|--> dd::cache::Dictionary_client::storedd::Table
|-->dd::cache::Storage_adapter::storedd::Table
|-->dd::Weak_object_impl::store
// Insert the record to the mysql/tables system table
// Then insert the record to other system tables, such as “mysql/columns”,
            |-->dd::Table_impl::store_children
                |--> dd::Abstract_table_impl::store_children      // mysql/columns
                    |--> dd::Collection<dd::Column*>::store_items
                        |--> Weak_object_impl::store
            |-->dd::Collection<dd::Index*>::store_items           // mysql/indexes
                |--> dd::Weak_object_impl::store
                    |-->dd::Index_impl::store_children
                        |--> dd::Collection<dd::Index_element*>::store_items    // mysql/index_column_usage
```


-- Then create the engine file
1. Open table
-- Restart the instance and re-open the table. If you load the table definition to memory for the first time, you need to access the system table to get the table definition:
open_and_process_table
|-->open_table
    |-->get_table_share_with_discover
        |-->get_table_share
            |-->open_table_def
                // First check whether schema exists, and load it to the memory cache from the mysql/schemata system table
                |-->dd::schema_exists  
                    |--> dd::cache::Dictionary_client::acquire<dd::Schema>
                        |-->dd::cache::Dictionary_client::acquire<dd::Item_name_key, dd::Schema>
                            |-->dd::cache::Shared_dictionary_cache::get<dd::Item_name_key, dd::Schema>
                                |-->dd::cache::Shared_dictionary_cache::get_uncached<dd::Item_name_key, dd::Schema>
                                    |-->dd::cache::Storage_adapter::get<dd::Item_name_key, dd::Schema>
                                        |-->dd::Raw_table::find_record
                // Then obtain the table definition and load it from the mysql/tables system table
                |-->dd::abstract_table_type
                    |-->dd::cache::Dictionary_client::acquire<dd::Abstract_table>
                        |-->dd::cache::Dictionary_client::acquire<dd::Item_name_key, dd::Abstract_table>
                            |-->dd::cache::Shared_dictionary_cache::get<dd::Item_name_key, dd::Abstract_table>
                                |-->dd::cache::Shared_dictionary_cache::get_uncached<dd::Item_name_key, dd::Abstract_table>
                                    |-->dd::cache::Storage_adapter::get<dd::Item_name_key, dd::Abstract_table>
                                        |--> dd::Raw_table::find_record
                                        // Obtain the attribute information in the table
                                        |-->Dictionary_object_table_impl::restore_object_from_record
                                            |-->dd::Table_impl::restore_children
                                                |-->dd::Abstract_table_impl::restore_children
                                                    // Obtain the column information from the mysql/columns system table
                                                    |-->dd::Collection<dd::Column*>::restore_items<dd::Abstract_table_impl>
                                                    // Obtain the index information from the mysql/indexs system table
                                                    |-->dd::Collection<dd::Index*>::restore_items<dd::Table_impl>
                                                    //Obtain the index information from “mysql/index_column_usage”
                                                    |-->dd::Collection<dd::Index_element*>::restore_items<dd::Index_impl>
                                                    // Obtain the foreign key information from “mysql/foreign_keys”
                                                    |-->dd::Collection<dd::Foreign_key*>::restore_items<dd::Table_impl>
                                                    // Obtain the partition information from “mysql/table_partitions”
                                                    |-->dd::Collection<dd::Partition*>::restore_items<dd::Table_impl>
                                                    //Obtain the trigger information from “mysql/triggers”
                                                    |-->dd::Collection<dd::Trigger*>::restore_items<dd::Table_impl>
Guest