库表字符集不一致导致的全表扫描问题

背景：

当数据库的建库字符集和表不一样时，在库下针对表创建存储过程可能导致全表扫描

如下例：

drop database if exists xx1;
drop database if exists xx2;
create database xx1 character set utf8;
create database xx2 character set gbk;

然后分别在xx1 和 xx2下执行：

CREATE TABLE t1 ( `col1` varchar(10) NOT NULL, `col2` bigint(11) NOT NULL DEFAULT '0', PRIMARY KEY (`col1`)) ENGINE=InnoDB DEFAULT CHARSET=gbk;
insert into t1 values ('ac',2),('del',3),('sp',5),('tr',12);
DELIMITER //
CREATE PROCEDURE proc1(IN isn CHAR(20))
BEGIN
     START TRANSACTION;
     SELECT col2 FROM t1 WHERE col1= isn FOR UPDATE;
     COMMIT;
END //
DELIMITER ;

在5.5版本中通过show processlist可以看到SQL进行了转换(简单的锁住行记录，执行存储过程，再show processlist)：

use xx1;

call proc1(‘tr’); 参数isn会被展开成：NAME_CONST(‘isn’,_utf8 0x7472 COLLATE ‘utf8_general_ci’)

use xx2;

call proc1(‘tr’); 参数isn会被展开成：NAME_CONST(‘isn’,_gbk 0x7472 COLLATE ‘gbk_chinese_ci’)

在5.6版本里使用ROW模式时，不会展开sql，但内部采取的是同样的策略。

由于name_const中引用的的字符集和表定义不同，因此xx1.proc1()每次调用都会产生两次全表扫描。但奇怪的是，如果单独执行上述SQL（而不是调用存储过程），就不会全表扫描。

还好MySQL5.6开始有了optimizer_trace这个利器，打开optimizer trace就一目了然了，对比单独执行SQL和执行存储过程：

call xx1.proc1(‘tr’)

| SELECT col2 FROM t1 WHERE col1= NAME_CONST(‘isn’,_utf8’tr’ COLLATE ‘utf8_general_ci’) FOR UPDATE | {
“steps”: [
{
“join_preparation”: {
“select#”: 1,
“steps”: [
{
“expanded_query”: “/* select#1 */ select `t1`.`col2` AS `col2` from `t1` where (convert(`t1`.`col1` using utf8) = isn@0)”
}
]
}
},
{
“join_optimization”: {
“select#”: 1,
“steps”: [
{
“condition_processing”: {
“condition”: “WHERE”,
“original_condition”: “(convert(`t1`.`col1` using utf8) = isn@0)”,
“steps”: [
{
“transformation”: “equality_propagation”,
“resulting_condition”: “(convert(`t1`.`col1` using utf8) = isn@0)”
},
{
“transformation”: “constant_propagation”,
“resulting_condition”: “(convert(`t1`.`col1` using utf8) = isn@0)”
},
{
“transformation”: “trivial_condition_removal”,
“resulting_condition”: “(convert(`t1`.`col1` using utf8) = isn@0)”
}
]
}
},
{
“table_dependencies”: [
{
“table”: “`t1`”,
“row_may_be_null”: false,
“map_bit”: 0,
“depends_on_map_bits”: [
]
}
]
},
{
“ref_optimizer_key_uses”: [
]
},
{
“rows_estimation”: [
{
“table”: “`t1`”,
“table_scan”: {
“rows”: 4,
“cost”: 1
}
}
]
},
{
“considered_execution_plans”: [
{
“plan_prefix”: [
],
“table”: “`t1`”,
“best_access_path”: {
“considered_access_paths”: [
{
“access_type”: “scan”,
“rows”: 4,
“cost”: 1.8,
“chosen”: true
}
]
},
“cost_for_plan”: 1.8,
“rows_for_plan”: 4,
“chosen”: true
}
]
},
{
“attaching_conditions_to_tables”: {
“original_condition”: “(convert(`t1`.`col1` using utf8) = isn@0)”,
“attached_conditions_computation”: [
],
“attached_conditions_summary”: [
{
“table”: “`t1`”,
“attached”: “(convert(`t1`.`col1` using utf8) = isn@0)”
}
]
}
},
{
“refine_plan”: [
{
“table”: “`t1`”,
“access_type”: “table_scan”
}
]
}
]
}
},
{
“join_execution”: {
“select#”: 1,
“steps”: [
]
}
}
]
}

call xx2.proc1(‘tr’)

SELECT col2 FROM t1 WHERE col1= NAME_CONST(‘isn’,_gbk’tr’ COLLATE ‘gbk_chinese_ci’) FOR UPDATE | {
“steps”: [
{
“join_preparation”: {
“select#”: 1,
“steps”: [
{
“expanded_query”: “/* select#1 */ select `t1`.`col2` AS `col2` from `t1` where (`t1`.`col1` = isn@0)”
}
]
}
},
{
“join_optimization”: {
“select#”: 1,
“steps”: [
{
“condition_processing”: {
“condition”: “WHERE”,
“original_condition”: “(`t1`.`col1` = isn@0)”,
“steps”: [
{
“transformation”: “equality_propagation”,
“resulting_condition”: “multiple equal(isn@0, `t1`.`col1`)”
},
{
“transformation”: “constant_propagation”,
“resulting_condition”: “multiple equal(isn@0, `t1`.`col1`)”
},
{
“transformation”: “trivial_condition_removal”,
“resulting_condition”: “multiple equal(isn@0, `t1`.`col1`)”
}
]
}
},
{
“table_dependencies”: [
{
“table”: “`t1`”,
“row_may_be_null”: false,
“map_bit”: 0,
“depends_on_map_bits”: [
]
}
]
},
{
“ref_optimizer_key_uses”: [
{
“table”: “`t1`”,
“field”: “col1″,
“equals”: “isn@0″,
“null_rejecting”: false
}
]
},
{
“rows_estimation”: [
{
“table”: “`t1`”,
“rows”: 1,
“cost”: 1,
“table_type”: “const”,
“empty”: false
}
]
},
{
“condition_on_constant_tables”: “1”,
“condition_value”: true
},
{
“attaching_conditions_to_tables”: {
“original_condition”: “1”,
“attached_conditions_computation”: [
],
“attached_conditions_summary”: [
]
}
},
{
“refine_plan”: [
]
}
]
}
},
{
“join_execution”: {
“select#”: 1,
“steps”: [
]
}
}
]
}

use xx1;

SELECT col2 FROM t1 WHERE col1=

NAME_CONST

(‘isn’,_utf8’tr’ COLLATE ‘utf8_general_ci’)

FOR UPDATE

SELECT col2 FROM t1 WHERE col1= NAME_CONST(‘isn’,_utf8’tr’ COLLATE ‘utf8_general_ci’) FOR UPDATE | {

“steps”: [

{

“join_preparation”: {

“select#”: 1,

“steps”: [

{

“expanded_query”: “/* select#1 */ select `t1`.`col2` AS `col2` from `t1` where (`t1`.`col1` = convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk))”

}

]

}

{

“join_optimization”: {

“select#”: 1,

“steps”: [

{

“condition_processing”: {

“condition”: “WHERE”,

“original_condition”: “(`t1`.`col1` = convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk))”,

“steps”: [

{

“transformation”: “equality_propagation”,

“resulting_condition”: “multiple equal(convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk), `t1`.`col1`)”

{

“transformation”: “constant_propagation”,

“resulting_condition”: “multiple equal(convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk), `t1`.`col1`)”

{

“transformation”: “trivial_condition_removal”,

“resulting_condition”: “multiple equal(convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk), `t1`.`col1`)”

}

]

}

{

“table_dependencies”: [

{

“table”: “`t1`”,

“row_may_be_null”: false,

“map_bit”: 0,

“depends_on_map_bits”: [

]

}

]

{

“ref_optimizer_key_uses”: [

{

“table”: “`t1`”,

“field”: “col1″,

“equals”: “convert(NAME_CONST(‘isn’,(_utf8’tr’ collate utf8_general_ci)) using gbk)”,

“null_rejecting”: false

}

]

{

“rows_estimation”: [

{

“table”: “`t1`”,

“rows”: 1,

“cost”: 1,

“table_type”: “const”,

“empty”: false

}

]

{

“condition_on_constant_tables”: “1”,

“condition_value”: true

{

“attaching_conditions_to_tables”: {

“original_condition”: “1”,

“attached_conditions_computation”: [

“attached_conditions_summary”: [

]

}

{

“refine_plan”: [

]

}

]

}

{

“join_execution”: {

“select#”: 1,

“steps”: [

]

}

]
}

注意这expanded_query一项，对于这种方式的查询，需要读取每一条记录，进行字符集转换再比较：

expanded_query”: “/* select#1 */ select `t1`.`col2` AS `col2` from `t1` where (convert(`t1`.`col1` using utf8) = isn@0)

由于本人对优化器模块了解不深，也不确定是否是Bug，先Report到了一个小bug到Bug list，ref:http://bugs.mysql.com/bug.php?id=76002

Updated @2015/2/25, 存储过程的输入列需要显式的定义输入字符集，否则其输入列的字符集将继承自库的字符集。参考bug中的回复。

时间： 2024-11-01 00:41:10

库表字符集不一致导致的全表扫描问题

库表字符集不一致导致的全表扫描问题的相关文章

oracle-数据库与项目字符集不一致，中文全是乱码

大幅提升MySQL中InnoDB的全表扫描速度的方法_Mysql

一条全表扫描sql语句的分析

mysql修改表、字段、库的字符集

SQL SERVER中关于OR会导致索引扫描或全表扫描的浅析

SQL中WHERE变量IS NULL条件导致全表扫描问题的解决方法_MsSql

MongoDB Primary 为何持续出现 oplog 全表扫描？

PostgreSQL 全表全字段模糊查询的毫秒级高效实现 - 搜索引擎也颤抖了

高水位线和全表扫描