# 第08章_索引的创建与设计原则
# 1. 索引的声明与使用
# 01-索引的创建
# 第1种:CREATE TABLE
# 隐式的方式创建索引。在声明有主键约束、唯一性约束、外键约束的字段上,会自动的添加相关的索引
CREATE DATABASE dbtest2;
USE dbtest2;
CREATE TABLE dept(
dept_id INT PRIMARY KEY,
dept_name VARCHAR(20)
);
CREATE TABLE emp(
emp_id INT PRIMARY KEY AUTO_INCREMENT,
emp_name VARCHAR(20) UNIQUE,
dept_id INT,
CONSTRAINT emp_dept_id_fk FOREIGN KEY (dept_id) REFERENCES dept(dept_id)
);
# 显式的方式创建:
# ①创建普通的索引
CREATE TABLE book(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR,
#声明索引
INDEX idx_bname(book_name)
);
# 通过命令查看索引
# 方式1:
SHOW CREATE TABLE book;
# 方式2:
SHOW INDEX FROM book;
# 性能分析工具:EXPLAIN
EXPLAIN SELECT * FROM book
WHERE book_name = 'mysql高级';
# ② 创建唯一索引
# 声明有唯一索引的字段,在添加数据时,要保证唯一性,但是可以添加null
CREATE TABLE book1(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR,
#声明索引
UNIQUE INDEX uk_idx_cmt(comment)
);
SHOW INDEX FROM book1;
INSERT INTO book1(book_id,book_name,comment)
VALUES(1,'Mysql高级','适合有数据库开发经验的人员学习');
INSERT INTO book1(book_id,book_name,comment)
VALUES(2,'Mysql高级',NULL);
SELECT * FROM book1;
# ③ 主键索引
# 通过定义主键约束的方式定义主键索引
CREATE TABLE book2(
book_id INT PRIMARY KEY,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR
);
SHOW INDEX FROM book2;
#通过删除主键约束的方式删除主键索引
ALTER TABLE book2
DROP PRIMARY KEY;
# ④ 创建单列索引
CREATE TABLE book3(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR,
#声明索引
UNIQUE INDEX idx_bname(book_name)
);
SHOW INDEX FROM book3;
# ⑤ 创建联合索引
CREATE TABLE book4(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR,
#声明索引
INDEX mul_bid_bname_info(book_id,book_name,info)
);
SHOW INDEX FROM book4;
# 分析
EXPLAIN SELECT * FROM book4 WHERE book_id = 1001 AND book_name = 'mysql';
EXPLAIN SELECT * FROM book4 WHERE book_name = 'mysql';
# ⑥ 创建全文索引
CREATE TABLE test4(
id INT NOT NULL,
name CHAR(30) NOT NULL,
age INT NOT NULL,
info VARCHAR(255),
FULLTEXT INDEX futxt_idx_info(info(50))
);
SHOW INDEX FROM test4;
# 第2种:表已经创建成功
# ① ALTER TABLE ... ADD ...
CREATE TABLE book5(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR
);
SHOW INDEX FROM book5;
ALTER TABLE book5 ADD INDEX idx_cmt(comment);
ALTER TABLE book5 ADD UNIQUE uk_idx_bname(book_name);
ALTER TABLE book5 ADD INDEX mul_bid_bname_info(book_id,book_name,info);
# ② CREATE INDEX ... ON ...
CREATE TABLE book6(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR
);
SHOW INDEX FROM book6;
CREATE INDEX idx_cmt ON book6(comment);
CREATE UNIQUE INDEX uk_idx_bname ON book6(book_name);
CREATE INDEX mul_bid_bname_info ON book6(book_id,book_name,info);
# 02-索引的删除
SHOW INDEX FROM book5;
# 方式1:ALTER TABLE ... DROP INDEX ...
ALTER TABLE book5
DROP INDEX idx_cmt;
# 方式2:DROP INDEX ... ON ...
DROP INDEX uk_idx_bname ON book5;
# 测试:删除联合索引中的相关字段,索引的变化
ALTER TABLE book5
DROP COLUMN book_name;
ALTER TABLE book5
DROP COLUMN book_id;
ALTER TABLE book5
DROP COLUMN info;
# 2. MySQL8.0索引新特性
# 03-MySQL8.0新特性
# 1. 支持降序索引
CREATE TABLE ts1(
a INT,
b INT,
INDEX idx_a_b(a ASC,b DESC)
);
SHOW CREATE TABLE ts1;
DELIMITER //
CREATE PROCEDURE ts_insert()
BEGIN
DECLARE i INT DEFAULT 1;
WHILE i < 800
DO
INSERT INTO ts1 SELECT RAND()*80000,RAND()*80000;
SET i = i + 1;
END WHILE;
COMMIT;
END //
DELIMITER ;
#调用
CALL ts_insert();
SELECT COUNT(*) FROM ts1;
# 优化测试
EXPLAIN SELECT * FROM ts1 ORDER BY a,b DESC LIMIT 5;
# 不推荐
EXPLAIN SELECT * FROM ts1 ORDER BY a DESC,b DESC LIMIT 5;
# 2. 隐藏索引
# ① 创建表时 隐藏索引
CREATE TABLE book7(
book_id INT,
book_name VARCHAR(100),
authors VARCHAR(100),
info VARCHAR(100) ,
comment VARCHAR(100),
year_publication YEAR,
#创建不可见的索引
INDEX idx_cmt(comment) INVISIBLE
);
SHOW INDEX FROM book7;
EXPLAIN SELECT * FROM book7 WHERE comment = 'mysql....';
# ② 创建表以后
ALTER TABLE book7
ADD UNIQUE INDEX uk_idx_bname(book_name) INVISIBLE;
CREATE INDEX idx_year_pub ON book7(year_publication);
EXPLAIN SELECT * FROM book7 WHERE year_publication = '2022';
#修改索引的可见性
ALTER TABLE book7 ALTER INDEX idx_year_pub INVISIBLE; #可见 ---> 不可见
ALTER TABLE book7 ALTER INDEX idx_cmt VISIBLE; #不可见 ---> 可见
#了解:使隐藏索引对查询优化器可见
SELECT @@optimizer_switch;
SET SESSION optimizer_switch = "use_invisible_indexes=on";
EXPLAIN SELECT * FROM book7 WHERE year_publication = '2022';
# 3. 索引的设计原则
# 04-索引的设计原则
# 3.1 数据的准备
# 第1步:创建数据库、创建表
CREATE DATABASE atguigudb1;
USE atguigudb1;
#1.创建学生表和课程表
CREATE TABLE `student_info` (
`id` INT(11) AUTO_INCREMENT,
`student_id` INT NOT NULL,
`name` VARCHAR(20) DEFAULT NULL,
`course_id` INT NOT NULL,
`class_id` INT(11) DEFAULT NULL,
`create_time` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=INNODB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
CREATE TABLE `course` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`course_id` INT NOT NULL,
`course_name` VARCHAR(40) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=INNODB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
# 第2步:创建模拟数据必需的存储函数
#函数1:创建随机产生字符串函数
DELIMITER //
CREATE FUNCTION rand_string(n INT)
RETURNS VARCHAR(255) #该函数会返回一个字符串
BEGIN
DECLARE chars_str VARCHAR(100) DEFAULT
'abcdefghijklmnopqrstuvwxyzABCDEFJHIJKLMNOPQRSTUVWXYZ';
DECLARE return_str VARCHAR(255) DEFAULT '';
DECLARE i INT DEFAULT 0;
WHILE i < n DO
SET return_str =CONCAT(return_str,SUBSTRING(chars_str,FLOOR(1+RAND()*52),1));
SET i = i + 1;
END WHILE;
RETURN return_str;
END //
DELIMITER ;
SELECT @@log_bin_trust_function_creators;
SET GLOBAL log_bin_trust_function_creators = 1;
#函数2:创建随机数函数
DELIMITER //
CREATE FUNCTION rand_num (from_num INT ,to_num INT)
RETURNS INT(11)
BEGIN
DECLARE i INT DEFAULT 0;
SET i = FLOOR(from_num +RAND()*(to_num - from_num+1)) ;
RETURN i;
END //
DELIMITER ;
# 第3步:创建插入模拟数据的存储过程
# 存储过程1:创建插入课程表存储过程
DELIMITER //
CREATE PROCEDURE insert_course( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO course (course_id, course_name ) VALUES
(rand_num(10000,10100),rand_string(6));
UNTIL i = max_num
END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;
# 存储过程2:创建插入学生信息表存储过程
DELIMITER //
CREATE PROCEDURE insert_stu( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO student_info (course_id, class_id ,student_id ,NAME ) VALUES
(rand_num(10000,10100),rand_num(10000,10200),rand_num(1,200000),rand_string(6));
UNTIL i = max_num
END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;
# 第4步:调用存储过程
CALL insert_course(100);
SELECT COUNT(*) FROM course;
CALL insert_stu(1000000);
SELECT COUNT(*) FROM student_info;
# 3.2 哪些情况适合创建索引
# ① 字段的数值有唯一性的限制
# ② 频繁作为 WHERE 查询条件的字段
# 查看当前student_info表中的索引
SHOW INDEX FROM student_info;
#student_id字段上没有索引的:
SELECT course_id,class_id,name,create_time,student_id
FROM student_info
WHERE student_id = 123110; #295 ms
#给student_id字段添加索引
ALTER TABLE student_info
ADD INDEX idx_sid(student_id);
#student_id字段上有索引的:
SELECT course_id,class_id,name,create_time,student_id
FROM student_info
WHERE student_id = 123110; #29 ms
# ③ 经常 GROUP BY 和 ORDER BY 的列
# student_id字段上有索引的:
SELECT student_id,COUNT(*) AS num
FROM student_info
GROUP BY student_id LIMIT 100; #44 ms
#删除idx_sid索引
DROP INDEX idx_sid ON student_info;
# student_id字段上没有索引的:
SELECT student_id,COUNT(*) AS num
FROM student_info
GROUP BY student_id LIMIT 100; #370 ms
#再测试:
SHOW INDEX FROM student_info;
#添加单列索引
ALTER TABLE student_info
ADD INDEX idx_sid(student_id);
ALTER TABLE student_info
ADD INDEX idx_cre_time(create_time);
EXPLAIN SELECT student_id,COUNT(*) AS num FROM student_info
GROUP BY student_id
ORDER BY create_time DESC
LIMIT 100; #6 s 50 ms
#修改sql_mod
SELECT @@sql_mode;
# ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
SET @@sql_mode = 'STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION'
# 添加联合索引
ALTER TABLE student_info
ADD INDEX idx_sid_cre_time(student_id,create_time DESC);
EXPLAIN SELECT student_id,COUNT(*) AS num FROM student_info
GROUP BY student_id
ORDER BY create_time DESC
LIMIT 100; #370 ms
#再进一步:
ALTER TABLE student_info
ADD INDEX idx_cre_time_sid(create_time DESC,student_id);
DROP INDEX idx_sid_cre_time ON student_info;
EXPLAIN SELECT student_id,COUNT(*) AS num FROM student_info
GROUP BY student_id
ORDER BY create_time DESC
LIMIT 100; #2 s 915 ms
# ④ UPDATE、DELETE 的 WHERE 条件列
SHOW INDEX FROM student_info;
UPDATE student_info SET student_id = 10002
WHERE NAME = 'akldjkwfafaewe231ds'; #740 ms
#添加索引
ALTER TABLE student_info
ADD INDEX idx_name(name);
UPDATE student_info SET student_id = 10002
WHERE NAME = 'akldjkwfafaewe231ds'; #4 ms
# ⑤ DISTINCT 字段需要创建索引
# ⑥ 多表 JOIN 连接操作时,创建索引注意事项
# 首先, 连接表的数量尽量不要超过 3 张 ,因为每增加一张表就相当于增加了一次嵌套的循环,数量级增长会非常快,严重影响查询的效率。
# 其次, 对 WHERE 条件创建索引 ,因为 WHERE 才是对数据条件的过滤。如果在数据量非常大的情况下,没有 WHERE 条件过滤是非常可怕的。
# 最后, 对用于连接的字段创建索引 ,并且该字段在多张表中的 类型必须一致 。比如 course_id 在student_info 表和 course 表中都为 int(11) 类型,而不能一个为 int 另一个为 varchar 类型。
EXPLAIN SELECT s.course_id,name,s.student_id,c.course_name
FROM student_info s JOIN course c
ON s.course_id = c.course_id
WHERE name = '462eed7ac6e791292a79'; #23 ms
DROP INDEX idx_name ON student_info;
EXPLAIN SELECT s.course_id,name,s.student_id,c.course_name
FROM student_info s JOIN course c
ON s.course_id = c.course_id
WHERE name = '462eed7ac6e791292a79'; #251 ms
# ⑦ 使用列的类型小的创建索引
# ⑧ 使用字符串前缀创建索引
# 创建一张商户表,因为地址字段比较长,在地址字段上建立前缀索引
# CREATE TABLE shop(address VARCHAR(120) NOT NULL);
# ALTER TABLE shop ADD INDEX (address(12));
# 问题是,截取多少呢?截取得多了,达不到节省索引存储空间的目的;截取得少了,重复内容太多,
# 字段的散列度(选择性)会降低。怎么计算不同的长度的选择性呢?
# 先看一下字段在全部数据中的选择度:
# SELECT COUNT(DISTINCT address) / COUNT(*) FROM shop;
SELECT COUNT(DISTINCT name) / COUNT(*) FROM student_info;
# 通过不同长度去计算,与全表的选择性对比:
# 公式:
# COUNT(DISTINCT LEFT(列名, 索引长度)) / COUNT(*)
# 例如:
# SELECT COUNT(DISTINCT LEFT(name,10)) / COUNT(*) AS sub10, -- 截取前10个字符的选择度
# COUNT(DISTINCT LEFT(name,15)) / COUNT(*) AS sub11, -- 截取前15个字符的选择度
# COUNT(DISTINCT LEFT(name,20)) / COUNT(*) AS sub12, -- 截取前20个字符的选择度
# COUNT(DISTINCT LEFT(name,25)) / COUNT(*) AS sub13 -- 截取前25个字符的选择度
# FROM shop;
SELECT COUNT(DISTINCT LEFT(name,2)) / COUNT(*) AS sub10,
COUNT(DISTINCT LEFT(name,3)) / COUNT(*) AS sub11,
COUNT(DISTINCT LEFT(name,4)) / COUNT(*) AS sub12,
COUNT(DISTINCT LEFT(name,5)) / COUNT(*) AS sub13
FROM student_info;
# ⑨ 区分度高(散列性高)的列适合作为索引
# 公式:
# SELECT COUNT(DISTINCT a) / COUNT(*) FROM t1;
# ⑩ 使用最频繁的列放到联合索引的左侧
SELECT *
FROM student_info
WHERE student_id = 10013 AND course_id = 100;
# ⒒在多个字段都要创建索引的情况下,联合索引优于单值索引
# 3.3 限制索引的数目
# 在实际工作中,我们也需要注意平衡,索引的数目不是越多越好,我们需要限制每张表上的索引数量,
# 建议单张表索引数量不超过6个。
#3.4 哪些情况不适合创建索引
# ① 在where中(包括GROUP BY、ORDER BY)使用不到的字段,不要设置索引
# ② 数据量小的表最好不要使用索引
# ③ 有大量重复数据的列上不要建立索引
# 结论:当数据重复度大,比如 高于 10% 的时候,也不需要对这个字段使用索引。
# ④ 避免对经常更新的表创建过多的索引
# ⑤ 不建议用无序的值作为索引
# ⑥ 删除不再使用或者很少使用的索引
# ⑦ 不要定义冗余或重复的索引
暂无评论