多表查询

链表查询¶

数据准备¶

-- 建表
-- 在硬盘上 department和employee就是两张物理表
create table department(
    id int,
    name varchar(20) 
);

create table employee(
  id int primary key auto_increment,
  name varchar(20),
    sex enum('male','female') not null default 'male',
    age int,
    dep_id int -- 两表关系不是一定需要建立foreign key外键的 不让两者强耦合 具有更高的扩展性
);

-- 插入数据
insert into department values
    (200,'技术'),
    (201,'人力资源'),
    (202,'销售'),
    (203,'运营');

insert into employee(name,sex,age,dep_id) values
    ('egon','male',18,200),
    ('alex','female',48,201),
    ('wupeiqi','male',38,201),
    ('yuanhao','female',28,202),
    ('liwenzhou','male',18,200),
    ('jingliyang','female',18,204); -- 无效数据 没有id为204的部门存在

交叉连接¶

交叉连接: 将两表合在一起生成笛卡尔积.

假如A表中的数据为m行，B表中的数据有n行，那么A和B做笛卡尔积，结果为m*n行
select * from employee,department;

内连接¶

内连接: 只保留有对应关系的记录... (只连接匹配的行筛选出笛卡尔积中我们需要的记录)

用where关键字可以完成想要的拼接效果(不建议.) where应该用于筛选数据. '术有专攻'.
但它等同于下方用inner join的sql语句~
select * from employee,department where employee.dep_id = department.id; (是内连接哦！！)

用inner join关键字！！！

select * from employee inner join department on employee.dep_id = department.id;
-- 联表结果如下:
-- (203,'运营') ('jingliyang','female',18,204) 它们两都没有对应的记录
+----+-----------+--------+------+--------+------+--------------+
| id | name      | sex    | age  | dep_id | id   | name         |
+----+-----------+--------+------+--------+------+--------------+
|  1 | egon      | male   |   18 |    200 |  200 | 技术   |
|  2 | alex      | female |   48 |    201 |  201 | 人力资源 |
|  3 | wupeiqi   | male   |   38 |    201 |  201 | 人力资源 |
|  4 | yuanhao   | female |   28 |    202 |  202 | 销售   |
|  5 | liwenzhou | male   |   18 |    200 |  200 | 技术 |
+----+-----------+--------+------+--------+------+--------------+

外连接¶

外连接: 若想让联表保留两表中没有对应关系的记录就用外连接

左连接: 在内连接的基础上优先显示左表全部记录 left join

select * from employee left join department on employee.dep_id = department.id;
-- 以左表为准,即找出所有员工信息,当然包括没有部门的员工
-- 本质就是：在内连接的基础上增加左边有右边没有的结果
+----+------------+--------+------+--------+------+--------------+
| id | name       | sex    | age  | dep_id | id   | name         |
+----+------------+--------+------+--------+------+--------------+
|  1 | egon       | male   |   18 |    200 |  200 | 技术  |
|  2 | alex       | female |   48 |    201 |  201 | 人力资源    |
|  3 | wupeiqi    | male   |   38 |    201 |  201 | 人力资源  |
|  4 | yuanhao    | female |   28 |    202 |  202 | 销售 |
|  5 | liwenzhou  | male   |   18 |    200 |  200 | 技术 |
|  6 | jingliyang | female |   18 |    204 | NULL | NULL    |
+----+------------+--------+------+--------+------+--------------+

右连接: 在内连接的基础上优先显示右表全部记录 right join

select * from employee right join department on employee.dep_id = department.id;
-- 以右表为准,即找出所有部门信息,包括没有员工的部门
-- 本质就是：在内连接的基础上增加右边有左边没有的结果
+------+-----------+--------+------+--------+------+--------------+
| id   | name      | sex    | age  | dep_id | id   | name         |
+------+-----------+--------+------+--------+------+--------------+
|    5 | liwenzhou | male   |   18 |    200 |  200 | 技术 |
|    1 | egon      | male   |   18 |    200 |  200 | 技术 |
|    3 | wupeiqi   | male   |   38 |    201 |  201 | 人力资源   |
|    2 | alex      | female |   48 |    201 |  201 | 人力资源   |
|    4 | yuanhao   | female |   28 |    202 |  202 | 销售 |
| NULL | NULL      | NULL   | NULL |   NULL |  203 | 运营 |
+------+-----------+--------+------+--------+------+--------------+

全外连接: 显示左右两个表全部记录
在内连接的基础上增加左边有右边没有的和右边有左边没有的结果

-- 注意: mysql不支持全外连接 full JOIN
-- 可以用union上下连表间接实现 
-- 上下表的字段完全一致的话 union会自动去重 (若使用union all则不会去重)
mysql> select * from employee left join department on employee.dep_id = department.id
    -> union
    -> select * from employee right join department on employee.dep_id = department.id;
+------+------------+--------+------+--------+------+--------------+
| id   | name       | sex    | age  | dep_id | id   | name         |
+------+------------+--------+------+--------+------+--------------+
|    1 | egon       | male   |   18 |    200 |  200 | 技术    |
|    2 | alex       | female |   48 |    201 |  201 | 人力资源  |
|    3 | wupeiqi    | male   |   38 |    201 |  201 | 人力资源  |
|    4 | yuanhao    | female |   28 |    202 |  202 | 销售 |
|    5 | liwenzhou  | male   |   18 |    200 |  200 | 技术 |
|    6 | jingliyang | female |   18 |    204 | NULL | NULL  |
| NULL | NULL       | NULL   | NULL |   NULL |  203 | 运营 |
+------+------------+--------+------+--------+------+--------------+

思维扩展: 两个及以上数量的物理表连接、单表自己连接自己、也可以跟虚拟表进行连接.
虚拟表 -- 通过select语句查询展示出来的结果表它不是存在于硬盘上的物理表 (select有点类似于print)

将连表操作后产生的虚拟表看作成一个单表进行单表查询操作！！

查询技术部门所有员工的名字

select employee.name from employee
inner join department on employee.dep_id = department.id
where department.name = '技术';

查询每个部门的员工的平均年龄

select department.name,avg(age) from employee
inner join department on employee.dep_id = department.id
group by department.name;

子查询¶

子查询是将一个查询语句嵌套在另一个查询语句中;
内层查询语句的查询结果,可以为外层查询语句提供查询条件.

子查询中可以包含：IN、NOT IN、ANY、ALL、EXISTS 和 NOT EXISTS等关键字
还可以包含比较运算符：= 、 !=、> 、<等

用括号括起来就是一个结果集啦！！

in¶

not in无法处理有null值的结果集
在sql中,in会走索引,not in不会..

-- 查询技术部和销售部的员工名字
select name from employee 
    where dep_id in
        (select id from department where name = '技术' or name = '销售');

-- 查询平均年龄在25岁以上的部门名
select id,name from department 
    where id in
        (select dep_id from employee group by dep_id having avg(age) > 25);

-- 查看不足1人的部门名(子查询得到的是有人的部门id)
select name from department where id not in (select distinct dep_id from employee);
-- 注意: not in无法处理null的值,即子查询的结果集中如果存在null值,not in将无法处理
-- 假设 employee的某一行dep_id字段的数据值为null 那么上方的这条查询结果为空 解决如下:
select name from department where id not in 
        (select distinct dep_id from employee where dep_id is not null);

any¶

any 任何一个
any 必须和其他的比较运算符共同使用, 并且比较运算符在any 关键字之前

any比in强大的地方在于 any可以 >、< 的比较

select name from employee 
    where dep_id in
        (select id from department where name = '技术' or name = '销售');

-- 等同于

-- 注意一点 any('此结果集必须是一条查询语句得来的')
-- any(1,2,3)报语法错！  但是 in(1,2,3)能正常运行.
select name from employee 
    where dep_id =
        any(select id from department where name = '技术' or name = '销售');

-----  ----- -----  -----  ------

-- 只要 dep_id 大于 结果集中的任何一个值就成立
select name from employee 
    where dep_id >
        any(select id from department where name = '技术' or name = '销售');

# 案例: 查询年龄大于任一部门平均年龄的员工信息.(大于结果集中最小的)
-- 也可以麻烦一点 group by+order by+limit实现
select * from employee 
    where age > 
        any (select avg(age) from employee group by dep_id);

all¶

all同any类似，只不过all表示的是所有，any表示任一

# 案例: 查询年龄大于所有部门平均年龄的员工信息.(大于结果集中最大的)
select * from employee 
    where age > 
        all (select avg(age) from employee group by dep_id);

比较运算符¶

比较的是一个值

-- 查询大于所有人平均年龄的员工姓名与年龄
select name,age from employee where age > (select avg(age) from employee);

exists¶

in any all 都是内层语句查询完了拿到一个结果再执行外层语句
exists不同它类似于一个for循环嵌套

exists返回一个真假值True或False

-- 查询有员工的部门
/*
exists的执行原理为：
1、依次执行外部查询：即select * from department
2、然后为外部查询返回的每一行分别执行一次子查询：
    即(select * from employee where employee.dep_id = department.id)
3、子查询如果返回行，则exists条件成立，条件成立则输出外部查询取出的那条记录
*/
select * from department
where exists(
    select * from employee where employee.dep_id = department.id
);

/* 伪代码！！
for line in deaprtment:
    flag = False
    for x in employee: -- 内层循环完
        if employee.dep_id = department.id:
            flag = True -- 不管有几个成立 只要有就行
    if flag == True:
    print(line)
*/

题外话(有待商榷): exists与in; not exists与not in

/* exists与in */
-- in()的子查询会先产生结果集,再结合前面exists的执行原理,in 的执行效率是快于exists的.

/* not exists与not in */
-- not exists查询的效率远远高与not in查询的效率

练习！！！¶

查询每个部门最新入职的那位员工

应用场景扩展:
某网站在全国各个市都有站点,每个站点都有一条数据,想取每个省下最新的那一条市的网站质量信息

/* A表 跟 通过A表查询出来的虚拟表 进行关联 */

-- 每个部门员工的最大入职时间.
SELECT
    post,
    max( hire_date ) AS max_date 
FROM
    emp 
GROUP BY
    post;

-- 这样连表后 每个员工的信息最后面都跟着本部门最大的入职时间
SELECT
  *
FROM
  emp
INNER JOIN 
    (SELECT post, max(hire_date) FROM emp GROUP BY post) AS t2 -- 给虚拟(临时)表起别名！！很重要
ON emp.post = t2.post

-- 员工的入职时间等于本部门最大的入职时间即可
SELECT
  *
FROM
  emp AS t1 
INNER JOIN 
    -- as max_date 给使用聚合函数的字段起了别名 便于后面的where条件
    (SELECT post, max(hire_date) as max_date FROM emp GROUP BY post) AS t2
ON t1.post = t2.post
where t1.hire_date = t2.max_date;