简 介
01
dbgen 使 用
下载地址:https://www.tpc.org/tpch/
# makefile.suite 的更改参数如下:
CC = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
# SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS,
# SGI, SUN, U2200, VMS, LINUX, WIN32
# Current values for WORKLOAD are: TPCH
# 程序给定参数没有postgresql ,修改tpcd.h 添加POSTGRESQL脚本
DATABASE = POSTGRESQL
MACHINE = LINUX
WORKLOAD = TPCH
/* 修改tpcd.h VECTORWISE */
#ifdef POSTGRESQL
#define GEN_QUERY_PLAN "EXPLAIN"
#define START_TRAN "BEGIN TRANSACTION"
#define END_TRAN "COMMIT;"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "LIMIT %d\n"
#define SET_DBASE ""
#endif
}
// #ifdef EOL_HANDLING
if (sep)
// #endif /* EOL_HANDLING */
fprintf(target, "%c", SEPARATOR);
return(0);
}
# 保存更改,在dbgen目录下执行
make -f makefile.suite
# 执行成功后在dbgen目录下生成dbgen和qgen文件
# 拷贝需要的文件
cp dbgen qgen dists.dss dss.ddl ~/dbgen
cp -r queries ~/dbgen
cd ~/dbgen
chmod +x dbgen qgen
# 生成50G数据
[root@localhost dbgen]# ./dbgen -vf -s 50
TPC-H Population Generator (Version 3.0.0)
Copyright Transaction Processing Performance Council 1994 - 2010
Generating data for suppliers table/
Preloading text ... 100%
done.
Generating data for customers tabledone.
Generating data for orders/lineitem tablesdone.
Generating data for part/partsupplier tablesdone.
Generating data for nation tabledone.
Generating data for region tabledone.
# 执行成功后会在dbgen目录下生成八个.tbl文件,可通过下列命令查看(在dbgen目录下)
[root@localhost dbgen]# ll *.tbl
-rw-r--r--. 1 root root 1221138997 2月 23 14:20 customer.tbl
-rw-r--r--. 1 root root 39232656196 2月 23 14:20 lineitem.tbl
-rw-r--r--. 1 root root 2199 2月 23 14:20 nation.tbl
-rw-r--r--. 1 root root 8777127362 2月 23 14:20 orders.tbl
-rw-r--r--. 1 root root 6037819784 2月 23 14:20 partsupp.tbl
-rw-r--r--. 1 root root 1211051905 2月 23 14:20 part.tbl
-rw-r--r--. 1 root root 384 2月 23 14:20 region.tbl
-rw-r--r--. 1 root root 70871820 2月 23 14:20 supplier.tbl
cd ~/dbgen
vim gen_query.sh
# 生成查询sql
for i in {1..22}
do
name="d$i.sql"
echo $name
./qgen -d queries/$i > queries/$name
done
chmod +x gen_query.sh
# 执行命令生成查询
[root@localhost dbgen]# ./gen_query.sh
d1.sql
d2.sql
d3.sql
d4.sql
d5.sql
d6.sql
d7.sql
d8.sql
d9.sql
d10.sql
d11.sql
d12.sql
d13.sql
d14.sql
d15.sql
d16.sql
d17.sql
d18.sql
d19.sql
d20.sql
d21.sql
d22.sql
02
数 据 入 库
create user tpch password 'Hello@123' valid until 'infinity';
create database tpch owner tpch;
-- 删除表(新库不用执行)
drop table if exists customer cascade;
drop table if exists lineitem cascade;
drop table if exists nation cascade;
drop table if exists orders cascade;
drop table if exists part cascade;
drop table if exists partsupp cascade;
drop table if exists region cascade;
drop table if exists supplier cascade;
-- 找到之前cp到 ~/dbgen下的dds.ddl文件执行里边的建表语句
psql tpch tpch < ~/dbgen/dss.ddl
-- 或者直接执行语句:
create table nation(
n_nationkey integer not null,
n_name char(25) not null,
n_regionkey integer not null,
n_comment varchar(152));
create table region(
r_regionkey integer not null,
r_name char(25) not null,
r_comment varchar(152));
create table part(
p_partkey integer not null,
p_name varchar(55) not null,
p_mfgr char(25) not null,
p_brand char(10) not null,
p_type varchar(25) not null,
p_size integer not null,
p_container char(10) not null,
p_retailprice decimal(15,2) not null,
p_comment varchar(23) not null );
create table supplier(
s_suppkey integer not null,
s_name char(25) not null,
s_address varchar(40) not null,
s_nationkey integer not null,
s_phone char(15) not null,
s_acctbal decimal(15,2) not null,
s_comment varchar(101) not null);
create table partsupp(
ps_partkey integer not null,
ps_suppkey integer not null,
ps_availqty integer not null,
ps_supplycost decimal(15,2) not null,
ps_comment varchar(199) not null );
create table customer(
c_custkey integer not null,
c_name varchar(25) not null,
c_address varchar(40) not null,
c_nationkey integer not null,
c_phone char(15) not null,
c_acctbal decimal(15,2) not null,
c_mktsegment char(10) not null,
c_comment varchar(117) not null);
create table orders(
o_orderkey bigint not null,
o_custkey integer not null,
o_orderstatus char(1) not null,
o_totalprice decimal(15,2) not null,
o_orderdate date not null,
o_orderpriority char(15) not null,
o_clerk char(15) not null,
o_shippriority integer not null,
o_comment varchar(79) not null);
create table lineitem(
l_orderkey bigint not null,
l_partkey integer not null,
l_suppkey integer not null,
l_linenumber integer not null,
l_quantity decimal(15,2) not null,
l_extendedprice decimal(15,2) not null,
l_discount decimal(15,2) not null,
l_tax decimal(15,2) not null,
l_returnflag char(1) not null,
l_linestatus char(1) not null,
l_shipdate date not null,
l_commitdate date not null,
l_receiptdate date not null,
l_shipinstruct char(25) not null,
l_shipmode char(10) not null,
l_comment varchar(44) not null);
\copy region from '~/dbgen/region.tbl' with delimiter as '|';
\copy nation from '~/dbgen/nation.tbl' with delimiter as '|';
\copy part from '~/dbgen/part.tbl' with delimiter as '|';
\copy supplier from '~/dbgen/supplier.tbl' with delimiter as '|';
\copy customer from '~/dbgen/customer.tbl' with delimiter as '|';
\copy lineitem from '~/dbgen/lineitem.tbl' with delimiter as '|';
\copy partsupp from '~/dbgen/partsupp.tbl' with delimiter as '|';
\copy orders from '~/dbgen/orders.tbl' with delimiter as '|';
alter table region add primary key (r_regionkey);
alter table nation add primary key (n_nationkey);
alter table nation add foreign key (n_regionkey) references region;
alter table part add primary key (p_partkey);
alter table supplier add primary key (s_suppkey);
alter table supplier add foreign key (s_nationkey) references nation;
alter table partsupp add primary key (ps_partkey,ps_suppkey);
alter table customer add primary key (c_custkey);
alter table customer add foreign key (c_nationkey) references nation;
alter table lineitem add primary key (l_orderkey,l_linenumber);
alter table orders add primary key (o_orderkey);
alter table partsupp add foreign key (ps_suppkey) references supplier;
alter table partsupp add foreign key (ps_partkey) references part;
alter table orders add foreign key (o_custkey) references customer;
alter table lineitem add foreign key (l_orderkey) references orders;
alter table lineitem add foreign key (l_partkey,l_suppkey) references partsupp;
-- 全库vacuum
vacuum (analyze,freeze,verbose) ;
03
查 询 SQL
-- 获取之前生成的sql语句进行查询,内容如下:
-- Q1
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice*(1-l_discount)) as sum_disc_price,
sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from lineitem
where l_shipdate <= date'1998-12-01' - interval '90' day
group by l_returnflag, l_linestatus
order by l_returnflag, l_linestatus;
-- Q2
select
s_acctbal, s_name,
n_name, p_partkey, p_mfgr,
s_address, s_phone, s_comment
from part, supplier, partsupp, nation, region
where p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost =
( select min(ps_supplycost)
from partsupp, supplier, nation, region
where p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE' )
order by s_acctbal desc, n_name, s_name, p_partkey limit 100;
-- Q3
select
l_orderkey,
sum(l_extendedprice*(1-l_discount)) as revenue,
o_orderdate,
o_shippriority
from customer, orders, lineitem
where c_mktsegment = 'BUILDING'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < date'1995-03-15'
and l_shipdate > date'1995-03-15'
group by l_orderkey, o_orderdate, o_shippriority
order by revenue desc, o_orderdate limit 10;
-- Q4
select
o_orderpriority,
count(*) as order_count
from orders
where o_orderdate >= date'1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists
(select *
from lineitem
where l_orderkey = o_orderkey
and l_commitdate < l_receiptdate )
group by o_orderpriority
order by o_orderpriority;
-- Q5
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from customer, orders, lineitem, supplier, nation, region
where c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'ASIA'
and o_orderdate >= date'1994-01-01'
and o_orderdate < date '1994-01-01' + interval '1' year
group by n_name
order by revenue desc;
-- Q6
select
sum(l_extendedprice*l_discount) as revenue
from lineitem
where l_shipdate >= '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year
and l_discount between 0.06 - 0.01
and 0.06 + 0.01
and l_quantity < 24;
-- Q7
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from ( select n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from supplier, lineitem, orders, customer, nation n1, nation n2
where s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and ( (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') )
and l_shipdate between date'1995-01-01' and date'1996-12-31' ) as shipping
group by supp_nation, cust_nation, l_year
order by supp_nation, cust_nation, l_year;
-- Q8
select
o_year,
sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) as mkt_share
from ( select extract(year from o_orderdate) as o_year,
l_extendedprice * (1-l_discount) as volume,
n2.n_name as nation
from part, supplier, lineitem, orders, customer, nation n1, nation n2, region
where p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01'
and date '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL' ) as all_nations
group by o_year
order by o_year;
-- Q9
select
nation,
o_year,
sum(amount) as sum_profit
from ( select n_name as nation,
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from part, supplier, lineitem, partsupp, orders, nation
where s_suppkey = l_suppkey
and ps_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
and p_name like '%green%' ) as profit
group by nation, o_year
order by nation, o_year desc;
-- Q10
select
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address, c_phone,
c_comment
from customer, orders, lineitem, nation
where c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate >= date'1993-10-01'
and o_orderdate < date'1993-10-01' + interval '3' month
and l_returnflag = 'R'
and c_nationkey = n_nationkey
group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment
order by revenue desc limit 20;
-- Q11
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as valuess
from partsupp, supplier, nation
where ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by ps_partkey
having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001
from partsupp, supplier, nation
where ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY' )
order by valuess desc;
-- Q12
select
l_shipmode,
sum(case when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' then 1 else 0 end) as high_line_count,
sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count
from orders, lineitem
where o_orderkey = l_orderkey
and l_shipmode in ('MAIL', 'SHIP')
and l_commitdate < l_receiptdate
and l_shipdate < l_commitdate
and l_receiptdate >= date '1994-01-01'
and l_receiptdate < date '1994-01-01' + interval '1' year
group by l_shipmode
order by l_shipmode;
--Q13
select
c_count,
count(*) as custdist
from ( select c_custkey,
count(o_orderkey)
from customer left outer join orders on c_custkey = o_custkey
and o_comment not like '%special%requests%'
group by c_custkey )as c_orders (c_custkey, c_count)
group by c_count
order by custdist desc, c_count desc;
-- Q14
select
100.00 * sum(case when p_type like 'PROMO%'
then l_extendedprice*(1-l_discount)
else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from lineitem, part
where l_partkey = p_partkey
and l_shipdate >= date '1995-09-01'
and l_shipdate < date'1995-09-01' + interval '1' month;
-- Q15
create view revenue (supplier_no, total_revenue) as
select l_suppkey, sum(l_extendedprice * (1 - l_discount)) from lineitem
where l_shipdate >= date '1996-01-01' and l_shipdate < date '1996-01-01' + interval '3' month group by l_suppkey;
select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from supplier, revenue
where s_suppkey = supplier_no
and total_revenue =
( select max(total_revenue) from revenue ) order by s_suppkey;
drop view revenue;
-- Q16
select
p_brand,
p_type,
p_size,
count(distinct ps_suppkey) as supplier_cnt
from partsupp, part
where p_partkey = ps_partkey
and p_brand <> 'Brand#45'
and p_type not like 'MEDIUM POLISHED%'
and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
and ps_suppkey not in ( select s_suppkey
from supplier
where s_comment like '%Customer%Complaints%' )
group by p_brand, p_type, p_size
order by supplier_cnt desc, p_brand, p_type, p_size;
-- Q17
select
sum(l_extendedprice) / 7.0 as avg_yearly
from lineitem, part
where p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container = 'MED BOX'
and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey );
-- Q18
select
c_name,
c_custkey,
o_orderkey,
o_orderdate,
o_totalprice,
sum(l_quantity)
from customer, orders, lineitem
where o_orderkey in ( select l_orderkey
from lineitem
group by l_orderkey
having sum(l_quantity) > 300 )
and c_custkey = o_custkey
and o_orderkey = l_orderkey
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate limit 100;
-- Q19
select
sum(l_extendedprice * (1 - l_discount) ) as revenue
from lineitem, part
where ( p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1
and l_quantity <= 1 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON' )
or ( p_partkey = l_partkey and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10
and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON' )
or ( p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20
and l_quantity <= 20 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON' );
-- Q20
select
s_name,
s_address
from supplier, nation
where s_suppkey in ( select ps_suppkey from partsupp
where ps_partkey in ( select p_partkey from part where p_name like 'forest%' )
and ps_availqty > ( select 0.5 * sum(l_quantity) from lineitem where l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year )
)
and s_nationkey = n_nationkey
and n_name = 'CANADA'
order by s_name;
-- Q21
select
s_name,
count(*) as numwait
from supplier, lineitem l1, orders, nation
where s_suppkey = l1.l_suppkey and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists ( select * from lineitem l2
where l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey )
and not exists ( select * from lineitem l3
where l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate )
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by s_name
order by numwait desc, s_name limit 100;
-- Q22
select
cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
from ( select substring(c_phone from 1 for 2) as cntrycode, c_acctbal
from customer
where substring(c_phone from 1 for 2) in ('13','31','23','29','30','18','17')
and c_acctbal > ( select avg(c_acctbal) from customer where c_acctbal > 0.00
and substring (c_phone from 1 for 2) in ('13','31','23','29','30','18','17')
)
and not exists ( select * from orders where o_custkey = c_custkey )
) as custsale
group by cntrycode
order by cntrycode;
如果您发现该资源为电子书等存在侵权的资源或对该资源描述不正确等,可点击“私信”按钮向作者进行反馈;如作者无回复可进行平台仲裁,我们会在第一时间进行处理!
加入交流群
请使用微信扫一扫!