华为云用户手册

  • 创建TPC-H目标表 连接DWS数据库后执行以下命令创建目标表。 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 CREATE TABLE REGION( R_REGIONKEY INT NOT NULL , R_NAME VARCHAR(25) NOT NULL , R_COMMENT VARCHAR(152)) WITH (orientation=column, colversion=2.0, enable_hstore=true, enable_hstore_opt=true,bucketnums=16384)DISTRIBUTE BY replication; CREATE TABLE NATION( N_NATIONKEY INT NOT NULL , N_NAME VARCHAR(25) NOT NULL , N_REGIONKEY INT NOT NULL , N_COMMENT VARCHAR(152)) WITH (orientation=column, colversion=2.0, enable_hstore=true, enable_hstore_opt=true,bucketnums=16384)DISTRIBUTE BY replication; CREATE TABLE SUPPLIER( S_SUPPKEY BIGINT NOT NULL , S_NAME VARCHAR(25) NOT NULL , S_ADDRESS VARCHAR(40) NOT NULL , S_NATIONKEY INT NOT NULL , S_PHONE VARCHAR(15) NOT NULL , S_ACCTBAL DECIMAL(15,2) NOT NULL , S_COMMENT VARCHAR(101) NOT NULL) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='s_suppkey',secondary_part_num=72)DISTRIBUTE BY hash(S_SUPPKEY)PARTITION BY RANGE(S_NATIONKEY)(PARTITION S_NATIONKEY_1 VALUES LESS THAN(1),PARTITION S_NATIONKEY_2 VALUES LESS THAN(2),PARTITION S_NATIONKEY_3 VALUES LESS THAN(3),PARTITION S_NATIONKEY_4 VALUES LESS THAN(4),PARTITION S_NATIONKEY_5 VALUES LESS THAN(5),PARTITION S_NATIONKEY_6 VALUES LESS THAN(6),PARTITION S_NATIONKEY_7 VALUES LESS THAN(7),PARTITION S_NATIONKEY_8 VALUES LESS THAN(8),PARTITION S_NATIONKEY_9 VALUES LESS THAN(9),PARTITION S_NATIONKEY_10 VALUES LESS THAN(10),PARTITION S_NATIONKEY_11 VALUES LESS THAN(11),PARTITION S_NATIONKEY_12 VALUES LESS THAN(12),PARTITION S_NATIONKEY_13 VALUES LESS THAN(13),PARTITION S_NATIONKEY_14 VALUES LESS THAN(14),PARTITION S_NATIONKEY_15 VALUES LESS THAN(15),PARTITION S_NATIONKEY_16 VALUES LESS THAN(16),PARTITION S_NATIONKEY_17 VALUES LESS THAN(17),PARTITION S_NATIONKEY_18 VALUES LESS THAN(18),PARTITION S_NATIONKEY_19 VALUES LESS THAN(19),PARTITION S_NATIONKEY_20 VALUES LESS THAN(20),PARTITION S_NATIONKEY_21 VALUES LESS THAN(21),PARTITION S_NATIONKEY_22 VALUES LESS THAN(22),PARTITION S_NATIONKEY_23 VALUES LESS THAN(23),PARTITION S_NATIONKEY_24 VALUES LESS THAN(24),PARTITION S_NATIONKEY_25 VALUES LESS THAN(25)); CREATE TABLE CUSTOMER( C_CUSTKEY BIGINT NOT NULL , C_NAME VARCHAR(25) NOT NULL , C_ADDRESS VARCHAR(40) NOT NULL , C_NATIONKEY INT NOT NULL , C_PHONE VARCHAR(15) NOT NULL , C_ACCTBAL DECIMAL(15,2) NOT NULL , C_MKTSEGMENT VARCHAR(10) NOT NULL , C_COMMENT VARCHAR(117) NOT NULL) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='c_custkey',secondary_part_num=72)DISTRIBUTE BY hash(C_CUSTKEY)PARTITION BY RANGE(C_NATIONKEY)(PARTITION C_NATIONKEY_1 VALUES LESS THAN(1),PARTITION C_NATIONKEY_2 VALUES LESS THAN(2),PARTITION C_NATIONKEY_3 VALUES LESS THAN(3),PARTITION C_NATIONKEY_4 VALUES LESS THAN(4),PARTITION C_NATIONKEY_5 VALUES LESS THAN(5),PARTITION C_NATIONKEY_6 VALUES LESS THAN(6),PARTITION C_NATIONKEY_7 VALUES LESS THAN(7),PARTITION C_NATIONKEY_8 VALUES LESS THAN(8),PARTITION C_NATIONKEY_9 VALUES LESS THAN(9),PARTITION C_NATIONKEY_10 VALUES LESS THAN(10),PARTITION C_NATIONKEY_11 VALUES LESS THAN(11),PARTITION C_NATIONKEY_12 VALUES LESS THAN(12),PARTITION C_NATIONKEY_13 VALUES LESS THAN(13),PARTITION C_NATIONKEY_14 VALUES LESS THAN(14),PARTITION C_NATIONKEY_15 VALUES LESS THAN(15),PARTITION C_NATIONKEY_16 VALUES LESS THAN(16),PARTITION C_NATIONKEY_17 VALUES LESS THAN(17),PARTITION C_NATIONKEY_18 VALUES LESS THAN(18),PARTITION C_NATIONKEY_19 VALUES LESS THAN(19),PARTITION C_NATIONKEY_20 VALUES LESS THAN(20),PARTITION C_NATIONKEY_21 VALUES LESS THAN(21),PARTITION C_NATIONKEY_22 VALUES LESS THAN(22),PARTITION C_NATIONKEY_23 VALUES LESS THAN(23),PARTITION C_NATIONKEY_24 VALUES LESS THAN(24),PARTITION C_NATIONKEY_25 VALUES LESS THAN(25)); CREATE TABLE PART( P_PARTKEY BIGINT NOT NULL , P_NAME VARCHAR(55) NOT NULL , P_MFGR VARCHAR(25) NOT NULL , P_BRAND VARCHAR(10) NOT NULL , P_TYPE VARCHAR(25) NOT NULL , P_SIZE BIGINT NOT NULL , P_CONTAINER VARCHAR(10) NOT NULL , P_RETAILPRICE DECIMAL(15,2) NOT NULL , P_COMMENT VARCHAR(23) NOT NULL ) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='p_partkey',secondary_part_num=72)DISTRIBUTE BY hash(P_PARTKEY)PARTITION BY RANGE(P_SIZE)(PARTITION P_SIZE_1 VALUES LESS THAN(11),PARTITION P_SIZE_2 VALUES LESS THAN(21),PARTITION P_SIZE_3 VALUES LESS THAN(31),PARTITION P_SIZE_4 VALUES LESS THAN(41),PARTITION P_SIZE_5 VALUES LESS THAN(51)); CREATE TABLE PARTSUPP( PS_PARTKEY BIGINT NOT NULL , PS_SUPPKEY BIGINT NOT NULL , PS_AVAILQTY BIGINT NOT NULL , PS_SUPPLYCOST DECIMAL(15,2) NOT NULL , PS_COMMENT VARCHAR(199) NOT NULL) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='ps_partkey',secondary_part_num=72)DISTRIBUTE BY hash(PS_PARTKEY)PARTITION BY RANGE(PS_AVAILQTY)(PARTITION PS_AVAILQTY_1 VALUES LESS THAN(1000),PARTITION PS_AVAILQTY_2 VALUES LESS THAN(2000),PARTITION PS_AVAILQTY_3 VALUES LESS THAN(3000),PARTITION PS_AVAILQTY_4 VALUES LESS THAN(4000),PARTITION PS_AVAILQTY_5 VALUES LESS THAN(5000),PARTITION PS_AVAILQTY_6 VALUES LESS THAN(6000),PARTITION PS_AVAILQTY_7 VALUES LESS THAN(7000),PARTITION PS_AVAILQTY_8 VALUES LESS THAN(8000),PARTITION PS_AVAILQTY_9 VALUES LESS THAN(9000),PARTITION PS_AVAILQTY_10 VALUES LESS THAN(10000)) ; CREATE TABLE ORDERS( O_ORDERKEY BIGINT NOT NULL , O_CUSTKEY BIGINT NOT NULL , O_ORDERSTATUS VARCHAR(1) NOT NULL , O_TOTALPRICE DECIMAL(15,2) NOT NULL , O_ORDERDATE DATE NOT NULL , O_ORDERPRIORITY VARCHAR(15) NOT NULL , O_CLERK VARCHAR(15) NOT NULL , O_SHIPPRIORITY BIGINT NOT NULL , O_COMMENT VARCHAR(79) NOT NULL) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='o_orderkey',secondary_part_num=72)DISTRIBUTE BY hash(O_ORDERKEY)PARTITION BY RANGE(O_ORDERDATE)(PARTITION O_ORDERDATE_1 VALUES LESS THAN('1993-01-01 00:00:00'),PARTITION O_ORDERDATE_2 VALUES LESS THAN('1994-01-01 00:00:00'),PARTITION O_ORDERDATE_3 VALUES LESS THAN('1995-01-01 00:00:00'),PARTITION O_ORDERDATE_4 VALUES LESS THAN('1996-01-01 00:00:00'),PARTITION O_ORDERDATE_5 VALUES LESS THAN('1997-01-01 00:00:00'),PARTITION O_ORDERDATE_6 VALUES LESS THAN('1998-01-01 00:00:00'),PARTITION O_ORDERDATE_7 VALUES LESS THAN('1999-01-01 00:00:00')) ; CREATE TABLE LINEITEM( L_ORDERKEY BIGINT NOT NULL , L_PARTKEY BIGINT NOT NULL , L_SUPPKEY BIGINT NOT NULL , L_LINENUMBER BIGINT NOT NULL , L_QUANTITY DECIMAL(15,2) NOT NULL , L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL , L_DISCOUNT DECIMAL(15,2) NOT NULL , L_TAX DECIMAL(15,2) NOT NULL , L_RETURNFLAG VARCHAR(1) NOT NULL , L_LINESTATUS VARCHAR(1) NOT NULL , L_SHIPDATE DATE NOT NULL , L_COMMITDATE DATE NOT NULL , L_RECEIPTDATE DATE NOT NULL , L_SHIPINSTRUCT VARCHAR(25) NOT NULL , L_SHIPMODE VARCHAR(10) NOT NULL , L_COMMENT VARCHAR(44) NOT NULL) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,bucketnums=16384,secondary_part_column='l_orderkey',secondary_part_num=72)DISTRIBUTE BY hash(L_ORDERKEY)PARTITION BY RANGE(L_SHIPDATE)(PARTITION L_SHIPDATE_1 VALUES LESS THAN('1993-01-01 00:00:00'),PARTITION L_SHIPDATE_2 VALUES LESS THAN('1994-01-01 00:00:00'),PARTITION L_SHIPDATE_3 VALUES LESS THAN('1995-01-01 00:00:00'),PARTITION L_SHIPDATE_4 VALUES LESS THAN('1996-01-01 00:00:00'),PARTITION L_SHIPDATE_5 VALUES LESS THAN('1997-01-01 00:00:00'),PARTITION L_SHIPDATE_6 VALUES LESS THAN('1998-01-01 00:00:00'),PARTITION L_SHIPDATE_7 VALUES LESS THAN('1999-01-01 00:00:00')) ;
  • Q4.1 1 2 3 4 5 6 7 8 910 SELECT div(LO_ORDERDATE,10000) AS YEAR,C_NATION,SUM(LO_REVENUE - LO_SUPPLYCOST) AS profitFROM lineorder_flatWHEREC_REGION = 'AMERICA'AND S_REGION = 'AMERICA'AND P_MFGR IN ('MFGR#1', 'MFGR#2')GROUP BY YEAR, C_NATIONORDER BY YEAR ASC, C_NATION ASC;
  • SQL20 1 2 3 4 5 6 7 8 9101112131415161718192021222324252627 select i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price ,sum(cs_ext_sales_price) as itemrevenue ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over (partition by i_class) as revenueratio fromcatalog_sales ,item ,date_dim where cs_item_sk = i_item_sk and i_category in ('Sports', 'Shoes', 'Women') and cs_sold_date_sk = d_date_sk and d_date between cast('2001-03-21' as date) and (cast('2001-03-21' as date) + 30) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratiolimit 100;
  • SQL18 1 2 3 4 5 6 7 8 910111213141516171819202122232425262728293031 select i_item_id, ca_country, ca_state, ca_county, avg( cast(cs_quantity as decimal(12,2))) agg1, avg( cast(cs_list_price as decimal(12,2))) agg2, avg( cast(cs_coupon_amt as decimal(12,2))) agg3, avg( cast(cs_sales_price as decimal(12,2))) agg4, avg( cast(cs_net_profit as decimal(12,2))) agg5, avg( cast(c_birth_year as decimal(12,2))) agg6, avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 from catalog_sales, customer_demographics cd1, customer_demographics cd2, customer, customer_address, date_dim, item where cs_sold_date_sk = d_date_sk and cs_item_sk = i_item_sk and cs_bill_cdemo_sk = cd1.cd_demo_sk and cs_bill_customer_sk = c_customer_sk and cd1.cd_gender = 'M' and cd1.cd_education_status = 'Primary' and c_current_cdemo_sk = cd2.cd_demo_sk and c_current_addr_sk = ca_address_sk and c_birth_month in (10,1,8,7,3,5) and d_year = 1998 and ca_state in ('NE','OK','NC' ,'CO','ID','AR','MO') group by rollup (i_item_id, ca_country, ca_state, ca_county) order by ca_country, ca_state, ca_county,i_item_id limit 100;
  • SQL17 1 2 3 4 5 6 7 8 9101112131415161718192021222324252627282930313233343536373839404142 select i_item_id ,i_item_desc ,s_state ,count(ss_quantity) as store_sales_quantitycount ,avg(ss_quantity) as store_sales_quantityave ,stddev_samp(ss_quantity) as store_sales_quantitystdev ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov ,count(sr_return_quantity) as store_returns_quantitycount ,avg(sr_return_quantity) as store_returns_quantityave ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_quarter_name = '2000Q1' and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') group by i_item_id ,i_item_desc ,s_state order by i_item_id ,i_item_desc ,s_statelimit 100;
  • SQL7 1 2 3 4 5 6 7 8 9101112131415161718 select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where ss_sold_date_sk = d_date_sk and ss_item_sk = i_item_sk and ss_cdemo_sk = cd_demo_sk and ss_promo_sk = p_promo_sk and cd_gender = 'M' and cd_marital_status = 'U' and cd_education_status = 'College' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1999 group by i_item_id order by i_item_id limit 100;
  • SQL13 1 2 3 4 5 6 7 8 910111213141516171819202122232425262728293031323334353637383940414243444546474849 select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) ,sum(ss_ext_wholesale_cost) from store_sales ,store ,customer_demographics ,household_demographics ,customer_address ,date_dim where s_store_sk = ss_store_sk and ss_sold_date_sk = d_date_sk and d_year = 2001 and((ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'U' and cd_education_status = '4 yr Degree' and ss_sales_price between 100.00 and 150.00 and hd_dep_count = 3 )or (ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'D' and cd_education_status = '2 yr Degree' and ss_sales_price between 50.00 and 100.00 and hd_dep_count = 1 ) or (ss_hdemo_sk=hd_demo_sk and cd_demo_sk = ss_cdemo_sk and cd_marital_status = 'S' and cd_education_status = 'Advanced Degree' and ss_sales_price between 150.00 and 200.00 and hd_dep_count = 1 )) and((ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('IL', 'WI', 'TN') and ss_net_profit between 100 and 200 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('MO', 'OK', 'WA') and ss_net_profit between 150 and 300 ) or (ss_addr_sk = ca_address_sk and ca_country = 'United States' and ca_state in ('NE', 'VA', 'GA') and ss_net_profit between 50 and 250 ));
  • SQL10 1 2 3 4 5 6 7 8 91011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 select cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3, cd_dep_count, count(*) cnt4, cd_dep_employed_count, count(*) cnt5, cd_dep_college_count, count(*) cnt6 from customer c,customer_address ca,customer_demographics where c.c_current_addr_sk = ca.ca_address_sk and ca_county in ('Clark County','Richardson County','Tom Green County','Sullivan County','Cass County') and cd_demo_sk = c.c_current_cdemo_sk and exists (select * from store_sales,date_dim where c.c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 1 and 1+3) and (exists (select * from web_sales,date_dim where c.c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 1 ANd 1+3) or exists (select * from catalog_sales,date_dim where c.c_customer_sk = cs_ship_customer_sk and cs_sold_date_sk = d_date_sk and d_year = 2000 and d_moy between 1 and 1+3)) group by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count order by cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_countlimit 100;
  • SQL12 1 2 3 4 5 6 7 8 910111213141516171819202122232425262728293031 select i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price ,sum(ws_ext_sales_price) as itemrevenue ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over (partition by i_class) as revenueratiofromweb_sales,item ,date_dimwhere ws_item_sk = i_item_sk and i_category in ('Music', 'Shoes', 'Children')and ws_sold_date_sk = d_date_skand d_date between cast('2000-05-14' as date) and (cast('2000-05-14' as date) + 30 )group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_priceorder by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratiolimit 100;
  • SQL5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99100101102103104105106107108109110111112113114115116117118119120121122123124125 with ssr as (select s_store_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select ss_store_sk as store_sk, ss_sold_date_sk as date_sk, ss_ext_sales_price as sales_price, ss_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from store_sales union all select sr_store_sk as store_sk, sr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, sr_return_amt as return_amt, sr_net_loss as net_loss from store_returns ) salesreturns, date_dim, store where date_sk = d_date_sk and d_date between cast('2002-08-05' as date) and (cast('2002-08-05' as date) + 14 ) and store_sk = s_store_sk group by s_store_id) , csr as (select cp_catalog_page_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select cs_catalog_page_sk as page_sk, cs_sold_date_sk as date_sk, cs_ext_sales_price as sales_price, cs_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from catalog_sales union all select cr_catalog_page_sk as page_sk, cr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, cr_return_amount as return_amt, cr_net_loss as net_loss from catalog_returns ) salesreturns, date_dim, catalog_page where date_sk = d_date_sk and d_date between cast('2002-08-05' as date) and (cast('2002-08-05' as date) + 14 ) and page_sk = cp_catalog_page_sk group by cp_catalog_page_id) , wsr as (select web_site_id, sum(sales_price) as sales, sum(profit) as profit, sum(return_amt) as returns, sum(net_loss) as profit_loss from ( select ws_web_site_sk as wsr_web_site_sk, ws_sold_date_sk as date_sk, ws_ext_sales_price as sales_price, ws_net_profit as profit, cast(0 as decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss from web_sales union all select ws_web_site_sk as wsr_web_site_sk, wr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0 as decimal(7,2)) as profit, wr_return_amt as return_amt, wr_net_loss as net_loss from web_returns left outer join web_sales on ( wr_item_sk = ws_item_sk and wr_order_number = ws_order_number) ) salesreturns, date_dim, web_site where date_sk = d_date_sk and d_date between cast('2002-08-05' as date) and (cast('2002-08-05' as date) + 14 ) and wsr_web_site_sk = web_site_sk group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , 'store' || s_store_id as id , sales , returns , (profit - profit_loss) as profit from ssr union all select 'catalog channel' as channel , 'catalog_page' || cp_catalog_page_id as id , sales , returns , (profit - profit_loss) as profit from csr union all select 'web channel' as channel , 'web_site' || web_site_id as id , sales , returns , (profit - profit_loss) as profit from wsr ) x group by rollup (channel, id) order by channel ,id limit 100;
  • SQL3 1 2 3 4 5 6 7 8 9101112131415161718 select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) sum_agg from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 125 and dt.d_moy=11 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,sum_agg desc ,brand_id limit 100;
  • SQL2 1 2 3 4 5 6 7 8 9101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 with wscs as (select sold_date_sk ,sales_price from (select ws_sold_date_sk sold_date_sk ,ws_ext_sales_price sales_price from web_sales union all select cs_sold_date_sk sold_date_sk ,cs_ext_sales_price sales_price from catalog_sales)), wswscs as (select d_week_seq, sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales from wscs ,date_dim where d_date_sk = sold_date_sk group by d_week_seq) select d_week_seq1 ,round(sun_sales1/sun_sales2,2) ,round(mon_sales1/mon_sales2,2) ,round(tue_sales1/tue_sales2,2) ,round(wed_sales1/wed_sales2,2) ,round(thu_sales1/thu_sales2,2) ,round(fri_sales1/fri_sales2,2) ,round(sat_sales1/sat_sales2,2) from (select wswscs.d_week_seq d_week_seq1 ,sun_sales sun_sales1 ,mon_sales mon_sales1 ,tue_sales tue_sales1 ,wed_sales wed_sales1 ,thu_sales thu_sales1 ,fri_sales fri_sales1 ,sat_sales sat_sales1 from wswscs,date_dim where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 1999) y, (select wswscs.d_week_seq d_week_seq2 ,sun_sales sun_sales2 ,mon_sales mon_sales2 ,tue_sales tue_sales2 ,wed_sales wed_sales2 ,thu_sales thu_sales2 ,fri_sales fri_sales2 ,sat_sales sat_sales2 from wswscs ,date_dim where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 1999+1) z where d_week_seq1=d_week_seq2-53 order by d_week_seq1;
  • 命令生成方法 TPC-DS标准99个SQL查询语句可用如下方法生成: 准备工作。生成TPC-DS查询语句前需要修改query_templates目录下的文件: 登录测试过程申请的E CS ,进入/data1/script/tpcds-kit/DSGen-software-code-3.2.0rc1/query_templates目录: 1 cd /data1/script/tpcds-kit/DSGen-software-code-3.2.0rc1/query_templates 新建文件hwdws.tpl,内容为: 12345 define __LIMITA = "";define __LIMITB = "";define __LIMITC = "limit %d";define _BEGIN = "-- begin query " + [_QUERY] + " in stream " + [_STREAM] + " using template " + [_TEMPLATE];define _END = "-- end query " + [_QUERY] + " in stream " + [_STREAM] + " using template " + [_TEMPLATE]; 因TPC-DS工具中SQL语句生成模板有语法错误,需修改query77.tpl,将135行的‘, coalesce(returns, 0) returns’改为‘, coalesce(returns, 0) as returns’。 执行以下命令生成查询语句: 12 cd /data1/script/tpcds-kit/DSGen-software-code-3.2.0rc1/tools./dsqgen -input ../query_templates/templates.lst -directory ../query_templates/ -scale 1000 -dialect hwdws 执行后会生成query_0.sql文件,里面放着99个标准SQL语句,需要手动去切分成99个文件。 生成的标准查询中如下日期函数语法在 GaussDB (DWS)暂不支持,需要手动进行修改: 1 2 3 4 5 6 7 8 910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 Q5: and (cast('2001-08-19' as date) + 14 days)修改为and (cast('2001-08-19' as date) + 14)Q12:and (cast('1999-02-28' as date) + 30 days)修改为and (cast('1999-02-28' as date) + 30)Q16:(cast('1999-4-01' as date) + 60 days)修改为(cast('1999-4-01' as date) + 60)Q20:and (cast('1998-05-05' as date) + 30 days)修改为and (cast('1998-05-05' as date) + 30)Q21:and d_date between (cast ('2000-05-19' as date) - 30 days)修改为and d_date between (cast ('2000-05-19' as date) - 30)and (cast ('2000-05-19' as date) + 30 days)修改为and (cast ('2000-05-19' as date) + 30)Q32:(cast('1999-02-22' as date) + 90 days)修改为(cast('1999-02-22' as date) + 90)Q37:and d_date between cast('1998-04-29' as date) and (cast('1998-04-29' as date) + 60 days)修改为and d_date between cast('1998-04-29' as date) and (cast('1998-04-29' as date) + 60)Q40:and d_date between (cast ('2002-05-10' as date) - 30 days)修改为and d_date between (cast ('2002-05-10' as date) - 30)and (cast ('2002-05-10' as date) + 30 days)修改为and (cast ('2002-05-10' as date) + 30)Q77:and (cast('1999-08-29' as date) + 30 days)修改为and (cast('1999-08-29' as date) + 30)Q80:and (cast('2002-08-04' as date) + 30 days)修改为and (cast('2002-08-04' as date) + 30)Q82:and d_date between cast('1998-01-18' as date) and (cast('1998-01-18' as date) + 60 days)修改为and d_date between cast('1998-01-18' as date) and (cast('1998-01-18' as date) + 60)Q92:(cast('2001-01-26' as date) + 90 days)修改为(cast('2001-01-26' as date) + 90)Q94:(cast('1999-5-01' as date) + 60 days)修改为(cast('1999-5-01' as date) + 60)Q95:(cast('1999-4-01' as date) + 60 days)修改为(cast('1999-4-01' as date) + 60)Q98:and (cast('2002-04-01' as date) + 30 days)修改为and (cast('2002-04-01' as date) + 30)
  • 导入SSB数据 执行以下命令导入数据。 1 2 3 4 5 6 7 8 910111213141516171819202122232425262728293031323334353637383940414243444546474849 INSERT INTO customer SELECT * FROM customer_load;INSERT INTO supplier SELECT * FROM supplier_load;INSERT INTO part SELECT * FROM part_load;INSERT INTO lineorder SELECT * FROM lineorder_load; INSERT INTO lineorder_flatSELECT l.LO_ORDERKEY AS LO_ORDERKEY, l.LO_LINENUMBER AS LO_LINENUMBER, l.LO_CUSTKEY AS LO_CUSTKEY, l.LO_PARTKEY AS LO_PARTKEY, l.LO_SUPPKEY AS LO_SUPPKEY, l.LO_ORDERDATE AS LO_ORDERDATE, l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, l.LO_QUANTITY AS LO_QUANTITY, l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, l.LO_DISCOUNT AS LO_DISCOUNT, l.LO_REVENUE AS LO_REVENUE, l.LO_SUPPLYCOST AS LO_SUPPLYCOST, l.LO_TAX AS LO_TAX, l.LO_COMMITDATE AS LO_COMMITDATE, l.LO_SHIPMODE AS LO_SHIPMODE, c.C_NAME AS C_NAME, c.C_ADDRESS AS C_ADDRESS, c.C_CITY AS C_CITY, c.C_NATION AS C_NATION, c.C_REGION AS C_REGION, c.C_PHONE AS C_PHONE, c.C_MKTSEGMENT AS C_MKTSEGMENT, s.S_NAME AS S_NAME, s.S_ADDRESS AS S_ADDRESS, s.S_CITY AS S_CITY, s.S_NATION AS S_NATION, s.S_REGION AS S_REGION, s.S_PHONE AS S_PHONE, p.P_NAME AS P_NAME, p.P_MFGR AS P_MFGR, p.P_CATEGORY AS P_CATEGORY, p.P_BRAND AS P_BRAND, p.P_COLOR AS P_COLOR, p.P_TYPE AS P_TYPE, p.P_SIZE AS P_SIZE, p.P_CONTAINER AS P_CONTAINERFROM lineorder AS lINNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEYINNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEYINNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
  • 创建SSB数据集的GDS外表 连接DWS数据库后执行以下SQL语句。 以下每个外表的“gsfs://192.168.0.90:500x/xxx | gsfs://192.168.0.90:500x/xxx”中的IP地址和端口,请替换成安装和启动GDS中的对应的GDS的监听IP和端口。如启动两个GDS,则使用“|”区分。如果启动多个GDS,需要将所有GDS的监听IP和端口配置到外表中。 1 2 3 4 5 6 7 8 9101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 DROP FOREIGN TABLE IF EXISTS customer_load;CREATE FOREIGN TABLE customer_load( C_CUSTKEY BIGINT NOT NULL , C_NAME VARCHAR(25) NOT NULL , C_ADDRESS VARCHAR(40) NOT NULL , C_CITY VARCHAR(25) NOT NULL , C_NATION VARCHAR(25) NOT NULL , C_REGION VARCHAR(25) NOT NULL , C_PHONE VARCHAR(15) NOT NULL , C_MKTSEGMENT VARCHAR(10) NOT NULL)SERVER gsmpp_serverOPTIONS(location 'gsfs://192.168.0.90:5004/customer.tbl*',format 'text',delimiter '|',encoding 'utf8',mode 'Normal'); DROP FOREIGN TABLE IF EXISTS supplier_load;CREATE FOREIGN TABLE supplier_load( S_SUPPKEY BIGINT NOT NULL , S_NAME VARCHAR(25) NOT NULL , S_ADDRESS VARCHAR(40) NOT NULL , S_CITY VARCHAR(25) NOT NULL , S_NATION VARCHAR(25) NOT NULL , S_REGION VARCHAR(25) NOT NULL , S_PHONE VARCHAR(15) NOT NULL)SERVER gsmpp_serverOPTIONS(location 'gsfs://192.168.0.90:5004/supplier.tbl*',format 'text',delimiter '|',encoding 'utf8',mode 'Normal'); DROP FOREIGN TABLE IF EXISTS part_load;CREATE FOREIGN TABLE part_load( P_PARTKEY BIGINT NOT NULL , P_NAME VARCHAR(55) NOT NULL , P_MFGR VARCHAR(25) NOT NULL , P_CATEGORY VARCHAR(25) NOT NULL , P_BRAND VARCHAR(10) NOT NULL , P_COLOR VARCHAR(20) NOT NULL , P_TYPE VARCHAR(25) NOT NULL , P_SIZE BIGINT NOT NULL , P_CONTAINER VARCHAR(10) NOT NULL)SERVER gsmpp_serverOPTIONS(location 'gsfs://192.168.0.90:5004/part.tbl*',format 'text',delimiter '|',encoding 'utf8',mode 'Normal'); DROP FOREIGN TABLE IF EXISTS lineorder_load;CREATE FOREIGN TABLE lineorder_load( LO_ORDERKEY BIGINT NOT NULL, LO_LINENUMBER BIGINT NOT NULL, LO_CUSTKEY BIGINT NOT NULL, LO_PARTKEY BIGINT NOT NULL, LO_SUPPKEY BIGINT NOT NULL, LO_ORDERDATE DATE NOT NULL, LO_ORDERPRIORITY VARCHAR(15) NOT NULL, LO_SHIPPRIORITY BIGINT NOT NULL, LO_QUANTITY BIGINT NOT NULL, LO_EXTENDEDPRICE BIGINT NOT NULL, LO_ORDTOTALPRICE BIGINT NOT NULL, LO_DISCOUNT BIGINT NOT NULL, LO_REVENUE BIGINT NOT NULL, LO_SUPPLYCOST BIGINT NOT NULL, LO_TAX BIGINT NOT NULL, LO_COMMITDATE DATE NOT NULL, LO_SHIPMODE VARCHAR(10) NOT NULL)SERVER gsmpp_serverOPTIONS(location 'gsfs://192.168.0.90:5004/lineorder.tbl*',format 'text',delimiter '|',encoding 'utf8',mode 'Normal');
  • 创建SSB目标表 连接DWS数据库后执行以下SQL语句。 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 CREATE TABLE CUSTOMER(C_CUSTKEY BIGINT NOT NULL,C_NAME VARCHAR(25) NOT NULL,C_ADDRESS VARCHAR(40) NOT NULL,C_CITY VARCHAR(25) NOT NULL,C_NATION VARCHAR(25) NOT NULL,C_REGION VARCHAR(25) NOT NULL,C_PHONE VARCHAR(15) NOT NULL,C_MKTSEGMENT VARCHAR(10) NOT NULL)WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)DISTRIBUTE BY hash(C_CUSTKEY) ;CREATE TABLE SUPPLIER(S_SUPPKEY BIGINT NOT NULL, S_NAME VARCHAR(25) NOT NULL, S_ADDRESS VARCHAR(40) NOT NULL, S_CITY VARCHAR(25) NOT NULL, S_NATION VARCHAR(25) NOT NULL, S_REGION VARCHAR(25) NOT NULL, S_PHONE VARCHAR(15) NOT NULL)WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)DISTRIBUTE BY hash(S_SUPPKEY);CREATE TABLE PART(P_PARTKEY BIGINT NOT NULL, P_NAME VARCHAR(55) NOT NULL, P_MFGR VARCHAR(25) NOT NULL, P_CATEGORY VARCHAR(25) NOT NULL, P_BRAND VARCHAR(10) NOT NULL, P_COLOR VARCHAR(20) NOT NULL, P_TYPE VARCHAR(25) NOT NULL, P_SIZE BIGINT NOT NULL, P_CONTAINER VARCHAR(10) NOT NULL)WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)DISTRIBUTE BY hash(P_PARTKEY);CREATE TABLE lineorder(LO_ORDERKEY BIGINT NOT NULL,LO_LINENUMBER BIGINT NOT NULL,LO_CUSTKEY BIGINT NOT NULL,LO_PARTKEY BIGINT NOT NULL,LO_SUPPKEY BIGINT NOT NULL,LO_ORDERDATE DATE NOT NULL,LO_ORDERPRIORITY VARCHAR(15) NOT NULL,LO_SHIPPRIORITY BIGINT NOT NULL,LO_QUANTITY BIGINT NOT NULL,LO_EXTENDEDPRICE BIGINT NOT NULL,LO_ORDTOTALPRICE BIGINT NOT NULL,LO_DISCOUNT BIGINT NOT NULL,LO_REVENUE BIGINT NOT NULL,LO_SUPPLYCOST BIGINT NOT NULL,LO_TAX BIGINT NOT NULL,LO_COMMITDATE DATE NOT NULL,LO_SHIPMODE VARCHAR(10) NOT NULL)WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)DISTRIBUTE BY hash(LO_ORDERKEY)PARTITION BY RANGE(LO_ORDERDATE)(PARTITION LO_ORDERDATE_1 VALUES LESS THAN('1992-04-01 00:00:00'),PARTITION LO_ORDERDATE_2 VALUES LESS THAN('1992-07-01 00:00:00'),PARTITION LO_ORDERDATE_3 VALUES LESS THAN('1992-10-01 00:00:00'),PARTITION LO_ORDERDATE_4 VALUES LESS THAN('1993-01-01 00:00:00'),PARTITION LO_ORDERDATE_5 VALUES LESS THAN('1993-04-01 00:00:00'),PARTITION LO_ORDERDATE_6 VALUES LESS THAN('1993-07-01 00:00:00'),PARTITION LO_ORDERDATE_7 VALUES LESS THAN('1993-10-01 00:00:00'),PARTITION LO_ORDERDATE_8 VALUES LESS THAN('1994-01-01 00:00:00'),PARTITION LO_ORDERDATE_9 VALUES LESS THAN('1994-04-01 00:00:00'),PARTITION LO_ORDERDATE_10 VALUES LESS THAN('1994-07-01 00:00:00'),PARTITION LO_ORDERDATE_11 VALUES LESS THAN('1994-10-01 00:00:00'),PARTITION LO_ORDERDATE_12 VALUES LESS THAN('1995-01-01 00:00:00'),PARTITION LO_ORDERDATE_13 VALUES LESS THAN('1995-04-01 00:00:00'),PARTITION LO_ORDERDATE_14 VALUES LESS THAN('1995-07-01 00:00:00'),PARTITION LO_ORDERDATE_15 VALUES LESS THAN('1995-10-01 00:00:00'),PARTITION LO_ORDERDATE_16 VALUES LESS THAN('1996-01-01 00:00:00'),PARTITION LO_ORDERDATE_17 VALUES LESS THAN('1996-04-01 00:00:00'),PARTITION LO_ORDERDATE_18 VALUES LESS THAN('1996-07-01 00:00:00'),PARTITION LO_ORDERDATE_19 VALUES LESS THAN('1996-10-01 00:00:00'),PARTITION LO_ORDERDATE_20 VALUES LESS THAN('1997-01-01 00:00:00'),PARTITION LO_ORDERDATE_21 VALUES LESS THAN('1997-04-01 00:00:00'),PARTITION LO_ORDERDATE_22 VALUES LESS THAN('1997-07-01 00:00:00'),PARTITION LO_ORDERDATE_23 VALUES LESS THAN('1997-10-01 00:00:00'),PARTITION LO_ORDERDATE_24 VALUES LESS THAN('1998-01-01 00:00:00'),PARTITION LO_ORDERDATE_25 VALUES LESS THAN('1998-04-01 00:00:00'),PARTITION LO_ORDERDATE_26 VALUES LESS THAN('1998-07-01 00:00:00'),PARTITION LO_ORDERDATE_27 VALUES LESS THAN('1998-10-01 00:00:00'),PARTITION LO_ORDERDATE_28 VALUES LESS THAN('1999-01-01 00:00:00'));SET enable_hstoreopt_auto_bitmap=true;CREATE TABLE lineorder_flat(LO_ORDERKEY BIGINT NOT NULL,LO_LINENUMBER BIGINT NOT NULL,LO_CUSTKEY BIGINT NOT NULL,LO_PARTKEY BIGINT NOT NULL,LO_SUPPKEY BIGINT NOT NULL,LO_ORDERDATE DATE NOT NULL,LO_ORDERPRIORITY VARCHAR(15) NOT NULL,LO_SHIPPRIORITY BIGINT NOT NULL,LO_QUANTITY BIGINT NOT NULL,LO_EXTENDEDPRICE BIGINT NOT NULL,LO_ORDTOTALPRICE BIGINT NOT NULL,LO_DISCOUNT BIGINT NOT NULL,LO_REVENUE BIGINT NOT NULL,LO_SUPPLYCOST BIGINT NOT NULL,LO_TAX BIGINT NOT NULL,LO_COMMITDATE DATE NOT NULL,LO_SHIPMODE VARCHAR(10) NOT NULL,C_NAME VARCHAR(25) NOT NULL, C_ADDRESS VARCHAR(40) NOT NULL, C_CITY VARCHAR(25) NOT NULL, C_NATION VARCHAR(25) NOT NULL, C_REGION VARCHAR(25) NOT NULL, C_PHONE VARCHAR(15) NOT NULL, C_MKTSEGMENT VARCHAR(10) NOT NULL, S_NAME VARCHAR(25) NOT NULL, S_ADDRESS VARCHAR(40) NOT NULL, S_CITY VARCHAR(25) NOT NULL, S_NATION VARCHAR(25) NOT NULL, S_REGION VARCHAR(25) NOT NULL, S_PHONE VARCHAR(15) NOT NULL, P_NAME VARCHAR(55) NOT NULL, P_MFGR VARCHAR(25) NOT NULL, P_CATEGORY VARCHAR(25) NOT NULL, P_BRAND VARCHAR(10) NOT NULL, P_COLOR VARCHAR(20) NOT NULL, P_TYPE VARCHAR(25) NOT NULL, P_SIZE BIGINT NOT NULL, P_CONTAINER VARCHAR(10) NOT NULL, Partial Cluster Key(s_region,s_nation,s_city)) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,secondary_part_column='p_mfgr', secondary_part_num=8)DISTRIBUTE BY hash(LO_ORDERKEY)PARTITION BY RANGE(LO_ORDERDATE)(PARTITION LO_ORDERDATE_1 VALUES LESS THAN('1992-04-01 00:00:00'),PARTITION LO_ORDERDATE_2 VALUES LESS THAN('1992-07-01 00:00:00'),PARTITION LO_ORDERDATE_3 VALUES LESS THAN('1992-10-01 00:00:00'),PARTITION LO_ORDERDATE_4 VALUES LESS THAN('1993-01-01 00:00:00'),PARTITION LO_ORDERDATE_5 VALUES LESS THAN('1993-04-01 00:00:00'),PARTITION LO_ORDERDATE_6 VALUES LESS THAN('1993-07-01 00:00:00'),PARTITION LO_ORDERDATE_7 VALUES LESS THAN('1993-10-01 00:00:00'),PARTITION LO_ORDERDATE_8 VALUES LESS THAN('1994-01-01 00:00:00'),PARTITION LO_ORDERDATE_9 VALUES LESS THAN('1994-04-01 00:00:00'),PARTITION LO_ORDERDATE_10 VALUES LESS THAN('1994-07-01 00:00:00'),PARTITION LO_ORDERDATE_11 VALUES LESS THAN('1994-10-01 00:00:00'),PARTITION LO_ORDERDATE_12 VALUES LESS THAN('1995-01-01 00:00:00'),PARTITION LO_ORDERDATE_13 VALUES LESS THAN('1995-04-01 00:00:00'),PARTITION LO_ORDERDATE_14 VALUES LESS THAN('1995-07-01 00:00:00'),PARTITION LO_ORDERDATE_15 VALUES LESS THAN('1995-10-01 00:00:00'),PARTITION LO_ORDERDATE_16 VALUES LESS THAN('1996-01-01 00:00:00'),PARTITION LO_ORDERDATE_17 VALUES LESS THAN('1996-04-01 00:00:00'),PARTITION LO_ORDERDATE_18 VALUES LESS THAN('1996-07-01 00:00:00'),PARTITION LO_ORDERDATE_19 VALUES LESS THAN('1996-10-01 00:00:00'),PARTITION LO_ORDERDATE_20 VALUES LESS THAN('1997-01-01 00:00:00'),PARTITION LO_ORDERDATE_21 VALUES LESS THAN('1997-04-01 00:00:00'),PARTITION LO_ORDERDATE_22 VALUES LESS THAN('1997-07-01 00:00:00'),PARTITION LO_ORDERDATE_23 VALUES LESS THAN('1997-10-01 00:00:00'),PARTITION LO_ORDERDATE_24 VALUES LESS THAN('1998-01-01 00:00:00'),PARTITION LO_ORDERDATE_25 VALUES LESS THAN('1998-04-01 00:00:00'),PARTITION LO_ORDERDATE_26 VALUES LESS THAN('1998-07-01 00:00:00'),PARTITION LO_ORDERDATE_27 VALUES LESS THAN('1998-10-01 00:00:00'),PARTITION LO_ORDERDATE_28 VALUES LESS THAN('1999-01-01 00:00:00')) ;SET enable_hstoreopt_auto_bitmap=false;
  • API接口返回的中文字符为乱码,如何解决? 当API接口返回的中文字符出现乱码时,通常是因为字符编码格式不匹配。 DLI 接口返回的结果编码格式为“UTF-8”,在调用接口获取返回结果时需要对返回的信息编码转换为“UTF-8”。 例如,参考如下实现对返回的response.content内容做编码格式转换,确保返回的中文格式不会乱码。 print(response.content.decode("utf-8")) 父主题: DLI API类
  • 问题现象 Flink SQL作业创建表时,表名使用EL表达式,运行作业时报如下错误: DLI.0005: AnalysisException: t_user_message_input_#{date_format(date_sub(current_date(), 1), 'yyyymmddhhmmss')} is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _.
  • 解决方案 需要将SQL中表名的“#”字符改成“$”即可。DLI中使用EL表达式的格式为:${expr}。 修改前: t_user_message_input_#{date_format(date_sub(current_date(), 1), 'yyyymmddhhmmss')} 修改后: t_user_message_input_${date_format(date_sub(current_date(), 1), 'yyyymmddhhmmss')} 修改后,Flink SQL作业能够正确解析表名,并根据EL表达式动态生成表名。
  • 检查是否为DLI授权了DLI Datasource Connections Agency Access权限 您可以从连接日志判断是否是由于权限不足导致的连接失败。 如图4、图5所示,无法获取对端的子网ID、路由ID,因此跨源连接失败。 解决措施:请在服务授权添加DLI Datasource Connections Agency Access授权。 了解DLI更新委托权限。 图4 查看连接日志-2 图5 查看连接日志-3 图6 DLI服务授权
  • 排查思路 以下排查思路根据原因的出现概率进行排序,建议您从高频率原因往低频率原因排查,从而帮助您快速找到问题的原因。 如果解决完某个可能原因仍未解决问题,请继续排查其他可能原因。 检查是否在 域名 或者IP后添加了端口 检查是否连接的是对端VPC和子网 检查队列的网段是否与数据源网段是否重合 检查是否为DLI授权了DLI Datasource Connections Agency Access权限 检查对端安全组是否放通队列的网段 检查增强型跨源连接对应的对等连接的路由信息 检查VPC网络是否设置了ACL规则限制了网络访问
  • 检查队列的网段是否与数据源网段是否重合 绑定跨源的DLI队列网段和数据源网段不能重合。 您可以从连接日志判断是否是队列与数据源网段冲突。 如图3所示,即当前队列A网段与其他队列B网段冲突,且队列B已经建立了与数据源C的增强型跨源连接。因此提示队列A与数据源C的网段冲突,无法建立新的增强型跨源连接。 解决措施:修改队列网段或重建队列。 建议创建队列时就规划好网段划分,否则冲突后只能修改队列网段或重建队列。 图3 查看连接日志-1
  • 解决方案 登录 IAM 控制台页面,单击“用户”,在搜索框中选择“用户名”,输入运行作业的IAM用户名。 单击查询到用户名,查看该用户对应的用户组。 单击“用户组”,输入查询到的用户组查询,单击用户组名称,在“授权记录”中查看当前用户的权限。 确认当前用户所属用户组下的权限是否包含OBS写入的权限,比如“OBS OperateAccess”。如果没有OBS写入权限,则给对应的用户组进行授权。 授权完成后,等待5到10分钟等待权限生效。再次运行失败的Flink SQL作业,查看作业运行状态。
  • 问题现象 客户创建增强型跨源连接后,在队列管理测试网络连通性,网络不通,单击对应的跨源连接查看详情,发现绑定队列失败,报错信息如下: Failed to get subnet 86ddcf50-233a-449d-9811-cfef2f603213. Response code : 404, message : {"code":"VPC.0202","message":"Query resource by id 86ddcf50-233a-449d-9811-cfef2f603213 fail.the subnet could not be found."}
  • 排查方案 在OBS管理控制台找到DLI作业桶。 查看所选桶的桶策略。 DLI Flink作业所需要使用的桶授权信息如下,其中domainId和userId分别为DLI的账号和子账号,bucketName为用户桶名,timeStamp为策略创建时的时间戳。 { "Statement": [ { "Effect": "Allow", "Principal": { "ID": [ "domain/domainId:user/userId" ] }, "Action": [ "GetObject", "GetObjectVersion", "PutObject", "DeleteObject", "DeleteObjectVersion", "ListMultipartUploadParts", "AbortMultipartUpload", "GetObjectAcl", "GetObjectVersionAcl" ], "Resource": [ "bucketName/*" ], "Sid": "未命名的桶策略-Timestamp-0" }, { "Effect": "Allow", "Principal": { "ID": [ "domain/domainId:user/userId " ] }, "Action": [ "HeadBucket", "ListBucket", "ListBucketVersions", "ListBucketMultipartUploads", "GetBucketAcl", "GetBucketLocation", "GetBucketLogging", "GetLifecycleConfiguration" ], "Resource": [ " bucketName " ], "Sid": "未命名的桶策略-Timestamp-1" } ]} 在管理控制台检查以下权限相关内容,查看策略名称是否与2一致。 效力:允许 授权资源:按需授权桶和对象。 授权操作:与2中Action一致 常用检查项: 检查是否配置了所有账号的某些拒绝操作,且这些操作是上述DLI所需要的授权操作。 检查是否对DLI的被授权用户配置了某些拒绝操作,且这些操作是上述DLI所需要的授权操作。
  • 怎样配置DLI队列与数据源的网络连通? 配置DLI队列与内网数据源的网络连通 DLI在创建运行作业需要连接外部其他数据源,如:DLI连接 MRS 、RDS、 CSS 、Kafka、DWS时,需要打通DLI和外部数据源之间的网络。 DLI提供的增强型跨源连接功能,底层采用对等连接的方式打通与目的数据源的vpc网络,通过点对点的方式实现数据互通。 图1 增强型跨源连接配置流程 配置DLI队列与公网网络连通 通过配置SNAT规则,添加到公网的路由信息,可以实现队列到和公网的网络打通。 图2 配置DLI队列访问公网流程 父主题: 增强型跨源连接类
  • SQL作业执行完成后,修改表名导致datasize不正确怎么办? 在执行SQL作业后立即修改表名,可能会导致表的数据大小结果不正确。 这是因为DLI在执行SQL作业时,会对表进行元数据更新,如果在作业执行完成前修改了表名,会和作业的元数据更新过程冲突,从而影响对数据大小的判断。 为了避免这种情况,建议在SQL作业执行完成后,等待5分钟后再修改表名。确保系统有足够的时间更新表的元数据,避免因修改表名而导致的数据大小统计不准确的问题。 父主题: DLI数据库和表类
  • 操作步骤 在管理控制台检查对应SQL作业详情中的“结果条数”是否正确。 检查发现读取的数据量是正确的。 图1 检查读取的数据量 确认客户验证数据量的方式是否正确。客户验证的方式如下: 通过OBS下载数据文件。 通过文本编辑器打开数据文件,发现数据量缺失。 根据该验证方式,初步定位是因为文件数据量较大,文本编辑器无法全部读取。 通过执行查询语句,查询OBS数据进一步进行确认,查询结果确认数据量正确。 因此,该问题为验证方式不正确造成。
  • 解决方案 在SQL语句中配置发送失败重试:connector.properties.retries=5 create table kafka_sink( car_type string , car_name string , primary key (union_id) not enforced) with ( "connector.type" = "upsert-kafka", "connector.version" = "0.11", "connector.properties.bootstrap.servers" = "xxxx:9092", "connector.topic" = "kafka_car_topic ", "connector.sink.ignore-retraction" = "true", "connector.properties.retries" = "5", "format.type" = "json");
  • 怎样获取DLI作业样例(Demo) 为了方便用户更好地使用DLI,DLI服务提供了供作业开发的Demo样例,您可以通过DLI样例代码获取。 该样例代码的目录内容介绍如下: dli-flink-demo:开发Flink作业时的样例代码参考。例如,样例代码实现读取Kafka源表数据写入到HDFS、DWS、Hive等结果表中的功能。 dli-spark-demo:开发Spark作业时的样例代码参考。具体如下: “dli-spark-demo-obs”读取和写入OBS数据。 “dli-spark-demo-redis”读取和写入Redis数据。 dli-pyspark-demo:使用Python语言开发Spark作业。例如,样例包中的样例代码实现创建Redis表的功能。 父主题: DLI产品咨询类
共100000条