[KYUUBI #598] [TEST][TPCDS] Introduce schema check for queries-9

<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/NetEase/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->
Introduce schema check for queries(ninety plus)

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Closes #598 from turboFei/tpcds_9.

Closes #598

fef260d [fwang12] [TEST][TPCDS] Introduce schema check for queries-9

Authored-by: fwang12 <fwang12@ebay.com>
Signed-off-by: ulysses-you <ulyssesyou18@gmail.com>
This commit is contained in:
fwang12 2021-04-25 09:33:11 +08:00 committed by ulysses-you
parent e1fa5c62ed
commit 540de12764
20 changed files with 405 additions and 0 deletions

View File

@ -0,0 +1 @@
struct<am_pm_ratio:decimal>

View File

@ -0,0 +1,36 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio
FROM (SELECT count(*) amc
FROM web_sales, household_demographics, time_dim, web_page
WHERE ws_sold_time_sk = time_dim.t_time_sk
AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
AND ws_web_page_sk = web_page.wp_web_page_sk
AND time_dim.t_hour BETWEEN 8 AND 8 + 1
AND household_demographics.hd_dep_count = 6
AND web_page.wp_char_count BETWEEN 5000 AND 5200) at,
(SELECT count(*) pmc
FROM web_sales, household_demographics, time_dim, web_page
WHERE ws_sold_time_sk = time_dim.t_time_sk
AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
AND ws_web_page_sk = web_page.wp_web_page_sk
AND time_dim.t_hour BETWEEN 19 AND 19 + 1
AND household_demographics.hd_dep_count = 6
AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt
ORDER BY am_pm_ratio
LIMIT 100

View File

@ -0,0 +1 @@
struct<Call_Center:string,Call_Center_Name:string,Manager:string,Returns_Loss:decimal>

View File

@ -0,0 +1,40 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT
cc_call_center_id Call_Center,
cc_name Call_Center_Name,
cc_manager Manager,
sum(cr_net_loss) Returns_Loss
FROM
call_center, catalog_returns, date_dim, customer, customer_address,
customer_demographics, household_demographics
WHERE
cr_call_center_sk = cc_call_center_sk
AND cr_returned_date_sk = d_date_sk
AND cr_returning_customer_sk = c_customer_sk
AND cd_demo_sk = c_current_cdemo_sk
AND hd_demo_sk = c_current_hdemo_sk
AND ca_address_sk = c_current_addr_sk
AND d_year = 1998
AND d_moy = 11
AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown')
OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree'))
AND hd_buy_potential LIKE 'Unknown%'
AND ca_gmt_offset = -7
GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status
ORDER BY sum(cr_net_loss) DESC

View File

@ -0,0 +1 @@
struct<Excess Discount Amount :decimal>

View File

@ -0,0 +1,33 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount `
FROM web_sales, item, date_dim
WHERE i_manufact_id = 350
AND i_item_sk = ws_item_sk
AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days)
AND d_date_sk = ws_sold_date_sk
AND ws_ext_discount_amt >
(
SELECT 1.3 * avg(ws_ext_discount_amt)
FROM web_sales, date_dim
WHERE ws_item_sk = i_item_sk
AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days)
AND d_date_sk = ws_sold_date_sk
)
ORDER BY sum(ws_ext_discount_amt)
LIMIT 100

View File

@ -0,0 +1 @@
struct<ss_customer_sk:int,sumsales:decimal>

View File

@ -0,0 +1,36 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT
ss_customer_sk,
sum(act_sales) sumsales
FROM (SELECT
ss_item_sk,
ss_ticket_number,
ss_customer_sk,
CASE WHEN sr_return_quantity IS NOT NULL
THEN (ss_quantity - sr_return_quantity) * ss_sales_price
ELSE (ss_quantity * ss_sales_price) END act_sales
FROM store_sales
LEFT OUTER JOIN store_returns
ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number)
,
reason
WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t
GROUP BY ss_customer_sk
ORDER BY sumsales, ss_customer_sk
LIMIT 100

View File

@ -0,0 +1 @@
struct<order count :bigint,total shipping cost :decimal,total net profit :decimal>

View File

@ -0,0 +1,40 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT
count(DISTINCT ws_order_number) AS `order count `,
sum(ws_ext_ship_cost) AS `total shipping cost `,
sum(ws_net_profit) AS `total net profit `
FROM
web_sales ws1, date_dim, customer_address, web_site
WHERE
d_date BETWEEN '1999-02-01' AND
(CAST('1999-02-01' AS DATE) + INTERVAL 60 days)
AND ws1.ws_ship_date_sk = d_date_sk
AND ws1.ws_ship_addr_sk = ca_address_sk
AND ca_state = 'IL'
AND ws1.ws_web_site_sk = web_site_sk
AND web_company_name = 'pri'
AND EXISTS(SELECT *
FROM web_sales ws2
WHERE ws1.ws_order_number = ws2.ws_order_number
AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
AND NOT EXISTS(SELECT *
FROM web_returns wr1
WHERE ws1.ws_order_number = wr1.wr_order_number)
ORDER BY count(DISTINCT ws_order_number)
LIMIT 100

View File

@ -0,0 +1 @@
struct<order count :bigint,total shipping cost :decimal,total net profit :decimal>

View File

@ -0,0 +1,46 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
WITH ws_wh AS
(SELECT
ws1.ws_order_number,
ws1.ws_warehouse_sk wh1,
ws2.ws_warehouse_sk wh2
FROM web_sales ws1, web_sales ws2
WHERE ws1.ws_order_number = ws2.ws_order_number
AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
SELECT
count(DISTINCT ws_order_number) AS `order count `,
sum(ws_ext_ship_cost) AS `total shipping cost `,
sum(ws_net_profit) AS `total net profit `
FROM
web_sales ws1, date_dim, customer_address, web_site
WHERE
d_date BETWEEN '1999-02-01' AND
(CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY)
AND ws1.ws_ship_date_sk = d_date_sk
AND ws1.ws_ship_addr_sk = ca_address_sk
AND ca_state = 'IL'
AND ws1.ws_web_site_sk = web_site_sk
AND web_company_name = 'pri'
AND ws1.ws_order_number IN (SELECT ws_order_number
FROM ws_wh)
AND ws1.ws_order_number IN (SELECT wr_order_number
FROM web_returns, ws_wh
WHERE wr_order_number = ws_wh.ws_order_number)
ORDER BY count(DISTINCT ws_order_number)
LIMIT 100

View File

@ -0,0 +1 @@
struct<count(1):bigint>

View File

@ -0,0 +1,28 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT count(*)
FROM store_sales, household_demographics, time_dim, store
WHERE ss_sold_time_sk = time_dim.t_time_sk
AND ss_hdemo_sk = household_demographics.hd_demo_sk
AND ss_store_sk = s_store_sk
AND time_dim.t_hour = 20
AND time_dim.t_minute >= 30
AND household_demographics.hd_dep_count = 7
AND store.s_store_name = 'ese'
ORDER BY count(*)
LIMIT 100

View File

@ -0,0 +1 @@
struct<store_only:bigint,catalog_only:bigint,store_and_catalog:bigint>

View File

@ -0,0 +1,47 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
WITH ssci AS (
SELECT
ss_customer_sk customer_sk,
ss_item_sk item_sk
FROM store_sales, date_dim
WHERE ss_sold_date_sk = d_date_sk
AND d_month_seq BETWEEN 1200 AND 1200 + 11
GROUP BY ss_customer_sk, ss_item_sk),
csci AS (
SELECT
cs_bill_customer_sk customer_sk,
cs_item_sk item_sk
FROM catalog_sales, date_dim
WHERE cs_sold_date_sk = d_date_sk
AND d_month_seq BETWEEN 1200 AND 1200 + 11
GROUP BY cs_bill_customer_sk, cs_item_sk)
SELECT
sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL
THEN 1
ELSE 0 END) store_only,
sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL
THEN 1
ELSE 0 END) catalog_only,
sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL
THEN 1
ELSE 0 END) store_and_catalog
FROM ssci
FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk
AND ssci.item_sk = csci.item_sk)
LIMIT 100

View File

@ -0,0 +1 @@
struct<i_item_desc:string,i_category:string,i_class:string,i_current_price:decimal,itemrevenue:decimal,revenueratio:decimal>

View File

@ -0,0 +1,38 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT
i_item_desc,
i_category,
i_class,
i_current_price,
sum(ss_ext_sales_price) AS itemrevenue,
sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price))
OVER
(PARTITION BY i_class) AS revenueratio
FROM
store_sales, item, date_dim
WHERE
ss_item_sk = i_item_sk
AND i_category IN ('Sports', 'Books', 'Home')
AND ss_sold_date_sk = d_date_sk
AND d_date BETWEEN cast('1999-02-22' AS DATE)
AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
GROUP BY
i_item_id, i_item_desc, i_category, i_class, i_current_price
ORDER BY
i_category, i_class, i_item_id, i_item_desc, revenueratio

View File

@ -0,0 +1 @@
struct<substr(w_warehouse_name, 1, 20):string,sm_type:string,cc_name:string,30 days :bigint,31 - 60 days :bigint,61 - 90 days :bigint,91 - 120 days :bigint,>120 days :bigint>

View File

@ -0,0 +1,51 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
SELECT
substr(w_warehouse_name, 1, 20),
sm_type,
cc_name,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30)
THEN 1
ELSE 0 END) AS `30 days `,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND
(cs_ship_date_sk - cs_sold_date_sk <= 60)
THEN 1
ELSE 0 END) AS `31 - 60 days `,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND
(cs_ship_date_sk - cs_sold_date_sk <= 90)
THEN 1
ELSE 0 END) AS `61 - 90 days `,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND
(cs_ship_date_sk - cs_sold_date_sk <= 120)
THEN 1
ELSE 0 END) AS `91 - 120 days `,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120)
THEN 1
ELSE 0 END) AS `>120 days `
FROM
catalog_sales, warehouse, ship_mode, call_center, date_dim
WHERE
d_month_seq BETWEEN 1200 AND 1200 + 11
AND cs_ship_date_sk = d_date_sk
AND cs_warehouse_sk = w_warehouse_sk
AND cs_ship_mode_sk = sm_ship_mode_sk
AND cs_call_center_sk = cc_call_center_sk
GROUP BY
substr(w_warehouse_name, 1, 20), sm_type, cc_name
ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name
LIMIT 100