diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index aedc7fc43de6..f8dfb22e5fa1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -111,9 +111,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=30 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=30 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -175,19 +175,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=10 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=10 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] PartitionCols:_col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -245,9 +245,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=30 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=30 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -309,19 +309,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=10 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=10 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -379,9 +379,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=30 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=30 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_49] @@ -443,19 +443,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=10 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=10 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_56] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -513,9 +513,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=30 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=30 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_49] @@ -577,19 +577,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=10 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=10 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index c530633fbf1c..07d616602c90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -832,6 +832,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col cs.setNumNulls(csd.getBinaryStats().getNumNulls()); } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + cs.setCountDistint(csd.getTimestampStats().getNumDVs()); cs.setNumNulls(csd.getTimestampStats().getNumNulls()); Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue() .getSecondsSinceEpoch() : null; @@ -862,6 +863,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col cs.setHistogram(csd.getDecimalStats().getHistogram()); } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + cs.setCountDistint(csd.getDateStats().getNumDVs()); cs.setNumNulls(csd.getDateStats().getNumNulls()); Long lowVal = (csd.getDateStats().getLowValue() != null) ? csd.getDateStats().getLowValue() .getDaysSinceEpoch() : null; @@ -2087,8 +2089,16 @@ private static List extractNDVGroupingColumns(List colStats for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); - if (cs.getNumNulls() > 0) { - ndv = StatsUtils.safeAdd(ndv, 1); + + if (ndv == 0L) { + // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible + // However, there is a special exception for "constant NULL" columns. They are intentionally generated + // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1 + if (cs.getNumNulls() >= parentStats.getNumRows()) { + ndv = 1L; + } + } else if (cs.getNumNulls() > 0L) { + ndv = StatsUtils.safeAdd(ndv, 1L); } ndvValues.add(ndv); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index dde2019eadf7..7b61bc460158 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -41,9 +41,15 @@ public void add(ColStatistics stat) { if (stat.getAvgColLen() > result.getAvgColLen()) { result.setAvgColLen(stat.getAvgColLen()); } - if (stat.getCountDistint() > result.getCountDistint()) { - result.setCountDistint(stat.getCountDistint()); - } + + // NDVs can only be accurately combined if full information about columns, query branches and + // their relationships is available. Without that info, there is only one "truly conservative" + // value of NDV which is 0, which means that the NDV is unknown. It forces optimized + // to make the most conservative decisions possible, which is the exact goal of + // PessimisticStatCombiner. It does inflate statistics in multiple cases, but at the same time it + // also ensures than the query execution does not "blow up" due to too optimistic stats estimates + result.setCountDistint(0L); + if (stat.getNumNulls() > result.getNumNulls()) { result.setNumNulls(stat.getNumNulls()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index 5701fc40581e..8a3dd1cea14e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -25,6 +25,8 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -32,10 +34,15 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.serde.serdeConstants; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -244,4 +251,74 @@ static Stream floatingPointStatisticsTestData() { ); } + @Test + void testGetColStatisticsTimestampType() { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("ts_col"); + cso.setColType(serdeConstants.TIMESTAMP_TYPE_NAME); + + TimestampColumnStatsData tsStats = new TimestampColumnStatsData(); + tsStats.setNumDVs(35); + tsStats.setNumNulls(5); + tsStats.setLowValue(new Timestamp(1000)); + tsStats.setHighValue(new Timestamp(2000)); + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setTimestampStats(tsStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "ts_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(35, cs.getCountDistint(), "TIMESTAMP NumDVs should be extracted from metastore stats"); + assertEquals(5, cs.getNumNulls(), "NumNulls mismatch"); + } + + @Test + void testGetColStatisticsDateType() { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("date_col"); + cso.setColType(serdeConstants.DATE_TYPE_NAME); + + DateColumnStatsData dateStats = new DateColumnStatsData(); + dateStats.setNumDVs(42); + dateStats.setNumNulls(3); + dateStats.setLowValue(new Date(18000)); + dateStats.setHighValue(new Date(19000)); + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setDateStats(dateStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "date_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(42, cs.getCountDistint(), "DATE NumDVs should be extracted from metastore stats"); + assertEquals(3, cs.getNumNulls(), "NumNulls mismatch"); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + return cs; + } + + private Statistics createParentStats(long numRows) { + Statistics stats = new Statistics(numRows, 0, 0, 0); + stats.setColumnStatsState(Statistics.State.COMPLETE); + return stats; + } + + @Test + void testComputeNDVGroupingColumnsPartialStats() { + ColStatistics cs = createColStats("partial_stats_col", 0, 100); + Statistics parentStats = createParentStats(1000); + List colStats = Collections.singletonList(cs); + + long ndv = StatsUtils.computeNDVGroupingColumns(colStats, parentStats, false); + + assertEquals(0, ndv, "Partial stats (ndv=0, numNulls result = combiner.getResult(); + assertTrue(result.isPresent()); + ColStatistics combined = result.get(); + + assertEquals("col1", combined.getColumnName()); + assertEquals("int", combined.getColumnType()); + assertEquals(100, combined.getCountDistint()); + assertEquals(10, combined.getNumNulls()); + assertEquals(5.0, combined.getAvgColLen()); + assertNull(combined.getRange()); + assertTrue(combined.isEstimated()); + } + + @Test + void testCombineTakesMaxOfAvgColLen() { + ColStatistics stat1 = createStat("col1", "string", 50, 5, 10.0); + ColStatistics stat2 = createStat("col2", "string", 30, 3, 20.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(20.0, combined.getAvgColLen()); + } + + @Test + void testCombineTakesMaxOfNumNulls() { + ColStatistics stat1 = createStat("col1", "int", 50, 100, 4.0); + ColStatistics stat2 = createStat("col2", "int", 30, 200, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(200, combined.getNumNulls()); + } + + @Test + void testCombineSetsCountDistinctToZero() { + ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); + ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + } + + @Test + void testCombineTakesMaxOfNumTruesAndNumFalses() { + ColStatistics stat1 = createStat("col1", "boolean", 2, 5, 1.0); + stat1.setNumTrues(100); + stat1.setNumFalses(50); + + ColStatistics stat2 = createStat("col2", "boolean", 2, 10, 1.0); + stat2.setNumTrues(50); + stat2.setNumFalses(150); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(100, combined.getNumTrues()); + assertEquals(150, combined.getNumFalses()); + } + + @Test + void testCombinePropagatesFilteredColumnFlag() { + ColStatistics stat1 = createStat("col1", "int", 50, 5, 4.0); + ColStatistics stat2 = createStat("col2", "int", 30, 3, 4.0); + stat2.setFilterColumn(); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertTrue(combined.isFilteredColumn()); + } + + @Test + void testCombineMultipleStats() { + ColStatistics stat1 = createStat("col1", "bigint", 1000, 50, 8.0); + ColStatistics stat2 = createStat("col2", "bigint", 500, 100, 8.0); + ColStatistics stat3 = createStat("col3", "bigint", 2000, 25, 8.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + combiner.add(stat3); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + assertEquals(100, combined.getNumNulls()); + assertEquals(8.0, combined.getAvgColLen()); + } + + @Test + void testCombineSameColumnTwice() { + ColStatistics stat = createStat("col1", "int", 100, 10, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat); + combiner.add(stat); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + assertEquals(10, combined.getNumNulls()); + assertEquals(4.0, combined.getAvgColLen()); + } + + private ColStatistics createStat(String name, String type, long ndv, long numNulls, double avgColLen) { + ColStatistics stat = new ColStatistics(name, type); + stat.setCountDistint(ndv); + stat.setNumNulls(numNulls); + stat.setAvgColLen(avgColLen); + return stat; + } +} diff --git a/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q b/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q new file mode 100644 index 000000000000..dc3cc690c977 --- /dev/null +++ b/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q @@ -0,0 +1,77 @@ +CREATE TABLE t1 (cat INT, val BIGINT, data STRING); +ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000'); +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0'); + +-- Test 1: IF should result in NDV of 2 +EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x; + +-- Test 2: CASE WHEN should result in NDV of 3 +EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x; + +-- Test 3: CASE col WHEN val should result in NDV of 4 +EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x; + +-- Test 4: MapJoin NO longer chosen due to NDV=1 causing tiny size estimate +CREATE TABLE t2 (key STRING, v1 STRING); + +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0'); +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600'); +ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000'); +ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100'); +ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100'); +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask.size=1000; + +EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key; diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out index 270f527e890f..56b18f1c9766 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out @@ -151,19 +151,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,19 +380,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -467,10 +467,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -478,7 +478,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: llap @@ -533,7 +533,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -541,7 +541,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -628,18 +628,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -848,7 +848,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -856,18 +856,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -942,10 +942,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -953,7 +953,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -1008,7 +1008,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1016,7 +1016,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true @@ -1103,19 +1103,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1332,19 +1332,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1419,10 +1419,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1430,7 +1430,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: vectorized, llap @@ -1485,7 +1485,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1493,7 +1493,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -1580,18 +1580,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1800,7 +1800,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1808,18 +1808,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1894,10 +1894,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1905,7 +1905,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: vectorized, llap @@ -1960,7 +1960,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1968,7 +1968,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index 995733564a08..b07fc4ca6103 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -492,13 +492,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: vectorized, llap @@ -508,14 +508,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 40764 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out index 8e9dbf9b583d..075e9e8985ba 100644 --- a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out @@ -142,14 +142,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,14 +249,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index dbcf49b202e7..a312142af7c1 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index ad7051398156..9a960de8085f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out index 148303926d66..979ef4f18835 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out index c5dfc75a5f30..c426f13591b6 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: p1 filterExpr: birthdate is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -85,13 +85,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out index caec28516823..b8ec571b2e0d 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out @@ -335,7 +335,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: my_date (type: date), my_id2 (type: bigint), environment (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out index becfffca3e67..b7d946404c30 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE @@ -232,7 +232,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE @@ -349,7 +349,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col4) keys: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 2c9c9015c173..13b1ace4f633 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -180,13 +180,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -198,14 +198,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index 5b1e537b938a..e27223e6f3d4 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -156,13 +156,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -174,14 +174,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 79464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out index 23c7304f52e2..96e5976e66de 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -57,8 +57,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) - predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out index 1ba06844f9dd..02fde23144f2 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + filterExpr: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -95,8 +95,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) - predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint))) + predicate: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out index d7d3f4919183..5979bc2dbb9a 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out @@ -119,7 +119,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz @@ -129,7 +129,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -154,16 +154,16 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / if((_col9 = 1L), null, (_col9 - 1))), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / if((_col13 = 1L), null, (_col13 - 1))), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / if((_col16 = 1L), null, (_col16 - 1))) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz sort order: +++++++ - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -175,10 +175,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out index eeab9c89af72..b3c24ec4c133 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -106,7 +106,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -141,7 +141,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -150,13 +150,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out index d75a945ac003..25e7197cedb8 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + filterExpr: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -81,8 +81,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 14:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float), FilterDecimalColNotEqualDecimalScalar(col 15:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 16:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 16:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 17:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 17:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 18:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 18:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) - predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 15:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 17:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 18:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 18:double))) + predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out index eeab9c89af72..b3c24ec4c133 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -106,7 +106,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -141,7 +141,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -150,13 +150,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out new file mode 100644 index 000000000000..b23255417f92 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out @@ -0,0 +1,495 @@ +PREHOOK: query: CREATE TABLE t1 (cat INT, val BIGINT, data STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (cat INT, val BIGINT, data STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cat > 50), 'A', 'B') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cat < 30)) THEN ('X') WHEN ((cat < 60)) THEN ('Y') ELSE ('Z') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cat = 1)) THEN ('A') WHEN ((cat = 2)) THEN ('B') WHEN ((cat = 3)) THEN ('C') ELSE ('D') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE t2 (key STRING, v1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (key STRING, v1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 596000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (cat BETWEEN 0 AND 4) THEN ('K00') WHEN (cat BETWEEN 5 AND 9) THEN ('K01') WHEN (cat BETWEEN 10 AND 14) THEN ('K02') WHEN (cat BETWEEN 15 AND 19) THEN ('K03') WHEN (cat BETWEEN 20 AND 24) THEN ('K04') WHEN (cat BETWEEN 25 AND 29) THEN ('K05') WHEN (cat BETWEEN 30 AND 34) THEN ('K06') WHEN (cat BETWEEN 35 AND 39) THEN ('K07') WHEN (cat BETWEEN 40 AND 44) THEN ('K08') WHEN (cat BETWEEN 45 AND 49) THEN ('K09') WHEN (cat BETWEEN 50 AND 54) THEN ('K10') WHEN (cat BETWEEN 55 AND 59) THEN ('K11') WHEN (cat BETWEEN 60 AND 64) THEN ('K12') WHEN (cat BETWEEN 65 AND 69) THEN ('K13') WHEN (cat BETWEEN 70 AND 74) THEN ('K14') WHEN (cat BETWEEN 75 AND 79) THEN ('K15') WHEN (cat BETWEEN 80 AND 84) THEN ('K16') WHEN (cat BETWEEN 85 AND 89) THEN ('K17') WHEN (cat BETWEEN 90 AND 94) THEN ('K18') ELSE ('K19') END (type: string), val (type: bigint), data (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000000 Data size: 596000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), max(_col2) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), v1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out b/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out index a769d77d2c05..a6965593f68c 100644 --- a/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out +++ b/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out @@ -187,7 +187,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index fa79cd87dd13..6801074f4e1e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1533,16 +1533,16 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1555,11 +1555,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: boolean) Reducer 3 Execution mode: llap @@ -1571,21 +1571,21 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0L) or (_col4 is null and (_col2 >= _col1) and _col0 is not null)) (type: boolean) - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1610,17 +1610,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 507 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 105 Data size: 507 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out index 75cb43899a15..0f09eb27649e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out @@ -70,7 +70,7 @@ POSTHOOK: query: ALTER TABLE table_b UPDATE STATISTICS FOR COLUMN product_sk SET POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: Input: default@table_b POSTHOOK: Output: default@table_b -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT TC.CONST_DATE, TB.PRODUCT_SK FROM TABLE_A TA @@ -104,8 +104,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -116,31 +115,68 @@ STAGE PLANS: Statistics: Num rows: 100000000 Data size: 15400000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 50000000 Data size: 7700000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200000 Data size: 30800000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: product_id (type: int), product_sk (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200000 Data size: 19600000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 200000 Data size: 19600000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: ta + filterExpr: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:product_id, smallTablePos:0, keyRatio:4.0E-4 + Statistics: Num rows: 200000000 Data size: 12000000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) + Statistics: Num rows: 80000 Data size: 4800000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: product_id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 80000 Data size: 320000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 input vertices: - 1 Map 2 - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + 0 Map 1 + Statistics: Num rows: 80000 Data size: 7520000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 80000 Data size: 7520000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: DATE'2023-11-27' (type: date), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000 Data size: 12000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 80000 Data size: 12000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: _dummy_table @@ -154,52 +190,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ta - filterExpr: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 200000000 Data size: 12000000000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 100000000 Data size: 6000000000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: product_id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - outputColumnNames: _col1 - input vertices: - 0 Map 1 - Statistics: Num rows: 16666666666 Data size: 1566666666604 Basic stats: COMPLETE Column stats: COMPLETE - DynamicPartitionHashJoin: true - Select Operator - expressions: DATE'2023-11-27' (type: date), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16666666666 Data size: 2499999999900 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16666666666 Data size: 2499999999900 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 673b962e0e25..d45764c3fbec 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -520,7 +520,7 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Estimated key counts: Map 4 => 180340 keys: 0 _col1 (type: bigint), _col0 (type: bigint) 1 _col0 (type: bigint), _col2 (type: bigint) @@ -671,11 +671,11 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: bigint), _col2 (type: bigint) @@ -683,7 +683,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -978,7 +978,7 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Estimated key counts: Map 4 => 180340 keys: 0 _col1 (type: bigint), _col0 (type: bigint) 1 _col0 (type: bigint), _col2 (type: bigint) @@ -1130,11 +1130,11 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: bigint), _col2 (type: bigint) @@ -1142,7 +1142,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: bigint) auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/tpch18.q.out b/ql/src/test/results/clientpositive/llap/tpch18.q.out index 6dce589e7ebd..de0c9991b3db 100644 --- a/ql/src/test/results/clientpositive/llap/tpch18.q.out +++ b/ql/src/test/results/clientpositive/llap/tpch18.q.out @@ -116,16 +116,15 @@ HiveSortLimit(sort0=[$4], sort1=[$3], dir0=[DESC], dir1=[ASC], fetch=[100]) HiveProject(o_orderkey=[$0], o_totalprice=[$2], o_orderdate=[$3], c_custkey=[$5], c_name=[$6], $f8=[*($4, $7)]) HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$2], o_orderdate=[$3], count=[$4]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$2], o_orderdate=[$3], count=[$4]) - HiveAggregate(group=[{0, 1, 2, 3}], count=[count()]) - HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$3], o_orderdate=[$4]) - HiveTableScan(table=[[tpch_0_001, orders]], table:alias=[orders]) - HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, 3E2)]) - HiveAggregate(group=[{0}], agg#0=[sum($4)]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[tpch_0_001, lineitem]], table:alias=[lineitem]) + HiveAggregate(group=[{0, 1, 2, 3}], count=[count()]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$3], o_orderdate=[$4]) + HiveTableScan(table=[[tpch_0_001, orders]], table:alias=[orders]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 3E2)]) + HiveAggregate(group=[{0}], agg#0=[sum($4)]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[tpch_0_001, lineitem]], table:alias=[lineitem]) HiveProject(c_custkey=[$0], c_name=[$1], count=[$2]) HiveAggregate(group=[{0, 1}], count=[count()]) HiveProject(c_custkey=[$0], c_name=[$1]) diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out index ca80131cfdd1..23e8a82b7a2e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -62,7 +62,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnInList(col 3:date, values [-171, -67]) predicate: (cdate) IN (DATE'1969-07-14', DATE'1969-10-26') (type: boolean) - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -70,7 +70,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -79,7 +79,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -107,13 +107,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1174,10 +1174,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.92749614 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1187,7 +1187,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1222,7 +1222,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1231,7 +1231,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1249,13 +1249,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_date_1.q.out b/ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 334d56eecd80..3e62266aa51e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -978,7 +978,7 @@ STAGE PLANS: native: true predicateExpression: FilterDateColEqualDateScalar(col 0:date, val 11323) predicate: (dt1 = DATE'2001-01-01') (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: DATE'2001-01-01' (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 @@ -987,13 +987,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 1] selectExpressions: ConstantVectorExpression(val 11323) -> 4:date - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out index 273a92b28dec..58aa422777d5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out @@ -198,14 +198,14 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 7:int, val 10), FilterLongColEqualLongScalar(col 7:bigint, val 571)(children: col 7:int)) predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col5 (type: int), if(_col3 is not null, _col3, UDFToInteger(_col4)) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -214,7 +214,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 7, 9] selectExpressions: IfExprColumnCondExpr(col 8:boolean, col 5:intcol 6:smallint)(children: IsNotNull(col 5:int) -> 8:boolean, col 5:int, col 6:smallint) -> 9:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -233,12 +233,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: - keys: _col0 (type: int) null sort order: z - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -251,7 +251,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 9, 7] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -262,7 +262,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 9:int, 7:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -406,13 +406,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 922 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -421,13 +421,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 1, 2] selectExpressions: ConstantVectorExpression(val 922) -> 3:int - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 90351996b6e1..2bbf69ab5033 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 17] - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index b4a15de44345..db657028a293 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -415,13 +415,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -618,13 +618,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -863,13 +863,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -1784,13 +1784,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -1987,13 +1987,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -2232,13 +2232,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3153,13 +3153,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3366,13 +3366,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3664,13 +3664,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index e3fe66a06b3d..d7abcecbd954 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -429,13 +429,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -682,13 +682,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -1690,13 +1690,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -1888,13 +1888,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -2901,13 +2901,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -3104,13 +3104,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -3349,13 +3349,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index dd94e53a68bd..9ea03dbdcca3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@lday POSTHOOK: Output: default@lday #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select * from (select item1.S_ID S_ID, @@ -272,48 +272,112 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) - Map 6 <- Map 7 (BROADCAST_EDGE) + Map 3 <- Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Map 7 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: od1 - filterExpr: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + alias: item1 + filterExpr: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) - predicate: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) + predicate: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int), o_date (type: timestamp) + expressions: id (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: id:int, s_id:int, name:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: lday2 + filterExpr: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) + predicate: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: timestamp), ly_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: timestamp) + 1 _col0 (type: timestamp) Map Join Vectorization: - bigTableKeyColumns: 0:int + bigTableKeyColumns: 0:timestamp bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -321,14 +385,14 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: timestamp) - 1 _col0 (type: timestamp) + 1 _col1 (type: timestamp) Map Join Vectorization: bigTableKeyColumns: 1:timestamp bigTableRetainColumnNums: [] @@ -364,47 +428,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: timestamp) - 1 _col1 (type: timestamp) - Map Join Vectorization: - bigTableKeyColumns: 1:timestamp - bigTableRetainColumnNums: [] - className: VectorMapJoinInnerBigOnlyMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - hashTableImplementationType: OPTIMIZED - input vertices: - 0 Map 6 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 1) -> 5:boolean - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: true (type: boolean) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:boolean - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -419,44 +442,55 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, o_date:timestamp + dataColumns: d_date:timestamp, ly_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Map 5 + scratchColumnTypeNames: [bigint] + Map 6 Map Operator Tree: TableScan - alias: item1 - filterExpr: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + alias: ytday2 + filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) - predicate: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) + predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: ytd_date (type: timestamp) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: timestamp) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: timestamp) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -469,46 +503,46 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 3 + dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, s_id:int, name:string + dataColumns: d_date:timestamp, ytd_date:timestamp partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 6 + Map 7 Map Operator Tree: TableScan - alias: lday2 - filterExpr: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + alias: od2 + filterExpr: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) - predicate: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date (type: timestamp), ly_date (type: timestamp) + expressions: id (type: int), o_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: timestamp) - 1 _col0 (type: timestamp) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumns: 0:timestamp + bigTableKeyColumns: 0:int bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyMultiKeyOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -516,7 +550,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 7 + 1 Map 1 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: timestamp) @@ -529,6 +563,67 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: timestamp) + 1 _col0 (type: timestamp) + Map Join Vectorization: + bigTableKeyColumns: 1:timestamp + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED + input vertices: + 1 Reducer 2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: true (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:boolean + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -537,79 +632,50 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: d_date:timestamp, ly_date:timestamp + dataColumns: id:int, o_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 7 - Map Operator Tree: - TableScan - alias: ytday2 - filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) - predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ytd_date (type: timestamp) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: + Reduce Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: d_date:timestamp, ytd_date:timestamp + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -656,7 +722,7 @@ STAGE PLANS: valueColumns: 1:int, 2:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: timestamp) - Reducer 3 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -680,7 +746,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -734,7 +800,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select * from (select item1.S_ID S_ID, ytday1.D_DATE D_DATE diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index ff87190d0caf..80c87d730585 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -485,10 +485,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.91240877 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -498,7 +498,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -533,13 +533,13 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3154,10 +3154,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.91240877 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -3167,7 +3167,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -3202,13 +3202,13 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4170,7 +4170,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 12198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 12195 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4180,7 +4180,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 12198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 12195 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -5232,7 +5232,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 9911 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 9908 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -5242,7 +5242,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 9911 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 9908 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out b/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out index 87a54a46fe3c..58ab800f08f3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out @@ -3437,7 +3437,7 @@ STAGE PLANS: native: true predicateExpression: FilterTimestampColEqualTimestampScalar(col 3:timestamp, val 1970-01-03 00:00:00) predicate: (p_timestamp = TIMESTAMP'1970-01-03 00:00:00') (type: boolean) - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: TIMESTAMP'1970-01-03 00:00:00' (type: timestamp) null sort order: a @@ -3450,7 +3450,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string, 2:date, 5:double, 10:int - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_name (type: string), p_date (type: date), p_retailprice (type: double), rowindex (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3492,7 +3492,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5] - Statistics: Num rows: 20 Data size: 5584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2248 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3532,7 +3532,7 @@ STAGE PLANS: outputTypes: [bigint, double, string, string, date, double, int] partitionExpressions: [ConstantVectorExpression(val 1970-01-03 00:00:00) -> 8:timestamp] streamingColumns: [] - Statistics: Num rows: 20 Data size: 5584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), TIMESTAMP'1970-01-03 00:00:00' (type: timestamp), _col10 (type: int), _col2 (type: date), _col5 (type: double), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -3541,13 +3541,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2, 10, 5, 3, 4, 6, 7] selectExpressions: ConstantVectorExpression(val 1970-01-03 00:00:00) -> 10:timestamp - Statistics: Num rows: 20 Data size: 6464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 6464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4162,7 +4162,7 @@ STAGE PLANS: native: true predicateExpression: FilterDateColEqualDateScalar(col 2:date, val 2) predicate: (p_date = DATE'1970-01-03') (type: boolean) - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: DATE'1970-01-03' (type: date) null sort order: a @@ -4175,7 +4175,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string, 3:timestamp, 5:double, 10:int - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_name (type: string), p_timestamp (type: timestamp), p_retailprice (type: double), rowindex (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4217,7 +4217,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5] - Statistics: Num rows: 20 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4257,7 +4257,7 @@ STAGE PLANS: outputTypes: [bigint, double, string, string, timestamp, double, int] partitionExpressions: [ConstantVectorExpression(val 2) -> 8:date] streamingColumns: [] - Statistics: Num rows: 20 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col10 (type: int), DATE'1970-01-03' (type: date), _col5 (type: double), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -4266,13 +4266,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2, 3, 5, 10, 4, 6, 7] selectExpressions: ConstantVectorExpression(val 2) -> 10:date - Statistics: Num rows: 20 Data size: 6576 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 6576 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 8db9490d5ae4..f0a2ef011458 100644 --- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -383,10 +383,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -396,7 +396,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -428,7 +428,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -437,7 +437,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -454,7 +454,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out index 6b9bafb538ab..42634ee640b0 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -58,8 +58,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) - predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out index dc3212593078..f68d74709d90 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + filterExpr: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -96,8 +96,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) - predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint))) + predicate: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 6732aba7edd2..ee3b51fb93fc 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -120,7 +120,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz @@ -132,7 +132,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -163,16 +163,16 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / if((_col9 = 1L), null, (_col9 - 1))), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / if((_col13 = 1L), null, (_col13 - 1))), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / if((_col16 = 1L), null, (_col16 - 1))) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz sort order: +++++++ - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -184,10 +184,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 7e8cb81144fc..670bf936bfbd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -97,7 +97,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -109,7 +109,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -166,13 +166,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 445a06ac0f35..48cf350e1e31 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + filterExpr: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -82,8 +82,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 14:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float), FilterDecimalColNotEqualDecimalScalar(col 15:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 16:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 16:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 17:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 17:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 18:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 18:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) - predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 15:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 17:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 18:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 18:double))) + predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) @@ -133,7 +133,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), double, double, decimal(8,3)/DECIMAL_64, double, double, double, double, double, double, double, double, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(8,3)/DECIMAL_64, double, decimal(22,3), double, double, double, double, double, double, double, double, double, double, double, double, double, double] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 7e8cb81144fc..670bf936bfbd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -97,7 +97,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -109,7 +109,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -166,13 +166,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index da82903d7963..418538ff44de 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -635,7 +635,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) + filterExpr: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (ctimestamp1 = ctimestamp2) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -643,8 +643,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8:timestamp, col 9:timestamp), FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterLongColEqualLongScalar(col 11:boolean, val 1)), FilterExprAndExpr(children: FilterStringGroupColGreaterStringScalar(col 7:string, val a), SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 9:timestamp)), FilterDoubleColEqualDoubleScalar(col 4:float, val 762.0), FilterStringGroupColEqualStringScalar(col 6:string, val ss)) - predicate: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterLongColEqualLongScalar(col 11:boolean, val 1)), FilterTimestampColEqualTimestampColumn(col 8:timestamp, col 9:timestamp), FilterExprAndExpr(children: FilterStringGroupColGreaterStringScalar(col 7:string, val a), SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 9:timestamp)), FilterDoubleColEqualDoubleScalar(col 4:float, val 762.0), FilterStringGroupColEqualStringScalar(col 6:string, val ss)) + predicate: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (ctimestamp1 = ctimestamp2) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) Statistics: Num rows: 10571 Data size: 2446670 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), (cdouble * cdouble) (type: double) @@ -2949,10 +2949,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col0 (type: timestamp), _col1 (type: string) - minReductionHashAggr: 0.5133463 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) null sort order: zz @@ -2962,7 +2962,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: double), _col18 (type: bigint), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2997,7 +2997,7 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (UDFToDouble(_col5) / _col6) (type: double), _col7 (type: bigint), _col8 (type: tinyint), ((_col9 - ((_col10 * _col10) / _col11)) / if((_col11 = 1L), null, (_col11 - 1))) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / _col14) (type: double), (UDFToDouble(_col15) / _col16) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / if((_col14 = 1L), null, (_col14 - 1))) (type: double), (_col17 / _col18) (type: double), _col19 (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), power(((_col20 - ((_col21 * _col21) / _col22)) / _col22), 0.5) (type: double), _col15 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -3006,12 +3006,12 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 29, 7, 8, 36, 40, 42, 49, 50, 19, 54, 59, 15] selectExpressions: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 24:double)(children: DoubleColDivideLongColumn(col 23:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 23:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double, DoubleColDivideLongColumn(col 28:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 28:double) -> 29:double, DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double, DoubleColDivideLongColumn(col 39:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 37:double) -> 38:double) -> 39:double) -> 40:double, DoubleColDivideLongColumn(col 41:double, col 16:bigint)(children: CastLongToDouble(col 15:bigint) -> 41:double) -> 42:double, DoubleColDivideLongColumn(col 45:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 43:double) -> 44:double) -> 45:double, IfExprNullCondExpr(col 46:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 14:bigint, val 1) -> 46:boolean, LongColSubtractLongScalar(col 14:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 49:double, DoubleColDivideLongColumn(col 17:double, col 18:bigint) -> 50:double, DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 52:double)(children: DoubleColDivideLongColumn(col 51:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 51:double) -> 52:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 20:double, col 56:double)(children: DoubleColDivideLongColumn(col 55:double, col 22:bigint)(children: DoubleColMultiplyDoubleColumn(col 21:double, col 21:double) -> 55:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2458210 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++++++++++++++++++++++++++ keys: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175D) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28D - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28D - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175D)) (type: double), _col6 (type: double), (_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), _col2 (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175D / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (_col2 * 10.175D) (type: double), _col10 (type: double), (((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175D) (type: double), (10.175D % (10.175D / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28D - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), _col4 (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28D) (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2458210 Basic stats: COMPLETE Column stats: COMPLETE top n: 50 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -3025,7 +3025,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 23, 24, 29, 25, 26, 7, 35, 31, 8, 30, 32, 36, 28, 27, 38, 40, 37, 42, 49, 41, 39, 50, 43, 45, 48, 19, 54, 44, 52, 145, 59, 15, 53, 7, 7, 55] selectExpressions: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 23:double, DoubleColUnaryMinus(col 27:double) -> 24:double, DoubleColUnaryMinus(col 27:double) -> 25:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 26:double, LongColUnaryMinus(col 7:bigint) -> 35:bigint, DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 31:double, DoubleColMultiplyDoubleColumn(col 32:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 32:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 30:double, DoubleColUnaryMinus(col 28:double)(children: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 28:double) -> 32:double, DoubleColAddDoubleColumn(col 36:double, col 37:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 37:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 37:double) -> 38:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 37:double) -> 28:double, DoubleColDivideDoubleColumn(col 37:double, col 27:double)(children: CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 37:double) -> 38:double, DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 37:double, DoubleColSubtractDoubleColumn(col 39:double, col 43:double)(children: DoubleColAddDoubleColumn(col 36:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 39:double)(children: DoubleColMultiplyDoubleColumn(col 39:double, col 41:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 39:double, DoubleColUnaryMinus(col 27:double) -> 41:double) -> 43:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 39:double) -> 41:double) -> 39:double, DoubleColMultiplyDoubleColumn(col 44:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 43:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 41:double, DoubleColUnaryMinus(col 27:double) -> 43:double) -> 44:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 41:double) -> 43:double) -> 41:double, DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 39:double, DoubleColMultiplyDoubleScalar(col 44:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 43:double, col 45:double)(children: DoubleColAddDoubleColumn(col 36:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 45:double, col 43:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 44:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 43:double, DoubleColUnaryMinus(col 27:double) -> 44:double) -> 45:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 43:double) -> 44:double) -> 43:double, DoubleColMultiplyDoubleColumn(col 51:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 45:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 45:double) -> 51:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 44:double) -> 45:double) -> 44:double) -> 43:double, DoubleScalarModuloDoubleColumn(val 10.175, col 44:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 44:double) -> 45:double, LongColUnaryMinus(col 8:tinyint) -> 48:tinyint, DoubleColUnaryMinus(col 52:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 51:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double) -> 44:double, DoubleColModuloDoubleColumn(col 51:double, col 50:double)(children: DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double, DecimalScalarDivideDecimalColumn(val -26.28, col 127:decimal(3,0))(children: CastLongToDecimal(col 71:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 71:tinyint) -> 127:decimal(3,0)) -> 145:decimal(8,6), DoubleColDivideDoubleColumn(col 51:double, col 40:double)(children: DoubleColAddDoubleColumn(col 36:double, col 53:double)(children: DoubleColMultiplyDoubleColumn(col 55:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 53:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 53:double) -> 55:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 53:double) -> 51:double) -> 53:double, DoubleColModuloDoubleScalar(col 51:double, val -26.28)(children: DoubleColAddDoubleColumn(col 36:double, col 55:double)(children: DoubleColMultiplyDoubleColumn(col 56:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 55:double) -> 56:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 55:double) -> 51:double) -> 55:double - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz @@ -3034,7 +3034,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3051,7 +3051,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 2, 17, 18, 19, 20, 21, 22, 3, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 8, 8, 38] - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 262c0184faf0..5bbdded348a2 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -761,10 +761,10 @@ STAGE PLANS: keys: 0 _col0 (type: date) 1 _col0 (type: date) - Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -910,10 +910,10 @@ STAGE PLANS: keys: 0 _col0 (type: timestamp) 1 _col0 (type: timestamp) - Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out index af0c461861f3..63f97eeca17c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out @@ -1207,13 +1207,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1223,10 +1223,10 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1826,16 +1826,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: val1 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1845,10 +1845,10 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 8f722cdd70b1..3e1d638d5947 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -290,7 +290,7 @@ STAGE PLANS: native: true predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-02 16:00:00.0, 0002-02-03 16:00:00.0]) predicate: (ts) IN (TIMESTAMP'0001-01-01 00:00:00', TIMESTAMP'0002-02-02 00:00:00') (type: boolean) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 @@ -298,13 +298,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out index ac4f8ced88a8..813daa134207 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out index a6346d4138e8..4e9966445da8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], cc_call_center_sk=[$9], cc_county=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'NY')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_ship_date_sk=[$1], cs_ship_addr_sk=[$9], cs_call_center_sk=[$10], cs_warehouse_sk=[$13], cs_order_number=[$16], cs_ext_ship_cost=[$27], cs_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1), IS NOT NULL($10))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'NY')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$13], cs_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($13)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out index 455d9e57dd05..c96f9bdb6b8e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out index 7c1d04844a5f..a6d2b3e4911c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out @@ -3,18 +3,18 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(x.w_warehouse_name=[$0], x.i_item_id=[$1], x.inv_before=[$2], x.inv_after=[$3]) HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), false), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), false))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$10], $f1=[$5], $f2=[CASE($7, $3, 0)], $f3=[CASE($8, $3, 0)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out index fb6e9289cc12..a30ba66757bf 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out @@ -39,8 +39,8 @@ HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER] HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[316][bigTable=?] in task 'Reducer 15' is a cross product -Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 14' is a cross product +Warning: Map Join MAPJOIN[316][bigTable=?] in task 'Reducer 17' is a cross product +Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 16' is a cross product CBO PLAN: HiveProject(_c0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out index f158d7fea5d7..710837eecbe2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out @@ -6,18 +6,18 @@ HiveProject(d_date_sk=[$0]) CBO PLAN: HiveProject(excess discount amount=[$0]) HiveAggregate(group=[{}], agg#0=[sum($1)]) - HiveJoin(condition=[AND(=($6, $3), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_discount_amt=[$21], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], cs_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out index cdac1bab8139..e2ad185eef6f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 678, 849, 918, 964), BETWEEN(false, $5, 22:DECIMAL(12, 2), 52:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out index 6ddf309c5979..582967b597c1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out @@ -2,22 +2,22 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_state=[$0], i_item_id=[$1], sales_before=[$2], sales_after=[$3]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$14], $f1=[$9], $f2=[CASE($11, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($12, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) + HiveProject($f0=[$14], $f1=[$12], $f2=[CASE($9, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($10, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $5), =($2, $6))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_warehouse_sk=[$13], cs_item_sk=[$14], cs_order_number=[$16], cs_sales_price=[$20], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_refunded_cash=[$22]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out index 4fc7d0ba8c0d..c57a4bf13c24 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out @@ -10,9 +10,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) HiveUnion(all=[true]) HiveProject(channel=[_UTF-16LE'store channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(store_sk=[$6], date_sk=[$22], sales_price=[$14], profit=[$21], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -21,11 +21,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(store_sk=[$6], date_sk=[$19], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$10], net_loss=[$18]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -44,9 +44,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(wsr_web_site_sk=[$12], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -60,9 +60,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22], wr_returned_date_sk=[$23]) HiveFilter(condition=[IS NOT NULL($23)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(web_site_sk=[$0], web_site_id=[$1]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index a321f8b20706..de415dace201 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -1,24 +1,25 @@ CTE Suggestion: HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveFilter(condition=[sq_count_check($0)]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CTE Suggestion: HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 7' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 8' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -36,19 +37,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -65,19 +66,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -93,19 +94,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out index 159a27a6ab07..5f68efc24bc6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out @@ -23,72 +23,72 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_ext_sales_price=[$14], ss_net_profit=[$21], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_amt=[$10], sr_net_loss=[$18]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_catalog_page_sk=[$11], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_ext_sales_price=[$22], cs_net_profit=[$32], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_return_amount=[$17], cr_net_loss=[$25]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_web_site_sk=[$12], ws_promo_sk=[$15], ws_order_number=[$16], ws_ext_sales_price=[$22], ws_net_profit=[$32], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out index 9f9be64c934f..44172d451daa 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 129, 437, 663, 727), BETWEEN(false, $5, 30:DECIMAL(12, 2), 60:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out index a5167a68b8a1..90adfb99205a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out @@ -5,27 +5,27 @@ HiveProject(d_date_sk=[$0]) CBO PLAN: HiveProject(excess discount amount=[$0]) - HiveAggregate(group=[{}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($5, $3), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($1)]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) - HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out index 982bd647bb78..5f14c7b74791 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], web_site_sk=[$9], web_company_name=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_warehouse_sk=[$14], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($14)]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out index 9d39c369316e..57eaa4112026 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $3)], agg#1=[sum($4)], agg#2=[sum($5)]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$10], d_date=[$11], ca_address_sk=[$6], ca_state=[$7], web_site_sk=[$8], web_company_name=[$9]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$10], web_company_name=[$11]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_order_number=[$1]) HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out index 680a11e2bde1..e6db70d26a68 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out index ac4f8ced88a8..813daa134207 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out index a6346d4138e8..4e9966445da8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], cc_call_center_sk=[$9], cc_county=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'NY')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_ship_date_sk=[$1], cs_ship_addr_sk=[$9], cs_call_center_sk=[$10], cs_warehouse_sk=[$13], cs_order_number=[$16], cs_ext_ship_cost=[$27], cs_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1), IS NOT NULL($10))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'NY')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$13], cs_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($13)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out index 455d9e57dd05..c96f9bdb6b8e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out index 7c1d04844a5f..a6d2b3e4911c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out @@ -3,18 +3,18 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(x.w_warehouse_name=[$0], x.i_item_id=[$1], x.inv_before=[$2], x.inv_after=[$3]) HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), false), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), false))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$10], $f1=[$5], $f2=[CASE($7, $3, 0)], $f3=[CASE($8, $3, 0)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out index 8ccb8381e302..4017680294ad 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out @@ -1,18 +1,18 @@ CBO PLAN: HiveProject(excess discount amount=[$0]) HiveAggregate(group=[{}], agg#0=[sum($1)]) - HiveJoin(condition=[AND(=($6, $3), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_discount_amt=[$21], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], cs_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out index cdac1bab8139..e2ad185eef6f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 678, 849, 918, 964), BETWEEN(false, $5, 22:DECIMAL(12, 2), 52:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out index 6ddf309c5979..582967b597c1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out @@ -2,22 +2,22 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_state=[$0], i_item_id=[$1], sales_before=[$2], sales_after=[$3]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$14], $f1=[$9], $f2=[CASE($11, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($12, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) + HiveProject($f0=[$14], $f1=[$12], $f2=[CASE($9, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($10, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $5), =($2, $6))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_warehouse_sk=[$13], cs_item_sk=[$14], cs_order_number=[$16], cs_sales_price=[$20], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_refunded_cash=[$22]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out index 3c0a718cb8fb..647a8170af6d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out @@ -5,9 +5,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) HiveUnion(all=[true]) HiveProject(channel=[_UTF-16LE'store channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(store_sk=[$6], date_sk=[$22], sales_price=[$14], profit=[$21], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -16,15 +16,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(store_sk=[$6], date_sk=[$19], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$10], net_loss=[$18]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(page_sk=[$11], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -33,15 +33,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(page_sk=[$11], date_sk=[$26], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$17], net_loss=[$25]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($26))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) - HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(wsr_web_site_sk=[$12], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -55,9 +55,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22], wr_returned_date_sk=[$23]) HiveFilter(condition=[IS NOT NULL($23)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(web_site_sk=[$0], web_site_id=[$1]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index 9664e4762b89..de98d243fa41 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -16,19 +17,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -45,19 +46,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -73,19 +74,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out index a518625aad5e..2fea65b34ef5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out @@ -8,72 +8,72 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_ext_sales_price=[$14], ss_net_profit=[$21], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_amt=[$10], sr_net_loss=[$18]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_catalog_page_sk=[$11], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_ext_sales_price=[$22], cs_net_profit=[$32], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_return_amount=[$17], cr_net_loss=[$25]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_web_site_sk=[$12], ws_promo_sk=[$15], ws_order_number=[$16], ws_ext_sales_price=[$22], ws_net_profit=[$32], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out index 9f9be64c934f..44172d451daa 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 129, 437, 663, 727), BETWEEN(false, $5, 30:DECIMAL(12, 2), 60:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out index 29e3cfdc3ff4..5e5810049a88 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out @@ -1,26 +1,26 @@ CBO PLAN: HiveProject(excess discount amount=[$0]) - HiveAggregate(group=[{}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($5, $3), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($1)]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) - HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out index 982bd647bb78..5f14c7b74791 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], web_site_sk=[$9], web_company_name=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_warehouse_sk=[$14], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($14)]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out index 9d39c369316e..57eaa4112026 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $3)], agg#1=[sum($4)], agg#2=[sum($5)]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$10], d_date=[$11], ca_address_sk=[$6], ca_state=[$7], web_site_sk=[$8], web_company_name=[$9]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$10], web_company_name=[$11]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_order_number=[$1]) HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out index 680a11e2bde1..e6db70d26a68 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out index 8d5eeb1dcade..973a5e460ab8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_56_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:0.2727272808816537 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_52_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:0.030300956815565664 Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 5889447025 Data size: 4110531380410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2399240019 Data size: 287606194744 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 654338207 Data size: 451190719790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.98058045 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out index 9cd4986a6e30..0fe6be75d613 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out @@ -23,7 +23,7 @@ STAGE PLANS: TableScan alias: cs1 filterExpr: (cs_ship_addr_sk is not null and cs_ship_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_119_container, bigKeyColName:cs_call_center_sk, smallTablePos:1, keyRatio:1.8509578697501366E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_120_container, bigKeyColName:cs_call_center_sk, smallTablePos:1, keyRatio:4.13026255875998E-4 Statistics: Num rows: 43220864887 Data size: 11379157992136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cs_ship_addr_sk is not null and cs_ship_date_sk is not null and cs_call_center_sk is not null) (type: boolean) @@ -36,27 +36,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 8 - Statistics: Num rows: 803365808 Data size: 176672786488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4730608045 Data size: 1182045115232 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 9 - Statistics: Num rows: 160673164 Data size: 19280779808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 89256757 Data size: 10710810968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: @@ -89,22 +89,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: call_center + filterExpr: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) + Statistics: Num rows: 60 Data size: 6360 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) + Statistics: Num rows: 12 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: cc_call_center_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -165,43 +165,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'NY') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'NY') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: call_center - filterExpr: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) - Statistics: Num rows: 60 Data size: 6360 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'NY') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) - Statistics: Num rows: 12 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'NY') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cc_call_center_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out index e7dac6389994..2c85d5ee0f7b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: catalog_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_56_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.2727272808721824 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_52_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.030300956805910575 Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 11728666448 Data size: 8174562398208 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778018342 Data size: 561315454048 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 1303095951 Data size: 887089423694 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.99 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out index b8ecf93fa13f..2220fbc9ec45 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_74_container, bigKeyColName:inv_item_sk, smallTablePos:1, keyRatio:2.457218293744475E-9 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_75_container, bigKeyColName:inv_item_sk, smallTablePos:1, keyRatio:0.0015429438826629121 Statistics: Num rows: 1627857000 Data size: 45254407088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: inv_date_sk (type: bigint), inv_item_sk (type: bigint), inv_warehouse_sk (type: bigint), inv_quantity_on_hand (type: int) @@ -26,94 +26,94 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col5 + outputColumnNames: _col1, _col2, _col3, _col5, _col6 input vertices: 1 Map 4 - Statistics: Num rows: 22606776 Data size: 2622386020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180860619 Data size: 4738508420 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col5, _col7, _col8 + outputColumnNames: _col2, _col3, _col5, _col6, _col8 input vertices: 1 Map 5 - Statistics: Num rows: 2511693 Data size: 291356392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 291356276 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col3, _col5, _col7, _col8, _col10 + outputColumnNames: _col3, _col5, _col6, _col8, _col10 input vertices: 1 Map 6 - Statistics: Num rows: 2511693 Data size: 522432148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 522431940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col10 (type: varchar(20)), _col5 (type: string), if(_col7, _col3, 0) (type: int), if(_col8, _col3, 0) (type: int) + expressions: _col10 (type: varchar(20)), _col8 (type: string), if(_col5, _col3, 0) (type: int), if(_col6, _col3, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2511693 Data size: 522432148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 522431940 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col3) keys: _col0 (type: varchar(20)), _col1 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.9867269 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57753 Data size: 12474648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 519696 Data size: 112254336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: varchar(20)), _col1 (type: string) - Statistics: Num rows: 57753 Data size: 12474648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 519696 Data size: 112254336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 4 Map Operator Tree: TableScan - alias: item - filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: d_date_sk (type: bigint), (d_date < DATE'1998-04-08') (type: boolean), (d_date >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean), _col2 (type: boolean) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: item + filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint), (d_date < DATE'1998-04-08') (type: boolean), (d_date >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean), _col2 (type: boolean) + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 @@ -142,21 +142,21 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(20)), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19251 Data size: 4158216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 173232 Data size: 37418112 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (if((_col2 > 0L), (0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2))), false) and if((_col2 > 0L), ((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D), false)) (type: boolean) - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Reduce Output Operator key expressions: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -164,7 +164,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(20)), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 21600 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out index 1e33338c615a..bc67018f6574 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out @@ -358,13 +358,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66333133964 Data size: 4775985645408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66333133964 Data size: 4775985645408 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -396,11 +396,11 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16583283491 Data size: 1193996411352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 39257291232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -410,36 +410,36 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 5 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 > 4L) (type: boolean) - Statistics: Num rows: 5527761163 Data size: 88444178608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 181746718 Data size: 2907947488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 5527761163 Data size: 44222089304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 181746718 Data size: 1453973744 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out index 5423fb1d2107..1b634445238c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out @@ -8,7 +8,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) @@ -19,7 +19,7 @@ STAGE PLANS: TableScan alias: catalog_sales filterExpr: cs_ext_discount_amt is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_91_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.0010104727318500269 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_87_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:1.1226707964380053E-4 Statistics: Num rows: 43005109025 Data size: 5492699040592 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cs_ext_discount_amt is not null (type: boolean) @@ -32,35 +32,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 43455490 Data size: 695287952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4766159119 Data size: 560013852168 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 input vertices: 1 Map 4 Statistics: Num rows: 4828058 Data size: 38624576 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: bigint) + key expressions: _col4 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: bigint) + Map-reduce partition columns: _col4 (type: bigint) Statistics: Num rows: 4828058 Data size: 38624576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Select Operator - expressions: _col3 (type: bigint) - outputColumnNames: _col3 + expressions: _col4 (type: bigint) + outputColumnNames: _col4 Statistics: Num rows: 4828058 Data size: 38624464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col3), max(_col3), bloom_filter(_col3, expectedEntries=1000000) + aggregations: min(_col4), max(_col4), bloom_filter(_col4, expectedEntries=1000000) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -73,27 +73,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 3 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 4 Map Operator Tree: TableScan alias: date_dim @@ -134,6 +113,43 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 5 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -157,7 +173,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 4778018342 Data size: 561407286432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count(_col1) @@ -208,7 +224,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: bigint) + 0 _col4 (type: bigint) 1 _col1 (type: bigint) outputColumnNames: _col1, _col5 input vertices: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out index 9a35919c5a20..a56673a3e5b2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out @@ -251,13 +251,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: @@ -265,30 +265,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 265313593 Data size: 64736516692 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1035784270732 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 2 Execution mode: vectorized, llap @@ -309,13 +309,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -323,30 +323,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 989964579 Data size: 241551357276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -356,11 +356,11 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 444670820 Data size: 108499680080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint) outputColumnNames: _col3 - Statistics: Num rows: 444670820 Data size: 3557366560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 75890974448 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 3L) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -411,13 +411,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -425,30 +425,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 523405109 Data size: 127710846596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2043373546024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out index 005a3f992fd5..a30c659f78b2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out @@ -7,22 +7,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 5 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 1 <- Reducer 8 (BROADCAST_EDGE) + Map 5 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: catalog_sales - filterExpr: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 43005109025 Data size: 6179957594616 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 42897418825 Data size: 6164482203784 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_warehouse_sk (type: bigint), cs_item_sk (type: bigint), cs_order_number (type: bigint), cs_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -41,10 +41,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: catalog_returns - filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 543456366240 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cr_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 543456366240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cr_item_sk (type: bigint), cr_order_number (type: bigint), cr_refunded_cash (type: decimal(7,2)) @@ -60,43 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6416 Data size: 51328 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 Map Operator Tree: TableScan alias: date_dim @@ -134,6 +97,43 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6416 Data size: 51328 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan @@ -170,21 +170,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col4 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col7, _col9 + outputColumnNames: _col0, _col1, _col3, _col7, _col9, _col10 input vertices: 1 Map 6 - Statistics: Num rows: 947588639 Data size: 203304102788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7580978039 Data size: 1018266906400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col7, _col9, _col11, _col12 + outputColumnNames: _col0, _col3, _col7, _col9, _col10, _col12 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 105280419 Data size: 11370285484 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -192,18 +192,18 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col3, _col7, _col9, _col11, _col12, _col14 + outputColumnNames: _col3, _col7, _col9, _col10, _col12, _col14 input vertices: 1 Map 9 Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ - keys: _col14 (type: char(2)), _col9 (type: string) + keys: _col14 (type: char(2)), _col12 (type: string) null sort order: zz Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator - expressions: _col14 (type: char(2)), _col9 (type: string), if(_col11, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)), if(_col12, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)) + expressions: _col14 (type: char(2)), _col12 (type: string), if(_col9, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)), if(_col10, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -212,13 +212,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 684480 Data size: 280636800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6159360 Data size: 2525337600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(2)), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: string) - Statistics: Num rows: 684480 Data size: 280636800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6159360 Data size: 2525337600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) Reducer 3 Execution mode: vectorized, llap @@ -228,12 +228,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(2)), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(2)), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) Reducer 4 Execution mode: vectorized, llap @@ -241,7 +241,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: char(2)), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(18,2)), VALUE._col1 (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 41000 Basic stats: COMPLETE Column stats: COMPLETE @@ -252,7 +252,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out index adbdacef39ef..ea66858b591b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out @@ -7,11 +7,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 21 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 12 <- Map 13 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 10 (CONTAINS) + Map 1 <- Map 20 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 12 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Union 10 (CONTAINS) Map 14 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 15 (CONTAINS) - Map 7 <- Map 21 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 9 <- Map 13 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 10 (CONTAINS) + Map 7 <- Map 20 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 9 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Union 10 (CONTAINS) Reducer 11 <- Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE), Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 15 (CONTAINS) @@ -25,7 +25,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_store_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_231_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:1.0756178660512734 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_232_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.11950491485837746 Statistics: Num rows: 82510879939 Data size: 19351122693824 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_store_sk is not null (type: boolean) @@ -38,35 +38,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 88750176606 Data size: 48460575985864 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 9860455682 Data size: 3580319207704 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 8 Statistics: Num rows: 9860455682 Data size: 4519007506664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -87,35 +87,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 13 - Statistics: Num rows: 47131652878 Data size: 26162171562344 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 5236491827 Data size: 2342411162624 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 13 Statistics: Num rows: 5236491827 Data size: 2826570083372 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -149,7 +149,7 @@ STAGE PLANS: TableScan alias: web_sales filterExpr: ws_web_site_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_235_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.1458761134564632 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_236_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:0.23841435728939733 Statistics: Num rows: 21594638446 Data size: 5182388988880 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_web_site_sk is not null (type: boolean) @@ -162,35 +162,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 20 - Statistics: Num rows: 46339418820 Data size: 25753225754752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5148471846 Data size: 2336125623824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: 1 Map 21 Statistics: Num rows: 5148471846 Data size: 2809871462440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -235,31 +235,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 20 - Map Operator Tree: - TableScan - alias: web_site - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: web_site_sk (type: bigint), web_site_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 21 Map Operator Tree: TableScan alias: date_dim @@ -406,6 +381,31 @@ STAGE PLANS: Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 21 + Map Operator Tree: + TableScan + alias: web_site + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: web_site_sk (type: bigint), web_site_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan @@ -423,35 +423,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 88750176606 Data size: 48460575985864 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 9860455682 Data size: 3580319207704 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 8 Statistics: Num rows: 9860455682 Data size: 4519007506664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -485,7 +485,7 @@ STAGE PLANS: TableScan alias: catalog_sales filterExpr: cs_catalog_page_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_233_container, bigKeyColName:cs_catalog_page_sk, smallTablePos:1, keyRatio:1.0959547352990346 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_234_container, bigKeyColName:cs_catalog_page_sk, smallTablePos:1, keyRatio:0.12176441231565975 Statistics: Num rows: 43005109025 Data size: 10308315074584 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cs_catalog_page_sk is not null (type: boolean) @@ -498,35 +498,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 13 - Statistics: Num rows: 47131652878 Data size: 26162171562344 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 5236491827 Data size: 2342411162624 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 13 Statistics: Num rows: 5236491827 Data size: 2826570083372 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -538,16 +538,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5099 Data size: 2794252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45891 Data size: 25148268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5099 Data size: 3156281 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45891 Data size: 28406529 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -556,13 +556,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 16 Execution mode: vectorized, llap @@ -572,16 +572,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 2740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 23016 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 3075 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 25830 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -590,13 +590,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 18 Execution mode: vectorized, llap @@ -620,35 +620,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 20 - Statistics: Num rows: 46339418820 Data size: 25753225754752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5148471846 Data size: 2336125623824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: 1 Map 21 Statistics: Num rows: 5148471846 Data size: 2809871462440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 3 Execution mode: vectorized, llap @@ -658,16 +658,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 98 Data size: 53704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 879 Data size: 481692 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 98 Data size: 60466 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 879 Data size: 542343 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -676,13 +676,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 5 Execution mode: vectorized, llap @@ -692,17 +692,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) Reducer 6 Execution mode: vectorized, llap @@ -710,7 +710,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 61900 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out index 7c68da53ee81..5091dfb25a49 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out @@ -42,13 +42,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -78,13 +78,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -180,17 +180,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Reducer 3 Execution mode: llap @@ -202,13 +202,13 @@ STAGE PLANS: 0 _col0 (type: bigint), _col1 (type: date) 1 _col0 (type: bigint), _col1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: date) null sort order: az sort order: ++ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint) - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(27,2)), _col3 (type: bigint), _col4 (type: date), _col5 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -216,7 +216,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: bigint), VALUE._col4 (type: date), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -242,25 +242,25 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (max_window_0 > max_window_1) (type: boolean) - Statistics: Num rows: 15796368999650 Data size: 5560321887876800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date) null sort order: zz - Statistics: Num rows: 15796368999650 Data size: 5560321887876800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator expressions: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) Reducer 5 Execution mode: vectorized, llap @@ -268,7 +268,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: date), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE @@ -287,7 +287,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -310,17 +310,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index 0091d3885c34..31dc5e2d3eca 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,15 +9,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 6 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 9 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 10 <- Map 9 (SIMPLE_EDGE) + Map 1 <- Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 10 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 13 <- Reducer 6 (BROADCAST_EDGE) + Map 15 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 3 <- Reducer 14 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 7 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,8 +53,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 5 - Statistics: Num rows: 43005109025 Data size: 5148566336008 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 8 + Statistics: Num rows: 3532295 Data size: 28258472 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -56,25 +63,197 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 11 - Statistics: Num rows: 43005109025 Data size: 9105036366308 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 3532295 Data size: 353229612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.92992544 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 6777167 Data size: 54217448 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 6777167 Data size: 677716812 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.9634768 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 + input vertices: + 0 Reducer 6 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 1773711 Data size: 14189800 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 1773711 Data size: 177371212 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.86044854 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 17 Map Operator Tree: TableScan alias: item @@ -110,10 +289,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null or ((d_date = DATE'1998-02-19') and d_week_seq is not null) or (d_date = DATE'1998-02-19')) (type: boolean) + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: d_date is not null (type: boolean) + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) @@ -142,6 +321,13 @@ STAGE PLANS: Partition key expr: cs_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 1 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -164,7 +350,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 6 + Target Vertex: Map 10 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -187,28 +373,23 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 9 - Filter Operator - predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 15 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_date = DATE'1998-02-19') or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_date = DATE'1998-02-19') (type: boolean) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -217,13 +398,30 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 8 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -236,159 +434,139 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 input vertices: - 1 Reducer 4 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 Reducer 5 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 82510879939 Data size: 9683309686440 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 82510879939 Data size: 17274310640828 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Reducer 11 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) + Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Map 3 - Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 21594638446 Data size: 2591054005984 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 21594638446 Data size: 4577760743016 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 16 + Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: decimal(17,2)) + null sort order: zz + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) + Reducer 12 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 10 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 14 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 16 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -424,7 +602,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -437,78 +615,75 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 1 Reducer 7 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) - Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: sq_count_check(_col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + 0 + 1 + outputColumnNames: _col1 input vertices: - 1 Reducer 10 - Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: decimal(17,2)) - null sort order: zz - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 8 + 1 Map 8 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out index 86ce0891d9b0..b50f09d6e5a1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out @@ -7,36 +7,36 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) - Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) - Map 13 <- Reducer 15 (BROADCAST_EDGE) - Map 23 <- Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) - Map 26 <- Reducer 17 (BROADCAST_EDGE) - Map 7 <- Reducer 16 (BROADCAST_EDGE) - Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Map 13 (CUSTOM_SIMPLE_EDGE), Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) - Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) - Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 13 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) + Map 16 <- Reducer 19 (BROADCAST_EDGE) + Map 23 <- Reducer 11 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) + Map 26 <- Reducer 20 (BROADCAST_EDGE) + Map 7 <- Reducer 21 (BROADCAST_EDGE) + Reducer 10 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE), Map 16 (CUSTOM_SIMPLE_EDGE), Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 7 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 12 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 7 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 21 <- Map 18 (CUSTOM_SIMPLE_EDGE) - Reducer 24 <- Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 23 (CUSTOM_SIMPLE_EDGE), Map 26 (CUSTOM_SIMPLE_EDGE), Map 27 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 24 <- Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 23 (CUSTOM_SIMPLE_EDGE), Map 26 (CUSTOM_SIMPLE_EDGE), Map 27 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: store_sales - filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and ss_promo_sk BETWEEN DynamicValue(RS_23_promotion_p_promo_sk_min) AND DynamicValue(RS_23_promotion_p_promo_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter)) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_23_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and ss_promo_sk BETWEEN DynamicValue(RS_26_promotion_p_promo_sk_min) AND DynamicValue(RS_26_promotion_p_promo_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter)) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_26_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 82510879939 Data size: 21315868812296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_promo_sk BETWEEN DynamicValue(RS_23_promotion_p_promo_sk_min) AND DynamicValue(RS_23_promotion_p_promo_sk_max) and ss_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_23_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_promo_sk BETWEEN DynamicValue(RS_26_promotion_p_promo_sk_min) AND DynamicValue(RS_26_promotion_p_promo_sk_max) and ss_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_26_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ss_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 78675502838 Data size: 20325037116048 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_item_sk (type: bigint), ss_store_sk (type: bigint), ss_promo_sk (type: bigint), ss_ticket_number (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), ss_sold_date_sk (type: bigint) @@ -51,14 +51,32 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 10 + Map 12 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: bigint), s_store_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 Map Operator Tree: TableScan alias: catalog_sales - filterExpr: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and cs_promo_sk BETWEEN DynamicValue(RS_60_promotion_p_promo_sk_min) AND DynamicValue(RS_60_promotion_p_promo_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter)) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_60_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and cs_promo_sk BETWEEN DynamicValue(RS_63_promotion_p_promo_sk_min) AND DynamicValue(RS_63_promotion_p_promo_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter)) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_63_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 43005109025 Data size: 11339575410520 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_promo_sk BETWEEN DynamicValue(RS_60_promotion_p_promo_sk_min) AND DynamicValue(RS_60_promotion_p_promo_sk_max) and cs_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_60_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(cs_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_promo_sk BETWEEN DynamicValue(RS_63_promotion_p_promo_sk_min) AND DynamicValue(RS_63_promotion_p_promo_sk_max) and cs_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_63_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 42789551679 Data size: 11282737308320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_catalog_page_sk (type: bigint), cs_item_sk (type: bigint), cs_promo_sk (type: bigint), cs_order_number (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -73,14 +91,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col2 (type: bigint), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 13 + Map 16 Map Operator Tree: TableScan alias: catalog_returns - filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 1017653227728 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cr_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cr_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 1017653227728 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cr_item_sk (type: bigint), cr_order_number (type: bigint), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2)) @@ -95,107 +113,110 @@ STAGE PLANS: value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 17 Map Operator Tree: TableScan - alias: item - filterExpr: (i_current_price > 50) (type: boolean) - Statistics: Num rows: 462000 Data size: 55309408 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (i_current_price > 50) (type: boolean) - Statistics: Num rows: 231185 Data size: 27676904 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ss_sold_date_sk (bigint) + Target Input: store_sales + Partition key expr: ss_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 23 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 18 Map Operator Tree: TableScan - alias: promotion - filterExpr: (p_channel_tv = 'N') (type: boolean) - Statistics: Num rows: 2300 Data size: 213900 Basic stats: COMPLETE Column stats: COMPLETE + alias: item + filterExpr: (i_current_price > 50) (type: boolean) + Statistics: Num rows: 462000 Data size: 55309408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_channel_tv = 'N') (type: boolean) - Statistics: Num rows: 1150 Data size: 106950 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (i_current_price > 50) (type: boolean) + Statistics: Num rows: 231185 Data size: 27676904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_promo_sk (type: bigint) + expressions: i_item_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -212,11 +233,11 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -233,11 +254,11 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -273,10 +294,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - filterExpr: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and ws_promo_sk BETWEEN DynamicValue(RS_98_promotion_p_promo_sk_min) AND DynamicValue(RS_98_promotion_p_promo_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter)) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_98_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and ws_promo_sk BETWEEN DynamicValue(RS_101_promotion_p_promo_sk_min) AND DynamicValue(RS_101_promotion_p_promo_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter)) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_101_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 21594638446 Data size: 5700638697608 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_promo_sk BETWEEN DynamicValue(RS_98_promotion_p_promo_sk_min) AND DynamicValue(RS_98_promotion_p_promo_sk_max) and ws_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_98_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ws_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_promo_sk BETWEEN DynamicValue(RS_101_promotion_p_promo_sk_min) AND DynamicValue(RS_101_promotion_p_promo_sk_max) and ws_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_101_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ws_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 21589233207 Data size: 5699211801048 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ws_item_sk (type: bigint), ws_web_site_sk (type: bigint), ws_promo_sk (type: bigint), ws_order_number (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)), ws_sold_date_sk (type: bigint) @@ -295,10 +316,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_returns - filterExpr: (wr_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (wr_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 2160007345 Data size: 496628694560 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (wr_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (wr_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 2160007345 Data size: 496628694560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: wr_item_sk (type: bigint), wr_order_number (type: bigint), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) @@ -335,10 +356,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_returns - filterExpr: (sr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (sr_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 8634166995 Data size: 2004678961248 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (sr_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 8634166995 Data size: 2004678961248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sr_item_sk (type: bigint), sr_ticket_number (type: bigint), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) @@ -356,103 +377,108 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: promotion + filterExpr: (p_channel_tv = 'N') (type: boolean) + Statistics: Num rows: 2300 Data size: 213900 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_channel_tv = 'N') (type: boolean) + Statistics: Num rows: 1150 Data size: 106950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: p_promo_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ss_sold_date_sk (bigint) - Target Input: store_sales - Partition key expr: ss_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ws_sold_date_sk (bigint) - Target Input: web_sales - Partition key expr: ws_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 23 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cs_sold_date_sk (bigint) - Target Input: catalog_sales - Partition key expr: cs_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 10 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: bigint), s_store_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reducer 10 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 11 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Map Join Operator @@ -463,34 +489,34 @@ STAGE PLANS: 1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col9, _col10 input vertices: - 1 Map 13 + 1 Map 16 Statistics: Num rows: 68128960197 Data size: 26694756517832 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 34091760570 Data size: 10084166612312 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 7569366263 Data size: 1863498498512 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 17045880285 Data size: 4078164892288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3787714088 Data size: 895347046408 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col4, _col5, _col9, _col10 input vertices: @@ -524,7 +550,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 107889741 Data size: 47039927076 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(18,2)) - Reducer 12 + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -558,43 +584,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 15 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 16 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 17 + Reducer 19 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -612,19 +602,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 19 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -643,27 +620,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 62864387256 Data size: 22152162793776 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 13957729495 Data size: 3016221281800 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 31432193628 Data size: 7113224649584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6984453758 Data size: 1230973268960 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col1, _col4, _col5, _col9, _col10 input vertices: @@ -677,7 +654,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col4, _col5, _col9, _col10, _col15 input vertices: - 1 Map 9 + 1 Map 12 Statistics: Num rows: 3492226879 Data size: 715790771852 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col15 (type: string), _col4 (type: decimal(7,2)), if(_col9 is not null, _col9, 0) (type: decimal(7,2)), (_col5 - if(_col10 is not null, _col10, 0)) (type: decimal(8,2)) @@ -710,6 +687,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 21 Execution mode: vectorized, llap Reduce Operator Tree: @@ -723,6 +705,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 24 Execution mode: vectorized, llap Reduce Operator Tree: @@ -741,27 +728,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 16830120307 Data size: 6191043150168 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 3736778117 Data size: 926375207640 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 8415060154 Data size: 2151835885288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1869885355 Data size: 448426719824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col1, _col4, _col5, _col9, _col10 input vertices: @@ -900,6 +887,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index 39b248473790..c25dea04819d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,17 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 12 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 3 <- Map 8 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Map 8 <- Map 11 (BROADCAST_EDGE) - Reducer 10 <- Map 8 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) + Map 1 <- Map 15 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 13 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 15 <- Reducer 10 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 3 <- Map 9 (BROADCAST_EDGE) + Map 6 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -37,7 +39,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 8 + 1 Map 15 Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -48,7 +50,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 4320980099 Data size: 51505409168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1183036 Data size: 9464292 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -57,21 +59,21 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 4320980099 Data size: 449035578276 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 1183036 Data size: 118303604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.7907722 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -83,26 +85,32 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 12 + Map 13 Map Operator Tree: TableScan alias: web_returns @@ -119,7 +127,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 9 + 1 Map 15 Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -129,8 +137,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 2062802370 Data size: 24559207948 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 564772 Data size: 4518180 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -139,30 +147,121 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 2062802370 Data size: 214337025988 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 564772 Data size: 56477204 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5617275 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: wr_returned_date_sk (bigint) + Target Input: web_returns + Partition key expr: wr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 6 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 3 Map Operator Tree: TableScan alias: date_dim filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:2.7378882667798324E-4 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -179,23 +278,38 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 8 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: item @@ -227,7 +341,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -244,7 +358,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 10 + 1 Map 15 Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -254,8 +368,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 8332595709 Data size: 99383547688 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 2281371 Data size: 18250972 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -264,29 +378,29 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 8332595709 Data size: 865982352916 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 2281371 Data size: 228137104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.8915021 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -303,137 +417,104 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 11 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 12 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 11 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d_date is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: wr_returned_date_sk (bigint) - Target Input: web_returns - Partition key expr: wr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 12 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Reducer 13 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -469,7 +550,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -496,7 +590,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5, _col6 input vertices: - 1 Reducer 13 + 1 Reducer 14 Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ @@ -514,7 +608,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -531,19 +625,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out index 334084e78ceb..ad5904db6f25 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out @@ -252,13 +252,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: @@ -266,38 +266,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 523405109 Data size: 127710846596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2043373546024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 523405109 Data size: 131898087468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2110369399992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1513369688 Data size: 381369161376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 14 Execution mode: vectorized, llap @@ -318,13 +318,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: @@ -332,38 +332,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 265313593 Data size: 64736516692 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1035784270732 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 265313593 Data size: 66859025436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1069744410756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 281077860 Data size: 70831620720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 2 Execution mode: vectorized, llap @@ -384,13 +384,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -398,38 +398,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 989964579 Data size: 241551357276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 989964579 Data size: 249471073908 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2390565695112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1513369688 Data size: 381369161376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -439,41 +439,41 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 378342422 Data size: 95342290344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2390565695112 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col3 > 0L) and ((_col3 * 2L) = _col4)) (type: boolean) - Statistics: Num rows: 63057070 Data size: 15890381640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63057070 Data size: 15890381640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15764267 Data size: 3846481148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 385779119948 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 15764267 Data size: 3972595284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 281077860 Data size: 70831620720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 7 Execution mode: vectorized, llap @@ -483,16 +483,16 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 70269465 Data size: 17707905180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col3, _col4 - Statistics: Num rows: 70269465 Data size: 1124311440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 93217271520 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col3 > 0L) and ((_col3 * 2L) = _col4)) (type: boolean) - Statistics: Num rows: 11711577 Data size: 187385232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 971013245 Data size: 15536211920 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 11711577 Data size: 187385232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 971013245 Data size: 15536211920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out index c3abe9358aec..1e025671aef0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out @@ -7,18 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 7 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: web_sales - filterExpr: (ws_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: ws_ext_discount_amt is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_87_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:1.1253233093375219E-4 Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_ext_discount_amt is not null (type: boolean) @@ -27,30 +28,6 @@ STAGE PLANS: expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21591933650 Data size: 2763464608128 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 5 - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: bigint) - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: bigint) - Filter Operator - predicate: (ws_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -59,73 +36,77 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1 input vertices: - 1 Map 7 - Statistics: Num rows: 2399240019 Data size: 287605865240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 2398939507 Data size: 287569841768 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 2430095 Data size: 19440872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col4 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + Map-reduce partition columns: _col4 (type: bigint) + Statistics: Num rows: 2430095 Data size: 19440872 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Select Operator + expressions: _col4 (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 2430095 Data size: 19440760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col4), max(_col4), bloom_filter(_col4, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 3 Map Operator Tree: TableScan - alias: item - filterExpr: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -147,29 +128,83 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + Target Vertex: Map 5 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 5 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: (ws_item_sk BETWEEN DynamicValue(RS_30_item_i_item_sk_min) AND DynamicValue(RS_30_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ws_item_sk BETWEEN DynamicValue(RS_30_item_i_item_sk_min) AND DynamicValue(RS_30_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 2399240019 Data size: 287605865240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), count(_col1) + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: bigint) - outputColumnNames: _col3, _col1, _col2 + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: bigint) - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: bigint) - Reducer 3 + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -189,25 +224,19 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: bigint) + 0 _col4 (type: bigint) 1 _col1 (type: bigint) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col1, _col5 input vertices: - 0 Reducer 2 - Statistics: Num rows: 51330 Data size: 6159712 Basic stats: COMPLETE Column stats: COMPLETE + 0 Map 1 + Statistics: Num rows: 51330 Data size: 5749072 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 > _col4) (type: boolean) - Statistics: Num rows: 17110 Data size: 2053312 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) + predicate: (_col1 > _col5) (type: boolean) + Statistics: Num rows: 17110 Data size: 1916432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(7,2)) outputColumnNames: _col1 - input vertices: - 1 Map 7 - Statistics: Num rows: 17110 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17110 Data size: 1916432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) minReductionHashAggr: 0.99 @@ -219,7 +248,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) - Reducer 4 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -234,19 +263,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out index 6ca2d294e5f3..acc0f2da6bfd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out @@ -23,7 +23,7 @@ STAGE PLANS: TableScan alias: ws1 filterExpr: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_119_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.7777730824410645E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_120_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.9924592936258674E-4 Statistics: Num rows: 21600036511 Data size: 5701632353848 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) @@ -36,27 +36,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 8 - Statistics: Num rows: 407242361 Data size: 103520524440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2398040806 Data size: 613164879160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 9 - Statistics: Num rows: 58177483 Data size: 13737647176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45246054 Data size: 10530636632 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: @@ -89,22 +89,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: web_site + filterExpr: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: web_site_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -165,43 +165,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: web_site_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out index 3a966e9f29c8..f3568baa028f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out @@ -28,7 +28,7 @@ STAGE PLANS: TableScan alias: ws1 filterExpr: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_210_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.7777730824410645E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_211_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.9924592936258674E-4 Statistics: Num rows: 21600036511 Data size: 5528875272680 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) @@ -41,27 +41,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 8 - Statistics: Num rows: 407242361 Data size: 100305764080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2398040806 Data size: 594023731240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5 + outputColumnNames: _col2, _col3, _col4, _col5 input vertices: 1 Map 9 - Statistics: Num rows: 58177483 Data size: 13315405840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45246054 Data size: 10211846728 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5 input vertices: @@ -94,22 +94,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: web_site + filterExpr: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: web_site_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -194,43 +194,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: web_site_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 12 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out index c8fc334ead4c..6034e697996d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_54_container, bigKeyColName:ss_item_sk, smallTablePos:1, keyRatio:0.2727272808584318 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_50_container, bigKeyColName:ss_item_sk, smallTablePos:1, keyRatio:0.030300956793193314 Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 22502967927 Data size: 15489075671302 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9167247954 Data size: 882073848240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 2500158608 Data size: 1507113497776 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.99 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.6650032 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: