Skip to content

Commit

Permalink
Check for correlation variables in project when constructing aggregat…
Browse files Browse the repository at this point in the history
…e in SqlToRelConverter.createAggImpl
  • Loading branch information
ian.bertolacci authored and mihaibudiu committed Sep 18, 2024
1 parent 963f898 commit 7481b85
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3580,11 +3580,25 @@ private void createAggImpl(Blackboard bb,
final RelNode inputRel = bb.root();

// Project the expressions required by agg and having.
bb.setRoot(
relBuilder.push(inputRel)
.projectNamed(preExprs.leftList(), preExprs.rightList(), false)
.build(),
false);
RelNode intermediateProject = relBuilder.push(inputRel)
.projectNamed(preExprs.leftList(), preExprs.rightList(), false)
.build();
final RelNode r2;
// deal with correlation
final CorrelationUse p = getCorrelationUse(bb, intermediateProject);
if (p != null) {
assert p.r instanceof Project;
// correlation variables have been normalized in p.r, we should use expressions
// in p.r instead of the original exprs
Project project1 = (Project) p.r;
r2 = relBuilder.push(bb.root())
.projectNamed(project1.getProjects(), project1.getRowType().getFieldNames(),
true, ImmutableSet.of(p.id))
.build();
} else {
r2 = intermediateProject;
}
bb.setRoot(r2, false);
bb.mapRootRelToFieldProjection.put(bb.root(), r.groupExprProjection);

// REVIEW jvs 31-Oct-2007: doesn't the declaration of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3770,6 +3770,46 @@ void checkCorrelatedMapSubQuery(boolean expand) {
sql(sql).withExpand(false).withDecorrelate(false).ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6554">[CALCITE-6554]
* Nested correlated sub-query in aggregation does not have inner correlation variable bound
* to inner projection</a>. */
@Test void testCorrelationInProjectionWith1xNestedCorrelatedProjection() {
final String sql = "select e1.empno,\n"
+ " (select sum(e2.sal +\n"
+ " (select sum(e3.sal) from emp e3 where e3.mgr = e2.empno)\n"
+ " ) from emp e2 where e2.mgr = e1.empno)\n"
+ "from emp e1";
sql(sql).withExpand(false).withDecorrelate(false).ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6554">[CALCITE-6554]
* Nested correlated sub-query in aggregation does not have inner correlation variable bound
* to inner projection</a>. */
@Test void testCorrelationInProjectionWith2xNestedCorrelatedProjection() {
final String sql = "select e1.empno,\n"
+ " (select sum(e2.sal +\n"
+ " (select sum(e3.sal + (select sum(e4.sal) from emp e4 where e4.mgr = e3.empno)\n"
+ " ) from emp e3 where e3.mgr = e2.empno)\n"
+ " ) from emp e2 where e2.mgr = e1.empno)\n"
+ "from emp e1";
sql(sql).withExpand(false).withDecorrelate(false).ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6554">[CALCITE-6554]
* Nested correlated sub-query in aggregation does not have inner correlation variable bound
* to inner projection</a>. */
@Test void testCorrelationInProjectionWithCorrelatedProjectionWithNestedNonCorrelatedSubquery() {
final String sql = "select e1.empno,\n"
+ " (select sum(e2.sal +\n"
+ " (select sum(e3.sal) from emp e3 where e3.mgr = e1.empno)\n"
+ " ) from emp e2 where e2.mgr = e1.empno)\n"
+ "from emp e1";
sql(sql).withExpand(false).withDecorrelate(false).ok();
}

@Test void testCustomColumnResolving() {
final String sql = "select k0 from struct.t";
sql(sql).ok();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,62 @@ where exists (select * from emp
and emp.deptno in (dept.deptno, dept.deptno))]]>
</Resource>
</TestCase>
<TestCase name="testCorrelationInProjectionWith1xNestedCorrelatedProjection">
<Resource name="sql">
<![CDATA[select e1.empno,
(select sum(e2.sal +
(select sum(e3.sal) from emp e3 where e3.mgr = e2.empno)
) from emp e2 where e2.mgr = e1.empno)
from emp e1]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(variablesSet=[[$cor0]], EMPNO=[$0], EXPR$1=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(variablesSet=[[$cor1]], $f0=[+($5, $SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(SAL=[$5])
LogicalFilter(condition=[=($3, $cor1.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
}))])
LogicalFilter(condition=[=($3, $cor0.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testCorrelationInProjectionWith2xNestedCorrelatedProjection">
<Resource name="sql">
<![CDATA[select e1.empno,
(select sum(e2.sal +
(select sum(e3.sal + (select sum(e4.sal) from emp e4 where e4.mgr = e3.empno)
) from emp e3 where e3.mgr = e2.empno)
) from emp e2 where e2.mgr = e1.empno)
from emp e1]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(variablesSet=[[$cor0]], EMPNO=[$0], EXPR$1=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(variablesSet=[[$cor1]], $f0=[+($5, $SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(variablesSet=[[$cor2]], $f0=[+($5, $SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(SAL=[$5])
LogicalFilter(condition=[=($3, $cor2.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
}))])
LogicalFilter(condition=[=($3, $cor1.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
}))])
LogicalFilter(condition=[=($3, $cor0.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testCorrelationInProjectionWithCorrelatedProjection">
<Resource name="sql">
<![CDATA[select cardinality(arr) from (
Expand All @@ -1142,6 +1198,31 @@ LogicalProject(DEPTNO=[$cor0.DEPTNO])
}))])
LogicalProject(DEPTNO=[$7], ENAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testCorrelationInProjectionWithCorrelatedProjectionWithNestedNonCorrelatedSubquery">
<Resource name="sql">
<![CDATA[select e1.empno,
(select sum(e2.sal +
(select sum(e3.sal) from emp e3 where e3.mgr = e1.empno)
) from emp e2 where e2.mgr = e1.empno)
from emp e1]]>
</Resource>
<Resource name="plan">
<![CDATA[
LogicalProject(variablesSet=[[$cor1]], EMPNO=[$0], EXPR$1=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject($f0=[+($5, $SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[SUM($0)])
LogicalProject(SAL=[$5])
LogicalFilter(condition=[=($3, $cor1.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
}))])
LogicalFilter(condition=[=($3, $cor1.EMPNO)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
})])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
Expand Down
47 changes: 47 additions & 0 deletions core/src/test/resources/sql/sub-query.iq
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,53 @@ FROM (SELECT 1 AS a) AS t;

!ok

# [CALCITE-6554] nested correlated sub-query in aggregation does not have inner correlation variable bound to inner projection
SELECT ename,
(SELECT Sum(sal + COALESCE((SELECT Sum(sal) FROM "scott".emp AS subord2
WHERE
subord2.mgr =
subord.empno), 0))
FROM "scott".emp AS subord
WHERE subord.mgr = bosses.empno) AS deep2sal
FROM "scott".emp AS bosses;
+--------+----------+
| ENAME | DEEP2SAL |
+--------+----------+
| ADAMS | |
| ALLEN | |
| BLAKE | 6550.00 |
| CLARK | 1300.00 |
| FORD | 800.00 |
| JAMES | |
| JONES | 7900.00 |
| KING | 22125.00 |
| MARTIN | |
| MILLER | |
| SCOTT | 1100.00 |
| SMITH | |
| TURNER | |
| WARD | |
+--------+----------+
(14 rows)

!ok

EnumerableCalc(expr#0..3=[{inputs}], ENAME=[$t1], DEEP2SAL=[$t3])
EnumerableMergeJoin(condition=[=($0, $2)], joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableSort(sort0=[$0], dir0=[ASC])
EnumerableAggregate(group=[{0}], EXPR$0=[SUM($1)])
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t4)], expr#6=[0.00:DECIMAL(19, 2)], expr#7=[CASE($t5, $t4, $t6)], expr#8=[+($t2, $t7)], MGR9=[$t1], $f0=[$t8])
EnumerableMergeJoin(condition=[=($0, $3)], joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t3)], EMPNO=[$t0], MGR=[$t3], SAL=[$t5], $condition=[$t8])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableSort(sort0=[$0], dir0=[ASC])
EnumerableAggregate(group=[{3}], EXPR$0=[SUM($5)])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t3)], proj#0..7=[{exprs}], $condition=[$t8])
EnumerableTableScan(table=[[scott, EMP]])
!plan

# [CALCITE-1494] Inefficient plan for correlated sub-queries
# Plan must have only one scan each of emp and dept.
select sal
Expand Down

0 comments on commit 7481b85

Please sign in to comment.