[query] no sharing in utilities#15520
Conversation
5e516f2 to
db2a965
Compare
967d1bc to
f4aea9c
Compare
| if tuple_field_type not in [hl.tint32, hl.tint64]: | ||
| raise TypeError(f"Argument {i} of reshape needs to be an integer, got {tuple_field_type}.") | ||
| shape_ir = hl.or_missing(hl.is_defined(shape), hl.tuple([hl.int64(i) for i in shape]))._ir | ||
| shape_ir = hl.bind(lambda s: hl.or_missing(hl.is_defined(s), hl.tuple([hl.int64(i) for i in s])), shape)._ir |
There was a problem hiding this comment.
While not recomputing shape is desirable, not including this line lead to the following failure:
E Java stack trace:
E java.lang.RuntimeException: requiredness mismatch: EC=true / Analysis=false
E SNDArrayPointer(PCNDArray[+PInt64,4])
E
E %1 = EncodedLiteral [Tuple[Int32,Int32,Int32,Int32]]
E !2 = GetTupleElement(%1) [0]
E !3 = Cast(!2) [Int64]
E !4 = GetTupleElement(%1) [1]
E !5 = Cast(!4) [Int64]
E !6 = GetTupleElement(%1) [2]
E !7 = Cast(!6) [Int64]
E !8 = GetTupleElement(%1) [3]
E !9 = Cast(!8) [Int64]
E !10 = MakeTuple(!3, !5, !7, !9) [(0 1 2 3)]
E NDArrayReshape(#undefined_ref, !10) [524]
E
E at is.hail.expr.ir.Emit.emitI(Emit.scala:3369)
E at is.hail.expr.ir.Emit.emitInNewBuilder$1(Emit.scala:1011)
E at is.hail.expr.ir.Emit.$anonfun$emitI$41(Emit.scala:1218)
E at is.hail.expr.ir.EmitCode$.fromI(Emit.scala:553)
E at is.hail.expr.ir.Emit.$anonfun$emitI$40(Emit.scala:1218)
E at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
E at scala.collection.Iterator.foreach(Iterator.scala:943)
E at scala.collection.Iterator.foreach$(Iterator.scala:943)
E at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
E at scala.collection.IterableLike.foreach(IterableLike.scala:74)
E at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
E at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
E at scala.collection.TraversableLike.map(TraversableLike.scala:286)
E at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
E at scala.collection.AbstractTraversable.map(Traversable.scala:108)
E at is.hail.expr.ir.Emit.emitI(Emit.scala:1217)
E at is.hail.expr.ir.Emit.$anonfun$emitSplitMethod$1(Emit.scala:731)
E at is.hail.expr.ir.Emit.$anonfun$emitSplitMethod$1$adapted(Emit.scala:729)
E at is.hail.expr.ir.EmitCodeBuilder$.scoped(EmitCodeBuilder.scala:21)
E at is.hail.expr.ir.EmitCodeBuilder$.scopedVoid(EmitCodeBuilder.scala:31)
E at is.hail.expr.ir.EmitMethodBuilder.voidWithBuilder(EmitClassBuilder.scala:1263)
E at is.hail.expr.ir.Emit.emitSplitMethod(Emit.scala:729)
E at is.hail.expr.ir.Emit.emitInSeparateMethod(Emit.scala:754)
E at is.hail.expr.ir.Emit.emitI(Emit.scala:988)
E at is.hail.expr.ir.Emit.emitInNewBuilder$1(Emit.scala:1011)
E at is.hail.expr.ir.Emit.$anonfun$emitI$41(Emit.scala:1218)
E at is.hail.expr.ir.EmitCode$.fromI(Emit.scala:553)
E at is.hail.expr.ir.Emit.$anonfun$emitI$40(Emit.scala:1218)
E at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
E at scala.collection.Iterator.foreach(Iterator.scala:943)
E at scala.collection.Iterator.foreach$(Iterator.scala:943)
E at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
E at scala.collection.IterableLike.foreach(IterableLike.scala:74)
E at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
E at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
E at scala.collection.TraversableLike.map(TraversableLike.scala:286)
E at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
E at scala.collection.AbstractTraversable.map(Traversable.scala:108)
E at is.hail.expr.ir.Emit.emitI(Emit.scala:1217)
E at is.hail.expr.ir.Emit.emitI$2(Emit.scala:1001)
E at is.hail.expr.ir.Emit.emitI(Emit.scala:1118)
E at is.hail.expr.ir.Emit.emitInNewBuilder$1(Emit.scala:1011)
E at is.hail.expr.ir.Emit.$anonfun$emitI$41(Emit.scala:1218)
E at is.hail.expr.ir.EmitCode$.fromI(Emit.scala:553)
E at is.hail.expr.ir.Emit.$anonfun$emitI$40(Emit.scala:1218)
E at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
E at scala.collection.Iterator.foreach(Iterator.scala:943)
E at scala.collection.Iterator.foreach$(Iterator.scala:943)
E at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
E at scala.collection.IterableLike.foreach(IterableLike.scala:74)
E at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
E at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
E at scala.collection.TraversableLike.map(TraversableLike.scala:286)
E at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
E at scala.collection.AbstractTraversable.map(Traversable.scala:108)
E at is.hail.expr.ir.Emit.emitI(Emit.scala:1217)
E at is.hail.expr.ir.Emit$.$anonfun$apply$6(Emit.scala:137)
E at is.hail.expr.ir.EmitCodeBuilder$.scoped(EmitCodeBuilder.scala:21)
E at is.hail.expr.ir.EmitCodeBuilder$.scopedCode(EmitCodeBuilder.scala:26)
E at is.hail.expr.ir.EmitMethodBuilder.emitWithBuilder(EmitClassBuilder.scala:1260)
E at is.hail.expr.ir.WrappedEmitMethodBuilder.emitWithBuilder(EmitClassBuilder.scala:1311)
E at is.hail.expr.ir.WrappedEmitMethodBuilder.emitWithBuilder$(EmitClassBuilder.scala:1311)
E at is.hail.expr.ir.EmitFunctionBuilder.emitWithBuilder(EmitClassBuilder.scala:1328)
E at is.hail.expr.ir.Emit$.$anonfun$apply$1(Emit.scala:132)
E at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:99)
E at is.hail.backend.ExecuteContext.time(ExecuteContext.scala:167)
E at is.hail.expr.ir.Emit$.apply(Emit.scala:111)
E at is.hail.expr.ir.CompileOps.$anonfun$Impl$5(Compile.scala:133)
E at scala.collection.mutable.MapLike.getOrElseUpdate(MapLike.scala:209)
E at scala.collection.mutable.MapLike.getOrElseUpdate$(MapLike.scala:206)
E at scala.collection.mutable.AbstractMap.getOrElseUpdate(Map.scala:85)
E at is.hail.expr.ir.CompileOps.$anonfun$Impl$1(Compile.scala:108)
E at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:99)
E at is.hail.backend.ExecuteContext.time(ExecuteContext.scala:167)
E at is.hail.expr.ir.CompileOps.Impl(Compile.scala:90)
E at is.hail.expr.ir.CompileOps.Compile(Compile.scala:46)
E at is.hail.expr.ir.CompileOps.Compile$(Compile.scala:38)
E at is.hail.expr.ir.package$.Compile(package.scala:34)
E at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$1(CompileAndEvaluate.scala:72)
E at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:99)
E at is.hail.backend.ExecuteContext.time(ExecuteContext.scala:167)
E at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:49)
E at is.hail.backend.spark.SparkBackend.$anonfun$execute$1(SparkBackend.scala:363)
E at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:99)
E at is.hail.backend.ExecuteContext.time(ExecuteContext.scala:167)
E at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:348)
E at is.hail.backend.driver.BackendRpc.$anonfun$runRpc$2(BackendRpc.scala:99)
E at is.hail.backend.driver.BackendRpc.withRegisterSerializedFns(BackendRpc.scala:173)
E at is.hail.backend.driver.BackendRpc.$anonfun$runRpc$1(BackendRpc.scala:97)
E at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:94)
E at is.hail.utils.package$.using(package.scala:492)
E at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:94)
E at is.hail.utils.package$.using(package.scala:492)
E at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:169)
E at is.hail.backend.ExecuteContext$.$anonfun$scoped$1(ExecuteContext.scala:77)
E at is.hail.utils.package$.using(package.scala:492)
E at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:16)
E at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:76)
E at is.hail.backend.driver.Py4JQueryDriver.$anonfun$withExecuteContext$1(Py4JQueryDriver.scala:354)
E at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:16)
E at is.hail.backend.driver.Py4JQueryDriver.is$hail$backend$driver$Py4JQueryDriver$$withExecuteContext(Py4JQueryDriver.scala:336)
E at is.hail.backend.driver.Py4JQueryDriver$$anon$1$Context$.scoped(Py4JQueryDriver.scala:444)
E at is.hail.backend.driver.Py4JQueryDriver$$anon$1$Context$.scoped(Py4JQueryDriver.scala:442)
E at is.hail.backend.driver.BackendRpc.runRpc(BackendRpc.scala:83)
E at is.hail.backend.driver.BackendRpc.runRpc$(BackendRpc.scala:79)
E at is.hail.backend.driver.Py4JQueryDriver$$anon$1.runRpc(Py4JQueryDriver.scala:393)
E at is.hail.backend.driver.Py4JQueryDriver$$anon$1.$anonfun$new$1(Py4JQueryDriver.scala:452)
E at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
E at jdk.httpserver/sun.net.httpserver.AuthFilter.doFilter(AuthFilter.java:82)
E at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:80)
E at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange$LinkHandler.handle(ServerImpl.java:848)
E at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
E at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange.run(ServerImpl.java:817)
E at jdk.httpserver/sun.net.httpserver.ServerImpl$DefaultExecutor.execute(ServerImpl.java:201)
E at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.handle(ServerImpl.java:560)
E at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.run(ServerImpl.java:525)
E at java.base/java.lang.Thread.run(Thread.java:829)
There was a problem hiding this comment.
Why does this duplicate work? if anything it should do less work as we are evaluating/binding shape before getting all its elements.
f4aea9c to
a34828c
Compare
| value.isInstanceOf[Ref] || | ||
| value.isInstanceOf[In] || | ||
| (IsConstant(value) && !value.isInstanceOf[Str]) || | ||
| value.isInstanceOf[Atom] || |
There was a problem hiding this comment.
Inlining Constants can lead to significant IR bloat from Literals and EncodedLiteral.
We don't have Simplifyrules that match on either literal type and so I'm unaware of any reason to inline them.
Note that we pay special attention to literals in Emit to ensure we don't duplicate them in generated code. I'm not sure that this is required anymore, but I'll leave that for another change.
There was a problem hiding this comment.
Pull request overview
This PR advances the “noSharing in utilities” effort in the query/IR pipeline to better preserve TreeIR invariants across lowerings, reducing unwanted duplication and simplifying binding environments.
Changes:
- Adjusts
ForwardLetsto forward onlyAtoms or single-use expressions (and logs eliminated unused bindings), and tweaks relational let forwarding traversal. - Updates IR definitions/utilities via
ir-gen(notably:Stris no longer anAtom;MakeStreambecomes typed) and adds aMakeStream.singlehelper. - Simplifies
Blockbinding environment construction and updates a Python NDArray reshape path to avoid duplicating evaluation of tuple shapes.
Reviewed changes
Copilot reviewed 7 out of 7 changed files in this pull request and generated 1 comment.
Show a summary per file
| File | Description |
|---|---|
hail/python/hail/expr/expressions/typed_expressions.py |
Uses hl.bind to avoid repeated evaluation of tuple shape when building reshape IR. |
hail/hail/src/is/hail/expr/ir/NormalizeNames.scala |
Improves free-variable diagnostic message (now includes ref type). |
hail/hail/src/is/hail/expr/ir/IR.scala |
Adds MakeStream.single helper in the companion extension. |
hail/hail/src/is/hail/expr/ir/ForwardRelationalLets.scala |
Switches to preOrder traversal when counting relational ref uses. |
hail/hail/src/is/hail/expr/ir/ForwardLets.scala |
Updates forwarding policy (Atoms always, otherwise only single-use) and logs eliminated unused bindings. |
hail/hail/src/is/hail/expr/ir/Binds.scala |
Refactors/simplifies Block child binding environment construction. |
hail/hail/ir-gen/src/Main.scala |
Stops treating Str as an Atom and marks MakeStream as typed in generated IR. |
Comments suppressed due to low confidence (1)
hail/hail/src/is/hail/expr/ir/NormalizeNames.scala:81
typonRefcan benull(e.g., Parser constructsRef(id, null)), so interpolating${typ._toPretty}can throw aNullPointerExceptionwhile trying to raise the intended "free variable" error. Guard against a null type here so the diagnostic is reliable.
case Ref(name, typ) =>
val newName = env.eval.lookupOption(name).getOrElse {
if (!freeVariables.contains(name)) throw new RuntimeException(
s"found free variable in normalize: $name: ${typ._toPretty}; ${env.pretty(x => x.str)}"
)
else name
}
done(Ref(newName, typ))
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| def shouldForward(value: IR, refs: Set[RefEquality[BaseRef]], base: Block, scope: Scope) | ||
| : Boolean = | ||
| IsPure(value) && ( | ||
| value.isInstanceOf[Ref] || | ||
| value.isInstanceOf[In] || | ||
| (IsConstant(value) && !value.isInstanceOf[Str]) || | ||
| value.isInstanceOf[Atom] || | ||
| refs.isEmpty || | ||
| (refs.size == 1 && | ||
| nestingDepth.lookupRef(refs.head) == nestingDepth.lookupBinding(base, scope) && |
Partially resolves #13250
Succeeds #15500
This change is one in a series that aims to preserve the TreeIR invariant throught the various lowerings, culminating in the removal of noSharing.
In this change:
ForwardLets
Atoms or expressions with a single use only.Str is no longer an atom to prevent code-bloat
Binds for Block is slightly simplified.
This change does not affect the broad-managed batch service in gcp.