From 4b741a1a3ecb11017a19b290cce90e8b62fee464 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Thu, 25 Jun 2026 00:35:47 +0000 Subject: [PATCH 1/6] [AURON #2360] Honor config alt keys when reading from SQLConf Auron's ConfigOption alt keys (declared via addAltKey) were silently ignored: getFromSpark only consulted alt keys via ConfigEntry.findEntry (always null for Auron's unregistered options) and then synthesized a ConfigEntryWithDefaultFunction with an empty alternatives list, so only the primary key was ever read from SQLConf. As a result, e.g. setting spark.auron.enable (alt of spark.auron.enabled) had no effect. Pass the spark-prefixed alt keys as the synthesized entry's alternatives so ConfigEntry#readString reads primary +: alternatives, with the primary key taking precedence. Also add a test asserting alt keys are honored. Fixing this makes the test harness's spark.auron.enable=false baseline actually fall back to vanilla Spark, which exposed that acosh(0.0) yields NaN with a different (implementation-defined) bit pattern in each engine; QueryTest compares doubles via Double.doubleToRawLongBits, so update the acosh test to assert NaN-ness for the out-of-domain input rather than exact equality. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../org/apache/auron/AuronFunctionSuite.scala | 13 +++++++++++- .../org/apache/auron/AuronQuerySuite.scala | 21 +++++++++++++++++++ .../SparkAuronConfiguration.java | 15 ++++++++----- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index 9f8825087..7dfc7b020 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -550,9 +550,20 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { test("acosh null propagation") { withTable("t1") { sql("create table t1(c1 double) using parquet") - sql("insert into t1 values(null), (0.0), (1.0), (2.0)") + sql("insert into t1 values(null), (1.0), (2.0)") + // null propagates to null; in-domain values must match vanilla Spark exactly. checkSparkAnswerAndOperator("select acosh(c1) from t1") } + // Out-of-domain input (acosh is defined on [1, inf)) yields NaN. The IEEE-754 NaN + // bit pattern (sign/payload) is implementation-defined: vanilla Spark and the native + // engine emit different NaN encodings, and QueryTest compares doubles via + // Double.doubleToRawLongBits. So assert NaN-ness here rather than exact equality. + withTable("t2") { + sql("create table t2(c1 double) using parquet") + sql("insert into t2 values(0.0)") + val result = sql("select acosh(c1) from t2").collect() + assert(result.length == 1 && java.lang.Double.isNaN(result(0).getDouble(0))) + } } test("test function least") { diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala index 629f9d601..14340e15b 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala @@ -25,6 +25,27 @@ import org.apache.auron.util.AuronTestUtils class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQLTestHelper { import testImplicits._ + test("config alt keys are honored") { + // AURON_ENABLED has primary key "spark.auron.enabled" and alt key "spark.auron.enable". + // Setting either key must take effect (primary takes precedence over alt). + withSQLConf("spark.auron.enabled" -> "false") { + assert(!SparkAuronConfiguration.AURON_ENABLED.get()) + } + withSQLConf("spark.auron.enable" -> "false") { + assert(!SparkAuronConfiguration.AURON_ENABLED.get()) + } + withSQLConf("spark.auron.enabled" -> "true") { + assert(SparkAuronConfiguration.AURON_ENABLED.get()) + } + withSQLConf("spark.auron.enable" -> "true") { + assert(SparkAuronConfiguration.AURON_ENABLED.get()) + } + // Primary key wins when both are set to conflicting values. + withSQLConf("spark.auron.enabled" -> "true", "spark.auron.enable" -> "false") { + assert(SparkAuronConfiguration.AURON_ENABLED.get()) + } + } + test("test partition path has url encoded character") { withTable("t1") { sql( diff --git a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java index 2f1fcc2b3..6ee2a44e6 100644 --- a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java +++ b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java @@ -27,7 +27,7 @@ import org.apache.spark.internal.config.ConfigEntryWithDefaultFunction; import org.apache.spark.sql.internal.SQLConf; import scala.Option; -import scala.collection.immutable.List$; +import scala.collection.mutable.ListBuffer; /** * Spark configuration proxy for Auron. @@ -561,21 +561,26 @@ private T getFromSpark( synchronized (SparkAuronConfiguration.class) { String sparkConfKey = key.startsWith(SPARK_PREFIX) ? key : SPARK_PREFIX + key; + ListBuffer sparkConfAltKeys = new ListBuffer<>(); + configEntry = ConfigEntry.findEntry(sparkConfKey); for (String altKey : altKeys) { String sparkConfAltKey = altKey.startsWith(SPARK_PREFIX) ? altKey : SPARK_PREFIX + altKey; - if (configEntry != null) { - break; + sparkConfAltKeys.$plus$eq(sparkConfAltKey); + if (configEntry == null) { + configEntry = ConfigEntry.findEntry(sparkConfAltKey); } - configEntry = ConfigEntry.findEntry(sparkConfAltKey); } if (configEntry == null) { + // Auron's own options are not registered in Spark's ConfigEntry registry, so the + // alt keys must be passed as the synthesized entry's alternatives list. Otherwise + // ConfigEntry#readString only reads the primary key and the alt keys are ignored. configEntry = new ConfigEntryWithDefaultFunction<>( sparkConfKey, Option.empty(), "", - List$.MODULE$.empty(), + sparkConfAltKeys.toList(), defaultValueSupplier::get, val -> valueConverter(val, valueClass), String::valueOf, From b606195e818b7520d1230991d481009ac7fefcae Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Thu, 25 Jun 2026 16:33:07 +0000 Subject: [PATCH 2/6] Remove explanatory comment on synthesized ConfigEntry alternatives Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../auron/spark/configuration/SparkAuronConfiguration.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java index 6ee2a44e6..f298e883a 100644 --- a/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java +++ b/spark-extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java @@ -573,9 +573,6 @@ private T getFromSpark( } if (configEntry == null) { - // Auron's own options are not registered in Spark's ConfigEntry registry, so the - // alt keys must be passed as the synthesized entry's alternatives list. Otherwise - // ConfigEntry#readString only reads the primary key and the alt keys are ignored. configEntry = new ConfigEntryWithDefaultFunction<>( sparkConfKey, Option.empty(), From c6616fc7d044aa0ffaad3b20ba58be2e6f2dc308 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Thu, 25 Jun 2026 16:51:18 +0000 Subject: [PATCH 3/6] Make checkSparkAnswerAndOperator tolerant of NaN bit-pattern differences QueryTest compares doubles via Double.doubleToRawLongBits, which is bit-exact. Vanilla Spark and the native engine can produce semantically equal NaNs with different (implementation-defined) bit patterns, so the comparison would spuriously fail. Canonicalize NaN on both sides before comparing. This lets the acosh null propagation test keep its original single-query form covering the out-of-domain (NaN) input. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../org/apache/auron/AuronFunctionSuite.scala | 13 +----------- .../org/apache/spark/sql/AuronQueryTest.scala | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index 7dfc7b020..9f8825087 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -550,20 +550,9 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { test("acosh null propagation") { withTable("t1") { sql("create table t1(c1 double) using parquet") - sql("insert into t1 values(null), (1.0), (2.0)") - // null propagates to null; in-domain values must match vanilla Spark exactly. + sql("insert into t1 values(null), (0.0), (1.0), (2.0)") checkSparkAnswerAndOperator("select acosh(c1) from t1") } - // Out-of-domain input (acosh is defined on [1, inf)) yields NaN. The IEEE-754 NaN - // bit pattern (sign/payload) is implementation-defined: vanilla Spark and the native - // engine emit different NaN encodings, and QueryTest compares doubles via - // Double.doubleToRawLongBits. So assert NaN-ness here rather than exact equality. - withTable("t2") { - sql("create table t2(c1 double) using parquet") - sql("insert into t2 values(0.0)") - val result = sql("select acosh(c1) from t2").collect() - assert(result.length == 1 && java.lang.Double.isNaN(result(0).getDouble(0))) - } } test("test function least") { diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala index 678faea82..ff85c2a73 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala @@ -60,7 +60,25 @@ abstract class AuronQueryTest } val dfAuron = dataframe() - checkAnswer(dfAuron, expected) + + // Canonicalize NaN before comparing. The IEEE-754 NaN bit pattern (sign/payload) is + // implementation-defined, so semantically-equal NaNs produced by vanilla Spark and the + // native engine may differ in their raw bits; QueryTest compares doubles via + // Double.doubleToRawLongBits, which would otherwise flag them as mismatches. + def canonicalizeNaN(rows: Seq[Row]): Seq[Row] = rows.map { row => + Row.fromSeq(row.toSeq.map { + case d: Double if d.isNaN => Double.NaN + case f: Float if f.isNaN => Float.NaN + case other => other + }) + } + + QueryTest + .sameRows(canonicalizeNaN(expected), canonicalizeNaN(dfAuron.collect())) + .foreach(msg => fail(s""" + |Results do not match for query: + |${dfAuron.queryExecution} + |$msg""".stripMargin)) if (requireNative) { val plan = stripAQEPlan(dfAuron.queryExecution.executedPlan) From 19b842680898d33120452a65d50d2e525c091bfc Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Thu, 25 Jun 2026 16:59:15 +0000 Subject: [PATCH 4/6] Reword NaN canonicalization comment in plainer language Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../scala/org/apache/spark/sql/AuronQueryTest.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala index ff85c2a73..c3073837e 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala @@ -61,10 +61,12 @@ abstract class AuronQueryTest val dfAuron = dataframe() - // Canonicalize NaN before comparing. The IEEE-754 NaN bit pattern (sign/payload) is - // implementation-defined, so semantically-equal NaNs produced by vanilla Spark and the - // native engine may differ in their raw bits; QueryTest compares doubles via - // Double.doubleToRawLongBits, which would otherwise flag them as mismatches. + // Canonicalize NaN before comparing. There are many valid bit patterns for NaN, and + // vanilla Spark and the native engine don't always pick the same one (e.g. they differ + // only in the NaN sign bit). Both results are still NaN, but QueryTest compares doubles + // by raw bits (Double.doubleToRawLongBits), so it would wrongly report a mismatch. + // Replacing every NaN with the canonical NaN makes equal-but-differently-encoded NaNs + // compare equal. def canonicalizeNaN(rows: Seq[Row]): Seq[Row] = rows.map { row => Row.fromSeq(row.toSeq.map { case d: Double if d.isNaN => Double.NaN From 65ddb6c84e5bf8585c11fa806019d93932d8db16 Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Thu, 25 Jun 2026 17:45:28 +0000 Subject: [PATCH 5/6] Handle acosh NaN bit difference in the test instead of the shared checker Revert checkSparkAnswerAndOperator to plain checkAnswer and instead handle the NaN encoding difference locally in the acosh test. acosh of an out-of-domain input yields NaN, which vanilla Spark and the native engine may encode with different bits; checkAnswer/QueryTest compares doubles by raw bits. Split the test so in-domain/null values are compared numerically, and out-of-domain inputs are compared via the natively-supported isnan (a boolean) so no raw NaN bits are compared. This keeps the shared checker unchanged and avoids relaxing NaN comparison for all callers. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../org/apache/auron/AuronFunctionSuite.scala | 10 ++++++++- .../org/apache/spark/sql/AuronQueryTest.scala | 22 +------------------ 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index 9f8825087..eb4b86b3a 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -550,9 +550,17 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { test("acosh null propagation") { withTable("t1") { sql("create table t1(c1 double) using parquet") - sql("insert into t1 values(null), (0.0), (1.0), (2.0)") + sql("insert into t1 values(null), (1.0), (2.0)") checkSparkAnswerAndOperator("select acosh(c1) from t1") } + withTable("t2") { + sql("create table t2(c1 double) using parquet") + sql("insert into t2 values(0.0), (-1.0)") + // acosh is defined on [1, inf), so out-of-domain inputs yield NaN. Vanilla Spark and the + // native engine may encode that NaN with different bits (checkSparkAnswerAndOperator + // compares doubles by raw bits), so compare NaN-ness via the natively-supported isnan. + checkSparkAnswerAndOperator("select isnan(acosh(c1)) from t2") + } } test("test function least") { diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala index c3073837e..678faea82 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/AuronQueryTest.scala @@ -60,27 +60,7 @@ abstract class AuronQueryTest } val dfAuron = dataframe() - - // Canonicalize NaN before comparing. There are many valid bit patterns for NaN, and - // vanilla Spark and the native engine don't always pick the same one (e.g. they differ - // only in the NaN sign bit). Both results are still NaN, but QueryTest compares doubles - // by raw bits (Double.doubleToRawLongBits), so it would wrongly report a mismatch. - // Replacing every NaN with the canonical NaN makes equal-but-differently-encoded NaNs - // compare equal. - def canonicalizeNaN(rows: Seq[Row]): Seq[Row] = rows.map { row => - Row.fromSeq(row.toSeq.map { - case d: Double if d.isNaN => Double.NaN - case f: Float if f.isNaN => Float.NaN - case other => other - }) - } - - QueryTest - .sameRows(canonicalizeNaN(expected), canonicalizeNaN(dfAuron.collect())) - .foreach(msg => fail(s""" - |Results do not match for query: - |${dfAuron.queryExecution} - |$msg""".stripMargin)) + checkAnswer(dfAuron, expected) if (requireNative) { val plan = stripAQEPlan(dfAuron.queryExecution.executedPlan) From b82e3d54d2c49f91f51f463d389cc3b0978a0eba Mon Sep 17 00:00:00 2001 From: Rob Reeves Date: Fri, 26 Jun 2026 20:58:33 +0000 Subject: [PATCH 6/6] Disable ANSI mode in UnaryMinus test to avoid overflow divergence With the config alt-key fix, checkSparkAnswerAndOperator's baseline now actually runs vanilla Spark. On Spark 4.x ANSI mode is enabled by default, so negating Int.MinValue throws ARITHMETIC_OVERFLOW in vanilla Spark while the native engine wraps, causing the comparison to diverge (and the test to fail only on the spark-4.0/4.1 CI jobs). Run the test with spark.sql.ansi.enabled=false so both engines wrap consistently while still exercising the Int.MinValue boundary. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../org/apache/auron/AuronExpressionSuite.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronExpressionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronExpressionSuite.scala index 303a0d366..b0006d363 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronExpressionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronExpressionSuite.scala @@ -33,11 +33,16 @@ class AuronExpressionSuite extends AuronQueryTest with BaseAuronSQLSuite { } test("UnaryMinus") { - withTable("t1") { - sql("create table t1(col1 int) using parquet") - sql( - "insert into t1 values(1), (2), (3), (3), (-1), (0), (null), (2147483647), (-2147483648)") - checkSparkAnswerAndOperator("SELECT negative(col1), -(col1) FROM t1") + // Negating Int.MinValue overflows. Under ANSI mode (default in Spark 4.x) vanilla Spark + // throws while the native engine wraps, so the comparison diverges. Disable ANSI so both + // engines wrap consistently and the boundary value can still be exercised. + withSQLConf("spark.sql.ansi.enabled" -> "false") { + withTable("t1") { + sql("create table t1(col1 int) using parquet") + sql( + "insert into t1 values(1), (2), (3), (3), (-1), (0), (null), (2147483647), (-2147483648)") + checkSparkAnswerAndOperator("SELECT negative(col1), -(col1) FROM t1") + } } } }