Here is Spark 2.4 code using unionAll
import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.unionAll(df2)
val res2 = df1.unionAll(df2).unionAll(df3)
val res3 = Seq(df1, df2, df3).reduce(_ unionAll _)
val res4 = ds1.unionAll(ds2)
val res5 = Seq(ds1, ds2).reduce(_ unionAll _)
}
}
In Spark 3.+ unionAll
is deprecated. Here is equivalent code using union
import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.union(df2)
val res2 = df1.union(df2).union(df3)
val res3 = Seq(df1, df2, df3).reduce(_ union _)
val res4 = ds1.union(ds2)
val res5 = Seq(ds1, ds2).reduce(_ union _)
}
}
The question is
how to write a Scalafix rule (using quasiquotes) replacing unionAll
with union
?
Without quasiquotes I implemented the rule, it's working
override def fix(implicit doc: SemanticDocument): Patch = {
def matchOnTree(t: Tree): Patch = {
t.collect {
case Term.Apply(
Term.Select(_, deprecated @ Term.Name(name)),
_
) if config.deprecatedMethod.contains(name) =>
Patch.replaceTree(
deprecated,
config.deprecatedMethod(name)
)
case Term.Apply(
Term.Select(_, _ @Term.Name(name)),
List(
Term.AnonymousFunction(
Term.ApplyInfix(
_,
deprecatedAnm @ Term.Name(nameAnm),
_,
_
)
)
)
) if "reduce".contains(name) && config.deprecatedMethod.contains(nameAnm) =>
Patch.replaceTree(
deprecatedAnm,
config.deprecatedMethod(nameAnm)
)
}.asPatch
}
matchOnTree(doc.tree)
}
Try the rule
It transforms
into
https://scalacenter.github.io/scalafix/docs/developers/setup.html
https://scalameta.org/docs/trees/quasiquotes.html
https://scalameta.org/docs/semanticdb/guide.html
Your
Ver: 1
implementation erroneously transformedval unionAll = 42
intoval union = 42
.Sadly,
<: Dataset[_]
can't be checked for the infix application since SemanticDB seems not to have type information in this case (underscore_
in a lambda). This seems to be SemanticDB limitation. If you really needed subtype check in this case then maybe you would need a compiler plugin.Update. We can use multiple rules: firstly apply the rule replacing underscore lambdas with parameter lambdas
then re-compile the code (new
.semanticdb
files will be generated), apply the second rule replacingunionAll
withunion
(if types correspond)then apply the third rule replacing parameter lambdas back with underscore lambdas
The 1st and 3rd rules can be syntactic.