Wednesday, March 25, 2020

Spark Submit with Jar file Creation

build.sbt

name := "SparkNowTest"
version := "0.1"
scalaVersion := "2.12.9"
// https://mvnrepository.com/artifact/org.apache.spark/spark-corelibraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.2" % "provided"
import sbtassembly.MergeStrategy
assemblyMergeStrategy in assembly := {
  case PathList("org", "apache", "hadoop", "yarn", "factories", "package-info.class")         => MergeStrategy.discard  case PathList("org", "apache", "hadoop", "yarn", "provider", "package-info.class")         => MergeStrategy.discard  case PathList("org", "apache", "hadoop", "util", "provider", "package-info.class")         => MergeStrategy.discard  case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class")         => MergeStrategy.first  case PathList("org.slf4j", "impl", xs @ _*) => MergeStrategy.first  case x =>
    val oldStrategy = (assemblyMergeStrategy in assembly).value
    oldStrategy(x)
}
plugins.sbt

addSbtPlugin("com.eed3si9n" %% "sbt-assembly" % "0.14.5")

Application Run:

spark-submit --class "com.nagaraju.ReadJSON" --master local[*] target/scala-2.12/SparkNowTest-assembly-0.1.jar

No comments:

Post a Comment

Recent Post

Databricks Delta table merge Example

here's some sample code that demonstrates a merge operation on a Delta table using PySpark:   from pyspark.sql import SparkSession # cre...