Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 23 additions & 22 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -116,29 +116,20 @@ lazy val root = (project in file("."))

Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",

// Assembly settings
assembly / test := {}, // No tests in assembly
assemblyPackageScala / assembleArtifact := false,
assembly / assemblyMergeStrategy := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x if x.endsWith("module-info.class") => MergeStrategy.discard
case x =>
val oldStrategy = (assembly / assemblyMergeStrategy).value
oldStrategy(x)
},
Comment on lines -119 to -128
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this because I don't think there's any need to run assembly on the root project? Unless you want to keep the ability to manual build a fat JAR

Test / packageBin / publishArtifact := false,
Test / packageDoc / publishArtifact := false,
Test / packageSrc / publishArtifact := false,
Compile / packageBin / publishArtifact := true,
Compile / packageDoc / publishArtifact := true,
Compile / packageSrc / publishArtifact := true)

lazy val connect = (project in file("graphframes-connect"))
// Dedicated project for creating the shaded JAR that doesn't get published
lazy val connectAssembly = (project in file("graphframes-connect"))
.dependsOn(root)
.settings(
name := s"graphframes-connect-assembly",
moduleName := s"graphframes-connect-spark${sparkMajorVer}",
commonSetting,
name := s"graphframes-connect",
moduleName := s"${name.value}-spark${sparkMajorVer}",
Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",
Compile / PB.targets := Seq(PB.gens.java -> (Compile / sourceManaged).value),
Compile / PB.includePaths ++= Seq(file("src/main/protobuf")),
Expand All @@ -147,8 +138,8 @@ lazy val connect = (project in file("graphframes-connect"))
"org.apache.spark" %% "spark-connect" % sparkVer % "provided" cross CrossVersion.for3Use2_13),

// Assembly and shading
assembly / assemblyJarName := s"${moduleName.value}_${(scalaBinaryVersion).value}-${version.value}.jar",
assembly / test := {},
assemblyPackageScala / assembleArtifact := false,
assembly / assemblyShadeRules := Seq(
ShadeRule.rename("com.google.protobuf.**" -> protobufShadingPattern).inAll),
assembly / assemblyMergeStrategy := {
Expand All @@ -157,18 +148,28 @@ lazy val connect = (project in file("graphframes-connect"))
case x if x.endsWith("module-info.class") => MergeStrategy.discard
case x => MergeStrategy.first
},
assembly / assemblyExcludedJars := (Compile / fullClasspath).value.filter { className =>
className.data
.getName()
.contains("scala-library-") || className.data
.getName()
.contains("slf4j-api-")
assembly / assemblyExcludedJars := {
val cp = (assembly / fullClasspath).value
val allowedPrefixes = Set("protobuf-java")
cp.filter { f =>
!allowedPrefixes.exists(prefix => f.data.getName.startsWith(prefix))
}
},
publish / skip := false,
publish / skip := true,
Compile / packageBin := assembly.value,
Test / packageBin / publishArtifact := false,
Test / packageDoc / publishArtifact := false,
Test / packageSrc / publishArtifact := false,
Compile / packageBin / publishArtifact := true,
Compile / packageBin / publishArtifact := false,
Compile / packageDoc / publishArtifact := false,
Compile / packageSrc / publishArtifact := false)

// Publish the shaded JAR with the correct dependencies in the POM
lazy val connect = project
.dependsOn(root)
.settings(
commonSetting,
name := s"graphframes-connect",
moduleName := s"${name.value}-spark${sparkMajorVer}",
Compile / packageBin := (connectAssembly / Compile / assembly).value
)
4 changes: 2 additions & 2 deletions python/dev/build_jar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def build(spark_versions: Sequence[str] = ["3.5.5"]):
sbt_executable,
f"-Dspark.version={spark_version}",
"clean",
"assembly",
"package",
"connect/clean",
"connect/assembly"
"connect/package"
]
sbt_build = subprocess.Popen(
sbt_build_command,
Expand Down
4 changes: 2 additions & 2 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
core_jar: Optional[str] = None
connect_jar: Optional[str] = None

for pp in core_dir.glob("graphframes-assembly-*.jar"):
for pp in core_dir.glob(f"graphframes-spark{spark_major_version}*.jar"):
assert isinstance(pp, pathlib.PosixPath) # type checking
core_jar = str(pp.absolute())

Expand All @@ -45,7 +45,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
f"Failed to find graphframes jar for Spark {spark_major_version} in {core_dir}"
)

for pp in connect_dir.glob("graphframes-connect-assembly-*.jar"):
for pp in connect_dir.glob(f"graphframes-connect-spark{spark_major_version}*.jar"):
assert isinstance(pp, pathlib.PosixPath) # type checking
connect_jar = str(pp.absolute())

Expand Down