diff --git a/.github/workflows/deps.yml b/.github/workflows/deps.yml index 8a2c008f2..239d2af0c 100644 --- a/.github/workflows/deps.yml +++ b/.github/workflows/deps.yml @@ -90,6 +90,7 @@ jobs: - 'flink-1.19' - 'flink-1.20' - 'mr' + - 'tez' steps: - uses: actions/checkout@v4 - name: Setup JDK 8 diff --git a/README.md b/README.md index a3949e4c4..5305181fc 100644 --- a/README.md +++ b/README.md @@ -378,6 +378,25 @@ Meanwhile, configure the following settings in YARN and MapReduce config. **Note**: `MRAppMasterWithCeleborn` supports setting `mapreduce.celeborn.master.endpoints` via environment variable `CELEBORN_MASTER_ENDPOINTS`. Meanwhile, `MRAppMasterWithCeleborn` disables `yarn.app.mapreduce.am.job.recovery.enable` and sets `mapreduce.job.reduce.slowstart.completedmaps` to 1 by default. + +### Deploy Tez client +Copy `$CELEBORN_HOME/tez/*.jar` into `mapreduce.application.classpath` and `yarn.application.classpath`. +Meanwhile, configure the following settings in hive-site config and append `org.apache.tez.dag.app.CelebornDagAppMaster` to your `tez.am.launch.cmd-opts` in the `tez-site.xml` +```properties +tez.celeborn.master.endpoints :9097 +``` +**Note**: `CelebornDagAppMaster` supports setting `tez.celeborn.master.endpoints` via environment variable `CELEBORN_MASTER_ENDPOINTS`. +Meanwhile, `CelebornDagAppMaster` will override following configs in tez-site.xml. + +| config item | override value | +|----------------------------------------------------------|----------------| +| tez.shuffle-vertex-manager.min-src-fraction | 1.0f | +| tez.shuffle-vertex-manager.max-src-fraction | 1.0f | +| tez.runtime.transfer.data-via-events.enabled | false | +| tez.runtime.transfer.data-via-events.support.in-mem.file | false | +| tez.runtime.pipelined-shuffle.enabled | false | +| tez.am.node-unhealthy-reschedule-tasks | false | + ### Best Practice If you want to set up a production-ready Celeborn cluster, your cluster should have at least 3 masters and at least 4 workers. Masters and works can be deployed on the same node but should not deploy multiple masters or workers on the same node. diff --git a/build/make-distribution.sh b/build/make-distribution.sh index 1c9bbe6e5..d49eab487 100755 --- a/build/make-distribution.sh +++ b/build/make-distribution.sh @@ -243,11 +243,29 @@ function build_mr_client { "${BUILD_COMMAND[@]}" - ## flink spark client jars + ## build mr client jars mkdir -p "$DIST_DIR/mr" cp "$PROJECT_DIR"/client-mr/mr-shaded/target/celeborn-client-mr-shaded_${SCALA_VERSION}-$VERSION.jar "$DIST_DIR/mr/" } +function build_tez_client { + VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null \ + | grep -v "INFO" \ + | grep -v "WARNING" \ + | tail -n 1) + BUILD_COMMAND=("$MVN" clean package $MVN_DIST_OPT -pl :celeborn-client-tez-shaded_${SCALA_VERSION} -am $@) + + # Actually build the jar + echo -e "\nBuilding with..." + echo -e "\$ ${BUILD_COMMAND[@]}\n" + + "${BUILD_COMMAND[@]}" + + ## build tez client jars + mkdir -p "$DIST_DIR/tez" + cp "$PROJECT_DIR"/client-tez/tez-shaded/target/celeborn-client-tez-shaded_${SCALA_VERSION}-$VERSION.jar "$DIST_DIR/tez/" +} + ######################### # sbt functions # @@ -332,10 +350,11 @@ if [ "$SBT_ENABLED" == "true" ]; then sbt_build_client -Pflink-1.19 sbt_build_client -Pflink-1.20 sbt_build_client -Pmr + sbt_build_client -Ptez else echo "build client with $@" ENGINE_COUNT=0 - ENGINES=("spark" "flink" "mr") + ENGINES=("spark" "flink" "mr" "tez") for single_engine in ${ENGINES[@]} do echo $single_engine @@ -367,12 +386,13 @@ else build_flink_client -Pflink-1.19 build_flink_client -Pflink-1.20 build_mr_client -Pmr + build_tez_client -Ptez else ## build release package on demand build_service $@ echo "build client with $@" ENGINE_COUNT=0 - ENGINES=("spark" "flink" "mr") + ENGINES=("spark" "flink" "mr" "tez") for single_engine in ${ENGINES[@]} do echo $single_engine @@ -395,6 +415,9 @@ else elif [[ $@ == *"mr"* ]]; then echo "build mr clients" build_mr_client $@ + elif [[ $@ == *"tez"* ]]; then + echo "build tez clients" + build_tez_client $@ fi fi fi diff --git a/client-tez/tez-shaded/pom.xml b/client-tez/tez-shaded/pom.xml new file mode 100644 index 000000000..e192d3bdb --- /dev/null +++ b/client-tez/tez-shaded/pom.xml @@ -0,0 +1,163 @@ + + + + 4.0.0 + + org.apache.celeborn + celeborn-parent_${scala.binary.version} + ${project.version} + ../../pom.xml + + + celeborn-client-tez-shaded_${scala.binary.version} + jar + Celeborn Client shaded for Tez + + + + org.apache.celeborn + celeborn-client-tez_${scala.binary.version} + ${project.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + com.google.protobuf + ${shading.prefix}.com.google.protobuf + + + com.google.common + ${shading.prefix}.com.google.common + + com.google.common.util.concurrent.* + + + + io.netty + ${shading.prefix}.io.netty + + + org.apache.commons + ${shading.prefix}.org.apache.commons + + + org.roaringbitmap + ${shading.prefix}.org.roaringbitmap + + + org.roaringbitmap + ${shading.prefix}.org.roaringbitmap + + + io.dropwizard.metrics + ${shading.prefix}.io.dropwizard.metrics + + + com.codahale.metrics + ${shading.prefix}.com.codahale.metrics + + + com.github.luben + ${shading.prefix}.com.github.luben + + + + + org.apache.celeborn:* + com.google.protobuf:protobuf-java + com.google.guava:guava + com.google.guava:failureaccess + io.netty:* + org.apache.commons:commons-lang3 + org.roaringbitmap:RoaringBitmap + org.scala-lang:scala-library + org.scala-lang:scala-reflect + org.lz4:lz4-java + io.dropwizard.metrics:metrics-core + com.codahale.metrics:metrics-core + com.github.luben:zstd-jni + + + + + *:* + + **/*.proto + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + **/log4j.properties + META-INF/LICENSE.txt + META-INF/NOTICE.txt + LICENSE.txt + NOTICE.txt + + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + ${maven.plugin.antrun.version} + + + rename-native-library + + run + + package + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE b/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE new file mode 100644 index 000000000..ec665dcc8 --- /dev/null +++ b/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE @@ -0,0 +1,253 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +------------------------------------------------------------------------------------ +This project bundles the following dependencies under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0.txt): + + +Apache License 2.0 +-------------------------------------- + +com.google.guava:failureaccess +com.google.guava:guava +io.netty:netty +io.netty:netty-all +io.netty:netty-buffer +io.netty:netty-codec +io.netty:netty-codec-dns +io.netty:netty-codec-haproxy +io.netty:netty-codec-http +io.netty:netty-codec-http2 +io.netty:netty-codec-memcache +io.netty:netty-codec-mqtt +io.netty:netty-codec-redis +io.netty:netty-codec-smtp +io.netty:netty-codec-socks +io.netty:netty-codec-stomp +io.netty:netty-codec-xml +io.netty:netty-common +io.netty:netty-handler +io.netty:netty-handler-proxy +io.netty:netty-resolver +io.netty:netty-resolver-dns +io.netty:netty-transport +io.netty:netty-transport-classes-epoll +io.netty:netty-transport-classes-kqueue +io.netty:netty-transport-native-epoll +io.netty:netty-transport-native-kqueue +io.netty:netty-transport-native-unix-common +io.netty:netty-transport-rxtx +io.netty:netty-transport-sctp +io.netty:netty-transport-udt +org.apache.commons:commons-lang3 +org.lz4:lz4-java +org.roaringbitmap:RoaringBitmap +org.scala-lang:scala-library + + +BSD +------------ +See licenses/LICENSE-protobuf.txt for details. +com.google.protobuf:protobuf-java +See licenses/LICENSE-zstd-jni.txt for details. +com.github.luben:zstd-jni diff --git a/client-tez/tez-shaded/src/main/resources/META-INF/NOTICE b/client-tez/tez-shaded/src/main/resources/META-INF/NOTICE new file mode 100644 index 000000000..ffad42af1 --- /dev/null +++ b/client-tez/tez-shaded/src/main/resources/META-INF/NOTICE @@ -0,0 +1,62 @@ + +Apache Celeborn +Copyright 2022-2024 The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). + +Apache Spark +Copyright 2014 and onwards The Apache Software Foundation + +Apache Kyuubi +Copyright 2021-2023 The Apache Software Foundation + +Apache Iceberg +Copyright 2017-2022 The Apache Software Foundation + +Apache Parquet MR +Copyright 2014-2024 The Apache Software Foundation + +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This project includes code from Kite, developed at Cloudera, Inc. with +the following copyright notice: + +| Copyright 2013 Cloudera Inc. +| +| Licensed under the Apache License, Version 2.0 (the "License"); +| you may not use this file except in compliance with the License. +| You may obtain a copy of the License at +| +| http://www.apache.org/licenses/LICENSE-2.0 +| +| Unless required by applicable law or agreed to in writing, software +| distributed under the License is distributed on an "AS IS" BASIS, +| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +| See the License for the specific language governing permissions and +| limitations under the License. + +============================================================================= += NOTICE file corresponding to section 4d of the Apache License Version 2.0 = +============================================================================= + +Apache Commons Lang +Copyright 2001-2021 The Apache Software Foundation + +Scala +Copyright (c) 2002-2023 EPFL +Copyright (c) 2011-2023 Lightbend, Inc. + +Scala includes software developed at +LAMP/EPFL (https://lamp.epfl.ch/) and +Lightbend, Inc. (https://www.lightbend.com/). + +Licensed under the Apache License, Version 2.0 (the "License"). +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This software includes projects with other licenses -- see `doc/LICENSE.md`. diff --git a/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt b/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt new file mode 100644 index 000000000..b4350ec83 --- /dev/null +++ b/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-protobuf.txt @@ -0,0 +1,42 @@ +This license applies to all parts of Protocol Buffers except the following: + + - Atomicops support for generic gcc, located in + src/google/protobuf/stubs/atomicops_internals_generic_gcc.h. + This file is copyrighted by Red Hat Inc. + + - Atomicops support for AIX/POWER, located in + src/google/protobuf/stubs/atomicops_internals_aix.h. + This file is copyrighted by Bloomberg Finance LP. + +Copyright 2014, Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. \ No newline at end of file diff --git a/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-zstd-jni.txt b/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-zstd-jni.txt new file mode 100644 index 000000000..7bdccb6a9 --- /dev/null +++ b/client-tez/tez-shaded/src/main/resources/META-INF/licenses/LICENSE-zstd-jni.txt @@ -0,0 +1,26 @@ +Zstd-jni: JNI bindings to Zstd Library + +Copyright (c) 2015-present, Luben Karavelov/ All rights reserved. + +BSD License + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/dev/dependencies.sh b/dev/dependencies.sh index 9dd8930ce..f0bd888ac 100755 --- a/dev/dependencies.sh +++ b/dev/dependencies.sh @@ -204,6 +204,10 @@ case "$MODULE" in MVN_MODULES="client-mr/mr" SBT_PROJECT="celeborn-client-mr" ;; + "tez") + MVN_MODULES="client-tez/tez" + SBT_PROJECT="celeborn-client-tez" + ;; *) MODULE="server" MVN_MODULES="worker,master,cli" diff --git a/dev/deps/dependencies-client-tez b/dev/deps/dependencies-client-tez new file mode 100644 index 000000000..bf503d114 --- /dev/null +++ b/dev/deps/dependencies-client-tez @@ -0,0 +1,178 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +RoaringBitmap/1.0.6//RoaringBitmap-1.0.6.jar +accessors-smart/2.4.2//accessors-smart-2.4.2.jar +activation/1.1//activation-1.1.jar +asm/8.0.1//asm-8.0.1.jar +async-http-client-netty-utils/2.12.3//async-http-client-netty-utils-2.12.3.jar +async-http-client/2.12.3//async-http-client-2.12.3.jar +audience-annotations/0.5.0//audience-annotations-0.5.0.jar +avro/1.7.7//avro-1.7.7.jar +commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar +commons-cli/1.2//commons-cli-1.2.jar +commons-codec/1.11//commons-codec-1.11.jar +commons-collections/3.2.2//commons-collections-3.2.2.jar +commons-collections4/4.1//commons-collections4-4.1.jar +commons-compress/1.21//commons-compress-1.21.jar +commons-configuration2/2.8.0//commons-configuration2-2.8.0.jar +commons-crypto/1.0.0//commons-crypto-1.0.0.jar +commons-io/2.17.0//commons-io-2.17.0.jar +commons-lang/2.6//commons-lang-2.6.jar +commons-lang3/3.17.0//commons-lang3-3.17.0.jar +commons-logging/1.1.3//commons-logging-1.1.3.jar +commons-math3/3.1.1//commons-math3-3.1.1.jar +commons-net/3.9.0//commons-net-3.9.0.jar +commons-text/1.10.0//commons-text-1.10.0.jar +curator-client/5.2.0//curator-client-5.2.0.jar +curator-framework/4.2.0//curator-framework-4.2.0.jar +curator-recipes/5.2.0//curator-recipes-5.2.0.jar +dnsjava/2.1.7//dnsjava-2.1.7.jar +failureaccess/1.0.2//failureaccess-1.0.2.jar +gson/2.9.0//gson-2.9.0.jar +guava/33.1.0-jre//guava-33.1.0-jre.jar +hadoop-annotations/3.3.6//hadoop-annotations-3.3.6.jar +hadoop-auth/3.3.1//hadoop-auth-3.3.1.jar +hadoop-client-api/3.3.6//hadoop-client-api-3.3.6.jar +hadoop-client-runtime/3.3.6//hadoop-client-runtime-3.3.6.jar +hadoop-common/3.3.6//hadoop-common-3.3.6.jar +hadoop-hdfs-client/3.3.1//hadoop-hdfs-client-3.3.1.jar +hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar +hadoop-shaded-protobuf_3_7/1.1.1//hadoop-shaded-protobuf_3_7-1.1.1.jar +hadoop-shim/0.10.2//hadoop-shim-0.10.2.jar +jackson-annotations/2.15.3//jackson-annotations-2.15.3.jar +jackson-core-asl/1.9.2//jackson-core-asl-1.9.2.jar +jackson-core/2.15.3//jackson-core-2.15.3.jar +jackson-databind/2.15.3//jackson-databind-2.15.3.jar +jackson-jaxrs/1.9.2//jackson-jaxrs-1.9.2.jar +jackson-mapper-asl/1.9.2//jackson-mapper-asl-1.9.2.jar +jackson-module-scala_2.12/2.15.3//jackson-module-scala_2.12-2.15.3.jar +jackson-xc/1.9.2//jackson-xc-1.9.2.jar +jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar +jakarta.activation/1.2.2//jakarta.activation-1.2.2.jar +javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar +jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-impl/2.2.3-1//jaxb-impl-2.2.3-1.jar +jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar +jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar +jersey-client/1.19//jersey-client-1.19.jar +jersey-core/1.19.4//jersey-core-1.19.4.jar +jersey-json/1.19//jersey-json-1.19.jar +jersey-json/1.20//jersey-json-1.20.jar +jersey-server/1.19.4//jersey-server-1.19.4.jar +jersey-servlet/1.19.4//jersey-servlet-1.19.4.jar +jettison/1.3.4//jettison-1.3.4.jar +jetty-http/9.4.51.v20230217//jetty-http-9.4.51.v20230217.jar +jetty-io/9.4.51.v20230217//jetty-io-9.4.51.v20230217.jar +jetty-security/9.4.51.v20230217//jetty-security-9.4.51.v20230217.jar +jetty-server/9.4.51.v20230217//jetty-server-9.4.51.v20230217.jar +jetty-servlet/9.4.51.v20230217//jetty-servlet-9.4.51.v20230217.jar +jetty-util-ajax/9.4.51.v20230217//jetty-util-ajax-9.4.51.v20230217.jar +jetty-util/9.4.51.v20230217//jetty-util-9.4.51.v20230217.jar +jetty-webapp/9.4.51.v20230217//jetty-webapp-9.4.51.v20230217.jar +jetty-xml/9.4.51.v20230217//jetty-xml-9.4.51.v20230217.jar +jsch/0.1.55//jsch-0.1.55.jar +json-smart/2.4.2//json-smart-2.4.2.jar +jsp-api/2.1//jsp-api-2.1.jar +jsr305/1.3.9//jsr305-1.3.9.jar +jsr311-api/1.1.1//jsr311-api-1.1.1.jar +jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar +kerb-admin/1.0.1//kerb-admin-1.0.1.jar +kerb-client/1.0.1//kerb-client-1.0.1.jar +kerb-common/1.0.1//kerb-common-1.0.1.jar +kerb-core/1.0.1//kerb-core-1.0.1.jar +kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar +kerb-identity/1.0.1//kerb-identity-1.0.1.jar +kerb-server/1.0.1//kerb-server-1.0.1.jar +kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar +kerb-util/1.0.1//kerb-util-1.0.1.jar +kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar +kerby-config/1.0.1//kerby-config-1.0.1.jar +kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar +kerby-util/1.0.1//kerby-util-1.0.1.jar +kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar +leveldbjni-all/1.8//leveldbjni-all-1.8.jar +log4j/1.2.17//log4j-1.2.17.jar +lz4-java/1.8.0//lz4-java-1.8.0.jar +maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar +netty-all/4.1.115.Final//netty-all-4.1.115.Final.jar +netty-buffer/4.1.115.Final//netty-buffer-4.1.115.Final.jar +netty-codec-dns/4.1.115.Final//netty-codec-dns-4.1.115.Final.jar +netty-codec-haproxy/4.1.115.Final//netty-codec-haproxy-4.1.115.Final.jar +netty-codec-http/4.1.115.Final//netty-codec-http-4.1.115.Final.jar +netty-codec-http2/4.1.115.Final//netty-codec-http2-4.1.115.Final.jar +netty-codec-memcache/4.1.115.Final//netty-codec-memcache-4.1.115.Final.jar +netty-codec-mqtt/4.1.115.Final//netty-codec-mqtt-4.1.115.Final.jar +netty-codec-redis/4.1.115.Final//netty-codec-redis-4.1.115.Final.jar +netty-codec-smtp/4.1.115.Final//netty-codec-smtp-4.1.115.Final.jar +netty-codec-socks/4.1.115.Final//netty-codec-socks-4.1.115.Final.jar +netty-codec-stomp/4.1.115.Final//netty-codec-stomp-4.1.115.Final.jar +netty-codec-xml/4.1.115.Final//netty-codec-xml-4.1.115.Final.jar +netty-codec/4.1.115.Final//netty-codec-4.1.115.Final.jar +netty-common/4.1.115.Final//netty-common-4.1.115.Final.jar +netty-handler-proxy/4.1.115.Final//netty-handler-proxy-4.1.115.Final.jar +netty-handler/4.1.115.Final//netty-handler-4.1.115.Final.jar +netty-reactive-streams/2.0.4//netty-reactive-streams-2.0.4.jar +netty-resolver-dns-classes-macos/4.1.115.Final//netty-resolver-dns-classes-macos-4.1.115.Final.jar +netty-resolver-dns-native-macos/4.1.115.Final/osx-aarch_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-aarch_64.jar +netty-resolver-dns-native-macos/4.1.115.Final/osx-x86_64/netty-resolver-dns-native-macos-4.1.115.Final-osx-x86_64.jar +netty-resolver-dns/4.1.115.Final//netty-resolver-dns-4.1.115.Final.jar +netty-resolver/4.1.115.Final//netty-resolver-4.1.115.Final.jar +netty-transport-classes-epoll/4.1.115.Final//netty-transport-classes-epoll-4.1.115.Final.jar +netty-transport-classes-kqueue/4.1.115.Final//netty-transport-classes-kqueue-4.1.115.Final.jar +netty-transport-native-epoll/4.1.115.Final/linux-aarch_64/netty-transport-native-epoll-4.1.115.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.1.115.Final/linux-riscv64/netty-transport-native-epoll-4.1.115.Final-linux-riscv64.jar +netty-transport-native-epoll/4.1.115.Final/linux-x86_64/netty-transport-native-epoll-4.1.115.Final-linux-x86_64.jar +netty-transport-native-epoll/4.1.63.Final//netty-transport-native-epoll-4.1.63.Final.jar +netty-transport-native-kqueue/4.1.115.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.115.Final-osx-aarch_64.jar +netty-transport-native-kqueue/4.1.115.Final/osx-x86_64/netty-transport-native-kqueue-4.1.115.Final-osx-x86_64.jar +netty-transport-native-unix-common/4.1.115.Final//netty-transport-native-unix-common-4.1.115.Final.jar +netty-transport-rxtx/4.1.115.Final//netty-transport-rxtx-4.1.115.Final.jar +netty-transport-sctp/4.1.115.Final//netty-transport-sctp-4.1.115.Final.jar +netty-transport-udt/4.1.115.Final//netty-transport-udt-4.1.115.Final.jar +netty-transport/4.1.115.Final//netty-transport-4.1.115.Final.jar +nimbus-jose-jwt/9.8.1//nimbus-jose-jwt-9.8.1.jar +okhttp/2.7.5//okhttp-2.7.5.jar +okio/1.6.0//okio-1.6.0.jar +paranamer/2.8//paranamer-2.8.jar +protobuf-java/3.25.5//protobuf-java-3.25.5.jar +re2j/1.1//re2j-1.1.jar +reactive-streams/1.0.3//reactive-streams-1.0.3.jar +reload4j/1.2.22//reload4j-1.2.22.jar +scala-library/2.12.18//scala-library-2.12.18.jar +scala-reflect/2.12.18//scala-reflect-2.12.18.jar +slf4j-api/1.7.36//slf4j-api-1.7.36.jar +slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar +slf4j-reload4j/1.7.36//slf4j-reload4j-1.7.36.jar +snakeyaml/2.2//snakeyaml-2.2.jar +snappy-java/1.1.10.5//snappy-java-1.1.10.5.jar +stax-api/1.0-2//stax-api-1.0-2.jar +stax-api/1.0.1//stax-api-1.0.1.jar +stax2-api/4.2.1//stax2-api-4.2.1.jar +tez-api/0.10.2//tez-api-0.10.2.jar +tez-common/0.10.2//tez-common-0.10.2.jar +tez-dag/0.10.2//tez-dag-0.10.2.jar +tez-runtime-internals/0.10.2//tez-runtime-internals-0.10.2.jar +tez-runtime-library/0.10.2//tez-runtime-library-0.10.2.jar +token-provider/1.0.1//token-provider-1.0.1.jar +woodstox-core/5.4.0//woodstox-core-5.4.0.jar +zookeeper-jute/3.6.3//zookeeper-jute-3.6.3.jar +zookeeper/3.6.3//zookeeper-3.6.3.jar +zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar diff --git a/pom.xml b/pom.xml index aee50505b..645090e96 100644 --- a/pom.xml +++ b/pom.xml @@ -1822,10 +1822,11 @@ tez client-tez/tez + client-tez/tez-shaded tests/tez-it - 0.10.3 + 0.10.2 diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 43892bd1f..80c45084e 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -91,6 +91,9 @@ object Dependencies { val protocVersion = "3.25.5" val protoVersion = "3.25.5" + // Tez + val tezVersion = "0.10.2" + val apLoader = "me.bechberger" % "ap-loader-all" % apLoaderVersion val commonsCompress = "org.apache.commons" % "commons-compress" % commonsCompressVersion val commonsCrypto = "org.apache.commons" % "commons-crypto" % commonsCryptoVersion excludeAll( @@ -214,6 +217,47 @@ object Dependencies { val bouncycastleBcprovJdk18on = "org.bouncycastle" % "bcprov-jdk18on" % bouncycastleVersion % "test" val bouncycastleBcpkixJdk18on = "org.bouncycastle" % "bcpkix-jdk18on" % bouncycastleVersion % "test" + // Tez support + val tezCommon = "org.apache.tez" % "tez-common" % tezVersion excludeAll( + ExclusionRule("org.apache.hadoop", "hadoop-annotations"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-common") + ) + val tezRuntimeLibrary = "org.apache.tez" % "tez-runtime-library" % tezVersion excludeAll( + ExclusionRule("org.apache.hadoop", "hadoop-annotations"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-common") + ) + val tezRuntimeInternals = "org.apache.tez" % "tez-runtime-internals" % tezVersion excludeAll( + ExclusionRule("org.apache.hadoop", "hadoop-annotations"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-client"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-common"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-web-proxy") + ) + val tezDag = "org.apache.tez" % "tez-dag" % tezVersion excludeAll( + ExclusionRule("org.apache.hadoop", "hadoop-annotations"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-client"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-common"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-server-web-proxy") + ) + val tezApi = "org.apache.tez" % "tez-api" % tezVersion excludeAll( + ExclusionRule("org.apache.hadoop", "hadoop-annotations"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-api"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-common"), + ExclusionRule("org.apache.hadoop", "hadoop-auth"), + ExclusionRule("org.apache.hadoop", "hadoop-hdfs"), + ExclusionRule("org.apache.hadoop", "hadoop-yarn-client") + ) + val hadoopCommon = "org.apache.hadoop" % "hadoop-common" % hadoopVersion excludeAll( + ExclusionRule("com.sun.jersey", "jersey-json"), + ExclusionRule("org.apache.httpcomponents", "httpclient"), + ExclusionRule("org.slf4j", "slf4j-log4j12") + ) + val picocli = "info.picocli" % "picocli" % picocliVersion } @@ -376,7 +420,12 @@ object CelebornBuild extends sbt.internal.BuildDef { CelebornWorker.worker, CelebornMaster.master, CelebornCli.cli - ) ++ maybeSparkClientModules ++ maybeFlinkClientModules ++ maybeMRClientModules ++ maybeWebModules ++ maybeCelebornMPUModule + ) ++ maybeSparkClientModules ++ + maybeFlinkClientModules ++ + maybeMRClientModules ++ + maybeWebModules ++ + maybeCelebornMPUModule ++ + maybeTezClientModules } // ThisBuild / parallelExecution := false @@ -452,6 +501,15 @@ object Utils { lazy val maybeMRClientModules: Seq[Project] = mrClientProjects.map(_.modules).getOrElse(Seq.empty) + val TEZ_VERSION = profiles.filter(_.startsWith("tez")).headOption + + lazy val tezClientProjects = TEZ_VERSION match { + case Some("tez") => Some(TezClientProjects) + case _ => None + } + + lazy val maybeTezClientModules: Seq[Project] = tezClientProjects.map(_.modules).getOrElse(Seq.empty) + val WEB_VERSION = profiles.filter(_.startsWith("web")).headOption lazy val webProjects = WEB_VERSION match { @@ -1594,3 +1652,156 @@ object WebProjects { Seq(web) } } + +//////////////////////////////////////////////////////// +// Tez Client // +//////////////////////////////////////////////////////// +object TezClientProjects { + + def tezClient: Project = { + Project("celeborn-client-tez", file("client-tez/tez")) + .dependsOn(CelebornCommon.common, CelebornClient.client) + .settings( + commonSettings, + libraryDependencies ++= Seq( + Dependencies.tezCommon, + Dependencies.tezRuntimeLibrary, + Dependencies.tezRuntimeInternals, + Dependencies.tezDag, + Dependencies.tezApi, + Dependencies.hadoopCommon, + Dependencies.slf4jApi, + ) ++ commonUnitTestDependencies, + dependencyOverrides += Dependencies.commonsCompress + ) + } + + def tezIt: Project = { + Project("celeborn-tez-it", file("tests/tez-it")) + // ref: https://www.scala-sbt.org/1.x/docs/Multi-Project.html#Classpath+dependencies + .dependsOn(CelebornCommon.common % "test->test;compile->compile") + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .dependsOn(CelebornMaster.master % "test->test;compile->compile") + .dependsOn(CelebornWorker.worker % "test->test;compile->compile") + .dependsOn(tezClient % "test->test;compile->compile") + .settings( + commonSettings, + copyDepsSettings, + libraryDependencies ++= Seq( + ) ++ commonUnitTestDependencies + ) + } + + def tezClientShade: Project = { + Project("celeborn-client-tez-shaded", file("client-tez/tez-shaded")) + .dependsOn(tezClient) + .disablePlugins(AddMetaInfLicenseFiles) + .settings( + commonSettings, + releaseSettings, + + // align final shaded jar name with maven. + (assembly / assemblyJarName) := { + val extension = artifact.value.extension + s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension" + }, + + (assembly / test) := {}, + + (assembly / logLevel) := Level.Info, + + // include `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := true, + + (assembly / assemblyExcludedJars) := { + val cp = (assembly / fullClasspath).value + cp filter { v => + val name = v.data.getName + !(name.startsWith("celeborn-") || + name.startsWith("protobuf-java-") || + name.startsWith("guava-") || + name.startsWith("failureaccess-") || + name.startsWith("netty-") || + name.startsWith("commons-lang3-") || + name.startsWith("RoaringBitmap-") || + name.startsWith("lz4-java-") || + name.startsWith("zstd-jni-") || + name.startsWith("metrics-core-") || + name.startsWith("scala-library-")) + } + }, + + (assembly / assemblyShadeRules) := Seq( + ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll, + ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll, + ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll, + ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll, + ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll, + ShadeRule.rename("io.dropwizard.metrics.**" -> "org.apache.celeborn.shaded.io.dropwizard.metrics.@1").inAll, + ShadeRule.rename("com.codahale.metrics.**" -> "org.apache.celeborn.shaded.com.codahale.metrics.@1").inAll, + ShadeRule.rename("com.github.luben.**" -> "org.apache.celeborn.shaded.com.github.luben.@1").inAll, + ), + + (assembly / assemblyMergeStrategy) := { + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + // For netty-3.x.y.Final.jar + case m if m.startsWith("META-INF/license/") => MergeStrategy.discard + // the LicenseAndNoticeMergeStrategy always picks the license/notice file from the current project + case m@("META-INF/LICENSE" | "META-INF/NOTICE") => CustomMergeStrategy("LicenseAndNoticeMergeStrategy") { conflicts => + val entry = conflicts.head + val projectLicenseFile = (Compile / resourceDirectory).value / entry.target + val stream = () => new java.io.BufferedInputStream(new java.io.FileInputStream(projectLicenseFile)) + Right(Vector(JarEntry(entry.target, stream))) + } + case PathList(ps@_*) if Assembly.isLicenseFile(ps.last) => MergeStrategy.discard + // Drop all proto files that are not needed as artifacts of the build. + case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard + case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard + // Drop netty jnilib + case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard + // rename netty native lib + case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so") + case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so") + case _ => MergeStrategy.first + }, + + Compile / packageBin := assembly.value, + pomPostProcess := removeDependenciesTransformer + ) + } + + def modules: Seq[Project] = { + Seq(tezClient, tezIt, tezGroup, tezClientShade) + } + + // for test only, don't use this group for any other projects + lazy val tezGroup = (project withId "celeborn-tez-group").aggregate(tezClient, tezIt) + + val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.") + val destPath = (Compile / crossTarget) { + _ / "mapreduce_lib" + } + + lazy val copyDepsSettings = Seq( + copyDeps := { + val dest = destPath.value + if (!dest.isDirectory() && !dest.mkdirs()) { + throw new java.io.IOException("Failed to create jars directory.") + } + + (Compile / dependencyClasspath).value.map(_.data) + .filter { jar => jar.isFile() } + .foreach { jar => + val destJar = new File(dest, jar.getName()) + if (destJar.isFile()) { + destJar.delete() + } + Files.copy(jar.toPath(), destJar.toPath()) + } + }, + (Test / compile) := { + copyDeps.value + (Test / compile).value + } + ) +} diff --git a/tests/tez-it/pom.xml b/tests/tez-it/pom.xml index d44dbf1dd..6e206794c 100644 --- a/tests/tez-it/pom.xml +++ b/tests/tez-it/pom.xml @@ -94,7 +94,7 @@ org.apache.celeborn - celeborn-client-tez_${scala.binary.version} + celeborn-client-tez-shaded_${scala.binary.version} ${project.version} test