From 3a41db360b1b21c5b4017eceece2d3b5bd0ffda6 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Mon, 25 Sep 2023 19:52:09 +0800 Subject: [PATCH] [CELEBORN-1006] Add support for Apache Hadoop 2.x in Celeborn build Add support for Apache Hadoop 2.x in Celeborn build Developers need to only specify their `hadoop.version`, and the build will pick the right profile internally based on the version to add the relevant dependencies. [hadoop-client-api](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client-api) and [hadoop-client-runtime](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client-runtime) were introduced in hadoop 3.x, while hadoop 2.x had [hadoop-client](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client) Celeborn depends on the former, and so requires hadoop 3.x to build. Apache Spark dropped support for Hadoop 2.x only in the recent v3.5 ([SPARK-42452](https://issues.apache.org/jira/browse/SPARK-42452)). Given this, we have case where deployments on supported platforms like Spark 3.4 and older running on 2.x hadoop, will need to pull in hadoop 3.x just for Celeborn. This PR uses `hadoop-client` when `hadoop.version` is specified as 2.x - and preserves existing behavior when `hadoop.version` is 3.x Note - while using `hadoop-client` in 3.x is an option, hadoop community recommendation is to rely on `hadoop-client-api`/`hadoop-client-runtime`, hence making an effort to leverage that as much as possible. Adds support for using 2.x for hadoop.version Three combinations were tested: * Default, without overriding hadoop.version Dependencies: ``` $ build/mvn dependency:list 2>&1 | grep hadoop | sort | uniq [INFO] org.apache.hadoop:hadoop-client-api:jar:3.2.4:compile [INFO] org.apache.hadoop:hadoop-client-runtime:jar:3.2.4:compile ``` Will update this section again based on test suite results (which are ongoing) * Setting hadoop.version to newer 3.3.0 explicitly Dependencies: ``` $ ARGS="-Pspark-3.1 -Dhadoop.version=3.3.0" ; build/mvn dependency:list $ARGS 2>&1 | grep hadoop | sort | uniq [INFO] org.apache.hadoop:hadoop-client-api:jar:3.3.0:compile [INFO] org.apache.hadoop:hadoop-client-runtime:jar:3.3.0:compile ``` * Setting hadoop.version to older 2.10.0 Dependencies: ``` $ ARGS="-Pspark-3.1 -Dhadoop.version=2.10.0" ; build/mvn dependency:list $ARGS 2>&1 | grep hadoop | grep compile | sort | uniq [INFO] org.apache.hadoop:hadoop-auth:jar:2.10.0:compile -- module hadoop.auth (auto) [INFO] org.apache.hadoop:hadoop-client:jar:2.10.0:compile -- module hadoop.client (auto) [INFO] org.apache.hadoop:hadoop-common:jar:2.10.0:compile -- module hadoop.common (auto) [INFO] org.apache.hadoop:hadoop-hdfs-client:jar:2.10.0:compile -- module hadoop.hdfs.client (auto) [INFO] org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.10.0:compile -- module hadoop.mapreduce.client.app (auto) [INFO] org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.10.0:compile -- module hadoop.mapreduce.client.common (auto) [INFO] org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.10.0:compile -- module hadoop.mapreduce.client.core (auto) [INFO] org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.10.0:compile [INFO] org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.10.0:compile -- module hadoop.mapreduce.client.shuffle (auto) [INFO] org.apache.hadoop:hadoop-yarn-api:jar:2.10.0:compile -- module hadoop.yarn.api (auto) [INFO] org.apache.hadoop:hadoop-yarn-common:jar:2.10.0:compile -- module hadoop.yarn.common (auto) ``` For each of the case above, build/test passes for each of the `ARGS`. Closes #1936 from mridulm/main. Authored-by: Mridul Muralidharan Signed-off-by: zky.zhoukeyong --- .mvn/extensions.xml | 25 ++++++++++++++++ LICENSE-binary | 11 +++++++ client-mr/mr/pom.xml | 48 +++++++++++++++++++++++------- common/pom.xml | 48 +++++++++++++++++++++++------- master/pom.xml | 47 ++++++++++++++++++++++------- pom.xml | 71 +++++++++++++++++++++++++++++++++++++------- 6 files changed, 209 insertions(+), 41 deletions(-) create mode 100644 .mvn/extensions.xml diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml new file mode 100644 index 000000000..3bfbd1dc2 --- /dev/null +++ b/.mvn/extensions.xml @@ -0,0 +1,25 @@ + + + + + fish.payara.maven.extensions + regex-profile-activator + 0.5 + + diff --git a/LICENSE-binary b/LICENSE-binary index 6971f6b64..7be2901c1 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -243,8 +243,19 @@ io.netty:netty-transport-sctp io.netty:netty-transport-udt org.apache.commons:commons-crypto org.apache.commons:commons-lang3 +org.apache.hadoop:hadoop-auth +org.apache.hadoop:hadoop-client org.apache.hadoop:hadoop-client-api org.apache.hadoop:hadoop-client-runtime +org.apache.hadoop:hadoop-common +org.apache.hadoop:hadoop-hdfs-client +org.apache.hadoop:hadoop-mapreduce-client-app +org.apache.hadoop:hadoop-mapreduce-client-common +org.apache.hadoop:hadoop-mapreduce-client-core +org.apache.hadoop:hadoop-mapreduce-client-jobclient +org.apache.hadoop:hadoop-mapreduce-client-shuffle +org.apache.hadoop:hadoop-yarn-api +org.apache.hadoop:hadoop-yarn-common org.apache.htrace:htrace-core4 org.apache.logging.log4j:log4j-1.2-api org.apache.logging.log4j:log4j-api diff --git a/client-mr/mr/pom.xml b/client-mr/mr/pom.xml index 402ffa507..8b16614cb 100644 --- a/client-mr/mr/pom.xml +++ b/client-mr/mr/pom.xml @@ -39,20 +39,48 @@ celeborn-client_${scala.binary.version} ${project.version} - - org.apache.hadoop - hadoop-client-api - ${hadoop.version} - - - org.apache.hadoop - hadoop-client-runtime - ${hadoop.version} - org.apache.hadoop hadoop-mapreduce-client-app ${hadoop.version} + + + + hadoop-3 + + + hadoop-3-deps + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + hadoop-2 + + + hadoop-2-deps + + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + diff --git a/common/pom.xml b/common/pom.xml index b88552ac6..7af0e9b9e 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -107,17 +107,6 @@ scala-reflect - - org.apache.hadoop - hadoop-client-api - ${hadoop.version} - - - org.apache.hadoop - hadoop-client-runtime - ${hadoop.version} - - org.roaringbitmap RoaringBitmap @@ -156,4 +145,41 @@ + + + hadoop-3 + + + hadoop-3-deps + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + hadoop-2 + + + hadoop-2-deps + + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + diff --git a/master/pom.xml b/master/pom.xml index 8facb5585..a5260a1ce 100644 --- a/master/pom.xml +++ b/master/pom.xml @@ -82,16 +82,6 @@ org.apache.logging.log4j log4j-1.2-api - - org.apache.hadoop - hadoop-client-api - ${hadoop.version} - - - org.apache.hadoop - hadoop-client-runtime - ${hadoop.version} - org.mockito @@ -119,4 +109,41 @@ + + + hadoop-3 + + + hadoop-3-deps + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + hadoop-2 + + + hadoop-2-deps + + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + diff --git a/pom.xml b/pom.xml index 287376cf9..31efbcf12 100644 --- a/pom.xml +++ b/pom.xml @@ -380,16 +380,6 @@ snakeyaml ${snakeyaml.version} - - org.apache.hadoop - hadoop-client-api - ${hadoop.version} - - - org.apache.hadoop - hadoop-client-runtime - ${hadoop.version} - org.apache.hadoop hadoop-mapreduce-client-app @@ -921,6 +911,67 @@ + + hadoop-3 + + + hadoop.version + /^3\..*$/ + + + + true + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + hadoop-2 + + + hadoop.version + /^2\..*$/ + + + + true + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-registry + + + org.apache.hadoop + hadoop-yarn-server-common + + + + + spark-2.4