[INFRA]Update scripts and templates for new name. (#724)
This commit is contained in:
parent
f2a234f870
commit
59474c2f11
94
README.md
94
README.md
@ -31,7 +31,7 @@ LifecycleManager maintains metadata of each shuffle and runs within the Spark dr
|
||||
6. Workers merge and replicate data to its peer.
|
||||
7. Workers flush to disk periodically.
|
||||
8. Mapper tasks accomplish and trigger MapperEnd event.
|
||||
9. When all mapper tasks complete, workers commit files.
|
||||
9. When all mapper tasks are complete, workers commit files.
|
||||
10. Reducers ask for file locations.
|
||||
11. Reducers read shuffle data.
|
||||
|
||||
@ -75,17 +75,17 @@ For example, if you are running Spark 2.4, you must compile Celeborn client with
|
||||
Celeborn supports HA mode deployment.
|
||||
|
||||
### Deploy Celeborn
|
||||
1. Unzip the package to $RSS_HOME
|
||||
2. Modify environment variables in $RSS_HOME/conf/rss-env.sh
|
||||
1. Unzip the package to $CELEBORN_HOME
|
||||
2. Modify environment variables in $CELEBORN_HOME/conf/celeborn-env.sh
|
||||
|
||||
EXAMPLE:
|
||||
```properties
|
||||
#!/usr/bin/env bash
|
||||
RSS_MASTER_MEMORY=4g
|
||||
RSS_WORKER_MEMORY=2g
|
||||
RSS_WORKER_OFFHEAP_MEMORY=4g
|
||||
CELEBORN_MASTER_MEMORY=4g
|
||||
CELEBORN_WORKER_MEMORY=2g
|
||||
CELEBORN_WORKER_OFFHEAP_MEMORY=4g
|
||||
```
|
||||
3. Modify configurations in $RSS_HOME/conf/rss-defaults.conf
|
||||
3. Modify configurations in $CELEBORN_HOME/conf/rss-defaults.conf
|
||||
|
||||
EXAMPLE: single master cluster
|
||||
```properties
|
||||
@ -123,56 +123,32 @@ rss.ha.port.dev-cluster.node3 9872
|
||||
```
|
||||
4. Copy Celeborn and configurations to all nodes
|
||||
5. Start Celeborn master
|
||||
`$RSS_HOME/sbin/start-master.sh`
|
||||
`$CELEBORN_HOME/sbin/start-master.sh`
|
||||
6. Start Celeborn worker
|
||||
For single master cluster : `$RSS_HOME/sbin/start-worker.sh rss://masterhost:port`
|
||||
For HA cluster :`$RSS_HOME/sbin/start-worker.sh`
|
||||
For single master cluster : `$CELEBORN_HOME/sbin/start-worker.sh rss://masterhost:port`
|
||||
For HA cluster :`$CELEBORN_HOME/sbin/start-worker.sh`
|
||||
7. If Celeborn start success, the output of Master's log should be like this:
|
||||
```angular2html
|
||||
21/12/21 20:06:18,964 INFO [main] Dispatcher: Dispatcher numThreads: 64
|
||||
21/12/21 20:06:18,994 INFO [main] TransportClientFactory: mode NIO threads 8
|
||||
21/12/21 20:06:19,113 WARN [main] ServerBootstrap: Unknown channel option 'TCP_NODELAY' for channel '[id: 0x8a9442f6]'
|
||||
21/12/21 20:06:19,129 INFO [main] Utils: Successfully started service 'MasterSys' on port 9097.
|
||||
21/12/21 20:06:19,150 INFO [main] HttpServer: HttpServer started on port 7001.
|
||||
21/12/21 20:06:21,615 INFO [netty-rpc-connection-0] TransportClientFactory: Successfully created connection to /172.16.159.100:40115 after 4 ms (0 ms spent in bootstraps)
|
||||
21/12/21 20:06:21,661 INFO [dispatcher-event-loop-9] Master: Registered worker
|
||||
Host: 172.16.159.100
|
||||
RpcPort: 40115
|
||||
PushPort: 35489
|
||||
FetchPort: 35689
|
||||
TotalSlots: 4096
|
||||
SlotsUsed: 0
|
||||
SlotsAvailable: 4096
|
||||
22/10/08 19:29:11,805 INFO [main] Dispatcher: Dispatcher numThreads: 64
|
||||
22/10/08 19:29:11,875 INFO [main] TransportClientFactory: mode NIO threads 64
|
||||
22/10/08 19:29:12,057 INFO [main] Utils: Successfully started service 'MasterSys' on port 9097.
|
||||
22/10/08 19:29:12,113 INFO [main] Master: Metrics system enabled.
|
||||
22/10/08 19:29:12,125 INFO [main] HttpServer: master: HttpServer started on port 9098.
|
||||
22/10/08 19:29:12,126 INFO [main] Master: Master started.
|
||||
22/10/08 19:29:57,842 INFO [dispatcher-event-loop-19] Master: Registered worker
|
||||
Host: 192.168.15.140
|
||||
RpcPort: 37359
|
||||
PushPort: 38303
|
||||
FetchPort: 37569
|
||||
ReplicatePort: 37093
|
||||
SlotsUsed: 0()
|
||||
LastHeartbeat: 0
|
||||
WorkerRef: NettyRpcEndpointRef(ess://WorkerEndpoint@172.16.159.100:40115)
|
||||
.
|
||||
21/12/21 20:06:23,785 INFO [netty-rpc-connection-1] TransportClientFactory: Successfully created connection to /172.16.159.98:39151 after 1 ms (0 ms spent in bootstraps)
|
||||
21/12/21 20:06:23,817 INFO [dispatcher-event-loop-17] Master: Registered worker
|
||||
Host: 172.16.159.98
|
||||
RpcPort: 39151
|
||||
PushPort: 40193
|
||||
FetchPort: 37455
|
||||
TotalSlots: 4096
|
||||
SlotsUsed: 0
|
||||
SlotsAvailable: 4096
|
||||
LastHeartbeat: 0
|
||||
WorkerRef: NettyRpcEndpointRef(ess://WorkerEndpoint@172.16.159.98:39151)
|
||||
.
|
||||
21/12/21 20:06:25,948 INFO [netty-rpc-connection-2] TransportClientFactory: Successfully created connection to /172.16.159.99:41955 after 1 ms (0 ms spent in bootstraps)
|
||||
21/12/21 20:06:26,009 INFO [dispatcher-event-loop-25] Master: Registered worker
|
||||
Host: 172.16.159.99
|
||||
RpcPort: 41955
|
||||
PushPort: 37587
|
||||
FetchPort: 46865
|
||||
TotalSlots: 4096
|
||||
SlotsUsed: 0
|
||||
SlotsAvailable: 4096
|
||||
LastHeartbeat: 0
|
||||
WorkerRef: NettyRpcEndpointRef(ess://WorkerEndpoint@172.16.159.99:41955)
|
||||
Disks: {/mnt/disk1=DiskInfo(maxSlots: 6679, committed shuffles 0 shuffleAllocations: Map(), mountPoint: /mnt/disk1, usableSpace: 448284381184, avgFlushTime: 0, activeSlots: 0) status: HEALTHY dirs , /mnt/disk3=DiskInfo(maxSlots: 6716, committed shuffles 0 shuffleAllocations: Map(), mountPoint: /mnt/disk3, usableSpace: 450755608576, avgFlushTime: 0, activeSlots: 0) status: HEALTHY dirs , /mnt/disk2=DiskInfo(maxSlots: 6713, committed shuffles 0 shuffleAllocations: Map(), mountPoint: /mnt/disk2, usableSpace: 450532900864, avgFlushTime: 0, activeSlots: 0) status: HEALTHY dirs , /mnt/disk4=DiskInfo(maxSlots: 6712, committed shuffles 0 shuffleAllocations: Map(), mountPoint: /mnt/disk4, usableSpace: 450456805376, avgFlushTime: 0, activeSlots: 0) status: HEALTHY dirs }
|
||||
WorkerRef: null
|
||||
```
|
||||
|
||||
### Deploy Spark client
|
||||
Copy $RSS_HOME/spark/*.jar to $SPARK_HOME/jars/
|
||||
Copy $CELEBORN_HOME/spark/*.jar to $SPARK_HOME/jars/
|
||||
|
||||
### Spark Configuration
|
||||
To use Celeborn, following spark configurations should be added.
|
||||
@ -197,15 +173,15 @@ spark.rss.push.data.replicate true
|
||||
# we recommend set localShuffleReader to false to get better performance of Celeborn
|
||||
spark.sql.adaptive.localShuffleReader.enabled false
|
||||
|
||||
# we recommend enable aqe support to gain better performance
|
||||
# we recommend enabling aqe support to gain better performance
|
||||
spark.sql.adaptive.enabled true
|
||||
spark.sql.adaptive.skewJoin.enabled true
|
||||
spark.sql.adaptive.skewJoin.enabled true
|
||||
```
|
||||
|
||||
### Best Practice
|
||||
If you want to set up a production-ready Celeborn cluster, your cluster should have at least 3 masters and at least 4 workers.
|
||||
Masters and works can be deployed on the same node, but should not deploy multiple masters or workers on the same node.
|
||||
See more detail in [CONFIGURATIONS](CONFIGURATION_GUIDE.md)
|
||||
Masters and works can be deployed on the same node but should not deploy multiple masters or workers on the same node.
|
||||
See more detail in [CONFIGURATIONS](docs/configuration.md)
|
||||
|
||||
### Support Spark Dynamic Allocation
|
||||
We provide a patch to enable users to use Spark with both dynamic allocation and Remote Shuffle Service.
|
||||
@ -213,18 +189,20 @@ For Spark2.x check [Spark2 Patch](assets/spark-patch/RSS_RDA_spark2.patch).
|
||||
For Spark3.x check [Spark3 Patch](assets/spark-patch/RSS_RDA_spark3.patch).
|
||||
|
||||
### Metrics
|
||||
Celeborn have various metrics. [METRICS](METRICS.md)
|
||||
Celeborn has various metrics. [METRICS](METRICS.md)
|
||||
|
||||
## Contribution
|
||||
This is an active open-source project. We are always open to developers who want to use the system or contribute to it.
|
||||
This is an active open-source project. We are always open to developers who want to use the system or contribute to it.
|
||||
See more detail in [Contributing](CONTRIBUTING.md).
|
||||
|
||||
## NOTICE
|
||||
If you need to fully restart an Celeborn cluster in HA mode, you must clean ratis meta storage first because ratis meta will store expired states of the last running cluster.
|
||||
If you need to fully restart a Celeborn cluster in HA mode,
|
||||
you must clean ratis meta storage first
|
||||
because ratis meta will store expired states of the last running cluster.
|
||||
|
||||
Here are some instructions:
|
||||
1. Stop all workers.
|
||||
2. Stop all masters.
|
||||
3. Clean all master`s ratis meta storage directory(rss.ha.storage.dir).
|
||||
3. Clean all master's ratis meta storage directory(rss.ha.storage.dir).
|
||||
4. Start all masters.
|
||||
5. Start all workers.
|
||||
@ -16,19 +16,19 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
export RSS_CONF_DIR="${RSS_CONF_DIR:-"${RSS_HOME}/conf"}"
|
||||
export CELEBORN_CONF_DIR="${CELEBORN_CONF_DIR:-"${CELEBORN_HOME}/conf"}"
|
||||
|
||||
if [ -z "$RSS_ENV_LOADED" ]; then
|
||||
export RSS_ENV_LOADED=1
|
||||
if [ -z "$CELEBORN_ENV_LOADED" ]; then
|
||||
export CELEBORN_ENV_LOADED=1
|
||||
|
||||
if [ -f "${RSS_CONF_DIR}/rss-env.sh" ]; then
|
||||
if [ -f "${CELEBORN_CONF_DIR}/celeborn-env.sh" ]; then
|
||||
# Promote all variable declarations to environment (exported) variables
|
||||
set -a
|
||||
. "${RSS_CONF_DIR}/rss-env.sh"
|
||||
. "${CELEBORN_CONF_DIR}/celeborn-env.sh"
|
||||
set +a
|
||||
fi
|
||||
fi
|
||||
@ -45,7 +45,7 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
# Find RSS jars.
|
||||
# Find CELEBORN jars.
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
@ -58,36 +58,36 @@ do
|
||||
done
|
||||
|
||||
if [ "${LAUNCH_CLASS}" == "org.apache.celeborn.service.deploy.master.Master" ] ; then
|
||||
if [ -d "${RSS_HOME}/master-jars" ]; then
|
||||
RSS_JARS_DIR="${RSS_HOME}/master-jars"
|
||||
if [ -d "${CELEBORN_HOME}/master-jars" ]; then
|
||||
CELEBORN_JARS_DIR="${CELEBORN_HOME}/master-jars"
|
||||
else
|
||||
RSS_JARS_DIR="${RSS_HOME}/master/target"
|
||||
CELEBORN_JARS_DIR="${CELEBORN_HOME}/master/target"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${LAUNCH_CLASS}" == "org.apache.celeborn.service.deploy.worker.Worker" ] ; then
|
||||
if [ -d "${RSS_HOME}/worker-jars" ]; then
|
||||
RSS_JARS_DIR="${RSS_HOME}/worker-jars"
|
||||
if [ -d "${CELEBORN_HOME}/worker-jars" ]; then
|
||||
CELEBORN_JARS_DIR="${CELEBORN_HOME}/worker-jars"
|
||||
else
|
||||
RSS_JARS_DIR="${RSS_HOME}/worker/target"
|
||||
CELEBORN_JARS_DIR="${CELEBORN_HOME}/worker/target"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -d "$RSS_JARS_DIR" ]; then
|
||||
echo "Failed to find RSS jars directory ($RSS_JARS_DIR)." 1>&2
|
||||
echo "You need to build RSS with the target \"package\" before running this program." 1>&2
|
||||
if [ ! -d "$CELEBORN_JARS_DIR" ]; then
|
||||
echo "Failed to find CELEBORN jars directory ($CELEBORN_JARS_DIR)." 1>&2
|
||||
echo "You need to build CELEBORN with the target \"package\" before running this program." 1>&2
|
||||
exit 1
|
||||
else
|
||||
RSS_CLASSPATH="$RSS_CONF_DIR:$HADOOP_CONF_DIR:$RSS_JARS_DIR/*"
|
||||
CELEBORN_CLASSPATH="$CELEBORN_CONF_DIR:$HADOOP_CONF_DIR:$CELEBORN_JARS_DIR/*"
|
||||
fi
|
||||
|
||||
# Turn off posix mode since it does not allow process substitution
|
||||
set +o posix
|
||||
CMD=()
|
||||
CMD+=("$RUNNER")
|
||||
CMD=(${CMD[@]} "$RSS_JAVA_OPTS")
|
||||
CMD=(${CMD[@]} "$CELEBORN_JAVA_OPTS")
|
||||
CMD+=("-cp")
|
||||
CMD+=("$RSS_CLASSPATH")
|
||||
CMD+=("$CELEBORN_CLASSPATH")
|
||||
CMD=(${CMD[@]} "$@")
|
||||
|
||||
COUNT=${#CMD[@]}
|
||||
34
conf/celeborn-env.sh.template
Executable file
34
conf/celeborn-env.sh.template
Executable file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# - CELEBORN_MASTER_MEMORY, to set how much total memory of master (e.g. 1000m, 2g)
|
||||
# - CELEBORN_WORKER_MEMORY, to set how much total memory of workers (e.g. 1000m, 2g)
|
||||
# - CELEBORN_WORKER_OFFHEAP_MEMORY, to set how much total off-heap memory of workers (e.g. 1000m, 2g)
|
||||
# - CELEBORN_MASTER_JAVA_OPTS
|
||||
# - CELEBORN_WORKER_JAVA_OPTS
|
||||
# - CELEBORN_PID_DIR
|
||||
# - CELEBORN_LOG_DIR
|
||||
|
||||
# Example:
|
||||
# CELEBORN_MASTER_MEMORY=2g
|
||||
# CELEBORN_WORKER_MEMORY=2g
|
||||
# CELEBORN_WORKER_OFFHEAP_MEMORY=4g
|
||||
# CELEBORN_WORKER_JAVA_OPTS="-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-worker.out -Dio.netty.leakDetectionLevel=advanced"
|
||||
# CELEBORN_MASTER_JAVA_OPTS="-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-master.out -Dio.netty.leakDetectionLevel=advanced"
|
||||
# CELEBORN_PID_DIR="$CELEBORN_HOME/pids"
|
||||
# CELEBORN_LOG_DIR="$CELEBORN_HOME/logs"
|
||||
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# - RSS_MASTER_MEMORY, to set how much total memory of rss master (e.g. 1000m, 2g)
|
||||
# - RSS_WORKER_MEMORY, to set how much total memory of rss workers (e.g. 1000m, 2g)
|
||||
# - RSS_WORKER_OFFHEAP_MEMORY, to set how much total off-heap memory of rss workers (e.g. 1000m, 2g)
|
||||
# - RSS_MASTER_JAVA_OPTS
|
||||
# - RSS_WORKER_JAVA_OPTS
|
||||
# - RSS_PID_DIR
|
||||
# - RSS_LOG_DIR
|
||||
|
||||
# Example:
|
||||
# RSS_MASTER_MEMORY=2g
|
||||
# RSS_WORKER_MEMORY=2g
|
||||
# RSS_WORKER_OFFHEAP_MEMORY=4g
|
||||
# RSS_WORKER_JAVA_OPTS="-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-worker.out -Dio.netty.leakDetectionLevel=advanced"
|
||||
# RSS_MASTER_JAVA_OPTS="-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-master.out -Dio.netty.leakDetectionLevel=advanced"
|
||||
# RSS_PID_DIR="$RSS_HOME/pids"
|
||||
# RSS_LOG_DIR="$RSS_HOME/logs"
|
||||
@ -18,24 +18,24 @@
|
||||
|
||||
unset HADOOP_CONF_DIR
|
||||
|
||||
# included in all the rss scripts with source command
|
||||
# included in all the celeborn scripts with source command
|
||||
# should not be executable directly
|
||||
# also should not be passed any arguments, since we need original $*
|
||||
|
||||
# symlink and absolute path should rely on RSS_HOME to resolve
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
# symlink and absolute path should rely on CELEBORN_HOME to resolve
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
export RSS_CONF_DIR="${RSS_CONF_DIR:-"${RSS_HOME}/conf"}"
|
||||
export CELEBORN_CONF_DIR="${CELEBORN_CONF_DIR:-"${CELEBORN_HOME}/conf"}"
|
||||
|
||||
if [ -z "$RSS_ENV_LOADED" ]; then
|
||||
export RSS_ENV_LOADED=1
|
||||
if [ -z "$CELEBORN_ENV_LOADED" ]; then
|
||||
export CELEBORN_ENV_LOADED=1
|
||||
|
||||
if [ -f "${RSS_CONF_DIR}/rss-env.sh" ]; then
|
||||
if [ -f "${CELEBORN_CONF_DIR}/celeborn-env.sh" ]; then
|
||||
# Promote all variable declarations to environment (exported) variables
|
||||
set -a
|
||||
. "${RSS_CONF_DIR}/rss-env.sh"
|
||||
. "${CELEBORN_CONF_DIR}/celeborn-env.sh"
|
||||
set +a
|
||||
fi
|
||||
fi
|
||||
@ -16,19 +16,19 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Runs a RSS command as a daemon.
|
||||
# Runs a celeborn command as a daemon.
|
||||
#
|
||||
# Environment Variables
|
||||
#
|
||||
# RSS_CONF_DIR Alternate conf dir. Default is ${RSS_HOME}/conf.
|
||||
# RSS_LOG_DIR Where log files are stored. ${RSS_HOME}/logs by default.
|
||||
# RSS_PID_DIR The pid files are stored. /tmp by default.
|
||||
# RSS_IDENT_STRING A string representing this instance of rss. $USER by default
|
||||
# RSS_NICENESS The scheduling priority for daemons. Defaults to 0.
|
||||
# RSS_NO_DAEMONIZE If set, will run the proposed command in the foreground. It will not output a PID file.
|
||||
# CELEBORN_CONF_DIR Alternate conf dir. Default is ${CELEBORN_HOME}/conf.
|
||||
# CELEBORN_LOG_DIR Where log files are stored. ${CELEBORN_HOME}/logs by default.
|
||||
# CELEBORN_PID_DIR The pid files are stored. /tmp by default.
|
||||
# CELEBORN_IDENT_STRING A string representing this instance of celeborn. $USER by default
|
||||
# CELEBORN_NICENESS The scheduling priority for daemons. Defaults to 0.
|
||||
# CELEBORN_NO_DAEMONIZE If set, will run the proposed command in the foreground. It will not output a PID file.
|
||||
##
|
||||
|
||||
usage="Usage: rss-daemon.sh [--config <conf-dir>] (start|stop|status) <rss-command> <rss-instance-number> <args...>"
|
||||
usage="Usage: celeborn-daemon.sh [--config <conf-dir>] (start|stop|status) <celeborn-command> <celeborn-instance-number> <args...>"
|
||||
|
||||
# if no args specified, show usage
|
||||
if [ $# -le 1 ]; then
|
||||
@ -36,11 +36,11 @@ if [ $# -le 1 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
. "${RSS_HOME}/sbin/rss-config.sh"
|
||||
. "${CELEBORN_HOME}/sbin/celeborn-config.sh"
|
||||
|
||||
# get arguments
|
||||
|
||||
@ -57,7 +57,7 @@ then
|
||||
echo $usage
|
||||
exit 1
|
||||
else
|
||||
export RSS_CONF_DIR="$conf_dir"
|
||||
export CELEBORN_CONF_DIR="$conf_dir"
|
||||
fi
|
||||
shift
|
||||
fi
|
||||
@ -69,7 +69,7 @@ shift
|
||||
instance=$1
|
||||
shift
|
||||
|
||||
rss_rotate_log ()
|
||||
celeborn_rotate_log ()
|
||||
{
|
||||
log=$1;
|
||||
num=5;
|
||||
@ -86,40 +86,40 @@ rss_rotate_log ()
|
||||
fi
|
||||
}
|
||||
|
||||
if [ "$RSS_IDENT_STRING" = "" ]; then
|
||||
export RSS_IDENT_STRING="$USER"
|
||||
if [ "$CELEBORN_IDENT_STRING" = "" ]; then
|
||||
export CELEBORN_IDENT_STRING="$USER"
|
||||
fi
|
||||
|
||||
export RSS_PRINT_LAUNCH_COMMAND="1"
|
||||
export CELEBORN_PRINT_LAUNCH_COMMAND="1"
|
||||
|
||||
# get log directory
|
||||
if [ "$RSS_LOG_DIR" = "" ]; then
|
||||
export RSS_LOG_DIR="${RSS_HOME}/logs"
|
||||
if [ "$CELEBORN_LOG_DIR" = "" ]; then
|
||||
export CELEBORN_LOG_DIR="${CELEBORN_HOME}/logs"
|
||||
fi
|
||||
mkdir -p "$RSS_LOG_DIR"
|
||||
touch "$RSS_LOG_DIR"/.rss_test > /dev/null 2>&1
|
||||
mkdir -p "$CELEBORN_LOG_DIR"
|
||||
touch "$CELEBORN_LOG_DIR"/.celeborn_test > /dev/null 2>&1
|
||||
TEST_LOG_DIR=$?
|
||||
if [ "${TEST_LOG_DIR}" = "0" ]; then
|
||||
rm -f "$RSS_LOG_DIR"/.rss_test
|
||||
rm -f "$CELEBORN_LOG_DIR"/.celeborn_test
|
||||
else
|
||||
chown "$RSS_IDENT_STRING" "$RSS_LOG_DIR"
|
||||
chown "$CELEBORN_IDENT_STRING" "$CELEBORN_LOG_DIR"
|
||||
fi
|
||||
|
||||
if [ "$RSS_PID_DIR" = "" ]; then
|
||||
RSS_PID_DIR="${RSS_HOME}/pids"
|
||||
if [ "$CELEBORN_PID_DIR" = "" ]; then
|
||||
CELEBORN_PID_DIR="${CELEBORN_HOME}/pids"
|
||||
fi
|
||||
|
||||
# some variables
|
||||
log="$RSS_LOG_DIR/rss-$RSS_IDENT_STRING-$command-$instance-$HOSTNAME.out"
|
||||
pid="$RSS_PID_DIR/rss-$RSS_IDENT_STRING-$command-$instance.pid"
|
||||
log="$CELEBORN_LOG_DIR/celeborn-$CELEBORN_IDENT_STRING-$command-$instance-$HOSTNAME.out"
|
||||
pid="$CELEBORN_PID_DIR/celeborn-$CELEBORN_IDENT_STRING-$command-$instance.pid"
|
||||
|
||||
# Set default scheduling priority
|
||||
if [ "$RSS_NICENESS" = "" ]; then
|
||||
export RSS_NICENESS=0
|
||||
if [ "$CELEBORN_NICENESS" = "" ]; then
|
||||
export CELEBORN_NICENESS=0
|
||||
fi
|
||||
|
||||
execute_command() {
|
||||
if [ -z ${RSS_NO_DAEMONIZE+set} ]; then
|
||||
if [ -z ${CELEBORN_NO_DAEMONIZE+set} ]; then
|
||||
nohup -- "$@" >> $log 2>&1 < /dev/null &
|
||||
newpid="$!"
|
||||
|
||||
@ -150,7 +150,7 @@ run_command() {
|
||||
mode="$1"
|
||||
shift
|
||||
|
||||
mkdir -p "$RSS_PID_DIR"
|
||||
mkdir -p "$CELEBORN_PID_DIR"
|
||||
|
||||
if [ -f "$pid" ]; then
|
||||
TARGET_ID="$(cat "$pid")"
|
||||
@ -160,12 +160,12 @@ run_command() {
|
||||
fi
|
||||
fi
|
||||
|
||||
rss_rotate_log "$log"
|
||||
celeborn_rotate_log "$log"
|
||||
echo "starting $command, logging to $log"
|
||||
|
||||
case "$mode" in
|
||||
(class)
|
||||
execute_command nice -n "$RSS_NICENESS" "${RSS_HOME}"/bin/rss-class "$command" "$@"
|
||||
execute_command nice -n "$CELEBORN_NICENESS" "${CELEBORN_HOME}"/bin/celeborn-class "$command" "$@"
|
||||
;;
|
||||
|
||||
(*)
|
||||
@ -205,7 +205,7 @@ case $option in
|
||||
echo "stopping $command"
|
||||
kill "$TARGET_ID" && rm -f "$pid"
|
||||
wait_time=0
|
||||
# keep same with `rss.worker.shutdown.timeout`
|
||||
# keep same with `celeborn.worker.shutdown.timeout`
|
||||
wait_timeout=600
|
||||
while [[ $(ps -p "$TARGET_ID" -o comm=) != "" && $wait_time -lt $wait_timeout ]];
|
||||
do
|
||||
@ -16,14 +16,14 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Restart the rss worker on the machine this script is executed on.
|
||||
# Restart the celeborn worker on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
if [ "$WORKER_INSTANCE" = "" ]; then
|
||||
WORKER_INSTANCE=1
|
||||
fi
|
||||
|
||||
"${RSS_HOME}/sbin/rss-daemon.sh" restart org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE"
|
||||
"${CELEBORN_HOME}/sbin/celeborn-daemon.sh" restart org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE"
|
||||
@ -16,48 +16,48 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Starts the rss master and workers on the machine this script is executed on.
|
||||
# Starts the celeborn master and workers on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
. "${RSS_HOME}/sbin/rss-config.sh"
|
||||
. "${CELEBORN_HOME}/sbin/celeborn-config.sh"
|
||||
|
||||
if [ -f "${RSS_CONF_DIR}/hosts" ]; then
|
||||
HOST_LIST=$(awk '/\[/{prefix=$0; next} $1{print prefix,$0}' "${RSS_CONF_DIR}/hosts")
|
||||
if [ -f "${CELEBORN_CONF_DIR}/hosts" ]; then
|
||||
HOST_LIST=$(awk '/\[/{prefix=$0; next} $1{print prefix,$0}' "${CELEBORN_CONF_DIR}/hosts")
|
||||
else
|
||||
HOST_LIST="[master] localhost\n[worker] localhost"
|
||||
fi
|
||||
|
||||
# By default disable strict host key checking
|
||||
if [ "$RSS_SSH_OPTS" = "" ]; then
|
||||
RSS_SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
if [ "$CELEBORN_SSH_OPTS" = "" ]; then
|
||||
CELEBORN_SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
fi
|
||||
|
||||
# start masters
|
||||
for host in `echo "$HOST_LIST" | sed "s/#.*$//;/^$/d" | grep '\[master\]' | awk '{print $NF}'`
|
||||
do
|
||||
if [ -n "${RSS_SSH_FOREGROUND}" ]; then
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/start-master.sh"
|
||||
if [ -n "${CELEBORN_SSH_FOREGROUND}" ]; then
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/start-master.sh"
|
||||
else
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/start-master.sh" &
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/start-master.sh" &
|
||||
fi
|
||||
if [ "$RSS_SLEEP" != "" ]; then
|
||||
sleep $RSS_SLEEP
|
||||
if [ "$CELEBORN_SLEEP" != "" ]; then
|
||||
sleep $CELEBORN_SLEEP
|
||||
fi
|
||||
done
|
||||
|
||||
# start workers
|
||||
for host in `echo "$HOST_LIST" | sed "s/#.*$//;/^$/d" | grep '\[worker\]' | awk '{print $NF}'`
|
||||
do
|
||||
if [ -n "${RSS_SSH_FOREGROUND}" ]; then
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/start-worker.sh"
|
||||
if [ -n "${CELEBORN_SSH_FOREGROUND}" ]; then
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/start-worker.sh"
|
||||
else
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/start-worker.sh" &
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/start-worker.sh" &
|
||||
fi
|
||||
if [ "$RSS_SLEEP" != "" ]; then
|
||||
sleep $RSS_SLEEP
|
||||
if [ "$CELEBORN_SLEEP" != "" ]; then
|
||||
sleep $CELEBORN_SLEEP
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
@ -16,18 +16,18 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Starts the rss master on the machine this script is executed on.
|
||||
# Starts the celeborn master on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
. "${RSS_HOME}/sbin/rss-config.sh"
|
||||
. "${CELEBORN_HOME}/sbin/celeborn-config.sh"
|
||||
|
||||
if [ "$RSS_MASTER_MEMORY" = "" ]; then
|
||||
RSS_MASTER_MEMORY="1g"
|
||||
if [ "$CELEBORN_MASTER_MEMORY" = "" ]; then
|
||||
CELEBORN_MASTER_MEMORY="1g"
|
||||
fi
|
||||
|
||||
export RSS_JAVA_OPTS="-Xmx$RSS_MASTER_MEMORY $RSS_MASTER_JAVA_OPTS"
|
||||
export CELEBORN_JAVA_OPTS="-Xmx$CELEBORN_MASTER_MEMORY $CELEBORN_MASTER_JAVA_OPTS"
|
||||
|
||||
"${RSS_HOME}/sbin/rss-daemon.sh" start org.apache.celeborn.service.deploy.master.Master 1 "$@"
|
||||
"${CELEBORN_HOME}/sbin/celeborn-daemon.sh" start org.apache.celeborn.service.deploy.master.Master 1 "$@"
|
||||
@ -16,26 +16,26 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Starts the rss worker on the machine this script is executed on.
|
||||
# Starts the celeborn worker on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
. "${RSS_HOME}/sbin/rss-config.sh"
|
||||
. "${CELEBORN_HOME}/sbin/celeborn-config.sh"
|
||||
|
||||
if [ "$RSS_WORKER_MEMORY" = "" ]; then
|
||||
RSS_WORKER_MEMORY="1g"
|
||||
if [ "$CELEBORN_WORKER_MEMORY" = "" ]; then
|
||||
CELEBORN_WORKER_MEMORY="1g"
|
||||
fi
|
||||
|
||||
if [ "$RSS_WORKER_OFFHEAP_MEMORY" = "" ]; then
|
||||
RSS_WORKER_OFFHEAP_MEMORY="1g"
|
||||
if [ "$CELEBORN_WORKER_OFFHEAP_MEMORY" = "" ]; then
|
||||
CELEBORN_WORKER_OFFHEAP_MEMORY="1g"
|
||||
fi
|
||||
|
||||
export RSS_JAVA_OPTS="-Xmx$RSS_WORKER_MEMORY -XX:MaxDirectMemorySize=$RSS_WORKER_OFFHEAP_MEMORY $RSS_WORKER_JAVA_OPTS"
|
||||
export CELEBORN_JAVA_OPTS="-Xmx$CELEBORN_WORKER_MEMORY -XX:MaxDirectMemorySize=$CELEBORN_WORKER_OFFHEAP_MEMORY $CELEBORN_WORKER_JAVA_OPTS"
|
||||
|
||||
if [ "$WORKER_INSTANCE" = "" ]; then
|
||||
WORKER_INSTANCE=1
|
||||
fi
|
||||
|
||||
"${RSS_HOME}/sbin/rss-daemon.sh" start org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE" "$@"
|
||||
"${CELEBORN_HOME}/sbin/celeborn-daemon.sh" start org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE" "$@"
|
||||
@ -16,48 +16,48 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Starts the rss master and workers on the machine this script is executed on.
|
||||
# Starts the celeborn master and workers on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
. "${RSS_HOME}/sbin/rss-config.sh"
|
||||
. "${CELEBORN_HOME}/sbin/celeborn-config.sh"
|
||||
|
||||
if [ -f "${RSS_CONF_DIR}/hosts" ]; then
|
||||
HOST_LIST=$(awk '/\[/{prefix=$0; next} $1{print prefix,$0}' "${RSS_CONF_DIR}/hosts")
|
||||
if [ -f "${CELEBORN_CONF_DIR}/hosts" ]; then
|
||||
HOST_LIST=$(awk '/\[/{prefix=$0; next} $1{print prefix,$0}' "${CELEBORN_CONF_DIR}/hosts")
|
||||
else
|
||||
HOST_LIST="[master] localhost\n[worker] localhost"
|
||||
fi
|
||||
|
||||
# By default disable strict host key checking
|
||||
if [ "$RSS_SSH_OPTS" = "" ]; then
|
||||
RSS_SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
if [ "$CELEBORN_SSH_OPTS" = "" ]; then
|
||||
CELEBORN_SSH_OPTS="-o StrictHostKeyChecking=no"
|
||||
fi
|
||||
|
||||
# start masters
|
||||
for host in `echo "$HOST_LIST" | sed "s/#.*$//;/^$/d" | grep '\[master\]' | awk '{print $NF}'`
|
||||
do
|
||||
if [ -n "${RSS_SSH_FOREGROUND}" ]; then
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/stop-master.sh"
|
||||
if [ -n "${CELEBORN_SSH_FOREGROUND}" ]; then
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/stop-master.sh"
|
||||
else
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/stop-master.sh" &
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/stop-master.sh" &
|
||||
fi
|
||||
if [ "$RSS_SLEEP" != "" ]; then
|
||||
sleep $RSS_SLEEP
|
||||
if [ "$CELEBORN_SLEEP" != "" ]; then
|
||||
sleep $CELEBORN_SLEEP
|
||||
fi
|
||||
done
|
||||
|
||||
# start workers
|
||||
for host in `echo "$HOST_LIST"| sed "s/#.*$//;/^$/d" | grep '\[worker\]' | awk '{print $NF}'`
|
||||
do
|
||||
if [ -n "${RSS_SSH_FOREGROUND}" ]; then
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/stop-worker.sh"
|
||||
if [ -n "${CELEBORN_SSH_FOREGROUND}" ]; then
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/stop-worker.sh"
|
||||
else
|
||||
ssh $RSS_SSH_OPTS "$host" "${RSS_HOME}/sbin/stop-worker.sh" &
|
||||
ssh $CELEBORN_SSH_OPTS "$host" "${CELEBORN_HOME}/sbin/stop-worker.sh" &
|
||||
fi
|
||||
if [ "$RSS_SLEEP" != "" ]; then
|
||||
sleep $RSS_SLEEP
|
||||
if [ "$CELEBORN_SLEEP" != "" ]; then
|
||||
sleep $CELEBORN_SLEEP
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
@ -16,10 +16,10 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Stops the rss master on the machine this script is executed on.
|
||||
# Stops the celeborn master on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
"${RSS_HOME}/sbin/rss-daemon.sh" stop org.apache.celeborn.service.deploy.master.Master 1
|
||||
"${CELEBORN_HOME}/sbin/celeborn-daemon.sh" stop org.apache.celeborn.service.deploy.master.Master 1
|
||||
@ -16,14 +16,14 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Stops the rss worker on the machine this script is executed on.
|
||||
# Stops the celeborn worker on the machine this script is executed on.
|
||||
|
||||
if [ -z "${RSS_HOME}" ]; then
|
||||
export RSS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
if [ -z "${CELEBORN_HOME}" ]; then
|
||||
export CELEBORN_HOME="$(cd "`dirname "$0"`"/..; pwd)"
|
||||
fi
|
||||
|
||||
if [ "$WORKER_INSTANCE" = "" ]; then
|
||||
WORKER_INSTANCE=1
|
||||
fi
|
||||
|
||||
"${RSS_HOME}/sbin/rss-daemon.sh" stop org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE"
|
||||
"${CELEBORN_HOME}/sbin/celeborn-daemon.sh" stop org.apache.celeborn.service.deploy.worker.Worker "$WORKER_INSTANCE"
|
||||
Loading…
Reference in New Issue
Block a user