[CELEBORN-1368] Log celeborn config for debugging purposes
### What changes were proposed in this pull request? Log celeborn config for debugging purposes. ### Why are the changes needed? Help with debugging ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? tested the patch internally. Closes #2442 from akpatnam25/CELEBORN-1368. Authored-by: Aravind Patnam <apatnam@linkedin.com> Signed-off-by: mingji <fengmingxiao.fmx@alibaba-inc.com>
This commit is contained in:
parent
6ce8b8894d
commit
f04ebccd4d
@ -1281,6 +1281,10 @@ class CelebornConf(loadDefaults: Boolean) extends Cloneable with Logging with Se
|
||||
// Rack Resolver //
|
||||
// //////////////////////////////////////////////////////
|
||||
def rackResolverRefreshInterval = get(RACKRESOLVER_REFRESH_INTERVAL)
|
||||
|
||||
def logCelebornConfEnabled = get(LOG_CELEBORN_CONF_ENABLED)
|
||||
|
||||
def secretRedactionPattern = get(SECRET_REDACTION_PATTERN)
|
||||
}
|
||||
|
||||
object CelebornConf extends Logging {
|
||||
@ -4924,4 +4928,22 @@ object CelebornConf extends Logging {
|
||||
s"Invalid maxEncryptedBlockSize, must be a position number upto ${Int.MaxValue}")
|
||||
.createWithDefaultString("64k")
|
||||
|
||||
val SECRET_REDACTION_PATTERN =
|
||||
buildConf("celeborn.redaction.regex")
|
||||
.categories("master", "worker")
|
||||
.doc("Regex to decide which Celeborn configuration properties and environment variables in " +
|
||||
"master and worker environments contain sensitive information. When this regex matches " +
|
||||
"a property key or value, the value is redacted from the logging.")
|
||||
.version("0.5.0")
|
||||
.regexConf
|
||||
.createWithDefault("(?i)secret|password|token|access[.]key".r)
|
||||
|
||||
val LOG_CELEBORN_CONF_ENABLED: ConfigEntry[Boolean] =
|
||||
buildConf("celeborn.logConf.enabled")
|
||||
.categories("master", "worker")
|
||||
.version("0.5.0")
|
||||
.doc("When `true`, log the CelebornConf for debugging purposes.")
|
||||
.booleanConf
|
||||
.createWithDefault(false)
|
||||
|
||||
}
|
||||
|
||||
@ -34,6 +34,7 @@ import scala.io.Source
|
||||
import scala.reflect.ClassTag
|
||||
import scala.util.{Random => ScalaRandom, Try}
|
||||
import scala.util.control.{ControlThrowable, NonFatal}
|
||||
import scala.util.matching.Regex
|
||||
|
||||
import com.google.protobuf.{ByteString, GeneratedMessageV3}
|
||||
import io.netty.channel.unix.Errors.NativeIoException
|
||||
@ -1101,4 +1102,44 @@ object Utils extends Logging {
|
||||
val host = components.dropRight(portsNum).mkString(":")
|
||||
Array(host) ++ portsArr
|
||||
}
|
||||
|
||||
private val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
|
||||
|
||||
/**
|
||||
* Redact the sensitive values in the given map. If a map key matches the redaction pattern then
|
||||
* its value is replaced with a dummy text.
|
||||
*/
|
||||
def redact(conf: CelebornConf, kvs: Seq[(String, String)]): Seq[(String, String)] = {
|
||||
val redactionPattern = conf.secretRedactionPattern
|
||||
redact(redactionPattern, kvs)
|
||||
}
|
||||
|
||||
private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, V)] = {
|
||||
// If the sensitive information regex matches with either the key or the value, redact the value
|
||||
// While the original intent was to only redact the value if the key matched with the regex,
|
||||
// we've found that especially in verbose mode, the value of the property may contain sensitive
|
||||
// information like so:
|
||||
//
|
||||
// celeborn.dynamicConfig.store.db.hikari.password=secret_password ...
|
||||
//
|
||||
// And, in such cases, simply searching for the sensitive information regex in the key name is
|
||||
// not sufficient. The values themselves have to be searched as well and redacted if matched.
|
||||
// This does mean we may be accounting more false positives - for example, if the value of an
|
||||
// arbitrary property contained the term 'password', we may redact the value from the UI and
|
||||
// logs. In order to work around it, user would have to make the celeborn.redaction.regex property
|
||||
// more specific.
|
||||
kvs.map {
|
||||
case (key: String, value: String) =>
|
||||
redactionPattern.findFirstIn(key)
|
||||
.orElse(redactionPattern.findFirstIn(value))
|
||||
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
|
||||
.getOrElse((key, value))
|
||||
case (key, value: String) =>
|
||||
redactionPattern.findFirstIn(value)
|
||||
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
|
||||
.getOrElse((key, value))
|
||||
case (key, value) =>
|
||||
(key, value)
|
||||
}.asInstanceOf[Seq[(K, V)]]
|
||||
}
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@ license: |
|
||||
| celeborn.dynamicConfig.store.db.hikari.username | | false | The username of db store backend. | 0.5.0 | |
|
||||
| celeborn.dynamicConfig.store.fs.path | <undefined> | false | The path of dynamic config file for fs store backend. The file format should be yaml. The default path is `${CELEBORN_CONF_DIR}/dynamicConfig.yaml`. | 0.5.0 | |
|
||||
| celeborn.internal.port.enabled | false | false | Whether to create a internal port on Masters/Workers for inter-Masters/Workers communication. This is beneficial when SASL authentication is enforced for all interactions between clients and Celeborn Services, but the services can exchange messages without being subject to SASL authentication. | 0.5.0 | |
|
||||
| celeborn.logConf.enabled | false | false | When `true`, log the CelebornConf for debugging purposes. | 0.5.0 | |
|
||||
| celeborn.master.estimatedPartitionSize.initialSize | 64mb | false | Initial partition size for estimation, it will change according to runtime stats. | 0.3.0 | celeborn.shuffle.initialEstimatedPartitionSize |
|
||||
| celeborn.master.estimatedPartitionSize.maxSize | <undefined> | false | Max partition size for estimation. Default value should be celeborn.worker.shuffle.partitionSplit.max * 2. | 0.4.1 | |
|
||||
| celeborn.master.estimatedPartitionSize.minSize | 8mb | false | Ignore partition size smaller than this configuration of partition size for estimation. | 0.3.0 | celeborn.shuffle.minPartitionSizeToEstimate |
|
||||
@ -60,6 +61,7 @@ license: |
|
||||
| celeborn.master.userResourceConsumption.update.interval | 30s | false | Time length for a window about compute user resource consumption. | 0.3.0 | |
|
||||
| celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | false | Worker unavailable info would be cleared when the retention period is expired | 0.3.1 | |
|
||||
| celeborn.quota.enabled | true | false | When Master side sets to true, the master will enable to check the quota via QuotaManager. When Client side sets to true, LifecycleManager will request Master side to check whether the current user has enough quota before registration of shuffle. Fallback to the default shuffle service of Spark when Master side checks that there is no enough quota for current user. | 0.2.0 | |
|
||||
| celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | Regex to decide which Celeborn configuration properties and environment variables in master and worker environments contain sensitive information. When this regex matches a property key or value, the value is redacted from the logging. | 0.5.0 | |
|
||||
| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 | celeborn.storage.activeTypes |
|
||||
| celeborn.storage.hdfs.dir | <undefined> | false | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 | |
|
||||
| celeborn.storage.hdfs.kerberos.keytab | <undefined> | false | Kerberos keytab file path for HDFS storage connection. | 0.3.2 | |
|
||||
|
||||
@ -33,9 +33,11 @@ license: |
|
||||
| celeborn.dynamicConfig.store.db.hikari.username | | false | The username of db store backend. | 0.5.0 | |
|
||||
| celeborn.dynamicConfig.store.fs.path | <undefined> | false | The path of dynamic config file for fs store backend. The file format should be yaml. The default path is `${CELEBORN_CONF_DIR}/dynamicConfig.yaml`. | 0.5.0 | |
|
||||
| celeborn.internal.port.enabled | false | false | Whether to create a internal port on Masters/Workers for inter-Masters/Workers communication. This is beneficial when SASL authentication is enforced for all interactions between clients and Celeborn Services, but the services can exchange messages without being subject to SASL authentication. | 0.5.0 | |
|
||||
| celeborn.logConf.enabled | false | false | When `true`, log the CelebornConf for debugging purposes. | 0.5.0 | |
|
||||
| celeborn.master.endpoints | <localhost>:9097 | false | Endpoints of master nodes for celeborn client to connect, allowed pattern is: `<host1>:<port1>[,<host2>:<port2>]*`, e.g. `clb1:9097,clb2:9098,clb3:9099`. If the port is omitted, 9097 will be used. | 0.2.0 | |
|
||||
| celeborn.master.estimatedPartitionSize.minSize | 8mb | false | Ignore partition size smaller than this configuration of partition size for estimation. | 0.3.0 | celeborn.shuffle.minPartitionSizeToEstimate |
|
||||
| celeborn.master.internal.endpoints | <localhost>:8097 | false | Endpoints of master nodes just for celeborn workers to connect, allowed pattern is: `<host1>:<port1>[,<host2>:<port2>]*`, e.g. `clb1:8097,clb2:8097,clb3:8097`. If the port is omitted, 8097 will be used. | 0.5.0 | |
|
||||
| celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | Regex to decide which Celeborn configuration properties and environment variables in master and worker environments contain sensitive information. When this regex matches a property key or value, the value is redacted from the logging. | 0.5.0 | |
|
||||
| celeborn.shuffle.chunk.size | 8m | false | Max chunk size of reducer's merged shuffle data. For example, if a reducer's shuffle data is 128M and the data will need 16 fetch chunk requests to fetch. | 0.2.0 | |
|
||||
| celeborn.storage.availableTypes | HDD | false | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 | celeborn.storage.activeTypes |
|
||||
| celeborn.storage.hdfs.dir | <undefined> | false | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 | |
|
||||
|
||||
@ -82,6 +82,10 @@ private[celeborn] class Master(
|
||||
// Send ApplicationMeta to workers
|
||||
private var sendApplicationMetaExecutor: ExecutorService = _
|
||||
|
||||
if (conf.logCelebornConfEnabled) {
|
||||
logInfo(getConf)
|
||||
}
|
||||
|
||||
override val rpcEnv: RpcEnv =
|
||||
if (!authEnabled) {
|
||||
RpcEnv.create(
|
||||
|
||||
@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
|
||||
|
||||
import org.apache.celeborn.common.CelebornConf
|
||||
import org.apache.celeborn.common.internal.Logging
|
||||
import org.apache.celeborn.common.util.Utils
|
||||
import org.apache.celeborn.server.common.http.HttpServer
|
||||
import org.apache.celeborn.server.common.http.api.ApiRootResource
|
||||
import org.apache.celeborn.server.common.service.config.ConfigLevel
|
||||
@ -35,8 +36,9 @@ abstract class HttpService extends Service with Logging {
|
||||
val sb = new StringBuilder
|
||||
sb.append("=========================== Configuration ============================\n")
|
||||
if (conf.getAll.nonEmpty) {
|
||||
val maxKeyLength = conf.getAll.toMap.keys.map(_.length).max
|
||||
conf.getAll.sortBy(_._1).foreach { case (key, value) =>
|
||||
val redactedConf = Utils.redact(conf, conf.getAll)
|
||||
val maxKeyLength = redactedConf.toMap.keys.map(_.length).max
|
||||
redactedConf.sortBy(_._1).foreach { case (key, value) =>
|
||||
sb.append(config(key, value, maxKeyLength))
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,6 +86,11 @@ private[celeborn] class Worker(
|
||||
val workerStatusManager = new WorkerStatusManager(conf)
|
||||
private val authEnabled = conf.authEnabled
|
||||
private val secretRegistry = new WorkerSecretRegistryImpl(conf.workerApplicationRegistryCacheSize)
|
||||
|
||||
if (conf.logCelebornConfEnabled) {
|
||||
logInfo(getConf)
|
||||
}
|
||||
|
||||
val rpcEnv: RpcEnv =
|
||||
if (!authEnabled) {
|
||||
RpcEnv.create(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user