diff --git a/METRICS.md b/METRICS.md index 09532d088..f6fed40e9 100644 --- a/METRICS.md +++ b/METRICS.md @@ -180,9 +180,11 @@ Here is an example of Grafana dashboard importing. | jvm_thread_blocked_count | JVM | The current number of threads having blocked state. | | jvm_thread_deadlock_count | JVM | The current number of threads having deadlock state. | | jvm_thread_new_count | JVM | The current number of threads having new state. | +| jvm_thread_peak_count | JVM | The current number of peak live threads. | | jvm_thread_runnable_count | JVM | The current number of threads having runnable state. | | jvm_thread_terminated_count | JVM | The current number of threads having terminated state. | | jvm_thread_timed_waiting_count | JVM | The current number of threads having timed_waiting state. | +| jvm_thread_total_started_count | JVM | The current number of started threads. | | jvm_thread_waiting_count | JVM | The current number of threads having waiting state. | | jvm_classloader_loaded | JVM | The total number of classes loaded since the start of the JVM. | | jvm_classloader_unloaded | JVM | The total number of classes unloaded since the start of the JVM. | diff --git a/assets/grafana/celeborn-jvm-dashboard.json b/assets/grafana/celeborn-jvm-dashboard.json index 4c20e5a0a..399832c3d 100644 --- a/assets/grafana/celeborn-jvm-dashboard.json +++ b/assets/grafana/celeborn-jvm-dashboard.json @@ -1245,6 +1245,42 @@ "legendFormat": "daemon_${baseLegend}", "range": true, "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "metrics_jvm_thread_peak_count_Value{instance=~\"${instance}\"}", + "hide": false, + "legendFormat": "peak_${baseLegend}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "metrics_jvm_thread_total_started_count_Value{instance=~\"${instance}\"}", + "hide": false, + "legendFormat": "total_started_${baseLegend}", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "metrics_jvm_thread_deadlock_count_Value{instance=~\"${instance}\"}", + "hide": false, + "legendFormat": "deadlock_${baseLegend}", + "range": true, + "refId": "F" } ], "title": "Thread Counts", @@ -1400,18 +1436,6 @@ "legendFormat": "terminated_${baseLegend}", "range": true, "refId": "F" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "metrics_jvm_thread_deadlock_count_Value{instance=~\"${instance}\"}", - "hide": false, - "legendFormat": "deadlock_${baseLegend}", - "range": true, - "refId": "G" } ], "title": "Thread States", diff --git a/dev/deps/dependencies-client-flink-1.14 b/dev/deps/dependencies-client-flink-1.14 index 0380d321b..9c2fc4476 100644 --- a/dev/deps/dependencies-client-flink-1.14 +++ b/dev/deps/dependencies-client-flink-1.14 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-flink-1.15 b/dev/deps/dependencies-client-flink-1.15 index 0380d321b..9c2fc4476 100644 --- a/dev/deps/dependencies-client-flink-1.15 +++ b/dev/deps/dependencies-client-flink-1.15 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-flink-1.17 b/dev/deps/dependencies-client-flink-1.17 index 0380d321b..9c2fc4476 100644 --- a/dev/deps/dependencies-client-flink-1.17 +++ b/dev/deps/dependencies-client-flink-1.17 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-flink-1.18 b/dev/deps/dependencies-client-flink-1.18 index 0380d321b..9c2fc4476 100644 --- a/dev/deps/dependencies-client-flink-1.18 +++ b/dev/deps/dependencies-client-flink-1.18 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-flink-1.19 b/dev/deps/dependencies-client-flink-1.19 index 0380d321b..9c2fc4476 100644 --- a/dev/deps/dependencies-client-flink-1.19 +++ b/dev/deps/dependencies-client-flink-1.19 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-mr b/dev/deps/dependencies-client-mr index 4a6aa7d7a..82dc2cb11 100644 --- a/dev/deps/dependencies-client-mr +++ b/dev/deps/dependencies-client-mr @@ -136,9 +136,9 @@ kotlin-stdlib/1.4.10//kotlin-stdlib-1.4.10.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar mssql-jdbc/6.2.1.jre7//mssql-jdbc-6.2.1.jre7.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-2.4 b/dev/deps/dependencies-client-spark-2.4 index bb5e0f9f2..72cd055c0 100644 --- a/dev/deps/dependencies-client-spark-2.4 +++ b/dev/deps/dependencies-client-spark-2.4 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.4.0//lz4-java-1.4.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.0 b/dev/deps/dependencies-client-spark-3.0 index fffc6e468..edd9f4264 100644 --- a/dev/deps/dependencies-client-spark-3.0 +++ b/dev/deps/dependencies-client-spark-3.0 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.7.1//lz4-java-1.7.1.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.1 b/dev/deps/dependencies-client-spark-3.1 index da095542c..9372233f7 100644 --- a/dev/deps/dependencies-client-spark-3.1 +++ b/dev/deps/dependencies-client-spark-3.1 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.7.1//lz4-java-1.7.1.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.2 b/dev/deps/dependencies-client-spark-3.2 index 76f5b0211..3913a6fc1 100644 --- a/dev/deps/dependencies-client-spark-3.2 +++ b/dev/deps/dependencies-client-spark-3.2 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.7.1//lz4-java-1.7.1.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.3 b/dev/deps/dependencies-client-spark-3.3 index 8102a540f..945c35808 100644 --- a/dev/deps/dependencies-client-spark-3.3 +++ b/dev/deps/dependencies-client-spark-3.3 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.4 b/dev/deps/dependencies-client-spark-3.4 index 96996db77..6652a37cb 100644 --- a/dev/deps/dependencies-client-spark-3.4 +++ b/dev/deps/dependencies-client-spark-3.4 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-client-spark-3.5 b/dev/deps/dependencies-client-spark-3.5 index 371e6479a..a2cffa8a4 100644 --- a/dev/deps/dependencies-client-spark-3.5 +++ b/dev/deps/dependencies-client-spark-3.5 @@ -34,9 +34,9 @@ jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar netty-buffer/4.1.109.Final//netty-buffer-4.1.109.Final.jar netty-codec-dns/4.1.109.Final//netty-codec-dns-4.1.109.Final.jar diff --git a/dev/deps/dependencies-server b/dev/deps/dependencies-server index ca9a26f94..d3f491677 100644 --- a/dev/deps/dependencies-server +++ b/dev/deps/dependencies-server @@ -75,9 +75,9 @@ log4j-core/2.17.2//log4j-core-2.17.2.jar log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar lz4-java/1.8.0//lz4-java-1.8.0.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar -metrics-core/3.2.6//metrics-core-3.2.6.jar -metrics-graphite/3.2.6//metrics-graphite-3.2.6.jar -metrics-jvm/3.2.6//metrics-jvm-3.2.6.jar +metrics-core/4.2.25//metrics-core-4.2.25.jar +metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar +metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar mimepull/1.9.15//mimepull-1.9.15.jar mybatis/3.5.15//mybatis-3.5.15.jar netty-all/4.1.109.Final//netty-all-4.1.109.Final.jar diff --git a/docs/monitoring.md b/docs/monitoring.md index e9a1f3f56..cc3badbb4 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -24,7 +24,7 @@ There are two ways to monitor Celeborn cluster: Prometheus metrics and REST API. ## Metrics Celeborn has a configurable metrics system based on the -[Dropwizard Metrics Library](http://metrics.dropwizard.io/4.2.0). +[Dropwizard Metrics Library](https://metrics.dropwizard.io/4.2.0). This allows users to report Celeborn metrics to a variety of sinks including HTTP, JMX, CSV files and prometheus servlet. The metrics are generated by sources embedded in the Celeborn code base. They provide instrumentation for specific activities and Celeborn components. diff --git a/pom.xml b/pom.xml index 6923ff85f..7d9374d00 100644 --- a/pom.xml +++ b/pom.xml @@ -70,7 +70,11 @@ 3.3.6 - 3.2.6 + + 4.2.25 3.12.0 2.13.0 1.0.0 @@ -236,6 +240,12 @@ io.dropwizard.metrics metrics-graphite ${codahale.metrics.version} + + + com.rabbitmq + amqp-client + + io.dropwizard.metrics diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 0ce59ddf6..9b0f794bf 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -53,7 +53,7 @@ object Dependencies { val leveldbJniVersion = "1.8" val log4j2Version = "2.17.2" val jdkToolsVersion = "0.1" - val metricsVersion = "3.2.6" + val metricsVersion = "4.2.25" val mockitoVersion = "4.11.0" val nettyVersion = "4.1.109.Final" val ratisVersion = "3.0.1" @@ -107,7 +107,8 @@ object Dependencies { ExclusionRule("log4j", "log4j"), ExclusionRule("org.slf4j", "slf4j-log4j12")) val ioDropwizardMetricsCore = "io.dropwizard.metrics" % "metrics-core" % metricsVersion - val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion + val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion excludeAll ( + ExclusionRule("com.rabbitmq", "amqp-client")) val ioDropwizardMetricsJvm = "io.dropwizard.metrics" % "metrics-jvm" % metricsVersion val ioNetty = "io.netty" % "netty-all" % nettyVersion excludeAll( ExclusionRule("io.netty", "netty-handler-ssl-ocsp"))