[KYUUBI #7072] Expose metrics of engine startup permit state
### Why are the changes needed? The metrics `kyuubi_operation_state_LaunchEngine_*` cannot reflect the state of Semaphore after configuring the maximum engine startup limit through `kyuubi.server.limit.engine.startup`, add some metrics to show the relevant permit state. ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? Closes #7072 from LennonChin/engine_startup_metrics. Closes #7072 d6bf3696a [Lennon Chin] Expose metrics of engine startup permit status Authored-by: Lennon Chin <i@coderap.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
bcaff5a3f1
commit
cad5a392f3
@ -65,6 +65,9 @@ These metrics include:
|
||||
| `kyuubi.engine.timeout` | | counter | 1.2.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> cumulative timeout engines</div> |
|
||||
| `kyuubi.engine.failed` | `${user}` | counter | 1.2.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> cumulative explicitly failed engine count for a `${user}`</div> |
|
||||
| `kyuubi.engine.failed` | `${errorType}` | counter | 1.2.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> cumulative explicitly failed engine count for a particular `${errorType}`, e.g. `ClassNotFoundException`</div> |
|
||||
| `kyuubi.engine.startup.permit.limit.total` | | meter | 1.11.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> concurrently startup engines permit limit </div> |
|
||||
| `kyuubi.engine.startup.permit.available` | | gauge | 1.11.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> available permits of concurrently startup engines </div> |
|
||||
| `kyuubi.engine.startup.permit.waiting` | | gauge | 1.11.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> startup engines that waiting to acquire permit </div> |
|
||||
| `kyuubi.backend_service.open_session` | | timer | 1.5.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> kyuubi backend service `openSession` method execution time and rate </div> |
|
||||
| `kyuubi.backend_service.close_session` | | timer | 1.5.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> kyuubi backend service `closeSession` method execution time and rate </div> |
|
||||
| `kyuubi.backend_service.get_info` | | timer | 1.5.0 | <div style='width: 150pt;word-wrap: break-word;white-space: normal'> kyuubi backend service `getInfo` method execution time and rate </div> |
|
||||
|
||||
@ -823,6 +823,123 @@
|
||||
"x": 16,
|
||||
"y": 7
|
||||
},
|
||||
"id": 104,
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
"alertThreshold": true,
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"repeatDirection": "h",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"editorMode": "code",
|
||||
"expr": " kyuubi_engine_startup_permit_limit_total{$baseFilter,instance=~\"$instance\"}",
|
||||
"hide": false,
|
||||
"legendFormat": "${baseLegend}-limit",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"editorMode": "code",
|
||||
"expr": " kyuubi_engine_startup_permit_waiting{$baseFilter,instance=~\"$instance\"}",
|
||||
"hide": false,
|
||||
"legendFormat": "${baseLegend}-waiting",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"editorMode": "code",
|
||||
"expr": " kyuubi_engine_startup_permit_available{$baseFilter,instance=~\"$instance\"}",
|
||||
"hide": false,
|
||||
"legendFormat": "${baseLegend}-available",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Engine startup permit",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 13
|
||||
},
|
||||
"id": 75,
|
||||
"maxPerRow": 2,
|
||||
"options": {
|
||||
@ -918,7 +1035,7 @@
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"x": 8,
|
||||
"y": 13
|
||||
},
|
||||
"id": 77,
|
||||
@ -1025,7 +1142,7 @@
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"x": 16,
|
||||
"y": 13
|
||||
},
|
||||
"id": 79,
|
||||
@ -1130,10 +1247,10 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 13
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 80,
|
||||
"maxPerRow": 2,
|
||||
@ -1236,7 +1353,7 @@
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"x": 8,
|
||||
"y": 20
|
||||
},
|
||||
"id": 34,
|
||||
@ -1335,7 +1452,7 @@
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"x": 16,
|
||||
"y": 20
|
||||
},
|
||||
"id": 71,
|
||||
@ -1430,8 +1547,8 @@
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 20
|
||||
"x": 0,
|
||||
"y": 26
|
||||
},
|
||||
"id": 76,
|
||||
"maxPerRow": 2,
|
||||
@ -1478,7 +1595,7 @@
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 26
|
||||
"y": 32
|
||||
},
|
||||
"id": 88,
|
||||
"panels": [],
|
||||
@ -1549,7 +1666,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
"y": 33
|
||||
},
|
||||
"id": 89,
|
||||
"maxPerRow": 2,
|
||||
@ -1646,7 +1763,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 27
|
||||
"y": 33
|
||||
},
|
||||
"id": 92,
|
||||
"maxPerRow": 2,
|
||||
@ -1743,7 +1860,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
"y": 41
|
||||
},
|
||||
"id": 90,
|
||||
"maxPerRow": 2,
|
||||
@ -1849,7 +1966,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 35
|
||||
"y": 41
|
||||
},
|
||||
"id": 91,
|
||||
"maxPerRow": 2,
|
||||
@ -1899,7 +2016,7 @@
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 42
|
||||
"y": 48
|
||||
},
|
||||
"id": 93,
|
||||
"panels": [],
|
||||
@ -1970,7 +2087,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 43
|
||||
"y": 49
|
||||
},
|
||||
"id": 94,
|
||||
"maxPerRow": 2,
|
||||
@ -2076,7 +2193,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 43
|
||||
"y": 49
|
||||
},
|
||||
"id": 99,
|
||||
"maxPerRow": 2,
|
||||
@ -2173,7 +2290,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 51
|
||||
"y": 57
|
||||
},
|
||||
"id": 98,
|
||||
"maxPerRow": 2,
|
||||
@ -2271,7 +2388,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 51
|
||||
"y": 57
|
||||
},
|
||||
"id": 97,
|
||||
"maxPerRow": 2,
|
||||
@ -2321,7 +2438,7 @@
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 59
|
||||
"y": 65
|
||||
},
|
||||
"id": 68,
|
||||
"panels": [],
|
||||
@ -2392,7 +2509,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 66
|
||||
},
|
||||
"id": 100,
|
||||
"options": {
|
||||
@ -2511,7 +2628,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 66
|
||||
},
|
||||
"id": 101,
|
||||
"options": {
|
||||
@ -2601,7 +2718,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 68
|
||||
"y": 74
|
||||
},
|
||||
"id": 49,
|
||||
"options": {
|
||||
@ -2653,7 +2770,7 @@
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 68
|
||||
"y": 82
|
||||
},
|
||||
"id": 60,
|
||||
"panels": [
|
||||
|
||||
@ -60,6 +60,11 @@ object MetricsConstants {
|
||||
final val ENGINE_TIMEOUT: String = ENGINE + "timeout"
|
||||
final val ENGINE_TOTAL: String = ENGINE + "total"
|
||||
|
||||
final private val ENGINE_STARTUP_PERMIT: String = ENGINE + "startup.permit."
|
||||
final val ENGINE_STARTUP_PERMIT_LIMIT: String = ENGINE_STARTUP_PERMIT + "limit"
|
||||
final val ENGINE_STARTUP_PERMIT_AVAILABLE: String = ENGINE_STARTUP_PERMIT + "available"
|
||||
final val ENGINE_STARTUP_PERMIT_WAITING: String = ENGINE_STARTUP_PERMIT + "waiting"
|
||||
|
||||
final private val OPERATION = KYUUBI + "operation."
|
||||
final val OPERATION_OPEN: String = OPERATION + "opened"
|
||||
final val OPERATION_FAIL: String = OPERATION + "failed"
|
||||
|
||||
@ -300,6 +300,14 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) {
|
||||
ms.registerGauge(EXEC_POOL_ALIVE, getExecPoolSize, 0)
|
||||
ms.registerGauge(EXEC_POOL_ACTIVE, getActiveCount, 0)
|
||||
ms.registerGauge(EXEC_POOL_WORK_QUEUE_SIZE, getWorkQueueSize, 0)
|
||||
this.engineStartupProcessSemaphore.foreach { semaphore =>
|
||||
ms.markMeter(ENGINE_STARTUP_PERMIT_LIMIT, semaphore.availablePermits)
|
||||
ms.registerGauge(
|
||||
ENGINE_STARTUP_PERMIT_AVAILABLE,
|
||||
semaphore.availablePermits,
|
||||
semaphore.availablePermits)
|
||||
ms.registerGauge(ENGINE_STARTUP_PERMIT_WAITING, semaphore.getQueueLength, 0)
|
||||
}
|
||||
}
|
||||
super.start()
|
||||
startEngineAliveChecker()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user