diff --git a/install/helm/kgateway-dashboards/README.md b/install/helm/kgateway-dashboards/README.md index 5f82fab..1c6e739 100644 --- a/install/helm/kgateway-dashboards/README.md +++ b/install/helm/kgateway-dashboards/README.md @@ -10,5 +10,6 @@ which can be used for monitoring control-plane and data-plane metrics. # Dashboards -- [Envoy](dashboards/envoy.json): A basic dashboard for monitoring data-plane metrics. +- [Envoy](dashboards/envoy.json): A basic dashboard for monitoring Envoy data-plane metrics. +- [Agentgateway](dashboards/agentgateway.json): A basic dashboard for Agentgateway data-plane metrics. - [Kgateway Operations](dashboards/kgateway.json): A basic dashboard for monitoring control-plane metrics. diff --git a/install/helm/kgateway-dashboards/dashboards/agentgateway.json b/install/helm/kgateway-dashboards/dashboards/agentgateway.json new file mode 100644 index 0000000..23ec08f --- /dev/null +++ b/install/helm/kgateway-dashboards/dashboards/agentgateway.json @@ -0,0 +1,776 @@ +{ + "annotations": {}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 0, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum(label_replace(sum by (pod,namespace) (container_memory_working_set_bytes{image=\"\",namespace=~\"$namespace\"}), \"nsnameexists\", \"yes\", \"namespace\", \".*\") / on(pod, namespace, nsnameexists) label_replace(label_join(agentgateway_build_info, \"nsname\", \"/\", \"namespace\", \"gateway_networking_k8s_io_gateway_name\"), \"nsnameexists\", \"yes\", \"nsname\", \"$gateway\")) without (nsnameexists)", + "legendFormat": "{{namespace}}/{{pod}}", + "refId": "" + } + ], + "title": "Memory", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (pod,namespace) (sum(label_replace(irate(container_cpu_usage_seconds_total{image=\"\",namespace=~\"$namespace\"}[$__rate_interval]), \"nsnameexists\", \"yes\", \"namespace\", \".*\") / on(pod, namespace, nsnameexists) label_replace(label_join(agentgateway_build_info, \"nsname\", \"/\", \"namespace\", \"gateway_networking_k8s_io_gateway_name\"), \"nsnameexists\", \"yes\", \"nsname\", \"$gateway\")) without (nsnameexists))", + "legendFormat": "{{namespace}}/{{pod}}", + "refId": "" + } + ], + "title": "CPU", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 0, + "panels": [], + "title": "Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 12 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (pod,namespace) (rate(agentgateway_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{namespace}}/{{pod}}", + "refId": "" + } + ], + "title": "Requests (by Pod)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 12 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gateway) (rate(agentgateway_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}", + "refId": "" + } + ], + "title": "Requests (by Gateway)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 22 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gateway,status) (rate(agentgateway_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}: {{status}}", + "refId": "" + } + ], + "title": "Requests (by Status)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 22 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gateway,reason) (rate(agentgateway_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}: {{reason}}", + "refId": "" + } + ], + "title": "Requests (by Reason)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 0, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 33 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gen_ai_token_type,gen_ai_request_model,gateway) (rate(agentgateway_gen_ai_client_token_usage_sum{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}: {{gen_ai_request_model}} ({{gen_ai_token_type}})", + "refId": "" + } + ], + "title": "Token Consumption", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "histogram_quantile(0.5, sum by (le,gateway,gen_ai_request_model) (rate(agentgateway_gen_ai_server_time_to_first_token_bucket{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval])))", + "legendFormat": "{{gateway}}: {{gen_ai_request_model}}", + "refId": "" + } + ], + "title": "Time To First Token", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 43 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "histogram_quantile(0.5, sum by (le,gateway,gen_ai_request_model) (rate(agentgateway_gen_ai_server_request_duration_bucket{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval])))", + "legendFormat": "{{gateway}}: {{gen_ai_request_model}}", + "refId": "" + } + ], + "title": "Request Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "tps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 43 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "1 / histogram_quantile(0.5, sum by (le,gateway,gen_ai_request_model) (rate(agentgateway_gen_ai_server_time_per_output_token_bucket{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval])))", + "legendFormat": "{{gateway}}: {{gen_ai_request_model}}", + "refId": "" + } + ], + "title": "Tokens Per Second", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "title": "LLM", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 0, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 54 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gateway,method) (rate(agentgateway_mcp_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}: {{method}}", + "refId": "" + } + ], + "title": "MCP Calls (by method)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "opacity", + "showPoints": "never" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 54 + }, + "interval": "5s", + "options": { + "legend": { + "calcs": [ + "last", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "asc" + } + }, + "repeatDirection": "h", + "targets": [ + { + "expr": "sum by (gateway,server,resource) (rate(agentgateway_mcp_requests_total{namespace=~\"$namespace\",gateway=~\"$gateway\",method=\"tools/call\"}[$__rate_interval]))", + "legendFormat": "{{gateway}}: {{server}}/{{resource}}", + "refId": "" + } + ], + "title": "Tool Calls (by tool)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "title": "MCP", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 36, + "style": "dark", + "templating": { + "list": [ + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "hide": 0, + "id": "00000000-0000-0000-0000-000000000000", + "includeAll": false, + "multi": false, + "name": "datasource", + "query": "prometheus", + "skipUrlSync": false, + "type": "datasource" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "hide": 0, + "id": "00000000-0000-0000-0000-000000000000", + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "query": "label_values(agentgateway_build_info,namespace)", + "refresh": 2, + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "hide": 0, + "id": "00000000-0000-0000-0000-000000000000", + "includeAll": true, + "label": "Gateway", + "multi": true, + "name": "gateway", + "query": "query_result(label_join(agentgateway_build_info{namespace=~\"$namespace\"}, \"text\", \"/\", \"namespace\", \"gateway_networking_k8s_io_gateway_name\"))", + "refresh": 2, + "regex": "/.*text=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "browser", + "title": "Agentgateway", + "uid": "agentgateway" +} \ No newline at end of file