|
449 | 449 | "mappings": [ |
450 | 450 | { |
451 | 451 | "options": { |
452 | | - "False": { |
| 452 | + "Healthy": { |
453 | 453 | "color": "green", |
454 | 454 | "index": 0, |
455 | 455 | "text": "Healthy" |
456 | 456 | }, |
457 | | - "True": { |
| 457 | + "PARTITIONED": { |
458 | 458 | "color": "red", |
459 | 459 | "index": 1, |
460 | 460 | "text": "PARTITIONED" |
|
485 | 485 | "value": 150 |
486 | 486 | } |
487 | 487 | ] |
| 488 | + }, |
| 489 | + { |
| 490 | + "matcher": { |
| 491 | + "id": "byName", |
| 492 | + "options": "Status" |
| 493 | + }, |
| 494 | + "properties": [ |
| 495 | + { |
| 496 | + "id": "custom.cellOptions", |
| 497 | + "value": { |
| 498 | + "mode": "basic", |
| 499 | + "type": "color-background" |
| 500 | + } |
| 501 | + } |
| 502 | + ] |
488 | 503 | } |
489 | 504 | ] |
490 | 505 | }, |
|
512 | 527 | "type": "prometheus", |
513 | 528 | "uid": "${datasource}" |
514 | 529 | }, |
515 | | - "expr": "topk(100, node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"NetworkPartitioned\"}) by (node, status)", |
| 530 | + "expr": "label_replace(increase(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"NetworkPartitioned\", status=\"False\"}[5m]) > 0, \"partition_status\", \"Healthy\", \"\", \"\") or label_replace(increase(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"NetworkPartitioned\", status=\"True\"}[5m]) > 0, \"partition_status\", \"PARTITIONED\", \"\", \"\")", |
516 | 531 | "format": "table", |
517 | 532 | "instant": true, |
518 | | - "legendFormat": "{{node}} - {{status}}", |
| 533 | + "legendFormat": "__auto", |
519 | 534 | "refId": "A" |
520 | 535 | } |
521 | 536 | ], |
522 | 537 | "title": "Network Partition Status by Node", |
523 | 538 | "transformations": [ |
524 | | - { |
525 | | - "id": "groupBy", |
526 | | - "options": { |
527 | | - "fields": { |
528 | | - "Value": { |
529 | | - "aggregations": ["lastNotNull"], |
530 | | - "operation": "aggregate" |
531 | | - }, |
532 | | - "node": { |
533 | | - "aggregations": [], |
534 | | - "operation": "groupby" |
535 | | - }, |
536 | | - "status": { |
537 | | - "aggregations": [], |
538 | | - "operation": "groupby" |
539 | | - } |
540 | | - } |
541 | | - } |
542 | | - }, |
543 | 539 | { |
544 | 540 | "id": "organize", |
545 | 541 | "options": { |
546 | 542 | "excludeByName": { |
547 | | - "Value (lastNotNull)": true |
| 543 | + "Time": true, |
| 544 | + "Value": true, |
| 545 | + "__name__": true, |
| 546 | + "cluster": true, |
| 547 | + "condition_type": true, |
| 548 | + "container": true, |
| 549 | + "endpoint": true, |
| 550 | + "instance": true, |
| 551 | + "job": true, |
| 552 | + "namespace": true, |
| 553 | + "pod": true, |
| 554 | + "service": true, |
| 555 | + "status": true |
| 556 | + }, |
| 557 | + "indexByName": { |
| 558 | + "node": 0, |
| 559 | + "partition_status": 1 |
548 | 560 | }, |
549 | | - "indexByName": {}, |
550 | 561 | "renameByName": { |
551 | 562 | "node": "Node", |
552 | | - "status": "Partitioned" |
| 563 | + "partition_status": "Status" |
553 | 564 | } |
554 | 565 | } |
555 | 566 | } |
|
576 | 587 | "mappings": [ |
577 | 588 | { |
578 | 589 | "options": { |
579 | | - "False": { |
| 590 | + "UNHEALTHY": { |
580 | 591 | "color": "red", |
581 | 592 | "index": 1, |
582 | 593 | "text": "UNHEALTHY" |
583 | 594 | }, |
584 | | - "True": { |
| 595 | + "Healthy": { |
585 | 596 | "color": "green", |
586 | 597 | "index": 0, |
587 | 598 | "text": "Healthy" |
|
600 | 611 | ] |
601 | 612 | } |
602 | 613 | }, |
603 | | - "overrides": [] |
| 614 | + "overrides": [ |
| 615 | + { |
| 616 | + "matcher": { |
| 617 | + "id": "byName", |
| 618 | + "options": "Node" |
| 619 | + }, |
| 620 | + "properties": [ |
| 621 | + { |
| 622 | + "id": "custom.width", |
| 623 | + "value": 150 |
| 624 | + } |
| 625 | + ] |
| 626 | + }, |
| 627 | + { |
| 628 | + "matcher": { |
| 629 | + "id": "byName", |
| 630 | + "options": "CNI Status" |
| 631 | + }, |
| 632 | + "properties": [ |
| 633 | + { |
| 634 | + "id": "custom.cellOptions", |
| 635 | + "value": { |
| 636 | + "mode": "basic", |
| 637 | + "type": "color-background" |
| 638 | + } |
| 639 | + } |
| 640 | + ] |
| 641 | + } |
| 642 | + ] |
604 | 643 | }, |
605 | 644 | "gridPos": { |
606 | 645 | "h": 8, |
|
626 | 665 | "type": "prometheus", |
627 | 666 | "uid": "${datasource}" |
628 | 667 | }, |
629 | | - "expr": "topk(100, node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"CNIHealthy\"}) by (node, status)", |
| 668 | + "expr": "label_replace(increase(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"CNIHealthy\", status=\"True\"}[5m]) > 0, \"health_status\", \"Healthy\", \"\", \"\") or label_replace(increase(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=\"CNIHealthy\", status=\"False\"}[5m]) > 0, \"health_status\", \"UNHEALTHY\", \"\", \"\")", |
630 | 669 | "format": "table", |
631 | 670 | "instant": true, |
632 | | - "legendFormat": "{{node}} - {{status}}", |
| 671 | + "legendFormat": "__auto", |
633 | 672 | "refId": "A" |
634 | 673 | } |
635 | 674 | ], |
636 | 675 | "title": "CNI Health Status by Node", |
637 | 676 | "transformations": [ |
638 | | - { |
639 | | - "id": "groupBy", |
640 | | - "options": { |
641 | | - "fields": { |
642 | | - "Value": { |
643 | | - "aggregations": ["lastNotNull"], |
644 | | - "operation": "aggregate" |
645 | | - }, |
646 | | - "node": { |
647 | | - "aggregations": [], |
648 | | - "operation": "groupby" |
649 | | - }, |
650 | | - "status": { |
651 | | - "aggregations": [], |
652 | | - "operation": "groupby" |
653 | | - } |
654 | | - } |
655 | | - } |
656 | | - }, |
657 | 677 | { |
658 | 678 | "id": "organize", |
659 | 679 | "options": { |
660 | 680 | "excludeByName": { |
661 | | - "Value (lastNotNull)": true |
| 681 | + "Time": true, |
| 682 | + "Value": true, |
| 683 | + "__name__": true, |
| 684 | + "cluster": true, |
| 685 | + "condition_type": true, |
| 686 | + "container": true, |
| 687 | + "endpoint": true, |
| 688 | + "instance": true, |
| 689 | + "job": true, |
| 690 | + "namespace": true, |
| 691 | + "pod": true, |
| 692 | + "service": true, |
| 693 | + "status": true |
| 694 | + }, |
| 695 | + "indexByName": { |
| 696 | + "node": 0, |
| 697 | + "health_status": 1 |
662 | 698 | }, |
663 | | - "indexByName": {}, |
664 | 699 | "renameByName": { |
665 | 700 | "node": "Node", |
666 | | - "status": "CNI Healthy" |
| 701 | + "health_status": "CNI Status" |
667 | 702 | } |
668 | 703 | } |
669 | 704 | } |
|
1003 | 1038 | "type": "prometheus", |
1004 | 1039 | "uid": "${datasource}" |
1005 | 1040 | }, |
1006 | | - "expr": "sum(node_doctor_monitor_peers_total{node=~\"$node\"})", |
1007 | | - "legendFormat": "Total Peers", |
| 1041 | + "expr": "avg(node_doctor_monitor_peers_total{node=~\"$node\"})", |
| 1042 | + "legendFormat": "Avg Peers per Node", |
1008 | 1043 | "refId": "A" |
1009 | 1044 | } |
1010 | 1045 | ], |
1011 | | - "title": "Total Peers", |
| 1046 | + "title": "Avg Peers per Node", |
1012 | 1047 | "type": "stat" |
1013 | 1048 | }, |
1014 | 1049 | { |
|
1061 | 1096 | "type": "prometheus", |
1062 | 1097 | "uid": "${datasource}" |
1063 | 1098 | }, |
1064 | | - "expr": "sum(node_doctor_monitor_peers_reachable_total{node=~\"$node\"})", |
1065 | | - "legendFormat": "Reachable Peers", |
| 1099 | + "expr": "avg(node_doctor_monitor_peers_reachable_total{node=~\"$node\"})", |
| 1100 | + "legendFormat": "Avg Reachable per Node", |
1066 | 1101 | "refId": "A" |
1067 | 1102 | } |
1068 | 1103 | ], |
1069 | | - "title": "Reachable Peers", |
| 1104 | + "title": "Avg Reachable per Node", |
1070 | 1105 | "type": "stat" |
1071 | 1106 | }, |
1072 | 1107 | { |
|
2272 | 2307 | "type": "prometheus", |
2273 | 2308 | "uid": "${datasource}" |
2274 | 2309 | }, |
2275 | | - "expr": "sum by (condition_type) (node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=~\"NetworkPartitioned|NetworkDegraded|CNIHealthy|CNIConfigValid|CNIInterfacesHealthy\", status=\"True\"})", |
2276 | | - "legendFormat": "{{condition_type}}", |
| 2310 | + "expr": "sum by (condition_type) (rate(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=~\"NetworkPartitioned|NetworkDegraded\", status=\"True\"}[5m])) * 60", |
| 2311 | + "legendFormat": "{{condition_type}} (problems)", |
2277 | 2312 | "refId": "A" |
| 2313 | + }, |
| 2314 | + { |
| 2315 | + "datasource": { |
| 2316 | + "type": "prometheus", |
| 2317 | + "uid": "${datasource}" |
| 2318 | + }, |
| 2319 | + "expr": "sum by (condition_type) (rate(node_doctor_monitor_conditions_total{node=~\"$node\", condition_type=~\"CNIHealthy|CNIConfigValid|CNIInterfacesHealthy\", status=\"False\"}[5m])) * 60", |
| 2320 | + "legendFormat": "{{condition_type}} (unhealthy)", |
| 2321 | + "refId": "B" |
2278 | 2322 | } |
2279 | 2323 | ], |
2280 | | - "title": "Network & CNI Conditions (True)", |
| 2324 | + "title": "Network Issue Activity (rate/min)", |
2281 | 2325 | "type": "timeseries" |
2282 | 2326 | }, |
2283 | 2327 | { |
|
0 commit comments