Skip to content

Commit aecc1cb

Browse files
authored
Fixed Flow Execution Requests Dashboard so that Max Duration is calcu… (#2)
* Fixed Flow Execution Requests Dashboard so that Max Duration is calculated correctly and modified Errored Request Graph to display Mean Duration for Errored Requests only * Modified Flow Executions Dashboard to filter errors correctly using new RequestStatus label added in Promtail config instead of the regex search for the text "error" to avoid returning results that were not errors but contained the word error in the log file. Amended the Loki config file to increase the max GRPC msg sizes that can be sent and received to reduce the liklihood of errors when querying from Grafana.
1 parent aec2f25 commit aecc1cb

File tree

3 files changed

+53
-21
lines changed

3 files changed

+53
-21
lines changed

Grafana/Grafana/Dashboards/Flow Execution Requests.json

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
"fiscalYearStartMonth": 0,
7575
"graphTooltip": 0,
7676
"id": null,
77-
"iteration": 1653401831290,
77+
"iteration": 1657624501089,
7878
"links": [],
7979
"liveNow": false,
8080
"panels": [
@@ -216,7 +216,7 @@
216216
"type": "loki",
217217
"uid": "${DS_LOKI}"
218218
},
219-
"expr": "sum(count_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" [$__range]))",
219+
"expr": "sum(count_over_time({job=\"flows\", RequestStatus=\"Error\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} [$__range]))",
220220
"hide": false,
221221
"legendFormat": "Total Errored Requests",
222222
"queryType": "instant",
@@ -739,10 +739,20 @@
739739
"type": "loki",
740740
"uid": "${DS_LOKI}"
741741
},
742-
"expr": "sum(count_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" [$Interval]))",
742+
"expr": "sum(count_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} [$Interval]))",
743743
"hide": false,
744744
"legendFormat": "Errored Requests",
745745
"refId": "E"
746+
},
747+
{
748+
"datasource": {
749+
"type": "loki",
750+
"uid": "${DS_LOKI}"
751+
},
752+
"expr": "avg by (Type) (avg_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$Interval]))",
753+
"hide": false,
754+
"legendFormat": "Errored Requests Mean Duration (ms)",
755+
"refId": "F"
746756
}
747757
],
748758
"title": "Requests",
@@ -770,7 +780,9 @@
770780
"Average Duration in ms": true,
771781
"Average duration in ms": true,
772782
"Average duration in seconds": false,
783+
"Errored Request Mean Duration (ms)": true,
773784
"Errored Requests": true,
785+
"Errored Requests Mean Duration (ms)": true,
774786
"Max Duration (ms)": true,
775787
"Max duration in ms": true,
776788
"Mean Duration (ms)": true,
@@ -817,7 +829,8 @@
817829
"mode": "absolute",
818830
"steps": [
819831
{
820-
"color": "green"
832+
"color": "green",
833+
"value": null
821834
},
822835
{
823836
"color": "red",
@@ -977,7 +990,7 @@
977990
"type": "loki",
978991
"uid": "${DS_LOKI}"
979992
},
980-
"expr": "sum by (FlowName) (count_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" | json [$__range]))",
993+
"expr": "sum by (FlowName) (count_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json [$__range]))",
981994
"hide": false,
982995
"legendFormat": "Error Count",
983996
"queryType": "instant",
@@ -1010,7 +1023,7 @@
10101023
"type": "loki",
10111024
"uid": "${DS_LOKI}"
10121025
},
1013-
"expr": "max by (FlowName) (min_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$__range]))",
1026+
"expr": "max by (FlowName) (max_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$__range]))",
10141027
"hide": false,
10151028
"legendFormat": "Max Duration (ms)",
10161029
"queryType": "instant",
@@ -1172,7 +1185,8 @@
11721185
"mode": "absolute",
11731186
"steps": [
11741187
{
1175-
"color": "green"
1188+
"color": "green",
1189+
"value": null
11761190
},
11771191
{
11781192
"color": "red",
@@ -1478,7 +1492,8 @@
14781492
"mode": "absolute",
14791493
"steps": [
14801494
{
1481-
"color": "green"
1495+
"color": "green",
1496+
"value": null
14821497
}
14831498
]
14841499
},
@@ -1593,12 +1608,18 @@
15931608
],
15941609
"title": "Errored Requests",
15951610
"transformations": [
1611+
{
1612+
"id": "filterByRefId",
1613+
"options": {
1614+
"include": "E|F"
1615+
}
1616+
},
15961617
{
15971618
"id": "calculateField",
15981619
"options": {
1599-
"alias": "Mean Duration (s)",
1620+
"alias": "Errored Requests Mean Duration (s)",
16001621
"binary": {
1601-
"left": "Mean Duration (ms)",
1622+
"left": "Errored Requests Mean Duration (ms)",
16021623
"operator": "/",
16031624
"reducer": "sum",
16041625
"right": "1000"
@@ -1614,6 +1635,8 @@
16141635
"options": {
16151636
"excludeByName": {
16161637
"Average duration in ms": true,
1638+
"Errored Request Mean Duration (ms)": true,
1639+
"Errored Requests Mean Duration (ms)": true,
16171640
"Max Duration (ms)": true,
16181641
"Mean Duration (ms)": true,
16191642
"Min Duration (ms)": true,
@@ -1661,7 +1684,8 @@
16611684
"mode": "absolute",
16621685
"steps": [
16631686
{
1664-
"color": "green"
1687+
"color": "green",
1688+
"value": null
16651689
},
16661690
{
16671691
"color": "red",
@@ -1711,7 +1735,7 @@
17111735
"type": "loki",
17121736
"uid": "${DS_LOKI}"
17131737
},
1714-
"expr": "topk(10, sum by (FlowName, StatusCode, Result) (count_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" | json [$__range])))",
1738+
"expr": "topk(10, sum by (FlowName, StatusCode, Result) (count_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json [$__range])))",
17151739
"hide": false,
17161740
"legendFormat": "Top10 Errored Requests",
17171741
"queryType": "instant",
@@ -1722,7 +1746,7 @@
17221746
"type": "loki",
17231747
"uid": "${DS_LOKI}"
17241748
},
1725-
"expr": "avg by (FlowName, StatusCode, Result) (avg_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" | json | unwrap Event_Duration_InMs [$__range]))",
1749+
"expr": "avg by (FlowName, StatusCode, Result) (avg_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$__range]))",
17261750
"hide": false,
17271751
"legendFormat": "Mean Duration (ms)",
17281752
"queryType": "instant",
@@ -1733,7 +1757,7 @@
17331757
"type": "loki",
17341758
"uid": "${DS_LOKI}"
17351759
},
1736-
"expr": "min by (FlowName, StatusCode, Result) (min_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" | json | unwrap Event_Duration_InMs [$__range]))",
1760+
"expr": "min by (FlowName, StatusCode, Result) (min_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$__range]))",
17371761
"hide": false,
17381762
"legendFormat": "Min Duration (ms)",
17391763
"queryType": "instant",
@@ -1744,7 +1768,7 @@
17441768
"type": "loki",
17451769
"uid": "${DS_LOKI}"
17461770
},
1747-
"expr": "max by (FlowName, StatusCode, Result) (max_over_time({job=\"flows\", Type=\"Cortex\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} |= \"Error\" | json | unwrap Event_Duration_InMs [$__range]))",
1771+
"expr": "max by (FlowName, StatusCode, Result) (max_over_time({job=\"flows\", Type=\"Cortex\", RequestStatus=\"Error\", Api=\"Cortex.ServiceFabric.Service.ApiGateway.Controllers.ExecutionsController.ExecuteFlowSync\", Tenant=~\"$Tenant\", System=~\"$System\", Node=~\"$Node\", PackageName=~\"$PackageName\", FlowName=~\"$FlowName\", StatusCode=~\"$StatusCode\", Result=~\"$Result\", InitiatorIpAddress=~\"$Initiator\"} | json | unwrap Event_Duration_InMs [$__range]))",
17481772
"hide": false,
17491773
"legendFormat": "Max Duration (ms)",
17501774
"queryType": "instant",
@@ -1950,7 +1974,8 @@
19501974
"mode": "absolute",
19511975
"steps": [
19521976
{
1953-
"color": "green"
1977+
"color": "green",
1978+
"value": null
19541979
},
19551980
{
19561981
"color": "red",
@@ -2108,6 +2133,7 @@
21082133
"excludeByName": {
21092134
"Average duration in ms": true,
21102135
"Errored Requests": true,
2136+
"Errored Requests Mean Duration (ms)": true,
21112137
"Flow executions": true,
21122138
"Max Duration (ms)": true,
21132139
"Max duration in ms": true,
@@ -2156,7 +2182,8 @@
21562182
"mode": "absolute",
21572183
"steps": [
21582184
{
2159-
"color": "green"
2185+
"color": "green",
2186+
"value": null
21602187
},
21612188
{
21622189
"color": "red",
@@ -2429,7 +2456,8 @@
24292456
"mode": "absolute",
24302457
"steps": [
24312458
{
2432-
"color": "green"
2459+
"color": "green",
2460+
"value": null
24332461
},
24342462
{
24352463
"color": "red",
@@ -2952,13 +2980,13 @@
29522980
]
29532981
},
29542982
"time": {
2955-
"from": "now-6h",
2983+
"from": "now-3h",
29562984
"to": "now"
29572985
},
29582986
"timepicker": {},
29592987
"timezone": "",
29602988
"title": "Flow Execution Requests",
29612989
"uid": "o2QeoKQ7z",
2962-
"version": 40,
2990+
"version": 57,
29632991
"weekStart": ""
29642992
}

Grafana/Loki/Config/loki-local-config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ server:
44
http_listen_port: 3100
55
http_server_read_timeout: 10m # To avoid Grafana timeouts when frontend parallelism is lowered.
66
http_server_write_timeout: 10m # To avoid Grafana timeouts when frontend parallelism is lowered.
7-
7+
grpc_server_max_recv_msg_size: 16777216 # To avoid grpc ResourceExhausted error
8+
grpc_server_max_send_msg_size: 16777216 # To avoid grpc ResourceExhausted error
9+
810
common:
911
path_prefix: /tmp/loki
1012
storage:

Grafana/Promtail/Config/promtail-local-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ scrape_configs:
3131
PackageName: Event.Details.Request.Inputs.packageName
3232
FlowName: Event.Details.Request.Inputs.flowName
3333
InitiatorIpAddress: Event.Details.Request.Initiator.IpAddress
34+
RequestStatus: '"@l"'
3435
- labels:
3536
Type:
3637
Api:
@@ -42,6 +43,7 @@ scrape_configs:
4243
PackageName:
4344
FlowName:
4445
InitiatorIpAddress:
46+
RequestStatus:
4547
- match:
4648
selector: '{job="flows"}|~ "\"Outputs\":.?null"'
4749
stages:

0 commit comments

Comments
 (0)