fix: add the doc and fix util runtime bug

coderi421 · coderi421 · commit d682eff7498b · 2023-05-29T16:12:34.000+08:00
diff --git a/gmicro/core/metric/doc.go b/gmicro/core/metric/doc.go
@@ -0,0 +1,60 @@
+package metric
+
+/*
+监控
+	1. 业务监控(上层概念 - 领导层)：
+		需求方：老板、运营
+		开发方： 大数据库 ，都会访问业务库，大数据库会从同步库， 宽表
+		QPS、DAU日活、访问状态(http code)、业务接口(登录、注册、聊天、上传、留言、搜索、投诉)、 产品转换率、充值额度
+	2. 系统监控
+		需求方： 运维
+		开发方： 运维
+		操作系统相关： cpu使用率、内存使用、磁盘使用率、磁盘空间（非常常见）、TCP(上W的链接)，流量
+		组件： mysql、redis、kafka
+
+	3. 日志监控
+		需求方：运维、开发
+		开发方：开发
+		两种日志：业务日志（大数据， 普通日志）、 系统日志（操作系统日志、mysql组件日志、kakfa的日志）
+		监控中的重头戏，一般我们都会对单独针对日志设计日志管理系统， ELK日志系统， loki
+
+	4. 网络监控：
+		需求方：机房管理
+		开放方：服务器管理
+		IDC 交换机、路由器、防火墙、负载均衡、服务器、机柜、电源、UPS、空调、网络设备、机房环境监控，
+		网络：内部网络（物理内网，虚拟内网（VPN））监控
+
+	5. 程序监控：
+		需求方：开发
+		开发方：开发
+		比如产生了 500 ErrUserNotFound
+		一般要运维和开发人员配合，开发人员在程序中提供监控接口，运维人员通过接口获取监控数据
+
+	prometheus的数据格式： metrics
+	metrics是一种对采样数据的总称
+
+	guages
+	最简单的度量指标，只是一个简单的返回值，或者叫瞬时状态，我们想要知道一个队列中的个数
+	比如：当前的内存使用率、当前的CPU使用率、当前的磁盘使用率、当前的磁盘空间、当前的TCP连接数、当前的流量、当前的QPS、当前的DAU、当前的访问状态、当前的业务接口、当前的产品转换率、当前的充值额度、当前的业务日志、当前的系统日志、当前的网络设备、当前的服务器、当前的机柜、当前的电源、当前的UPS、当前的空调、当前的网络设备、当前的机房环境监控、当前的程序监控
+	随着时间的推移， 这个值是不断变化的， 这个值有可能增加，有可能减少
+
+	Counter
+	是计数器， 这个值是从0开始累积，在理想状态下，这个值不可能减少
+	在理想状态下：如果我的服务器重启，同时这个数是放在内存中的
+
+	guages和counter是最主要的类型 70%
+
+	Histograms
+	http_res_time 表示http请求的响应时间
+	nginx
+
+	如果我要统计一天的所有访问的平均耗时
+	如果我们统计下来平均耗时是50ms 但是， 现在中午有一段时间系统卡住了， 1W个请求 平均耗时是在5s，
+	但是由于我们每天的访问量很大， 1000W访问量，这个5s耗时的请求就被平均掉了
+	越早发现越好， 有可能是程序的bug，也有可能是系统的bug
+
+	50ms以内有多少请求， 50-200ms有多少请求 200ms-500ms有多少请求 500ms-1s有多少请求 1s-5s有多少请求 5s以上有多少请求
+	分布式图
+
+
+*/
diff --git a/pkg/common/util/runtime/runtime.go b/pkg/common/util/runtime/runtime.go
@@ -17,6 +17,7 @@ limitations under the License.
 package runtime
 
 import (
+	"errors"
 	"fmt"
 	"net/http"
 	"runtime"
@@ -85,15 +86,29 @@ func logPanic(r interface{}) {
 // should be packaged up into a testable and reusable object.
 var ErrorHandlers = []func(error){
 	logError,
-	(&rudimentaryErrorBackoff{
-		lastErrorTime: time.Now(),
-		// 1ms was the number folks were able to stomach as a global rate limit.
-		// If you need to log errors more than 1000 times a second you
-		// should probably consider fixing your code instead. :)
-		minPeriod: time.Millisecond,
-	}).OnError,
+	func(err error) {
+		(&rudimentaryErrorBackoff{
+			lastErrorTime: time.Now(),
+			// 1ms was the number folks were able to stomach as a global rate limit.
+			// If you need to log errors more than 1000 times a second you
+			// should probably consider fixing your code instead. :)
+			minPeriod: time.Millisecond,
+		}).OnError(errors.New("error occurred ErrorHandlers"))
+	},
 }
 
+//备用：
+//var ErrorHandlers = []func(error){
+//	logError,
+//	(&rudimentaryErrorBackoff{
+//		lastErrorTime: time.Now(),
+//		// 1ms was the number folks were able to stomach as a global rate limit.
+//		// If you need to log errors more than 1000 times a second you
+//		// should probably consider fixing your code instead. :)
+//		minPeriod: time.Millisecond,
+//	}).OnError,
+//}
+
 // HandlerError is a method to invoke when a non-user facing piece of code cannot
 // return an error and needs to indicate it has been ignored. Invoking this method
 // is preferable to logging the error - the default behavior is to log but the