Browse Source

feat: add metrics (#3624)

MarkJoyMa 1 year ago
parent
commit
c05e03bb5a

+ 21 - 11
core/metric/histogram.go

@@ -8,18 +8,21 @@ import (
 type (
 	// A HistogramVecOpts is a histogram vector options.
 	HistogramVecOpts struct {
-		Namespace string
-		Subsystem string
-		Name      string
-		Help      string
-		Labels    []string
-		Buckets   []float64
+		Namespace   string
+		Subsystem   string
+		Name        string
+		Help        string
+		Labels      []string
+		Buckets     []float64
+		ConstLabels map[string]string
 	}
 
 	// A HistogramVec interface represents a histogram vector.
 	HistogramVec interface {
 		// Observe adds observation v to labels.
 		Observe(v int64, labels ...string)
+		// ObserveFloat allow to observe float64 values.
+		ObserveFloat(v float64, labels ...string)
 		close() bool
 	}
 
@@ -35,11 +38,12 @@ func NewHistogramVec(cfg *HistogramVecOpts) HistogramVec {
 	}
 
 	vec := prom.NewHistogramVec(prom.HistogramOpts{
-		Namespace: cfg.Namespace,
-		Subsystem: cfg.Subsystem,
-		Name:      cfg.Name,
-		Help:      cfg.Help,
-		Buckets:   cfg.Buckets,
+		Namespace:   cfg.Namespace,
+		Subsystem:   cfg.Subsystem,
+		Name:        cfg.Name,
+		Help:        cfg.Help,
+		Buckets:     cfg.Buckets,
+		ConstLabels: cfg.ConstLabels,
 	}, cfg.Labels)
 	prom.MustRegister(vec)
 	hv := &promHistogramVec{
@@ -58,6 +62,12 @@ func (hv *promHistogramVec) Observe(v int64, labels ...string) {
 	})
 }
 
+func (hv *promHistogramVec) ObserveFloat(v float64, labels ...string) {
+	update(func() {
+		hv.histogram.WithLabelValues(labels...).Observe(v)
+	})
+}
+
 func (hv *promHistogramVec) close() bool {
 	return prom.Unregister(hv.histogram)
 }

+ 8 - 7
core/metric/histogram_test.go

@@ -14,7 +14,7 @@ func TestNewHistogramVec(t *testing.T) {
 		Help:    "rpc server requests duration(ms).",
 		Buckets: []float64{1, 2, 3},
 	})
-	defer histogramVec.close()
+	defer histogramVec.(*promHistogramVec).close()
 	histogramVecNil := NewHistogramVec(nil)
 	assert.NotNil(t, histogramVec)
 	assert.Nil(t, histogramVecNil)
@@ -28,9 +28,10 @@ func TestHistogramObserve(t *testing.T) {
 		Buckets: []float64{1, 2, 3},
 		Labels:  []string{"method"},
 	})
-	defer histogramVec.close()
+	defer histogramVec.(*promHistogramVec).close()
 	hv, _ := histogramVec.(*promHistogramVec)
 	hv.Observe(2, "/Users")
+	hv.ObserveFloat(1.1, "/Users")
 
 	metadata := `
 		# HELP counts rpc server requests duration(ms).
@@ -38,11 +39,11 @@ func TestHistogramObserve(t *testing.T) {
 `
 	val := `
 		counts_bucket{method="/Users",le="1"} 0
-		counts_bucket{method="/Users",le="2"} 1
-		counts_bucket{method="/Users",le="3"} 1
-		counts_bucket{method="/Users",le="+Inf"} 1
-		counts_sum{method="/Users"} 2
-        counts_count{method="/Users"} 1
+		counts_bucket{method="/Users",le="2"} 2
+		counts_bucket{method="/Users",le="3"} 2
+		counts_bucket{method="/Users",le="+Inf"} 2
+		counts_sum{method="/Users"} 3.1
+        counts_count{method="/Users"} 2
 `
 
 	err := testutil.CollectAndCompare(hv.histogram, strings.NewReader(metadata+val))

+ 2 - 1
core/stores/redis/hook.go

@@ -54,9 +54,10 @@ func (h hook) AfterProcess(ctx context.Context, cmd red.Cmder) error {
 	duration := timex.Since(start)
 	if duration > slowThreshold.Load() {
 		logDuration(ctx, []red.Cmder{cmd}, duration)
+		metricSlowCount.Inc(cmd.Name())
 	}
 
-	metricReqDur.Observe(duration.Milliseconds(), cmd.Name())
+	metricReqDur.ObserveFloat(float64(duration)/float64(time.Millisecond), cmd.Name())
 	if msg := formatError(err); len(msg) > 0 {
 		metricReqErr.Inc(cmd.Name(), msg)
 	}

+ 168 - 2
core/stores/redis/metrics.go

@@ -1,6 +1,12 @@
 package redis
 
-import "github.com/zeromicro/go-zero/core/metric"
+import (
+	"sync"
+
+	red "github.com/go-redis/redis/v8"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/zeromicro/go-zero/core/metric"
+)
 
 const namespace = "redis_client"
 
@@ -11,7 +17,7 @@ var (
 		Name:      "duration_ms",
 		Help:      "redis client requests duration(ms).",
 		Labels:    []string{"command"},
-		Buckets:   []float64{5, 10, 25, 50, 100, 250, 500, 1000, 2500},
+		Buckets:   []float64{0.25, 0.5, 1, 1.5, 2, 3, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000},
 	})
 	metricReqErr = metric.NewCounterVec(&metric.CounterVecOpts{
 		Namespace: namespace,
@@ -20,4 +26,164 @@ var (
 		Help:      "redis client requests error count.",
 		Labels:    []string{"command", "error"},
 	})
+	metricSlowCount = metric.NewCounterVec(&metric.CounterVecOpts{
+		Namespace: namespace,
+		Subsystem: "requests",
+		Name:      "slow_total",
+		Help:      "redis client requests slow count.",
+		Labels:    []string{"command"},
+	})
+
+	connLabels = []string{"key", "client_type"}
+
+	connCollector = newCollector()
+
+	_ prometheus.Collector = (*collector)(nil)
+)
+
+type (
+	statGetter struct {
+		clientType string
+		key        string
+		poolSize   int
+		poolStats  func() *red.PoolStats
+	}
+
+	// collector collects statistics from a redis client.
+	// It implements the prometheus.Collector interface.
+	collector struct {
+		hitDesc     *prometheus.Desc
+		missDesc    *prometheus.Desc
+		timeoutDesc *prometheus.Desc
+		totalDesc   *prometheus.Desc
+		idleDesc    *prometheus.Desc
+		staleDesc   *prometheus.Desc
+		maxDesc     *prometheus.Desc
+
+		clients []*statGetter
+		lock    sync.Mutex
+	}
 )
+
+func newCollector() *collector {
+	c := &collector{
+		hitDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_hit_total"),
+			"Number of times a connection was found in the pool",
+			connLabels, nil,
+		),
+		missDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_miss_total"),
+			"Number of times a connection was not found in the pool",
+			connLabels, nil,
+		),
+		timeoutDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_timeout_total"),
+			"Number of times a timeout occurred when looking for a connection in the pool",
+			connLabels, nil,
+		),
+		totalDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_conn_total_current"),
+			"Current number of connections in the pool",
+			connLabels, nil,
+		),
+		idleDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_conn_idle_current"),
+			"Current number of idle connections in the pool",
+			connLabels, nil,
+		),
+		staleDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_conn_stale_total"),
+			"Number of times a connection was removed from the pool because it was stale",
+			connLabels, nil,
+		),
+		maxDesc: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "pool_conn_max"),
+			"Max number of connections in the pool",
+			connLabels, nil,
+		),
+	}
+
+	prometheus.MustRegister(c)
+
+	return c
+}
+
+// Describe implements the prometheus.Collector interface.
+func (s *collector) Describe(descs chan<- *prometheus.Desc) {
+	descs <- s.hitDesc
+	descs <- s.missDesc
+	descs <- s.timeoutDesc
+	descs <- s.totalDesc
+	descs <- s.idleDesc
+	descs <- s.staleDesc
+	descs <- s.maxDesc
+}
+
+// Collect implements the prometheus.Collector interface.
+func (s *collector) Collect(metrics chan<- prometheus.Metric) {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	for _, client := range s.clients {
+		key, clientType := client.key, client.clientType
+		stats := client.poolStats()
+
+		metrics <- prometheus.MustNewConstMetric(
+			s.hitDesc,
+			prometheus.CounterValue,
+			float64(stats.Hits),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.missDesc,
+			prometheus.CounterValue,
+			float64(stats.Misses),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.timeoutDesc,
+			prometheus.CounterValue,
+			float64(stats.Timeouts),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.totalDesc,
+			prometheus.GaugeValue,
+			float64(stats.TotalConns),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.idleDesc,
+			prometheus.GaugeValue,
+			float64(stats.IdleConns),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.staleDesc,
+			prometheus.CounterValue,
+			float64(stats.StaleConns),
+			key,
+			clientType,
+		)
+		metrics <- prometheus.MustNewConstMetric(
+			s.maxDesc,
+			prometheus.CounterValue,
+			float64(client.poolSize),
+			key,
+			clientType,
+		)
+	}
+}
+
+func (s *collector) registerClient(client *statGetter) {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	s.clients = append(s.clients, client)
+}

+ 130 - 0
core/stores/redis/metrics_test.go

@@ -0,0 +1,130 @@
+package redis
+
+import (
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	red "github.com/go-redis/redis/v8"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/zeromicro/go-zero/core/conf"
+	"github.com/zeromicro/go-zero/internal/devserver"
+)
+
+func TestRedisMetric(t *testing.T) {
+	cfg := devserver.Config{}
+	_ = conf.FillDefault(&cfg)
+	server := devserver.NewServer(cfg)
+	server.StartAsync()
+	time.Sleep(time.Second)
+
+	metricReqDur.Observe(8, "test-cmd")
+	metricReqErr.Inc("test-cmd", "internal-error")
+	metricSlowCount.Inc("test-cmd")
+
+	url := "http://127.0.0.1:6060/metrics"
+	resp, err := http.Get(url)
+	assert.Nil(t, err)
+	defer resp.Body.Close()
+	s, err := io.ReadAll(resp.Body)
+	assert.Nil(t, err)
+	content := string(s)
+	assert.Contains(t, content, "redis_client_requests_duration_ms_sum{command=\"test-cmd\"} 8\n")
+	assert.Contains(t, content, "redis_client_requests_duration_ms_count{command=\"test-cmd\"} 1\n")
+	assert.Contains(t, content, "redis_client_requests_error_total{command=\"test-cmd\",error=\"internal-error\"} 1\n")
+	assert.Contains(t, content, "redis_client_requests_slow_total{command=\"test-cmd\"} 1\n")
+}
+
+func Test_newCollector(t *testing.T) {
+	prometheus.Unregister(connCollector)
+	c := newCollector()
+	c.registerClient(&statGetter{
+		clientType: "node",
+		key:        "test1",
+		poolSize:   10,
+		poolStats: func() *red.PoolStats {
+			return &red.PoolStats{
+				Hits:       10000,
+				Misses:     10,
+				Timeouts:   5,
+				TotalConns: 100,
+				IdleConns:  20,
+				StaleConns: 1,
+			}
+		},
+	})
+	c.registerClient(&statGetter{
+		clientType: "node",
+		key:        "test2",
+		poolSize:   11,
+		poolStats: func() *red.PoolStats {
+			return &red.PoolStats{
+				Hits:       10001,
+				Misses:     11,
+				Timeouts:   6,
+				TotalConns: 101,
+				IdleConns:  21,
+				StaleConns: 2,
+			}
+		},
+	})
+	c.registerClient(&statGetter{
+		clientType: "cluster",
+		key:        "test3",
+		poolSize:   5,
+		poolStats: func() *red.PoolStats {
+			return &red.PoolStats{
+				Hits:       20000,
+				Misses:     20,
+				Timeouts:   10,
+				TotalConns: 200,
+				IdleConns:  40,
+				StaleConns: 2,
+			}
+		},
+	})
+	val := `
+		# HELP redis_client_pool_conn_idle_current Current number of idle connections in the pool
+		# TYPE redis_client_pool_conn_idle_current gauge
+		redis_client_pool_conn_idle_current{client_type="cluster",key="test3"} 40
+		redis_client_pool_conn_idle_current{client_type="node",key="test1"} 20
+		redis_client_pool_conn_idle_current{client_type="node",key="test2"} 21
+	 	# HELP redis_client_pool_conn_max Max number of connections in the pool
+	 	# TYPE redis_client_pool_conn_max counter
+		redis_client_pool_conn_max{client_type="cluster",key="test3"} 5
+	 	redis_client_pool_conn_max{client_type="node",key="test1"} 10
+		redis_client_pool_conn_max{client_type="node",key="test2"} 11
+	 	# HELP redis_client_pool_conn_stale_total Number of times a connection was removed from the pool because it was stale
+	 	# TYPE redis_client_pool_conn_stale_total counter
+		redis_client_pool_conn_stale_total{client_type="cluster",key="test3"} 2
+	 	redis_client_pool_conn_stale_total{client_type="node",key="test1"} 1
+		redis_client_pool_conn_stale_total{client_type="node",key="test2"} 2
+	 	# HELP redis_client_pool_conn_total_current Current number of connections in the pool
+	 	# TYPE redis_client_pool_conn_total_current gauge
+		redis_client_pool_conn_total_current{client_type="cluster",key="test3"} 200
+	 	redis_client_pool_conn_total_current{client_type="node",key="test1"} 100
+		redis_client_pool_conn_total_current{client_type="node",key="test2"} 101
+	 	# HELP redis_client_pool_hit_total Number of times a connection was found in the pool
+	 	# TYPE redis_client_pool_hit_total counter
+		redis_client_pool_hit_total{client_type="cluster",key="test3"} 20000
+	 	redis_client_pool_hit_total{client_type="node",key="test1"} 10000
+		redis_client_pool_hit_total{client_type="node",key="test2"} 10001
+	 	# HELP redis_client_pool_miss_total Number of times a connection was not found in the pool
+	 	# TYPE redis_client_pool_miss_total counter
+		redis_client_pool_miss_total{client_type="cluster",key="test3"} 20
+	 	redis_client_pool_miss_total{client_type="node",key="test1"} 10
+		redis_client_pool_miss_total{client_type="node",key="test2"} 11
+	 	# HELP redis_client_pool_timeout_total Number of times a timeout occurred when looking for a connection in the pool
+	 	# TYPE redis_client_pool_timeout_total counter
+		redis_client_pool_timeout_total{client_type="cluster",key="test3"} 10
+	 	redis_client_pool_timeout_total{client_type="node",key="test1"} 5
+		redis_client_pool_timeout_total{client_type="node",key="test2"} 6
+`
+
+	err := testutil.CollectAndCompare(c, strings.NewReader(val))
+	assert.NoError(t, err)
+}

+ 15 - 1
core/stores/redis/redisclientmanager.go

@@ -3,6 +3,7 @@ package redis
 import (
 	"crypto/tls"
 	"io"
+	"runtime"
 
 	red "github.com/go-redis/redis/v8"
 	"github.com/zeromicro/go-zero/core/syncx"
@@ -14,7 +15,11 @@ const (
 	idleConns       = 8
 )
 
-var clientManager = syncx.NewResourceManager()
+var (
+	clientManager = syncx.NewResourceManager()
+	// nodePoolSize is default pool size for node type of redis.
+	nodePoolSize = 10 * runtime.GOMAXPROCS(0)
+)
 
 func getClient(r *Redis) (*red.Client, error) {
 	val, err := clientManager.GetResource(r.Addr, func() (io.Closer, error) {
@@ -37,6 +42,15 @@ func getClient(r *Redis) (*red.Client, error) {
 			store.AddHook(hook)
 		}
 
+		connCollector.registerClient(&statGetter{
+			clientType: NodeType,
+			key:        r.Addr,
+			poolSize:   nodePoolSize,
+			poolStats: func() *red.PoolStats {
+				return store.PoolStats()
+			},
+		})
+
 		return store, nil
 	})
 	if err != nil {

+ 15 - 1
core/stores/redis/redisclustermanager.go

@@ -3,6 +3,7 @@ package redis
 import (
 	"crypto/tls"
 	"io"
+	"runtime"
 	"strings"
 
 	red "github.com/go-redis/redis/v8"
@@ -11,7 +12,11 @@ import (
 
 const addrSep = ","
 
-var clusterManager = syncx.NewResourceManager()
+var (
+	clusterManager = syncx.NewResourceManager()
+	// clusterPoolSize is default pool size for cluster type of redis.
+	clusterPoolSize = 5 * runtime.GOMAXPROCS(0)
+)
 
 func getCluster(r *Redis) (*red.ClusterClient, error) {
 	val, err := clusterManager.GetResource(r.Addr, func() (io.Closer, error) {
@@ -33,6 +38,15 @@ func getCluster(r *Redis) (*red.ClusterClient, error) {
 			store.AddHook(hook)
 		}
 
+		connCollector.registerClient(&statGetter{
+			clientType: ClusterType,
+			key:        r.Addr,
+			poolSize:   clusterPoolSize,
+			poolStats: func() *red.PoolStats {
+				return store.PoolStats()
+			},
+		})
+
 		return store, nil
 	})
 	if err != nil {

+ 143 - 3
core/stores/sqlx/metrics.go

@@ -1,8 +1,14 @@
 package sqlx
 
-import "github.com/zeromicro/go-zero/core/metric"
+import (
+	"database/sql"
+	"sync"
 
-const namespace = "sql_client"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/zeromicro/go-zero/core/metric"
+)
+
+const namespace = "mysql_client"
 
 var (
 	metricReqDur = metric.NewHistogramVec(&metric.HistogramVecOpts{
@@ -11,7 +17,7 @@ var (
 		Name:      "duration_ms",
 		Help:      "mysql client requests duration(ms).",
 		Labels:    []string{"command"},
-		Buckets:   []float64{5, 10, 25, 50, 100, 250, 500, 1000, 2500},
+		Buckets:   []float64{0.25, 0.5, 1, 1.5, 2, 3, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000},
 	})
 	metricReqErr = metric.NewCounterVec(&metric.CounterVecOpts{
 		Namespace: namespace,
@@ -20,4 +26,138 @@ var (
 		Help:      "mysql client requests error count.",
 		Labels:    []string{"command", "error"},
 	})
+	metricSlowCount = metric.NewCounterVec(&metric.CounterVecOpts{
+		Namespace: namespace,
+		Subsystem: "requests",
+		Name:      "slow_total",
+		Help:      "mysql client requests slow count.",
+		Labels:    []string{"command"},
+	})
+
+	connLabels = []string{"db_name", "hash"}
+
+	connCollector = newCollector()
+
+	_ prometheus.Collector = (*collector)(nil)
 )
+
+type (
+	statGetter struct {
+		dbName    string
+		hash      string
+		poolStats func() sql.DBStats
+	}
+
+	// collector collects statistics from a redis client.
+	// It implements the prometheus.Collector interface.
+	collector struct {
+		maxOpenConnections *prometheus.Desc
+
+		openConnections  *prometheus.Desc
+		inUseConnections *prometheus.Desc
+		idleConnections  *prometheus.Desc
+
+		waitCount         *prometheus.Desc
+		waitDuration      *prometheus.Desc
+		maxIdleClosed     *prometheus.Desc
+		maxIdleTimeClosed *prometheus.Desc
+		maxLifetimeClosed *prometheus.Desc
+
+		clients []*statGetter
+		lock    sync.Mutex
+	}
+)
+
+func newCollector() *collector {
+	c := &collector{
+		maxOpenConnections: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "max_open_connections"),
+			"Maximum number of open connections to the database.",
+			connLabels, nil,
+		),
+		openConnections: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "open_connections"),
+			"The number of established connections both in use and idle.",
+			connLabels, nil,
+		),
+		inUseConnections: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "in_use_connections"),
+			"The number of connections currently in use.",
+			connLabels, nil,
+		),
+		idleConnections: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "idle_connections"),
+			"The number of idle connections.",
+			connLabels, nil,
+		),
+		waitCount: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "wait_count_total"),
+			"The total number of connections waited for.",
+			connLabels, nil,
+		),
+		waitDuration: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "wait_duration_seconds_total"),
+			"The total time blocked waiting for a new connection.",
+			connLabels, nil,
+		),
+		maxIdleClosed: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "max_idle_closed_total"),
+			"The total number of connections closed due to SetMaxIdleConns.",
+			connLabels, nil,
+		),
+		maxIdleTimeClosed: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "max_idle_time_closed_total"),
+			"The total number of connections closed due to SetConnMaxIdleTime.",
+			connLabels, nil,
+		),
+		maxLifetimeClosed: prometheus.NewDesc(
+			prometheus.BuildFQName(namespace, "", "max_lifetime_closed_total"),
+			"The total number of connections closed due to SetConnMaxLifetime.",
+			connLabels, nil,
+		),
+	}
+
+	prometheus.MustRegister(c)
+
+	return c
+}
+
+// Describe implements the prometheus.Collector interface.
+func (c *collector) Describe(ch chan<- *prometheus.Desc) {
+	ch <- c.maxOpenConnections
+	ch <- c.openConnections
+	ch <- c.inUseConnections
+	ch <- c.idleConnections
+	ch <- c.waitCount
+	ch <- c.waitDuration
+	ch <- c.maxIdleClosed
+	ch <- c.maxLifetimeClosed
+	ch <- c.maxIdleTimeClosed
+}
+
+// Collect implements the prometheus.Collector interface.
+func (c *collector) Collect(ch chan<- prometheus.Metric) {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	for _, client := range c.clients {
+		dbName, hash := client.dbName, client.hash
+		stats := client.poolStats()
+		ch <- prometheus.MustNewConstMetric(c.maxOpenConnections, prometheus.GaugeValue, float64(stats.MaxOpenConnections), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.openConnections, prometheus.GaugeValue, float64(stats.OpenConnections), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.inUseConnections, prometheus.GaugeValue, float64(stats.InUse), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.idleConnections, prometheus.GaugeValue, float64(stats.Idle), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.waitCount, prometheus.CounterValue, float64(stats.WaitCount), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.waitDuration, prometheus.CounterValue, stats.WaitDuration.Seconds(), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.maxIdleClosed, prometheus.CounterValue, float64(stats.MaxIdleClosed), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.maxLifetimeClosed, prometheus.CounterValue, float64(stats.MaxLifetimeClosed), dbName, hash)
+		ch <- prometheus.MustNewConstMetric(c.maxIdleTimeClosed, prometheus.CounterValue, float64(stats.MaxIdleTimeClosed), dbName, hash)
+	}
+}
+
+func (c *collector) registerClient(client *statGetter) {
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	c.clients = append(c.clients, client)
+}

+ 147 - 0
core/stores/sqlx/metrics_test.go

@@ -0,0 +1,147 @@
+package sqlx
+
+import (
+	"database/sql"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/stretchr/testify/assert"
+	"github.com/zeromicro/go-zero/core/conf"
+	"github.com/zeromicro/go-zero/internal/devserver"
+)
+
+func TestSqlxMetric(t *testing.T) {
+	cfg := devserver.Config{}
+	_ = conf.FillDefault(&cfg)
+	cfg.Port = 6480
+	server := devserver.NewServer(cfg)
+	server.StartAsync()
+	time.Sleep(time.Second)
+
+	metricReqDur.Observe(8, "test-cmd")
+	metricReqErr.Inc("test-cmd", "internal-error")
+	metricSlowCount.Inc("test-cmd")
+
+	url := "http://127.0.0.1:6480/metrics"
+	resp, err := http.Get(url)
+	assert.Nil(t, err)
+	defer resp.Body.Close()
+	s, err := io.ReadAll(resp.Body)
+	assert.Nil(t, err)
+	content := string(s)
+	assert.Contains(t, content, "mysql_client_requests_duration_ms_sum{command=\"test-cmd\"} 8\n")
+	assert.Contains(t, content, "mysql_client_requests_duration_ms_count{command=\"test-cmd\"} 1\n")
+	assert.Contains(t, content, "mysql_client_requests_error_total{command=\"test-cmd\",error=\"internal-error\"} 1\n")
+	assert.Contains(t, content, "mysql_client_requests_slow_total{command=\"test-cmd\"} 1\n")
+}
+
+func TestMetricCollector(t *testing.T) {
+	prometheus.Unregister(connCollector)
+	c := newCollector()
+	c.registerClient(&statGetter{
+		dbName: "db-1",
+		hash:   "hash-1",
+		poolStats: func() sql.DBStats {
+			return sql.DBStats{
+				MaxOpenConnections: 1,
+				OpenConnections:    2,
+				InUse:              3,
+				Idle:               4,
+				WaitCount:          5,
+				WaitDuration:       6 * time.Second,
+				MaxIdleClosed:      7,
+				MaxIdleTimeClosed:  8,
+				MaxLifetimeClosed:  9,
+			}
+		},
+	})
+	c.registerClient(&statGetter{
+		dbName: "db-1",
+		hash:   "hash-2",
+		poolStats: func() sql.DBStats {
+			return sql.DBStats{
+				MaxOpenConnections: 10,
+				OpenConnections:    20,
+				InUse:              30,
+				Idle:               40,
+				WaitCount:          50,
+				WaitDuration:       60 * time.Second,
+				MaxIdleClosed:      70,
+				MaxIdleTimeClosed:  80,
+				MaxLifetimeClosed:  90,
+			}
+		},
+	})
+	c.registerClient(&statGetter{
+		dbName: "db-2",
+		hash:   "hash-2",
+		poolStats: func() sql.DBStats {
+			return sql.DBStats{
+				MaxOpenConnections: 100,
+				OpenConnections:    200,
+				InUse:              300,
+				Idle:               400,
+				WaitCount:          500,
+				WaitDuration:       600 * time.Second,
+				MaxIdleClosed:      700,
+				MaxIdleTimeClosed:  800,
+				MaxLifetimeClosed:  900,
+			}
+		},
+	})
+	val := `
+		# HELP mysql_client_idle_connections The number of idle connections.
+		# TYPE mysql_client_idle_connections gauge
+		mysql_client_idle_connections{db_name="db-1",hash="hash-1"} 4
+		mysql_client_idle_connections{db_name="db-1",hash="hash-2"} 40
+		mysql_client_idle_connections{db_name="db-2",hash="hash-2"} 400
+		# HELP mysql_client_in_use_connections The number of connections currently in use.
+		# TYPE mysql_client_in_use_connections gauge
+		mysql_client_in_use_connections{db_name="db-1",hash="hash-1"} 3
+		mysql_client_in_use_connections{db_name="db-1",hash="hash-2"} 30
+		mysql_client_in_use_connections{db_name="db-2",hash="hash-2"} 300
+		# HELP mysql_client_max_idle_closed_total The total number of connections closed due to SetMaxIdleConns.
+		# TYPE mysql_client_max_idle_closed_total counter
+		mysql_client_max_idle_closed_total{db_name="db-1",hash="hash-1"} 7
+		mysql_client_max_idle_closed_total{db_name="db-1",hash="hash-2"} 70
+		mysql_client_max_idle_closed_total{db_name="db-2",hash="hash-2"} 700
+		# HELP mysql_client_max_idle_time_closed_total The total number of connections closed due to SetConnMaxIdleTime.
+		# TYPE mysql_client_max_idle_time_closed_total counter
+		mysql_client_max_idle_time_closed_total{db_name="db-1",hash="hash-1"} 8
+		mysql_client_max_idle_time_closed_total{db_name="db-1",hash="hash-2"} 80
+		mysql_client_max_idle_time_closed_total{db_name="db-2",hash="hash-2"} 800
+		# HELP mysql_client_max_lifetime_closed_total The total number of connections closed due to SetConnMaxLifetime.
+		# TYPE mysql_client_max_lifetime_closed_total counter
+		mysql_client_max_lifetime_closed_total{db_name="db-1",hash="hash-1"} 9
+		mysql_client_max_lifetime_closed_total{db_name="db-1",hash="hash-2"} 90
+		mysql_client_max_lifetime_closed_total{db_name="db-2",hash="hash-2"} 900
+		# HELP mysql_client_max_open_connections Maximum number of open connections to the database.
+		# TYPE mysql_client_max_open_connections gauge
+		mysql_client_max_open_connections{db_name="db-1",hash="hash-1"} 1
+		mysql_client_max_open_connections{db_name="db-1",hash="hash-2"} 10
+		mysql_client_max_open_connections{db_name="db-2",hash="hash-2"} 100
+		# HELP mysql_client_open_connections The number of established connections both in use and idle.
+		# TYPE mysql_client_open_connections gauge
+		mysql_client_open_connections{db_name="db-1",hash="hash-1"} 2
+		mysql_client_open_connections{db_name="db-1",hash="hash-2"} 20
+		mysql_client_open_connections{db_name="db-2",hash="hash-2"} 200
+		# HELP mysql_client_wait_count_total The total number of connections waited for.
+		# TYPE mysql_client_wait_count_total counter
+		mysql_client_wait_count_total{db_name="db-1",hash="hash-1"} 5
+		mysql_client_wait_count_total{db_name="db-1",hash="hash-2"} 50
+		mysql_client_wait_count_total{db_name="db-2",hash="hash-2"} 500
+		# HELP mysql_client_wait_duration_seconds_total The total time blocked waiting for a new connection.
+		# TYPE mysql_client_wait_duration_seconds_total counter
+		mysql_client_wait_duration_seconds_total{db_name="db-1",hash="hash-1"} 6
+		mysql_client_wait_duration_seconds_total{db_name="db-1",hash="hash-2"} 60
+		mysql_client_wait_duration_seconds_total{db_name="db-2",hash="hash-2"} 600
+`
+
+	err := testutil.CollectAndCompare(c, strings.NewReader(val))
+	assert.NoError(t, err)
+}

+ 2 - 1
core/stores/sqlx/stmt.go

@@ -128,6 +128,7 @@ func (e *realSqlGuard) finish(ctx context.Context, err error) {
 	duration := timex.Since(e.startTime)
 	if duration > slowThreshold.Load() {
 		logx.WithContext(ctx).WithDuration(duration).Slowf("[SQL] %s: slowcall - %s", e.command, e.stmt)
+		metricSlowCount.Inc(e.command)
 	} else if logSql.True() {
 		logx.WithContext(ctx).WithDuration(duration).Infof("sql %s: %s", e.command, e.stmt)
 	}
@@ -136,7 +137,7 @@ func (e *realSqlGuard) finish(ctx context.Context, err error) {
 		logSqlError(ctx, e.stmt, err)
 	}
 
-	metricReqDur.Observe(duration.Milliseconds(), e.command)
+	metricReqDur.ObserveFloat(float64(duration)/float64(time.Millisecond), e.command)
 }
 
 func (e *realSqlGuard) start(q string, args ...any) error {

+ 71 - 0
rest/httpc/internal/metricsinterceptor.go

@@ -0,0 +1,71 @@
+package internal
+
+import (
+	"net/http"
+	"net/url"
+	"strconv"
+	"time"
+
+	"github.com/zeromicro/go-zero/core/metric"
+	"github.com/zeromicro/go-zero/core/timex"
+)
+
+const clientNamespace = "httpc_client"
+
+var (
+	MetricClientReqDur = metric.NewHistogramVec(&metric.HistogramVecOpts{
+		Namespace: clientNamespace,
+		Subsystem: "requests",
+		Name:      "duration_ms",
+		Help:      "http client requests duration(ms).",
+		Labels:    []string{"name", "method", "url"},
+		Buckets:   []float64{0.25, 0.5, 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 15000},
+	})
+
+	MetricClientReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{
+		Namespace: clientNamespace,
+		Subsystem: "requests",
+		Name:      "code_total",
+		Help:      "http client requests code count.",
+		Labels:    []string{"name", "method", "url", "code"},
+	})
+)
+
+type (
+	MetricsURLRewriter func(u url.URL) string
+)
+
+func MetricsInterceptor(name string, pr MetricsURLRewriter) Interceptor {
+	return func(r *http.Request) (*http.Request, ResponseHandler) {
+		startTime := timex.Now()
+		return r, func(resp *http.Response, err error) {
+			u := cleanURL(*r.URL)
+			method := r.Method
+			var (
+				code int
+				path string
+			)
+			// error or resp is nil, set code=500
+			if err != nil || resp == nil {
+				code = http.StatusInternalServerError
+			} else {
+				code = resp.StatusCode
+			}
+			if pr != nil {
+				path = pr(u)
+			} else {
+				path = u.String()
+			}
+
+			MetricClientReqDur.ObserveFloat(float64(timex.Since(startTime))/float64(time.Millisecond), name, method, path)
+			MetricClientReqCodeTotal.Inc(name, method, path, strconv.Itoa(code))
+		}
+	}
+}
+
+func cleanURL(r url.URL) url.URL {
+	r.RawQuery = ""
+	r.RawFragment = ""
+	r.User = nil
+	return r
+}

+ 35 - 0
rest/httpc/internal/metricsinterceptor_test.go

@@ -0,0 +1,35 @@
+package internal
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/assert"
+	"github.com/zeromicro/go-zero/core/logx"
+)
+
+func TestMetricsInterceptor(t *testing.T) {
+	c := gomock.NewController(t)
+	defer c.Finish()
+
+	logx.Disable()
+
+	svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(100 * time.Millisecond)
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer svr.Close()
+
+	req, err := http.NewRequest(http.MethodGet, svr.URL, nil)
+	assert.NotNil(t, req)
+	assert.Nil(t, err)
+	interceptor := MetricsInterceptor("test", nil)
+	req, handler := interceptor(req)
+	resp, err := http.DefaultClient.Do(req)
+	assert.NotNil(t, resp)
+	assert.Nil(t, err)
+	handler(resp, err)
+}

+ 1 - 1
zrpc/internal/clientinterceptors/prometheusinterceptor.go

@@ -19,7 +19,7 @@ var (
 		Name:      "duration_ms",
 		Help:      "rpc client requests duration(ms).",
 		Labels:    []string{"method"},
-		Buckets:   []float64{5, 10, 25, 50, 100, 250, 500, 1000},
+		Buckets:   []float64{1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000},
 	})
 
 	metricClientReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{

+ 1 - 1
zrpc/internal/serverinterceptors/prometheusinterceptor.go

@@ -19,7 +19,7 @@ var (
 		Name:      "duration_ms",
 		Help:      "rpc server requests duration(ms).",
 		Labels:    []string{"method"},
-		Buckets:   []float64{5, 10, 25, 50, 100, 250, 500, 1000},
+		Buckets:   []float64{1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2000, 5000},
 	})
 
 	metricServerReqCodeTotal = metric.NewCounterVec(&metric.CounterVecOpts{