Skip to content

Commit 4d640c1

Browse files
smp server: log prometheus metrics (#1411)
* smp server: log prometheus metrics * save metrics * diff * lines * version * do not include Prometheus into client * corrections Co-authored-by: sh <[email protected]> * corrections Co-authored-by: sh <[email protected]> * corrections Co-authored-by: sh <[email protected]> * add timestamp to metrics * remove type * remove version --------- Co-authored-by: sh <[email protected]>
1 parent 0a82730 commit 4d640c1

File tree

9 files changed

+484
-10
lines changed

9 files changed

+484
-10
lines changed

simplexmq.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ library
204204
Simplex.Messaging.Server.MsgStore.STM
205205
Simplex.Messaging.Server.MsgStore.Types
206206
Simplex.Messaging.Server.NtfStore
207+
Simplex.Messaging.Server.Prometheus
207208
Simplex.Messaging.Server.QueueStore
208209
Simplex.Messaging.Server.QueueStore.STM
209210
Simplex.Messaging.Server.Stats

src/Simplex/Messaging/Server.hs

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing)
7171
import Data.Semigroup (Sum (..))
7272
import qualified Data.Text as T
7373
import Data.Text.Encoding (decodeLatin1)
74+
import qualified Data.Text.IO as T
7475
import Data.Time.Clock (UTCTime (..), diffTimeToPicoseconds, getCurrentTime)
7576
import Data.Time.Clock.System (SystemTime (..), getSystemTime)
7677
import Data.Time.Format.ISO8601 (iso8601Show)
@@ -98,6 +99,7 @@ import Simplex.Messaging.Server.MsgStore.Journal (JournalQueue, closeMsgQueue)
9899
import Simplex.Messaging.Server.MsgStore.STM
99100
import Simplex.Messaging.Server.MsgStore.Types
100101
import Simplex.Messaging.Server.NtfStore
102+
import Simplex.Messaging.Server.Prometheus
101103
import Simplex.Messaging.Server.QueueStore
102104
import Simplex.Messaging.Server.QueueStore.QueueInfo
103105
import Simplex.Messaging.Server.QueueStore.STM
@@ -176,7 +178,11 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg} attachHT
176178
: receiveFromProxyAgent pa
177179
: expireNtfsThread cfg
178180
: sigIntHandlerThread
179-
: map runServer transports <> expireMessagesThread_ cfg <> serverStatsThread_ cfg <> controlPortThread_ cfg
181+
: map runServer transports
182+
<> expireMessagesThread_ cfg
183+
<> serverStatsThread_ cfg
184+
<> prometheusMetricsThread_ cfg
185+
<> controlPortThread_ cfg
180186
)
181187
`finally` stopServer s
182188
where
@@ -555,6 +561,50 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg} attachHT
555561
showProxyStats ProxyStatsData {_pRequests, _pSuccesses, _pErrorsConnect, _pErrorsCompat, _pErrorsOther} =
556562
[show _pRequests, show _pSuccesses, show _pErrorsConnect, show _pErrorsCompat, show _pErrorsOther]
557563

564+
prometheusMetricsThread_ :: ServerConfig -> [M ()]
565+
prometheusMetricsThread_ ServerConfig {prometheusInterval = Just interval, prometheusMetricsFile} =
566+
[savePrometheusMetrics interval prometheusMetricsFile]
567+
prometheusMetricsThread_ _ = []
568+
569+
savePrometheusMetrics :: Int -> FilePath -> M ()
570+
savePrometheusMetrics saveInterval metricsFile = do
571+
labelMyThread "savePrometheusMetrics"
572+
liftIO $ putStrLn $ "Prometheus metrics saved every " <> show saveInterval <> " seconds to " <> metricsFile
573+
AMS _ st <- asks msgStore
574+
ss <- asks serverStats
575+
env <- ask
576+
let interval = 1000000 * saveInterval
577+
liftIO $ forever $ do
578+
threadDelay interval
579+
ts <- getCurrentTime
580+
sm <- getServerMetrics st ss
581+
rtm <- getRealTimeMetrics env
582+
T.writeFile metricsFile $ prometheusMetrics sm rtm ts
583+
584+
getServerMetrics :: STMQueueStore s => s -> ServerStats -> IO ServerMetrics
585+
getServerMetrics st ss = do
586+
d <- getServerStatsData ss
587+
let ps = periodStatDataCounts $ _activeQueues d
588+
psNtf = periodStatDataCounts $ _activeQueuesNtf d
589+
queueCount <- M.size <$> readTVarIO (activeMsgQueues st)
590+
notifierCount <- M.size <$> readTVarIO (notifiers' st)
591+
pure ServerMetrics {statsData = d, activeQueueCounts = ps, activeNtfCounts = psNtf, queueCount, notifierCount}
592+
593+
getRealTimeMetrics :: Env -> IO RealTimeMetrics
594+
getRealTimeMetrics Env {clients, sockets, server = Server {subscribers, notifiers, subClients, ntfSubClients}} = do
595+
socketStats <- mapM (traverse getSocketStats) =<< readTVarIO sockets
596+
#if MIN_VERSION_base(4,18,0)
597+
threadsCount <- length <$> listThreads
598+
#else
599+
let threadsCount = 0
600+
#endif
601+
clientsCount <- IM.size <$> readTVarIO clients
602+
smpSubsCount <- M.size <$> readTVarIO subscribers
603+
smpSubClientsCount <- IM.size <$> readTVarIO subClients
604+
ntfSubsCount <- M.size <$> readTVarIO notifiers
605+
ntfSubClientsCount <- IM.size <$> readTVarIO ntfSubClients
606+
pure RealTimeMetrics {socketStats, threadsCount, clientsCount, smpSubsCount, smpSubClientsCount, ntfSubsCount, ntfSubClientsCount}
607+
558608
runClient :: Transport c => C.APrivateSignKey -> TProxy c -> c -> M ()
559609
runClient signKey tp h = do
560610
kh <- asks serverIdentity
@@ -695,13 +745,13 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg} attachHT
695745
#endif
696746
CPSockets -> withUserRole $ unliftIO u (asks sockets) >>= readTVarIO >>= mapM_ putSockets
697747
where
698-
putSockets (tcpPort, (accepted', closed', active')) = do
699-
(accepted, closed, active) <- (,,) <$> readTVarIO accepted' <*> readTVarIO closed' <*> readTVarIO active'
748+
putSockets (tcpPort, socketsState) = do
749+
ss <- getSocketStats socketsState
700750
hPutStrLn h $ "Sockets for port " <> tcpPort <> ":"
701-
hPutStrLn h $ "accepted: " <> show accepted
702-
hPutStrLn h $ "closed: " <> show closed
703-
hPutStrLn h $ "active: " <> show (IM.size active)
704-
hPutStrLn h $ "leaked: " <> show (accepted - closed - IM.size active)
751+
hPutStrLn h $ "accepted: " <> show (socketsAccepted ss)
752+
hPutStrLn h $ "closed: " <> show (socketsClosed ss)
753+
hPutStrLn h $ "active: " <> show (socketsActive ss)
754+
hPutStrLn h $ "leaked: " <> show (socketsLeaked ss)
705755
CPSocketThreads -> withAdminRole $ do
706756
#if MIN_VERSION_base(4,18,0)
707757
unliftIO u (asks sockets) >>= readTVarIO >>= mapM_ putSocketThreads

src/Simplex/Messaging/Server/Env/STM.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ data ServerConfig = ServerConfig
9696
serverStatsLogFile :: FilePath,
9797
-- | file to save and restore stats
9898
serverStatsBackupFile :: Maybe FilePath,
99+
-- | interval and file to save prometheus metrics
100+
prometheusInterval :: Maybe Int,
101+
prometheusMetricsFile :: FilePath,
99102
-- | notification delivery interval
100103
ntfDeliveryInterval :: Int,
101104
-- | interval between sending pending END events to unsubscribed clients, seconds

src/Simplex/Messaging/Server/Main.hs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ smpServerCLI_ generateSite serveStaticFiles attachStaticFiles cfgPath logPath =
253253
<> ("expire_ntfs_hours: " <> tshow defNtfExpirationHours <> "\n\n")
254254
<> "# Log daily server statistics to CSV file\n"
255255
<> ("log_stats: " <> onOff logStats <> "\n\n")
256-
<> "[AUTH]\n\
256+
<> "# Log interval for real-time Prometheus metrics\n\
257+
\# prometheus_interval: 300\n\n\
258+
\[AUTH]\n\
257259
\# Set new_queues option to off to completely prohibit creating new messaging queues.\n\
258260
\# This can be useful when you want to decommission the server, but not all connections are switched yet.\n\
259261
\new_queues: on\n\n\
@@ -431,6 +433,8 @@ smpServerCLI_ generateSite serveStaticFiles attachStaticFiles cfgPath logPath =
431433
logStatsStartTime = 0, -- seconds from 00:00 UTC
432434
serverStatsLogFile = combine logPath "smp-server-stats.daily.log",
433435
serverStatsBackupFile = logStats $> combine logPath "smp-server-stats.log",
436+
prometheusInterval = eitherToMaybe $ read . T.unpack <$> lookupValue "STORE_LOG" "prometheus_interval" ini,
437+
prometheusMetricsFile = combine logPath "smp-server-metrics.txt",
434438
pendingENDInterval = 15000000, -- 15 seconds
435439
ntfDeliveryInterval = 3000000, -- 3 seconds
436440
smpServerVRange = supportedServerSMPRelayVRange,

0 commit comments

Comments
 (0)