Add Prometheus endpoint
Add a server for serving Prometheus metrics. Include a configuration block in the config file. Provide HTTP metrics on the API, along with process-level metrics and DB pool metrics.
This commit is contained in:
parent
f0e487f18a
commit
6888e35787
7 changed files with 226 additions and 4 deletions
50
Cargo.lock
generated
50
Cargo.lock
generated
|
|
@ -317,6 +317,18 @@ dependencies = [
|
|||
"syn 1.0.103",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "actix-web-prom"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9df3127d20a5d01c9fc9aceb969a38d31a6767e1b48a54d55a8f56c769a84923"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"prometheus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.19.0"
|
||||
|
|
@ -2795,6 +2807,7 @@ dependencies = [
|
|||
"activitypub_federation",
|
||||
"actix-cors",
|
||||
"actix-web",
|
||||
"actix-web-prom",
|
||||
"chrono",
|
||||
"clokwerk",
|
||||
"console-subscriber",
|
||||
|
|
@ -2812,6 +2825,7 @@ dependencies = [
|
|||
"opentelemetry 0.17.0",
|
||||
"opentelemetry-otlp 0.10.0",
|
||||
"pict-rs",
|
||||
"prometheus",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
"reqwest-tracing",
|
||||
|
|
@ -4082,6 +4096,36 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"byteorder",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"memchr",
|
||||
"parking_lot 0.12.1",
|
||||
"procfs",
|
||||
"protobuf",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.9.0"
|
||||
|
|
@ -4168,6 +4212,12 @@ dependencies = [
|
|||
"prost 0.11.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ lto = "thin"
|
|||
[features]
|
||||
embed-pictrs = ["pict-rs"]
|
||||
console = ["console-subscriber", "opentelemetry", "opentelemetry-otlp", "tracing-opentelemetry", "reqwest-tracing/opentelemetry_0_16"]
|
||||
prom = ["prometheus", "actix-web-prom"]
|
||||
default = []
|
||||
|
||||
[workspace]
|
||||
|
|
@ -142,4 +143,6 @@ rustls = { workspace = true }
|
|||
futures-util = { workspace = true }
|
||||
tokio-postgres = { workspace = true }
|
||||
tokio-postgres-rustls = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
prometheus = { version = "0.13.3", features = ["process"], optional = true }
|
||||
actix-web-prom = { version = "0.6.0", optional = true }
|
||||
|
|
@ -80,4 +80,8 @@
|
|||
worker_count: 0
|
||||
# The number of activitypub federation retry workers that can be in-flight concurrently
|
||||
retry_count: 0
|
||||
prometheus: {
|
||||
bind: "string"
|
||||
port: 123
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,10 @@ pub struct Settings {
|
|||
/// The number of activitypub federation retry workers that can be in-flight concurrently
|
||||
#[default(0)]
|
||||
pub retry_count: usize,
|
||||
// Prometheus configuration.
|
||||
#[default(None)]
|
||||
#[doku(example = "Some(Default::default())")]
|
||||
pub prometheus: Option<PrometheusConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)]
|
||||
|
|
@ -157,3 +161,15 @@ pub struct SetupConfig {
|
|||
#[default(None)]
|
||||
pub admin_email: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct PrometheusConfig {
|
||||
// Address that the Prometheus metrics will be served on.
|
||||
#[default(Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))))]
|
||||
#[doku(as = "String")]
|
||||
pub bind: Option<IpAddr>,
|
||||
// Port that the Prometheus metrics will be served on.
|
||||
#[default(Some(10002))]
|
||||
pub port: Option<i32>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,11 @@ services:
|
|||
environment:
|
||||
- RUST_LOG="warn,lemmy_server=debug,lemmy_api=debug,lemmy_api_common=debug,lemmy_api_crud=debug,lemmy_apub=debug,lemmy_db_schema=debug,lemmy_db_views=debug,lemmy_db_views_actor=debug,lemmy_db_views_moderator=debug,lemmy_routes=debug,lemmy_utils=debug,lemmy_websocket=debug"
|
||||
- RUST_BACKTRACE=full
|
||||
ports:
|
||||
# prometheus metrics available at the path /metrics on port 10002 by default
|
||||
# requires lemmy server to be compiled with `prom` feature enabled
|
||||
# cargo build --features=prom
|
||||
- "10002:10002"
|
||||
volumes:
|
||||
- ./lemmy.hjson:/config/config.hjson:Z
|
||||
depends_on:
|
||||
|
|
|
|||
30
src/lib.rs
30
src/lib.rs
|
|
@ -1,5 +1,7 @@
|
|||
pub mod api_routes_http;
|
||||
pub mod code_migrations;
|
||||
#[cfg(feature = "prom")]
|
||||
pub mod prom;
|
||||
pub mod root_span_builder;
|
||||
pub mod scheduled_tasks;
|
||||
#[cfg(feature = "console")]
|
||||
|
|
@ -35,6 +37,12 @@ use tracing_error::ErrorLayer;
|
|||
use tracing_log::LogTracer;
|
||||
use tracing_subscriber::{filter::Targets, layer::SubscriberExt, Layer, Registry};
|
||||
use url::Url;
|
||||
#[cfg(feature = "prom")]
|
||||
use {
|
||||
actix_web_prom::PrometheusMetricsBuilder,
|
||||
prom::serve_prometheus,
|
||||
prometheus::default_registry,
|
||||
};
|
||||
|
||||
/// Max timeout for http requests
|
||||
pub(crate) const REQWEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
|
@ -119,6 +127,9 @@ pub async fn start_lemmy_server() -> Result<(), LemmyError> {
|
|||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "prom")]
|
||||
serve_prometheus(settings.prometheus.as_ref(), context.clone());
|
||||
|
||||
let settings_bind = settings.clone();
|
||||
|
||||
let federation_config = FederationConfig::builder()
|
||||
|
|
@ -134,6 +145,14 @@ pub async fn start_lemmy_server() -> Result<(), LemmyError> {
|
|||
.build()
|
||||
.await?;
|
||||
|
||||
// this must come before the HttpServer creation
|
||||
// creates a middleware that populates http metrics for each path, method, and status code
|
||||
#[cfg(feature = "prom")]
|
||||
let prom_api_metrics = PrometheusMetricsBuilder::new("lemmy_api")
|
||||
.registry(default_registry().clone())
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
// Create Http server with websocket support
|
||||
HttpServer::new(move || {
|
||||
let cors_config = if cfg!(debug_assertions) {
|
||||
|
|
@ -145,7 +164,7 @@ pub async fn start_lemmy_server() -> Result<(), LemmyError> {
|
|||
.allowed_origin(&settings.get_protocol_and_hostname())
|
||||
};
|
||||
|
||||
App::new()
|
||||
let app = App::new()
|
||||
.wrap(middleware::Logger::new(
|
||||
// This is the default log format save for the usage of %{r}a over %a to guarantee to record the client's (forwarded) IP and not the last peer address, since the latter is frequently just a reverse proxy
|
||||
"%{r}a '%r' %s %b '%{Referer}i' '%{User-Agent}i' %T",
|
||||
|
|
@ -155,8 +174,13 @@ pub async fn start_lemmy_server() -> Result<(), LemmyError> {
|
|||
.wrap(TracingLogger::<QuieterRootSpanBuilder>::new())
|
||||
.app_data(Data::new(context.clone()))
|
||||
.app_data(Data::new(rate_limit_cell.clone()))
|
||||
.wrap(FederationMiddleware::new(federation_config.clone()))
|
||||
// The routes
|
||||
.wrap(FederationMiddleware::new(federation_config.clone()));
|
||||
|
||||
#[cfg(feature = "prom")]
|
||||
let app = app.wrap(prom_api_metrics.clone());
|
||||
|
||||
// The routes
|
||||
app
|
||||
.configure(|cfg| api_routes_http::config(cfg, rate_limit_cell))
|
||||
.configure(|cfg| {
|
||||
if federation_enabled {
|
||||
|
|
|
|||
120
src/prom.rs
Normal file
120
src/prom.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
use actix_web::{rt::System, web, App, HttpResponse, HttpServer, Responder};
|
||||
use lemmy_api_common::context::LemmyContext;
|
||||
use lemmy_utils::settings::structs::PrometheusConfig;
|
||||
use prometheus::{default_registry, Encoder, Gauge, Opts, TextEncoder};
|
||||
use std::{
|
||||
net::{IpAddr, Ipv4Addr},
|
||||
sync::Arc,
|
||||
thread,
|
||||
};
|
||||
|
||||
struct PromContext {
|
||||
lemmy: LemmyContext,
|
||||
db_pool_metrics: DbPoolMetrics,
|
||||
}
|
||||
|
||||
struct DbPoolMetrics {
|
||||
max_size: Gauge,
|
||||
size: Gauge,
|
||||
available: Gauge,
|
||||
}
|
||||
|
||||
static DEFAULT_BIND: IpAddr = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
|
||||
static DEFAULT_PORT: i32 = 10002;
|
||||
|
||||
pub fn serve_prometheus(config: Option<&PrometheusConfig>, lemmy_context: LemmyContext) {
|
||||
let context = Arc::new(PromContext {
|
||||
lemmy: lemmy_context,
|
||||
db_pool_metrics: create_db_pool_metrics(),
|
||||
});
|
||||
|
||||
let (bind, port) = match config {
|
||||
Some(config) => (
|
||||
config.bind.unwrap_or(DEFAULT_BIND),
|
||||
config.port.unwrap_or(DEFAULT_PORT),
|
||||
),
|
||||
None => (DEFAULT_BIND, DEFAULT_PORT),
|
||||
};
|
||||
|
||||
// spawn thread that blocks on handling requests
|
||||
// only mapping /metrics to a handler
|
||||
thread::spawn(move || {
|
||||
let sys = System::new();
|
||||
sys.block_on(async {
|
||||
let server = HttpServer::new(move || {
|
||||
App::new()
|
||||
.app_data(web::Data::new(Arc::clone(&context)))
|
||||
.route("/metrics", web::get().to(metrics))
|
||||
})
|
||||
.bind((bind, port as u16))
|
||||
.expect(&format!("Cannot bind to {}:{}", bind, port))
|
||||
.run();
|
||||
|
||||
if let Err(err) = server.await {
|
||||
eprintln!("Prometheus server error: {}", err);
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
// handler for the /metrics path
|
||||
async fn metrics(context: web::Data<Arc<PromContext>>) -> impl Responder {
|
||||
// collect metrics
|
||||
collect_db_pool_metrics(&context).await;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let encoder = TextEncoder::new();
|
||||
|
||||
// gather metrics from registry and encode in prometheus format
|
||||
let metric_families = prometheus::gather();
|
||||
encoder.encode(&metric_families, &mut buffer).unwrap();
|
||||
let output = String::from_utf8(buffer).unwrap();
|
||||
|
||||
HttpResponse::Ok().body(output)
|
||||
}
|
||||
|
||||
// create lemmy_db_pool_* metrics and register them with the default registry
|
||||
fn create_db_pool_metrics() -> DbPoolMetrics {
|
||||
let metrics = DbPoolMetrics {
|
||||
max_size: Gauge::with_opts(Opts::new(
|
||||
"lemmy_db_pool_max_connections",
|
||||
"Maximum number of connections in the pool",
|
||||
))
|
||||
.unwrap(),
|
||||
size: Gauge::with_opts(Opts::new(
|
||||
"lemmy_db_pool_connections",
|
||||
"Current number of connections in the pool",
|
||||
))
|
||||
.unwrap(),
|
||||
available: Gauge::with_opts(Opts::new(
|
||||
"lemmy_db_pool_available_connections",
|
||||
"Number of available connections in the pool",
|
||||
))
|
||||
.unwrap(),
|
||||
};
|
||||
|
||||
default_registry()
|
||||
.register(Box::new(metrics.max_size.clone()))
|
||||
.unwrap();
|
||||
default_registry()
|
||||
.register(Box::new(metrics.size.clone()))
|
||||
.unwrap();
|
||||
default_registry()
|
||||
.register(Box::new(metrics.available.clone()))
|
||||
.unwrap();
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
async fn collect_db_pool_metrics(context: &PromContext) {
|
||||
let pool_status = context.lemmy.pool().status();
|
||||
context
|
||||
.db_pool_metrics
|
||||
.max_size
|
||||
.set(pool_status.max_size as f64);
|
||||
context.db_pool_metrics.size.set(pool_status.size as f64);
|
||||
context
|
||||
.db_pool_metrics
|
||||
.available
|
||||
.set(pool_status.available as f64);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue