mirror of
https://github.com/SigNoz/signoz.git
synced 2025-12-21 01:16:57 +00:00
### Summary
Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts:
#### Frontend
- The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object.
For instance, we have gone from the response `{
"status": "success",
"data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"......
}` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......`
- `msteams` has been changed to `msteamsv2` wherever applicable
#### Ruler
The following changes have been done in the ruler component:
- Removal of the old alertmanager and notifier
- The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional
- Introduction of a new `testPrepareNotifyFunc` for sending test notifications
- Integration with the new alertmanager
#### Alertmanager
Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler
- Addition of ruleId based matching
- Support for marshalling the global configuration directly from the upstream alertmanager
- Addition of orgId to the legacy alertmanager
- Support for always adding defaults to both routes and receivers while creating them
- Migration to create the required alertmanager tables
- Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one.
#### Related Issues / PR's
Closes https://github.com/SigNoz/platform-pod/issues/404
Closes https://github.com/SigNoz/platform-pod/issues/176
105 lines
4.4 KiB
Go
105 lines
4.4 KiB
Go
package alertmanagerserver
|
|
|
|
import (
|
|
"net/url"
|
|
"time"
|
|
|
|
"github.com/prometheus/alertmanager/config"
|
|
"github.com/prometheus/common/model"
|
|
"go.signoz.io/signoz/pkg/types/alertmanagertypes"
|
|
)
|
|
|
|
type Config struct {
|
|
// The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself.
|
|
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L155C54-L155C249
|
|
ExternalURL *url.URL `mapstructure:"external_url"`
|
|
|
|
// GlobalConfig is the global configuration for the alertmanager
|
|
Global alertmanagertypes.GlobalConfig `mapstructure:"global" yaml:"global"`
|
|
|
|
// Config of the root node of the routing tree.
|
|
Route alertmanagertypes.RouteConfig `mapstructure:"route"`
|
|
|
|
// Configuration for alerts.
|
|
Alerts AlertsConfig `mapstructure:"alerts"`
|
|
|
|
// Configuration for silences.
|
|
Silences SilencesConfig `mapstructure:"silences"`
|
|
|
|
// Configuration for the notification log.
|
|
NFLog NFLogConfig `mapstructure:"nflog"`
|
|
}
|
|
|
|
type AlertsConfig struct {
|
|
// Interval between garbage collection of alerts.
|
|
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L152
|
|
GCInterval time.Duration `mapstructure:"gc_interval"`
|
|
}
|
|
|
|
type SilencesConfig struct {
|
|
// Maximum number of silences, including expired silences. If negative or zero, no limit is set.
|
|
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L150C64-L150C157
|
|
Max int `mapstructure:"max"`
|
|
|
|
// Maximum size of the silences in bytes. If negative or zero, no limit is set.
|
|
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L150C64-L150C157
|
|
MaxSizeBytes int `mapstructure:"max_size_bytes"`
|
|
|
|
// Interval between garbage collection and snapshotting of the silences. The snapshot will be stored in the state store.
|
|
// The upstream alertmanager config (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149) has
|
|
// been split between silences and nflog.
|
|
MaintenanceInterval time.Duration `mapstructure:"maintenance_interval"`
|
|
|
|
// Retention of the silences.
|
|
Retention time.Duration `mapstructure:"retention"`
|
|
}
|
|
|
|
type NFLogConfig struct {
|
|
// Interval between garbage collection and snapshotting of the notification logs. The snapshot will be stored in the state store.
|
|
// The upstream alertmanager config (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149) has
|
|
// been split between silences and nflog.
|
|
MaintenanceInterval time.Duration `mapstructure:"maintenance_interval"`
|
|
|
|
// Retention of the notification logs.
|
|
Retention time.Duration `mapstructure:"retention"`
|
|
}
|
|
|
|
func NewConfig() Config {
|
|
return Config{
|
|
ExternalURL: &url.URL{
|
|
Scheme: "http",
|
|
Host: "localhost:8080",
|
|
},
|
|
Global: alertmanagertypes.GlobalConfig{
|
|
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/config/config.go#L727)
|
|
ResolveTimeout: model.Duration(5 * time.Minute),
|
|
SMTPHello: "localhost",
|
|
SMTPFrom: "alertmanager@signoz.io",
|
|
SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"},
|
|
SMTPRequireTLS: true,
|
|
},
|
|
Route: alertmanagertypes.RouteConfig{
|
|
GroupByStr: []string{"alertname"},
|
|
GroupInterval: 5 * time.Minute,
|
|
GroupWait: 30 * time.Second,
|
|
RepeatInterval: 4 * time.Hour,
|
|
},
|
|
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L152)
|
|
Alerts: AlertsConfig{
|
|
GCInterval: 30 * time.Minute,
|
|
},
|
|
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149-L151)
|
|
Silences: SilencesConfig{
|
|
Max: 0,
|
|
MaxSizeBytes: 0,
|
|
MaintenanceInterval: 15 * time.Minute,
|
|
Retention: 120 * time.Hour,
|
|
},
|
|
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149)
|
|
NFLog: NFLogConfig{
|
|
MaintenanceInterval: 15 * time.Minute,
|
|
Retention: 120 * time.Hour,
|
|
},
|
|
}
|
|
}
|