signoz/pkg/sqlmigration/014_add_alertmanager.go

294 lines
7.1 KiB
Go
Raw Normal View History

feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
package sqlmigration
import (
"context"
"database/sql"
"encoding/json"
"strconv"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
"github.com/prometheus/alertmanager/config"
"github.com/tidwall/gjson"
"github.com/uptrace/bun"
"github.com/uptrace/bun/migrate"
)
type addAlertmanager struct {
store sqlstore.SQLStore
}
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
func NewAddAlertmanagerFactory(store sqlstore.SQLStore) factory.ProviderFactory[SQLMigration, Config] {
return factory.NewProviderFactory(factory.MustNewName("add_alertmanager"), func(ctx context.Context, ps factory.ProviderSettings, c Config) (SQLMigration, error) {
return newAddAlertmanager(ctx, ps, c, store)
})
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
}
func newAddAlertmanager(_ context.Context, _ factory.ProviderSettings, _ Config, store sqlstore.SQLStore) (SQLMigration, error) {
return &addAlertmanager{
store: store,
}, nil
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
}
func (migration *addAlertmanager) Register(migrations *migrate.Migrations) error {
if err := migrations.Register(migration.Up, migration.Down); err != nil {
return err
}
return nil
}
func (migration *addAlertmanager) Up(ctx context.Context, db *bun.DB) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
2025-05-23 15:52:58 +05:30
defer func() {
_ = tx.Rollback()
}()
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
if exists, err := migration.store.Dialect().ColumnExists(ctx, tx, "notification_channels", "deleted"); err != nil {
return err
} else if exists {
if _, err := tx.
NewDropColumn().
Table("notification_channels").
ColumnExpr("deleted").
Exec(ctx); err != nil {
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
return err
}
}
if exists, err := migration.store.Dialect().ColumnExists(ctx, tx, "notification_channels", "org_id"); err != nil {
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
return err
} else if !exists {
if _, err := tx.
NewAddColumn().
Table("notification_channels").
ColumnExpr("org_id TEXT REFERENCES organizations(id) ON DELETE CASCADE").
Exec(ctx); err != nil {
return err
}
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
}
if _, err := tx.
NewCreateTable().
Model(&struct {
bun.BaseModel `bun:"table:alertmanager_config"`
ID uint64 `bun:"id,pk,autoincrement"`
Config string `bun:"config,notnull,type:text"`
Hash string `bun:"hash,notnull,type:text"`
CreatedAt time.Time `bun:"created_at,notnull"`
UpdatedAt time.Time `bun:"updated_at,notnull"`
OrgID string `bun:"org_id,notnull,unique"`
}{}).
ForeignKey(`("org_id") REFERENCES "organizations" ("id")`).
IfNotExists().
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateTable().
Model(&struct {
bun.BaseModel `bun:"table:alertmanager_state"`
ID uint64 `bun:"id,pk,autoincrement"`
Silences string `bun:"silences,nullzero,type:text"`
NFLog string `bun:"nflog,nullzero,type:text"`
CreatedAt time.Time `bun:"created_at,notnull"`
UpdatedAt time.Time `bun:"updated_at,notnull"`
OrgID string `bun:"org_id,notnull,unique"`
}{}).
ForeignKey(`("org_id") REFERENCES "organizations" ("id")`).
IfNotExists().
Exec(ctx); err != nil {
return err
}
var orgID string
err = tx.
NewSelect().
ColumnExpr("id").
Table("organizations").
Limit(1).
Scan(ctx, &orgID)
if err != nil {
if err != sql.ErrNoRows {
return err
}
}
if err == nil {
if err := migration.populateOrgIDInChannels(ctx, tx, orgID); err != nil {
return err
}
if err := migration.populateAlertmanagerConfig(ctx, tx, orgID); err != nil {
return err
}
}
if err := tx.Commit(); err != nil {
return err
}
return nil
}
func (migration *addAlertmanager) populateOrgIDInChannels(ctx context.Context, tx bun.Tx, orgID string) error {
if _, err := tx.
NewUpdate().
Table("notification_channels").
Set("org_id = ?", orgID).
Where("org_id IS NULL").
Exec(ctx); err != nil {
return err
}
return nil
}
func (migration *addAlertmanager) populateAlertmanagerConfig(ctx context.Context, tx bun.Tx, orgID string) error {
var channels []*alertmanagertypes.Channel
err := tx.
NewSelect().
Model(&channels).
Where("org_id = ?", orgID).
Scan(ctx)
if err != nil {
return err
}
var receiversFromChannels []string
for _, channel := range channels {
receiversFromChannels = append(receiversFromChannels, channel.Name)
}
type matcher struct {
bun.BaseModel `bun:"table:rules"`
ID int `bun:"id,pk"`
Data string `bun:"data"`
}
matchers := []matcher{}
err = tx.
NewSelect().
Column("id", "data").
Model(&matchers).
Scan(ctx)
if err != nil {
return err
}
matchersMap := make(map[string][]string)
for _, matcher := range matchers {
receivers := gjson.Get(matcher.Data, "preferredChannels").Array()
for _, receiver := range receivers {
matchersMap[strconv.Itoa(matcher.ID)] = append(matchersMap[strconv.Itoa(matcher.ID)], receiver.String())
}
if len(receivers) == 0 {
matchersMap[strconv.Itoa(matcher.ID)] = append(matchersMap[strconv.Itoa(matcher.ID)], receiversFromChannels...)
}
}
for _, channel := range channels {
if err := migration.msTeamsChannelToMSTeamsV2Channel(channel); err != nil {
return err
}
}
config, err := alertmanagertypes.NewConfigFromChannels(alertmanagerserver.NewConfig().Global, alertmanagerserver.NewConfig().Route, channels, orgID)
if err != nil {
return err
}
for ruleID, receivers := range matchersMap {
err = config.CreateRuleIDMatcher(ruleID, receivers)
if err != nil {
return err
}
}
if _, err := tx.
NewInsert().
Model(config.StoreableConfig()).
On("CONFLICT (org_id) DO UPDATE").
Set("config = ?", config.StoreableConfig().Config).
Set("hash = ?", config.StoreableConfig().Hash).
Set("updated_at = ?", config.StoreableConfig().UpdatedAt).
Exec(ctx); err != nil {
return err
}
for _, channel := range channels {
if channel.Type == "msteamsv2" {
if _, err := tx.
NewUpdate().
Model(channel).
WherePK().
Exec(ctx); err != nil {
return err
}
}
}
return nil
}
func (migration *addAlertmanager) Down(ctx context.Context, db *bun.DB) error {
return nil
}
func (migration *addAlertmanager) msTeamsChannelToMSTeamsV2Channel(c *alertmanagertypes.Channel) error {
if c.Type != "msteams" {
return nil
}
receiver, err := alertmanagertypes.NewReceiver(c.Data)
if err != nil {
return err
}
receiver = migration.msTeamsReceiverToMSTeamsV2Receiver(receiver)
data, err := json.Marshal(receiver)
if err != nil {
return err
}
c.Type = "msteamsv2"
c.Data = string(data)
c.UpdatedAt = time.Now()
return nil
}
func (migration *addAlertmanager) msTeamsReceiverToMSTeamsV2Receiver(receiver alertmanagertypes.Receiver) alertmanagertypes.Receiver {
if receiver.MSTeamsConfigs == nil {
return receiver
}
var msTeamsV2Configs []*config.MSTeamsV2Config
for _, cfg := range receiver.MSTeamsConfigs {
msTeamsV2Configs = append(msTeamsV2Configs, &config.MSTeamsV2Config{
NotifierConfig: cfg.NotifierConfig,
HTTPConfig: cfg.HTTPConfig,
WebhookURL: cfg.WebhookURL,
WebhookURLFile: cfg.WebhookURLFile,
Title: cfg.Title,
Text: cfg.Text,
})
}
receiver.MSTeamsConfigs = nil
receiver.MSTeamsV2Configs = msTeamsV2Configs
return receiver
}