182 lines
5.9 KiB
Go
Raw Normal View History

package signozalertmanager
import (
"context"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
)
type provider struct {
service *alertmanager.Service
config alertmanager.Config
settings factory.ScopedProviderSettings
configStore alertmanagertypes.ConfigStore
stateStore alertmanagertypes.StateStore
stopC chan struct{}
}
func NewFactory(sqlstore sqlstore.SQLStore) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
return factory.NewProviderFactory(factory.MustNewName("signoz"), func(ctx context.Context, settings factory.ProviderSettings, config alertmanager.Config) (alertmanager.Alertmanager, error) {
return New(ctx, settings, config, sqlstore)
})
}
func New(ctx context.Context, providerSettings factory.ProviderSettings, config alertmanager.Config, sqlstore sqlstore.SQLStore) (*provider, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager")
configStore := sqlalertmanagerstore.NewConfigStore(sqlstore)
stateStore := sqlalertmanagerstore.NewStateStore(sqlstore)
p := &provider{
service: alertmanager.New(
ctx,
settings,
config.Signoz.Config,
stateStore,
configStore,
),
settings: settings,
config: config,
configStore: configStore,
stateStore: stateStore,
stopC: make(chan struct{}),
}
return p, nil
}
func (provider *provider) Start(ctx context.Context) error {
if err := provider.service.SyncServers(ctx); err != nil {
provider.settings.Logger().ErrorContext(ctx, "failed to sync alertmanager servers", "error", err)
return err
}
ticker := time.NewTicker(provider.config.Signoz.PollInterval)
defer ticker.Stop()
for {
select {
case <-provider.stopC:
return nil
case <-ticker.C:
if err := provider.service.SyncServers(ctx); err != nil {
provider.settings.Logger().ErrorContext(ctx, "failed to sync alertmanager servers", "error", err)
}
}
}
}
func (provider *provider) Stop(ctx context.Context) error {
close(provider.stopC)
return provider.service.Stop(ctx)
}
func (provider *provider) GetAlerts(ctx context.Context, orgID string, params alertmanagertypes.GettableAlertsParams) (alertmanagertypes.DeprecatedGettableAlerts, error) {
return provider.service.GetAlerts(ctx, orgID, params)
}
func (provider *provider) PutAlerts(ctx context.Context, orgID string, alerts alertmanagertypes.PostableAlerts) error {
return provider.service.PutAlerts(ctx, orgID, alerts)
}
func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiver alertmanagertypes.Receiver) error {
return provider.service.TestReceiver(ctx, orgID, receiver)
}
func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
return provider.service.TestAlert(ctx, orgID, alert, receivers)
}
func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) {
return provider.configStore.ListChannels(ctx, orgID)
}
func (provider *provider) ListAllChannels(ctx context.Context) ([]*alertmanagertypes.Channel, error) {
return nil, errors.Newf(errors.TypeUnsupported, errors.CodeUnsupported, "not supported by provider signoz")
}
func (provider *provider) GetChannelByID(ctx context.Context, orgID string, channelID int) (*alertmanagertypes.Channel, error) {
return provider.configStore.GetChannelByID(ctx, orgID, channelID)
}
func (provider *provider) UpdateChannelByReceiverAndID(ctx context.Context, orgID string, receiver alertmanagertypes.Receiver, id int) error {
channel, err := provider.configStore.GetChannelByID(ctx, orgID, id)
if err != nil {
return err
}
if err := channel.Update(receiver); err != nil {
return err
}
config, err := provider.configStore.Get(ctx, orgID)
if err != nil {
return err
}
if err := config.UpdateReceiver(receiver); err != nil {
return err
}
return provider.configStore.UpdateChannel(ctx, orgID, channel, alertmanagertypes.WithCb(func(ctx context.Context) error {
return provider.configStore.Set(ctx, config)
}))
}
func (provider *provider) DeleteChannelByID(ctx context.Context, orgID string, channelID int) error {
channel, err := provider.configStore.GetChannelByID(ctx, orgID, channelID)
if err != nil {
return err
}
config, err := provider.configStore.Get(ctx, orgID)
if err != nil {
return err
}
if err := config.DeleteReceiver(channel.Name); err != nil {
return err
}
return provider.configStore.DeleteChannelByID(ctx, orgID, channelID, alertmanagertypes.WithCb(func(ctx context.Context) error {
return provider.configStore.Set(ctx, config)
}))
}
func (provider *provider) CreateChannel(ctx context.Context, orgID string, receiver alertmanagertypes.Receiver) error {
config, err := provider.configStore.Get(ctx, orgID)
if err != nil {
return err
}
if err := config.CreateReceiver(receiver); err != nil {
return err
}
channel := alertmanagertypes.NewChannelFromReceiver(receiver, orgID)
return provider.configStore.CreateChannel(ctx, channel, alertmanagertypes.WithCb(func(ctx context.Context) error {
return provider.configStore.Set(ctx, config)
}))
}
func (provider *provider) SetConfig(ctx context.Context, config *alertmanagertypes.Config) error {
return provider.configStore.Set(ctx, config)
}
func (provider *provider) GetConfig(ctx context.Context, orgID string) (*alertmanagertypes.Config, error) {
return provider.configStore.Get(ctx, orgID)
}
feat(alertmanager): integrate with ruler (#7222) ### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-10 01:30:42 +05:30
func (provider *provider) SetDefaultConfig(ctx context.Context, orgID string) error {
config, err := alertmanagertypes.NewDefaultConfig(provider.config.Signoz.Config.Global, provider.config.Signoz.Config.Route, orgID)
if err != nil {
return err
}
return provider.configStore.Set(ctx, config)
}