diff --git a/frontend/src/container/CreateAlertV2/NotificationSettings/__tests__/NotificationSettings.test.tsx b/frontend/src/container/CreateAlertV2/NotificationSettings/__tests__/NotificationSettings.test.tsx
index 181971676bac..7b8c36a5c3cf 100644
--- a/frontend/src/container/CreateAlertV2/NotificationSettings/__tests__/NotificationSettings.test.tsx
+++ b/frontend/src/container/CreateAlertV2/NotificationSettings/__tests__/NotificationSettings.test.tsx
@@ -1,7 +1,6 @@
import { fireEvent, render, screen } from '@testing-library/react';
import * as createAlertContext from 'container/CreateAlertV2/context';
import { createMockAlertContextState } from 'container/CreateAlertV2/EvaluationSettings/__tests__/testUtils';
-import * as utils from 'container/CreateAlertV2/utils';
import NotificationSettings from '../NotificationSettings';
@@ -26,7 +25,6 @@ jest.mock(
jest.mock('container/CreateAlertV2/utils', () => ({
...jest.requireActual('container/CreateAlertV2/utils'),
- showCondensedLayout: jest.fn().mockReturnValue(false),
}));
const initialNotificationSettings = createMockAlertContextState()
@@ -42,10 +40,10 @@ const REPEAT_NOTIFICATIONS_TEXT = 'Repeat notifications';
const ENTER_TIME_INTERVAL_TEXT = 'Enter time interval...';
describe('NotificationSettings', () => {
- it('renders the notification settings tab with step number 4 and default values', () => {
+ it('renders the notification settings tab with step number 3 and default values', () => {
render(
);
expect(screen.getByText('Notification settings')).toBeInTheDocument();
- expect(screen.getByText('4')).toBeInTheDocument();
+ expect(screen.getByText('3')).toBeInTheDocument();
expect(screen.getByTestId('multiple-notifications')).toBeInTheDocument();
expect(screen.getByTestId('notification-message')).toBeInTheDocument();
expect(screen.getByText(REPEAT_NOTIFICATIONS_TEXT)).toBeInTheDocument();
@@ -56,15 +54,6 @@ describe('NotificationSettings', () => {
).toBeInTheDocument();
});
- it('renders the notification settings tab with step number 3 in condensed layout', () => {
- jest.spyOn(utils, 'showCondensedLayout').mockReturnValueOnce(true);
- render(
);
- expect(screen.getByText('Notification settings')).toBeInTheDocument();
- expect(screen.getByText('3')).toBeInTheDocument();
- expect(screen.getByTestId('multiple-notifications')).toBeInTheDocument();
- expect(screen.getByTestId('notification-message')).toBeInTheDocument();
- });
-
describe('Repeat notifications', () => {
it('renders the repeat notifications with inputs hidden when the repeat notifications switch is off', () => {
render(
);
diff --git a/frontend/src/container/CreateAlertV2/QuerySection/ChartPreview/ChartPreview.tsx b/frontend/src/container/CreateAlertV2/QuerySection/ChartPreview/ChartPreview.tsx
index 9335e3a2c690..05703de36016 100644
--- a/frontend/src/container/CreateAlertV2/QuerySection/ChartPreview/ChartPreview.tsx
+++ b/frontend/src/container/CreateAlertV2/QuerySection/ChartPreview/ChartPreview.tsx
@@ -51,7 +51,6 @@ function ChartPreview({ alertDef }: ChartPreviewProps): JSX.Element {
yAxisUnit={yAxisUnit || ''}
graphType={panelType || PANEL_TYPES.TIME_SERIES}
setQueryStatus={setQueryStatus}
- showSideLegend
additionalThresholds={thresholdState.thresholds}
/>
);
@@ -66,7 +65,6 @@ function ChartPreview({ alertDef }: ChartPreviewProps): JSX.Element {
yAxisUnit={yAxisUnit || ''}
graphType={panelType || PANEL_TYPES.TIME_SERIES}
setQueryStatus={setQueryStatus}
- showSideLegend
additionalThresholds={thresholdState.thresholds}
/>
);
diff --git a/frontend/src/container/CreateAlertV2/__tests__/utils.test.tsx b/frontend/src/container/CreateAlertV2/__tests__/utils.test.tsx
index 5ab1e63a1264..d480707a63e1 100644
--- a/frontend/src/container/CreateAlertV2/__tests__/utils.test.tsx
+++ b/frontend/src/container/CreateAlertV2/__tests__/utils.test.tsx
@@ -216,7 +216,7 @@ describe('CreateAlertV2 utils', () => {
multipleNotifications: ['email'],
reNotification: {
enabled: false,
- value: 1,
+ value: 30,
unit: UniversalYAxisUnit.MINUTES,
conditions: [],
},
diff --git a/frontend/src/container/CreateAlertV2/constants.ts b/frontend/src/container/CreateAlertV2/constants.ts
index 5049c8099b4e..b48af9acada5 100644
--- a/frontend/src/container/CreateAlertV2/constants.ts
+++ b/frontend/src/container/CreateAlertV2/constants.ts
@@ -22,7 +22,7 @@ const defaultNotificationSettings: PostableAlertRuleV2['notificationSettings'] =
groupBy: [],
renotify: {
enabled: false,
- interval: '1m',
+ interval: '30m',
alertStates: [],
},
usePolicy: false,
diff --git a/frontend/src/container/CreateAlertV2/context/constants.ts b/frontend/src/container/CreateAlertV2/context/constants.ts
index dabd9b1508fc..b6a50b06096c 100644
--- a/frontend/src/container/CreateAlertV2/context/constants.ts
+++ b/frontend/src/container/CreateAlertV2/context/constants.ts
@@ -172,6 +172,11 @@ export const ADVANCED_OPTIONS_TIME_UNIT_OPTIONS = [
{ value: UniversalYAxisUnit.HOURS, label: 'Hours' },
];
+export const RE_NOTIFICATION_TIME_UNIT_OPTIONS = [
+ { value: UniversalYAxisUnit.MINUTES, label: 'Minutes' },
+ { value: UniversalYAxisUnit.HOURS, label: 'Hours' },
+];
+
export const NOTIFICATION_MESSAGE_PLACEHOLDER =
'This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})';
@@ -184,7 +189,7 @@ export const INITIAL_NOTIFICATION_SETTINGS_STATE: NotificationSettingsState = {
multipleNotifications: [],
reNotification: {
enabled: false,
- value: 1,
+ value: 30,
unit: UniversalYAxisUnit.MINUTES,
conditions: [],
},
diff --git a/frontend/src/container/CreateAlertV2/utils.tsx b/frontend/src/container/CreateAlertV2/utils.tsx
index d0072681b721..2788d4a93844 100644
--- a/frontend/src/container/CreateAlertV2/utils.tsx
+++ b/frontend/src/container/CreateAlertV2/utils.tsx
@@ -27,16 +27,6 @@ import {
import { EVALUATION_WINDOW_TIMEFRAME } from './EvaluationSettings/constants';
import { GetCreateAlertLocalStateFromAlertDefReturn } from './types';
-// UI side feature flag
-export const showNewCreateAlertsPage = (): boolean =>
- localStorage.getItem('showNewCreateAlertsPage') === 'true';
-
-// UI side FF to switch between the 2 layouts of the create alert page
-// Layout 1 - Default layout
-// Layout 2 - Condensed layout
-export const showCondensedLayout = (): boolean =>
- localStorage.getItem('hideCondensedLayout') !== 'true';
-
export function Spinner(): JSX.Element | null {
const { isCreatingAlertRule, isUpdatingAlertRule } = useCreateAlertState();
@@ -198,10 +188,10 @@ export function getNotificationSettingsStateFromAlertDef(
(state) => state as 'firing' | 'nodata',
) || [];
const reNotificationValue = alertDef.notificationSettings?.renotify
- ? parseGoTime(alertDef.notificationSettings.renotify.interval || '1m').time
- : 1;
+ ? parseGoTime(alertDef.notificationSettings.renotify.interval || '30m').time
+ : 30;
const reNotificationUnit = alertDef.notificationSettings?.renotify
- ? parseGoTime(alertDef.notificationSettings.renotify.interval || '1m').unit
+ ? parseGoTime(alertDef.notificationSettings.renotify.interval || '30m').unit
: UniversalYAxisUnit.MINUTES;
return {
diff --git a/frontend/src/container/EditAlertV2/EditAlertV2.tsx b/frontend/src/container/EditAlertV2/EditAlertV2.tsx
index f02b2e363b37..4a434f51f08d 100644
--- a/frontend/src/container/EditAlertV2/EditAlertV2.tsx
+++ b/frontend/src/container/EditAlertV2/EditAlertV2.tsx
@@ -8,11 +8,10 @@ import { PostableAlertRuleV2 } from 'types/api/alerts/alertTypesV2';
import AlertCondition from '../CreateAlertV2/AlertCondition';
import { buildInitialAlertDef } from '../CreateAlertV2/context/utils';
-import EvaluationSettings from '../CreateAlertV2/EvaluationSettings';
import Footer from '../CreateAlertV2/Footer';
import NotificationSettings from '../CreateAlertV2/NotificationSettings';
import QuerySection from '../CreateAlertV2/QuerySection';
-import { showCondensedLayout, Spinner } from '../CreateAlertV2/utils';
+import { Spinner } from '../CreateAlertV2/utils';
interface EditAlertV2Props {
alertType?: AlertTypes;
@@ -33,15 +32,12 @@ function EditAlertV2({
useShareBuilderUrl({ defaultValue: currentQueryToRedirect });
- const showCondensedLayoutFlag = showCondensedLayout();
-
return (
<>
- {!showCondensedLayoutFlag ?
: null}
diff --git a/frontend/src/container/ListAlertRules/ListAlert.tsx b/frontend/src/container/ListAlertRules/ListAlert.tsx
index dafa333373bf..ea28a58b2f8e 100644
--- a/frontend/src/container/ListAlertRules/ListAlert.tsx
+++ b/frontend/src/container/ListAlertRules/ListAlert.tsx
@@ -1,6 +1,14 @@
/* eslint-disable react/display-name */
import { PlusOutlined } from '@ant-design/icons';
-import { Button, Dropdown, Flex, Input, MenuProps, Typography } from 'antd';
+import {
+ Button,
+ Dropdown,
+ Flex,
+ Input,
+ MenuProps,
+ Tag,
+ Typography,
+} from 'antd';
import type { ColumnsType } from 'antd/es/table/interface';
import saveAlertApi from 'api/alerts/save';
import logEvent from 'api/common/logEvent';
@@ -118,12 +126,16 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element {
const newAlertMenuItems: MenuProps['items'] = [
{
key: 'new',
- label: 'Try the new experience',
+ label: (
+
+ Try the new experience Beta
+
+ ),
onClick: onClickNewAlertV2Handler,
},
{
key: 'classic',
- label: 'Continue with the current experience',
+ label: 'Continue with the classic experience',
onClick: onClickNewClassicAlertHandler,
},
];
diff --git a/frontend/src/container/RoutingPolicies/RoutingPolicies.tsx b/frontend/src/container/RoutingPolicies/RoutingPolicies.tsx
index b5cb3f08d4f5..f340e8e14783 100644
--- a/frontend/src/container/RoutingPolicies/RoutingPolicies.tsx
+++ b/frontend/src/container/RoutingPolicies/RoutingPolicies.tsx
@@ -88,6 +88,7 @@ function RoutingPolicies(): JSX.Element {
isRoutingPoliciesError={isErrorRoutingPolicies}
handlePolicyDetailsModalOpen={handlePolicyDetailsModalOpen}
handleDeleteModalOpen={handleDeleteModalOpen}
+ hasSearchTerm={(searchTerm?.length ?? 0) > 0}
/>
{policyDetailsModalState.isOpen && (
['columns'] = [
{
@@ -25,6 +26,7 @@ function RoutingPolicyList({
},
];
+ /* eslint-disable no-nested-ternary */
const localeEmptyState = useMemo(
() => (
@@ -41,12 +43,23 @@ function RoutingPolicyList({
Something went wrong while fetching routing policies.
+ ) : hasSearchTerm ? (
+
No matching routing policies found.
) : (
-
No routing policies found.
+
+ No routing policies yet,{' '}
+
+ Learn more here
+
+
)}
),
- [isRoutingPoliciesError],
+ [isRoutingPoliciesError, hasSearchTerm],
);
return (
diff --git a/frontend/src/container/RoutingPolicies/__tests__/RoutingPoliciesList.test.tsx b/frontend/src/container/RoutingPolicies/__tests__/RoutingPoliciesList.test.tsx
index 62d41fc76cfa..f8adeaa4d9da 100644
--- a/frontend/src/container/RoutingPolicies/__tests__/RoutingPoliciesList.test.tsx
+++ b/frontend/src/container/RoutingPolicies/__tests__/RoutingPoliciesList.test.tsx
@@ -28,6 +28,7 @@ describe('RoutingPoliciesList', () => {
isRoutingPoliciesError={useRoutingPolicesMockData.isErrorRoutingPolicies}
handlePolicyDetailsModalOpen={mockHandlePolicyDetailsModalOpen}
handleDeleteModalOpen={mockHandleDeleteModalOpen}
+ hasSearchTerm={false}
/>,
);
@@ -51,6 +52,7 @@ describe('RoutingPoliciesList', () => {
isRoutingPoliciesError={false}
handlePolicyDetailsModalOpen={mockHandlePolicyDetailsModalOpen}
handleDeleteModalOpen={mockHandleDeleteModalOpen}
+ hasSearchTerm={false}
/>,
);
// Check for loading spinner by class name
@@ -67,6 +69,7 @@ describe('RoutingPoliciesList', () => {
isRoutingPoliciesError
handlePolicyDetailsModalOpen={mockHandlePolicyDetailsModalOpen}
handleDeleteModalOpen={mockHandleDeleteModalOpen}
+ hasSearchTerm={false}
/>,
);
expect(
@@ -82,8 +85,9 @@ describe('RoutingPoliciesList', () => {
isRoutingPoliciesError={false}
handlePolicyDetailsModalOpen={mockHandlePolicyDetailsModalOpen}
handleDeleteModalOpen={mockHandleDeleteModalOpen}
+ hasSearchTerm={false}
/>,
);
- expect(screen.getByText('No routing policies found.')).toBeInTheDocument();
+ expect(screen.getByText('No routing policies yet,')).toBeInTheDocument();
});
});
diff --git a/frontend/src/container/RoutingPolicies/types.ts b/frontend/src/container/RoutingPolicies/types.ts
index 631d7b09858d..8fc5908f7986 100644
--- a/frontend/src/container/RoutingPolicies/types.ts
+++ b/frontend/src/container/RoutingPolicies/types.ts
@@ -37,6 +37,7 @@ export interface RoutingPolicyListProps {
isRoutingPoliciesError: boolean;
handlePolicyDetailsModalOpen: HandlePolicyDetailsModalOpen;
handleDeleteModalOpen: HandleDeleteModalOpen;
+ hasSearchTerm: boolean;
}
export interface RoutingPolicyListItemProps {
diff --git a/frontend/src/container/RoutingPolicies/utils.tsx b/frontend/src/container/RoutingPolicies/utils.tsx
index 725e226782a9..26cad317e576 100644
--- a/frontend/src/container/RoutingPolicies/utils.tsx
+++ b/frontend/src/container/RoutingPolicies/utils.tsx
@@ -5,10 +5,6 @@ import { SuccessResponseV2 } from 'types/api';
import { RoutingPolicy } from './types';
-export function showRoutingPoliciesPage(): boolean {
- return localStorage.getItem('showRoutingPoliciesPage') === 'true';
-}
-
export function mapApiResponseToRoutingPolicies(
response: SuccessResponseV2,
): RoutingPolicy[] {
diff --git a/frontend/src/pages/AlertList/index.tsx b/frontend/src/pages/AlertList/index.tsx
index 078db79f6ed3..c4ef230a0646 100644
--- a/frontend/src/pages/AlertList/index.tsx
+++ b/frontend/src/pages/AlertList/index.tsx
@@ -8,7 +8,6 @@ import ROUTES from 'constants/routes';
import AllAlertRules from 'container/ListAlertRules';
import { PlannedDowntime } from 'container/PlannedDowntime/PlannedDowntime';
import RoutingPolicies from 'container/RoutingPolicies';
-import { showRoutingPoliciesPage } from 'container/RoutingPolicies/utils';
import TriggeredAlerts from 'container/TriggeredAlerts';
import { useSafeNavigate } from 'hooks/useSafeNavigate';
import useUrlQuery from 'hooks/useUrlQuery';
@@ -28,36 +27,27 @@ function AllAlertList(): JSX.Element {
const search = urlQuery.get('search');
- const showRoutingPoliciesPageFlag = showRoutingPoliciesPage();
-
const configurationTab = useMemo(() => {
- if (showRoutingPoliciesPageFlag) {
- const tabs = [
- {
- label: 'Planned Downtime',
- key: 'planned-downtime',
- children: ,
- },
- {
- label: 'Routing Policies',
- key: 'routing-policies',
- children: ,
- },
- ];
- return (
-
- );
- }
+ const tabs = [
+ {
+ label: 'Planned Downtime',
+ key: 'planned-downtime',
+ children: ,
+ },
+ {
+ label: 'Routing Policies',
+ key: 'routing-policies',
+ children: ,
+ },
+ ];
return (
-
+
);
- }, [showRoutingPoliciesPageFlag]);
+ }, []);
const items: TabsProps['items'] = [
{
diff --git a/go.mod b/go.mod
index 32eee2547a71..0b70e74e0a40 100644
--- a/go.mod
+++ b/go.mod
@@ -127,7 +127,7 @@ require (
github.com/elastic/lunes v0.1.0 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
- github.com/expr-lang/expr v1.17.5 // indirect
+ github.com/expr-lang/expr v1.17.5
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
@@ -338,3 +338,5 @@ require (
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
sigs.k8s.io/yaml v1.6.0 // indirect
)
+
+replace github.com/expr-lang/expr => github.com/SigNoz/expr v1.17.7-beta
diff --git a/go.sum b/go.sum
index db6a0ae3d544..f6c6a857f389 100644
--- a/go.sum
+++ b/go.sum
@@ -102,6 +102,8 @@ github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA4
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
+github.com/SigNoz/expr v1.17.7-beta h1:FyZkleM5dTQ0O6muQfwGpoH5A2ohmN/XTasRCO72gAA=
+github.com/SigNoz/expr v1.17.7-beta/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd h1:Bk43AsDYe0fhkbj57eGXx8H3ZJ4zhmQXBnrW523ktj8=
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd/go.mod h1:nxRcH/OEdM8QxzH37xkGzomr1O0JpYBRS6pwjsWW6Pc=
github.com/SigNoz/signoz-otel-collector v0.129.4 h1:DGDu9y1I1FU+HX4eECPGmfhnXE4ys4yr7LL6znbf6to=
@@ -248,8 +250,6 @@ github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
-github.com/expr-lang/expr v1.17.5 h1:i1WrMvcdLF249nSNlpQZN1S6NXuW9WaOfF5tPi3aw3k=
-github.com/expr-lang/expr v1.17.5/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1SCxNI1/Tieq/NFvh6dzLdgi7eu0tM=
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go
index e38ddbe633e9..4c3ecce299f4 100644
--- a/pkg/alertmanager/alertmanager.go
+++ b/pkg/alertmanager/alertmanager.go
@@ -3,6 +3,8 @@ package alertmanager
import (
"context"
+ amConfig "github.com/prometheus/alertmanager/config"
+
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/statsreporter"
@@ -26,7 +28,7 @@ type Alertmanager interface {
TestReceiver(context.Context, string, alertmanagertypes.Receiver) error
// TestAlert sends an alert to a list of receivers.
- TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error
+ TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error
// ListChannels lists all channels for the organization.
ListChannels(context.Context, string) ([]*alertmanagertypes.Channel, error)
@@ -59,6 +61,19 @@ type Alertmanager interface {
DeleteNotificationConfig(ctx context.Context, orgID valuer.UUID, ruleId string) error
+ // Notification Policy CRUD
+ CreateRoutePolicy(ctx context.Context, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
+ CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error)
+ GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error)
+ GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error)
+ UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
+ DeleteRoutePolicyByID(ctx context.Context, routeID string) error
+ DeleteAllRoutePoliciesByRuleId(ctx context.Context, ruleId string) error
+ UpdateAllRoutePoliciesByRuleId(ctx context.Context, ruleId string, routes []*alertmanagertypes.PostableRoutePolicy) error
+
+ CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error
+ DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error
+
// Collects stats for the organization.
statsreporter.StatsCollector
}
diff --git a/pkg/alertmanager/alertmanagerserver/dispatcher.go b/pkg/alertmanager/alertmanagerserver/dispatcher.go
index f5063177534d..7f040b8e5b06 100644
--- a/pkg/alertmanager/alertmanagerserver/dispatcher.go
+++ b/pkg/alertmanager/alertmanagerserver/dispatcher.go
@@ -10,19 +10,17 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
+ "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/notify"
+ "github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/store"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/common/model"
)
-const (
- noDataLabel = model.LabelName("nodata")
-)
-
// Dispatcher sorts incoming alerts into aggregation groups and
// assigns the correct notifiers to each.
type Dispatcher struct {
@@ -46,6 +44,7 @@ type Dispatcher struct {
logger *slog.Logger
notificationManager nfmanager.NotificationManager
orgID string
+ receiverRoutes map[string]*dispatch.Route
}
// We use the upstream Limits interface from Prometheus
@@ -90,6 +89,7 @@ func (d *Dispatcher) Run() {
d.mtx.Lock()
d.aggrGroupsPerRoute = map[*dispatch.Route]map[model.Fingerprint]*aggrGroup{}
+ d.receiverRoutes = map[string]*dispatch.Route{}
d.aggrGroupsNum = 0
d.metrics.aggrGroups.Set(0)
d.ctx, d.cancel = context.WithCancel(context.Background())
@@ -125,8 +125,14 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
}
now := time.Now()
- for _, r := range d.route.Match(alert.Labels) {
- d.processAlert(alert, r)
+ channels, err := d.notificationManager.Match(d.ctx, d.orgID, getRuleIDFromAlert(alert), alert.Labels)
+ if err != nil {
+ d.logger.ErrorContext(d.ctx, "Error on alert match", "err", err)
+ continue
+ }
+ for _, channel := range channels {
+ route := d.getOrCreateRoute(channel)
+ d.processAlert(alert, route)
}
d.metrics.processingDuration.Observe(time.Since(now).Seconds())
@@ -266,6 +272,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool
// processAlert determines in which aggregation group the alert falls
// and inserts it.
+// no data alert will only have ruleId and no data label
func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
ruleId := getRuleIDFromAlert(alert)
config, err := d.notificationManager.GetNotificationConfig(d.orgID, ruleId)
@@ -273,8 +280,14 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "error getting alert notification config", "rule_id", ruleId, "error", err)
return
}
+ renotifyInterval := config.Renotify.RenotifyInterval
- groupLabels := getGroupLabels(alert, config.NotificationGroup)
+ groupLabels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
+
+ if alertmanagertypes.NoDataAlert(alert) {
+ renotifyInterval = config.Renotify.NoDataInterval
+ groupLabels[alertmanagertypes.NoDataLabel] = alert.Labels[alertmanagertypes.NoDataLabel] //to create new group key for no data alerts
+ }
fp := groupLabels.Fingerprint()
@@ -299,12 +312,6 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "Too many aggregation groups, cannot create new group for alert", "groups", d.aggrGroupsNum, "limit", limit, "alert", alert.Name())
return
}
- renotifyInterval := config.Renotify.RenotifyInterval
-
- if noDataAlert(alert) {
- renotifyInterval = config.Renotify.NoDataInterval
- groupLabels[noDataLabel] = alert.Labels[noDataLabel]
- }
ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger, renotifyInterval)
@@ -543,21 +550,35 @@ func deepCopyRouteOpts(opts dispatch.RouteOpts, renotify time.Duration) dispatch
return newOpts
}
-func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}) model.LabelSet {
+func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}, groupByAll bool) model.LabelSet {
groupLabels := model.LabelSet{}
for ln, lv := range alert.Labels {
- if _, ok := groups[ln]; ok {
+ if _, ok := groups[ln]; ok || groupByAll {
groupLabels[ln] = lv
}
}
-
return groupLabels
}
-func noDataAlert(alert *types.Alert) bool {
- if _, ok := alert.Labels[noDataLabel]; ok {
- return true
- } else {
- return false
+func (d *Dispatcher) getOrCreateRoute(receiver string) *dispatch.Route {
+ d.mtx.Lock()
+ defer d.mtx.Unlock()
+ if route, exists := d.receiverRoutes[receiver]; exists {
+ return route
}
+ route := &dispatch.Route{
+ RouteOpts: dispatch.RouteOpts{
+ Receiver: receiver,
+ GroupWait: 30 * time.Second,
+ GroupInterval: 5 * time.Minute,
+ GroupByAll: false,
+ },
+ Matchers: labels.Matchers{{
+ Name: "__receiver__",
+ Value: receiver,
+ Type: labels.MatchEqual,
+ }},
+ }
+ d.receiverRoutes[receiver] = route
+ return route
}
diff --git a/pkg/alertmanager/alertmanagerserver/distpatcher_test.go b/pkg/alertmanager/alertmanagerserver/distpatcher_test.go
index 36369a35049d..5f084ec21470 100644
--- a/pkg/alertmanager/alertmanagerserver/distpatcher_test.go
+++ b/pkg/alertmanager/alertmanagerserver/distpatcher_test.go
@@ -10,21 +10,31 @@ import (
"testing"
"time"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
+ "github.com/SigNoz/signoz/pkg/factory"
+ "github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
+ "github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/SigNoz/signoz/pkg/valuer"
+ "github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/dispatch"
+ "github.com/prometheus/alertmanager/notify"
+ "github.com/prometheus/alertmanager/provider/mem"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
- "github.com/stretchr/testify/require"
- "github.com/prometheus/alertmanager/config"
- "github.com/prometheus/alertmanager/notify"
- "github.com/prometheus/alertmanager/provider/mem"
- "github.com/prometheus/alertmanager/types"
+ "github.com/stretchr/testify/require"
)
+func createTestProviderSettings() factory.ProviderSettings {
+ return instrumentationtest.New().ToProviderSettings()
+}
+
func TestAggrGroup(t *testing.T) {
lset := model.LabelSet{
"a": "v1",
@@ -59,7 +69,7 @@ func TestAggrGroup(t *testing.T) {
nfManager.SetMockConfig(orgId, ruleId, ¬ificationConfig)
var (
- a1 = &types.Alert{
+ a1 = &alertmanagertypes.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
@@ -72,7 +82,7 @@ func TestAggrGroup(t *testing.T) {
},
UpdatedAt: time.Now(),
}
- a2 = &types.Alert{
+ a2 = &alertmanagertypes.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
@@ -85,7 +95,7 @@ func TestAggrGroup(t *testing.T) {
},
UpdatedAt: time.Now(),
}
- a3 = &types.Alert{
+ a3 = &alertmanagertypes.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
@@ -104,10 +114,10 @@ func TestAggrGroup(t *testing.T) {
last = time.Now()
current = time.Now()
lastCurMtx = &sync.Mutex{}
- alertsCh = make(chan types.AlertSlice)
+ alertsCh = make(chan alertmanagertypes.AlertSlice)
)
- ntfy := func(ctx context.Context, alerts ...*types.Alert) bool {
+ ntfy := func(ctx context.Context, alerts ...*alertmanagertypes.Alert) bool {
// Validate that the context is properly populated.
if _, ok := notify.Now(ctx); !ok {
t.Errorf("now missing")
@@ -131,12 +141,12 @@ func TestAggrGroup(t *testing.T) {
current = time.Now().Add(-time.Millisecond)
lastCurMtx.Unlock()
- alertsCh <- types.AlertSlice(alerts)
+ alertsCh <- alertmanagertypes.AlertSlice(alerts)
return true
}
- removeEndsAt := func(as types.AlertSlice) types.AlertSlice {
+ removeEndsAt := func(as alertmanagertypes.AlertSlice) alertmanagertypes.AlertSlice {
for i, a := range as {
ac := *a
ac.EndsAt = time.Time{}
@@ -163,7 +173,7 @@ func TestAggrGroup(t *testing.T) {
if s < opts.GroupWait {
t.Fatalf("received batch too early after %v", s)
}
- exp := removeEndsAt(types.AlertSlice{a1})
+ exp := removeEndsAt(alertmanagertypes.AlertSlice{a1})
sort.Sort(batch)
if !reflect.DeepEqual(batch, exp) {
@@ -186,7 +196,7 @@ func TestAggrGroup(t *testing.T) {
if s < opts.GroupInterval {
t.Fatalf("received batch too early after %v", s)
}
- exp := removeEndsAt(types.AlertSlice{a1, a3})
+ exp := removeEndsAt(alertmanagertypes.AlertSlice{a1, a3})
sort.Sort(batch)
if !reflect.DeepEqual(batch, exp) {
@@ -213,7 +223,7 @@ func TestAggrGroup(t *testing.T) {
t.Fatalf("expected immediate alert but received none")
case batch := <-alertsCh:
- exp := removeEndsAt(types.AlertSlice{a1, a2})
+ exp := removeEndsAt(alertmanagertypes.AlertSlice{a1, a2})
sort.Sort(batch)
if !reflect.DeepEqual(batch, exp) {
@@ -236,7 +246,7 @@ func TestAggrGroup(t *testing.T) {
if s < opts.GroupInterval {
t.Fatalf("received batch too early after %v", s)
}
- exp := removeEndsAt(types.AlertSlice{a1, a2, a3})
+ exp := removeEndsAt(alertmanagertypes.AlertSlice{a1, a2, a3})
sort.Sort(batch)
if !reflect.DeepEqual(batch, exp) {
@@ -249,7 +259,7 @@ func TestAggrGroup(t *testing.T) {
a1r := *a1
a1r.EndsAt = time.Now()
ag.insert(&a1r)
- exp := append(types.AlertSlice{&a1r}, removeEndsAt(types.AlertSlice{a2, a3})...)
+ exp := append(alertmanagertypes.AlertSlice{&a1r}, removeEndsAt(alertmanagertypes.AlertSlice{a2, a3})...)
select {
case <-time.After(2 * opts.GroupInterval):
@@ -271,7 +281,7 @@ func TestAggrGroup(t *testing.T) {
// Resolve all remaining alerts, they should be removed after the next batch was sent.
// Do not add a1r as it should have been deleted following the previous batch.
a2r, a3r := *a2, *a3
- resolved := types.AlertSlice{&a2r, &a3r}
+ resolved := alertmanagertypes.AlertSlice{&a2r, &a3r}
for _, a := range resolved {
a.EndsAt = time.Now()
ag.insert(a)
@@ -303,7 +313,7 @@ func TestAggrGroup(t *testing.T) {
}
func TestGroupLabels(t *testing.T) {
- a := &types.Alert{
+ a := &alertmanagertypes.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"a": "v1",
@@ -328,7 +338,7 @@ func TestGroupLabels(t *testing.T) {
"b": "v2",
}
- ls := getGroupLabels(a, route.RouteOpts.GroupBy)
+ ls := getGroupLabels(a, route.RouteOpts.GroupBy, false)
if !reflect.DeepEqual(ls, expLs) {
t.Fatalf("expected labels are %v, but got %v", expLs, ls)
@@ -336,35 +346,25 @@ func TestGroupLabels(t *testing.T) {
}
func TestAggrRouteMap(t *testing.T) {
+ // Simplified config with just receivers and default route - no hardcoded routing rules
confData := `receivers:
- name: 'slack'
-- name: 'email'
+- name: 'email'
- name: 'pagerduty'
route:
group_by: ['alertname']
- group_wait: 10ms
- group_interval: 10ms
- receiver: 'slack'
- routes:
- - matchers:
- - 'ruleId=~"ruleId-OtherAlert|ruleId-TestingAlert"'
- receiver: 'slack'
- - matchers:
- - 'ruleId=~"ruleId-HighLatency|ruleId-HighErrorRate"'
- receiver: 'email'
- continue: true
- - matchers:
- - 'ruleId="ruleId-HighLatency"'
- receiver: 'pagerduty'`
+ group_wait: 1m
+ group_interval: 1m
+ receiver: 'slack'`
conf, err := config.Load(confData)
if err != nil {
t.Fatal(err)
}
-
- logger := promslog.NewNopLogger()
+ providerSettings := createTestProviderSettings()
+ logger := providerSettings.Logger
route := dispatch.NewRoute(conf.Route, nil)
- marker := types.NewMarker(prometheus.NewRegistry())
+ marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, nil, logger, nil)
if err != nil {
t.Fatal(err)
@@ -372,21 +372,78 @@ route:
defer alerts.Close()
timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
- recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
+ recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*alertmanagertypes.Alert)}
metrics := NewDispatcherMetrics(false, prometheus.NewRegistry())
- nfManager := nfmanagertest.NewMock()
+ store := nfroutingstoretest.NewMockSQLRouteStore()
+ store.MatchExpectationsInOrder(false)
+ nfManager, err := rulebasednotification.New(context.Background(), providerSettings, nfmanager.Config{}, store)
+ if err != nil {
+ t.Fatal(err)
+ }
orgId := "test-org"
+
+ ctx := context.Background()
+ routes := []*alertmanagertypes.RoutePolicy{
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-OtherAlert" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-OtherAlert",
+ Description: "Route for OtherAlert to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-OtherAlert" && threshold.name == "warning"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-OtherAlert",
+ Description: "Route for cluster aa and service api to Email",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"email"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-HighLatency" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-HighLatency",
+ Description: "High priority route for HighLatency to PagerDuty",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"pagerduty"},
+ },
+ }
+ // Set up SQL mock expectations for the CreateBatch call
+ store.ExpectCreateBatch(routes)
+ err = nfManager.CreateRoutePolicies(ctx, orgId, routes)
+ require.NoError(t, err)
+
+ // Set up expectations for getting routes during matching (multiple calls expected)
+
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, metrics, nfManager, orgId)
go dispatcher.Run()
defer dispatcher.Stop()
- inputAlerts := []*types.Alert{
- newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "cc", "service": "dd"}),
+ inputAlerts := []*alertmanagertypes.Alert{
+ newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "cc", "service": "dd", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "dc", "service": "dd", "threshold.name": "critical"}),
newAlert(model.LabelSet{"env": "testing", "ruleId": "ruleId-TestingAlert", "service": "api", "instance": "inst1"}),
newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst1"}),
newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst2"}),
newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "bb", "service": "api", "instance": "inst1"}),
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst3"}),
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst4"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "aa", "service": "api", "kafka": "yes", "instance": "inst3"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst4", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "test-db", "kafka": "yes", "instance": "inst4", "threshold.name": "critical"}),
+ }
+ for i := 0; i < 9; i++ {
+ store.ExpectGetAllByName(orgId, string(inputAlerts[i].Labels["ruleId"]), routes)
}
notiConfigs := map[string]alertmanagertypes.NotificationConfig{
"ruleId-OtherAlert": {
@@ -398,6 +455,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10,
},
+ UsePolicy: false,
},
"ruleId-TestingAlert": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -408,6 +466,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 11,
},
+ UsePolicy: false,
},
"ruleId-HighErrorRate": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -418,6 +477,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 12,
},
+ UsePolicy: false,
},
"ruleId-HighLatency": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -428,11 +488,13 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 13,
},
+ UsePolicy: false,
},
}
for ruleID, config := range notiConfigs {
- nfManager.SetMockConfig(orgId, ruleID, &config)
+ err := nfManager.SetNotificationConfig(orgId, ruleID, &config)
+ require.NoError(t, err)
}
err = alerts.Put(inputAlerts...)
if err != nil {
@@ -440,15 +502,15 @@ route:
}
// Let alerts get processed.
- for i := 0; len(recorder.Alerts()) != 9 && i < 10; i++ {
- time.Sleep(200 * time.Millisecond)
+ for i := 0; len(recorder.Alerts()) != 4; i++ {
+ time.Sleep(400 * time.Millisecond)
}
- require.Len(t, recorder.Alerts(), 9)
+ require.Len(t, recorder.Alerts(), 4)
alertGroups, receivers := dispatcher.Groups(
func(*dispatch.Route) bool {
return true
- }, func(*types.Alert, time.Time) bool {
+ }, func(*alertmanagertypes.Alert, time.Time) bool {
return true
},
)
@@ -468,11 +530,11 @@ route:
routeIDsFound[routeID] = true
expectedReceiver := ""
switch routeID {
- case "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0":
+ case "{__receiver__=\"slack\"}":
expectedReceiver = "slack"
- case "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1":
+ case "{__receiver__=\"email\"}":
expectedReceiver = "email"
- case "{}/{ruleId=\"ruleId-HighLatency\"}/2":
+ case "{__receiver__=\"pagerduty\"}":
expectedReceiver = "pagerduty"
}
if expectedReceiver != "" {
@@ -482,13 +544,12 @@ route:
totalAggrGroups += len(groups)
}
- require.Equal(t, 7, totalAggrGroups, "Should have exactly 7 aggregation groups")
+ require.Equal(t, 4, totalAggrGroups, "Should have exactly 4 aggregation groups")
// Verify specific route group counts
expectedGroupCounts := map[string]int{
- "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0": 2, // OtherAlert + TestingAlert
- "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1": 4, // 3 HighErrorRate + 1 HighLatency
- "{}/{ruleId=\"ruleId-HighLatency\"}/2": 1, // 1 HighLatency group
+ "{__receiver__=\"slack\"}": 2,
+ "{__receiver__=\"pagerduty\"}": 2,
}
for route, groups := range aggrGroupsPerRoute {
@@ -501,79 +562,31 @@ route:
require.Equal(t, AlertGroups{
&AlertGroup{
- Alerts: []*types.Alert{inputAlerts[5], inputAlerts[6]},
- Labels: model.LabelSet{
- "kafka": "yes",
- "ruleId": "ruleId-HighLatency",
- "service": "db",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"db\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 13,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[5], inputAlerts[6]},
+ Alerts: []*alertmanagertypes.Alert{inputAlerts[7]},
Labels: model.LabelSet{
"kafka": "yes",
"ruleId": "ruleId-HighLatency",
"service": "db",
},
Receiver: "pagerduty",
- GroupKey: "{}/{ruleId=\"ruleId-HighLatency\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"db\"}",
- RouteID: "{}/{ruleId=\"ruleId-HighLatency\"}/2",
+ GroupKey: "{__receiver__=\"pagerduty\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"db\"}",
+ RouteID: "{__receiver__=\"pagerduty\"}",
Renotify: 13,
},
&AlertGroup{
- Alerts: []*types.Alert{inputAlerts[1]},
+ Alerts: []*alertmanagertypes.Alert{inputAlerts[8]},
Labels: model.LabelSet{
- "instance": "inst1",
- "ruleId": "ruleId-TestingAlert",
- "service": "api",
+ "kafka": "yes",
+ "ruleId": "ruleId-HighLatency",
+ "service": "test-db",
},
- Renotify: 11,
- Receiver: "slack",
- GroupKey: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}:{instance=\"inst1\", ruleId=\"ruleId-TestingAlert\", service=\"api\"}",
- RouteID: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0",
+ Receiver: "pagerduty",
+ GroupKey: "{__receiver__=\"pagerduty\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"test-db\"}",
+ RouteID: "{__receiver__=\"pagerduty\"}",
+ Renotify: 13,
},
&AlertGroup{
- Alerts: []*types.Alert{inputAlerts[2]},
- Labels: model.LabelSet{
- "cluster": "aa",
- "instance": "inst1",
- "ruleId": "ruleId-HighErrorRate",
- },
- Renotify: 12,
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"aa\", instance=\"inst1\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[3]},
- Labels: model.LabelSet{
- "cluster": "aa",
- "instance": "inst2",
- "ruleId": "ruleId-HighErrorRate",
- },
- Renotify: 12,
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"aa\", instance=\"inst2\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[4]},
- Labels: model.LabelSet{
- "cluster": "bb",
- "instance": "inst1",
- "ruleId": "ruleId-HighErrorRate",
- },
- Renotify: 12,
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"bb\", instance=\"inst1\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[0]},
+ Alerts: []*alertmanagertypes.Alert{inputAlerts[0]},
Labels: model.LabelSet{
"cluster": "cc",
"ruleId": "ruleId-OtherAlert",
@@ -581,51 +594,50 @@ route:
},
Renotify: 10,
Receiver: "slack",
- GroupKey: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}:{cluster=\"cc\", ruleId=\"ruleId-OtherAlert\", service=\"dd\"}",
- RouteID: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0",
+ GroupKey: "{__receiver__=\"slack\"}:{cluster=\"cc\", ruleId=\"ruleId-OtherAlert\", service=\"dd\"}",
+ RouteID: "{__receiver__=\"slack\"}",
+ },
+ &AlertGroup{
+ Alerts: []*alertmanagertypes.Alert{inputAlerts[1]},
+ Labels: model.LabelSet{
+ "cluster": "dc",
+ "service": "dd",
+ "ruleId": "ruleId-OtherAlert",
+ },
+ Renotify: 10,
+ Receiver: "slack",
+ GroupKey: "{__receiver__=\"slack\"}:{cluster=\"dc\", ruleId=\"ruleId-OtherAlert\", service=\"dd\"}",
+ RouteID: "{__receiver__=\"slack\"}",
},
}, alertGroups)
require.Equal(t, map[model.Fingerprint][]string{
inputAlerts[0].Fingerprint(): {"slack"},
inputAlerts[1].Fingerprint(): {"slack"},
- inputAlerts[2].Fingerprint(): {"email"},
- inputAlerts[3].Fingerprint(): {"email"},
- inputAlerts[4].Fingerprint(): {"email"},
- inputAlerts[5].Fingerprint(): {"email", "pagerduty"},
- inputAlerts[6].Fingerprint(): {"email", "pagerduty"},
+ inputAlerts[7].Fingerprint(): {"pagerduty"},
+ inputAlerts[8].Fingerprint(): {"pagerduty"},
}, receivers)
}
func TestGroupsWithNodata(t *testing.T) {
+ // Simplified config with just receivers and default route - no hardcoded routing rules
confData := `receivers:
- name: 'slack'
-- name: 'email'
+- name: 'email'
- name: 'pagerduty'
route:
group_by: ['alertname']
group_wait: 10ms
group_interval: 10ms
- receiver: 'slack'
- routes:
- - matchers:
- - 'ruleId=~"ruleId-OtherAlert|ruleId-TestingAlert"'
- receiver: 'slack'
- - matchers:
- - 'ruleId=~"ruleId-HighLatency|ruleId-HighErrorRate"'
- receiver: 'email'
- continue: true
- - matchers:
- - 'ruleId="ruleId-HighLatency"'
- receiver: 'pagerduty'`
+ receiver: 'slack'`
conf, err := config.Load(confData)
if err != nil {
t.Fatal(err)
}
-
- logger := promslog.NewNopLogger()
+ providerSettings := createTestProviderSettings()
+ logger := providerSettings.Logger
route := dispatch.NewRoute(conf.Route, nil)
- marker := types.NewMarker(prometheus.NewRegistry())
+ marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, nil, logger, nil)
if err != nil {
t.Fatal(err)
@@ -633,30 +645,107 @@ route:
defer alerts.Close()
timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
- recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
+ recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*alertmanagertypes.Alert)}
metrics := NewDispatcherMetrics(false, prometheus.NewRegistry())
- nfManager := nfmanagertest.NewMock()
+ store := nfroutingstoretest.NewMockSQLRouteStore()
+ store.MatchExpectationsInOrder(false)
+ nfManager, err := rulebasednotification.New(context.Background(), providerSettings, nfmanager.Config{}, store)
+ if err != nil {
+ t.Fatal(err)
+ }
orgId := "test-org"
+
+ ctx := context.Background()
+ routes := []*alertmanagertypes.RoutePolicy{
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-OtherAlert" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-OtherAlert",
+ Description: "Route for OtherAlert critical to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-TestingAlert" && threshold.name == "warning"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-TestingAlert",
+ Description: "Route for TestingAlert warning to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-HighErrorRate" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-HighErrorRate",
+ Description: "Route for HighErrorRate critical to Email",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"email"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-HighLatency" && threshold.name == "warning"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-HighLatency",
+ Description: "Route for HighLatency warning to Email",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"email"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "ruleId-HighLatency" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-HighLatency",
+ Description: "Route for HighLatency critical to PagerDuty",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"pagerduty"},
+ },
+ }
+ // Set up SQL mock expectations for the CreateBatch call
+ store.ExpectCreateBatch(routes)
+ err = nfManager.CreateRoutePolicies(ctx, orgId, routes)
+ require.NoError(t, err)
+
dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, metrics, nfManager, orgId)
go dispatcher.Run()
defer dispatcher.Stop()
- // Create alerts. the dispatcher will automatically create the groups.
- inputAlerts := []*types.Alert{
- // Matches the parent route.
- newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "cc", "service": "dd"}),
- // Matches the first sub-route.
- newAlert(model.LabelSet{"env": "testing", "ruleId": "ruleId-TestingAlert", "service": "api", "instance": "inst1"}),
- // Matches the second sub-route.
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst1"}),
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst2"}),
- // Matches the second sub-route.
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "bb", "service": "api", "instance": "inst1"}),
- // Matches the second and third sub-route.
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst3"}),
- newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst4"}),
+ inputAlerts := []*alertmanagertypes.Alert{
+ newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "cc", "service": "dd", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "testing", "ruleId": "ruleId-TestingAlert", "service": "api", "instance": "inst1", "threshold.name": "warning"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst1", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst2", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "bb", "service": "api", "instance": "inst1", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst3", "threshold.name": "warning"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst4", "threshold.name": "critical"}),
newAlert(model.LabelSet{"ruleId": "ruleId-HighLatency", "nodata": "true"}),
}
+ // Set up expectations with route filtering for each alert
+ store.ExpectGetAllByName(orgId, "ruleId-OtherAlert", []*alertmanagertypes.RoutePolicy{routes[0]})
+ store.ExpectGetAllByName(orgId, "ruleId-TestingAlert", []*alertmanagertypes.RoutePolicy{routes[1]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighErrorRate", []*alertmanagertypes.RoutePolicy{routes[2]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighErrorRate", []*alertmanagertypes.RoutePolicy{routes[2]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighErrorRate", []*alertmanagertypes.RoutePolicy{routes[2]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighLatency", []*alertmanagertypes.RoutePolicy{routes[3], routes[4]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighLatency", []*alertmanagertypes.RoutePolicy{routes[3], routes[4]})
+ store.ExpectGetAllByName(orgId, "ruleId-HighLatency", []*alertmanagertypes.RoutePolicy{routes[3], routes[4]})
notiConfigs := map[string]alertmanagertypes.NotificationConfig{
"ruleId-OtherAlert": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -667,6 +756,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10,
},
+ UsePolicy: false,
},
"ruleId-TestingAlert": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -677,6 +767,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 11,
},
+ UsePolicy: false,
},
"ruleId-HighErrorRate": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -687,6 +778,7 @@ route:
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 12,
},
+ UsePolicy: false,
},
"ruleId-HighLatency": {
NotificationGroup: map[model.LabelName]struct{}{
@@ -698,160 +790,327 @@ route:
RenotifyInterval: 13,
NoDataInterval: 14,
},
+ UsePolicy: false,
},
}
for ruleID, config := range notiConfigs {
- nfManager.SetMockConfig(orgId, ruleID, &config)
+ err := nfManager.SetNotificationConfig(orgId, ruleID, &config)
+ require.NoError(t, err)
}
err = alerts.Put(inputAlerts...)
if err != nil {
t.Fatal(err)
}
- // Let alerts get processed.
- for i := 0; len(recorder.Alerts()) != 11 && i < 15; i++ {
- time.Sleep(200 * time.Millisecond)
+ for i := 0; len(recorder.Alerts()) != 9; i++ {
+ time.Sleep(400 * time.Millisecond)
}
- require.Len(t, recorder.Alerts(), 11)
+ require.Len(t, recorder.Alerts(), 9)
alertGroups, receivers := dispatcher.Groups(
func(*dispatch.Route) bool {
return true
- }, func(*types.Alert, time.Time) bool {
+ }, func(*alertmanagertypes.Alert, time.Time) bool {
return true
},
)
- require.Equal(t, AlertGroups{
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[7]},
- Labels: model.LabelSet{
- "ruleId": "ruleId-HighLatency",
- "nodata": "true",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{nodata=\"true\", ruleId=\"ruleId-HighLatency\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 14,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[7]},
- Labels: model.LabelSet{
- "ruleId": "ruleId-HighLatency",
- "nodata": "true",
- },
- Receiver: "pagerduty",
- GroupKey: "{}/{ruleId=\"ruleId-HighLatency\"}:{nodata=\"true\", ruleId=\"ruleId-HighLatency\"}",
- RouteID: "{}/{ruleId=\"ruleId-HighLatency\"}/2",
- Renotify: 14,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[5], inputAlerts[6]},
- Labels: model.LabelSet{
- "kafka": "yes",
- "ruleId": "ruleId-HighLatency",
- "service": "db",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"db\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 13,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[5], inputAlerts[6]},
- Labels: model.LabelSet{
- "kafka": "yes",
- "ruleId": "ruleId-HighLatency",
- "service": "db",
- },
- Receiver: "pagerduty",
- GroupKey: "{}/{ruleId=\"ruleId-HighLatency\"}:{kafka=\"yes\", ruleId=\"ruleId-HighLatency\", service=\"db\"}",
- RouteID: "{}/{ruleId=\"ruleId-HighLatency\"}/2",
- Renotify: 13,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[1]},
- Labels: model.LabelSet{
- "instance": "inst1",
- "ruleId": "ruleId-TestingAlert",
- "service": "api",
- },
- Receiver: "slack",
- GroupKey: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}:{instance=\"inst1\", ruleId=\"ruleId-TestingAlert\", service=\"api\"}",
- RouteID: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0",
- Renotify: 11,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[2]},
- Labels: model.LabelSet{
- "cluster": "aa",
- "instance": "inst1",
- "ruleId": "ruleId-HighErrorRate",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"aa\", instance=\"inst1\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 12,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[3]},
- Labels: model.LabelSet{
- "cluster": "aa",
- "instance": "inst2",
- "ruleId": "ruleId-HighErrorRate",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"aa\", instance=\"inst2\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 12,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[4]},
- Labels: model.LabelSet{
- "cluster": "bb",
- "instance": "inst1",
- "ruleId": "ruleId-HighErrorRate",
- },
- Receiver: "email",
- GroupKey: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}:{cluster=\"bb\", instance=\"inst1\", ruleId=\"ruleId-HighErrorRate\"}",
- RouteID: "{}/{ruleId=~\"ruleId-HighLatency|ruleId-HighErrorRate\"}/1",
- Renotify: 12,
- },
- &AlertGroup{
- Alerts: []*types.Alert{inputAlerts[0]},
- Labels: model.LabelSet{
- "cluster": "cc",
- "ruleId": "ruleId-OtherAlert",
- "service": "dd",
- },
- Receiver: "slack",
- GroupKey: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}:{cluster=\"cc\", ruleId=\"ruleId-OtherAlert\", service=\"dd\"}",
- RouteID: "{}/{ruleId=~\"ruleId-OtherAlert|ruleId-TestingAlert\"}/0",
- Renotify: 10,
- },
- }, alertGroups)
- require.Equal(t, map[model.Fingerprint][]string{
- inputAlerts[0].Fingerprint(): {"slack"},
- inputAlerts[1].Fingerprint(): {"slack"},
- inputAlerts[2].Fingerprint(): {"email"},
- inputAlerts[3].Fingerprint(): {"email"},
- inputAlerts[4].Fingerprint(): {"email"},
- inputAlerts[5].Fingerprint(): {"email", "pagerduty"},
- inputAlerts[6].Fingerprint(): {"email", "pagerduty"},
+ dispatcher.mtx.RLock()
+ aggrGroupsPerRoute := dispatcher.aggrGroupsPerRoute
+ dispatcher.mtx.RUnlock()
+
+ require.NotEmpty(t, aggrGroupsPerRoute, "Should have aggregation groups per route")
+
+ routeIDsFound := make(map[string]bool)
+ totalAggrGroups := 0
+
+ for route, groups := range aggrGroupsPerRoute {
+ routeID := route.ID()
+ routeIDsFound[routeID] = true
+ expectedReceiver := ""
+ switch routeID {
+ case "{__receiver__=\"slack\"}":
+ expectedReceiver = "slack"
+ case "{__receiver__=\"email\"}":
+ expectedReceiver = "email"
+ case "{__receiver__=\"pagerduty\"}":
+ expectedReceiver = "pagerduty"
+ }
+ if expectedReceiver != "" {
+ require.Equal(t, expectedReceiver, route.RouteOpts.Receiver,
+ "Route %s should have receiver %s", routeID, expectedReceiver)
+ }
+ totalAggrGroups += len(groups)
+ }
+
+ require.Equal(t, 9, totalAggrGroups, "Should have exactly 9 aggregation groups")
+
+ expectedGroupCounts := map[string]int{
+ "{__receiver__=\"slack\"}": 2,
+ "{__receiver__=\"email\"}": 5,
+ "{__receiver__=\"pagerduty\"}": 2,
+ }
+
+ for route, groups := range aggrGroupsPerRoute {
+ routeID := route.ID()
+ if expectedCount, exists := expectedGroupCounts[routeID]; exists {
+ require.Equal(t, expectedCount, len(groups),
+ "Route %s should have %d groups, got %d", routeID, expectedCount, len(groups))
+ }
+ }
+
+ // Verify alert groups contain expected alerts
+ require.Len(t, alertGroups, 9)
+
+ // Verify receivers mapping - exact expectations based on actual routing behavior
+ expectedReceivers := map[model.Fingerprint][]string{
+ inputAlerts[0].Fingerprint(): {"slack"}, // OtherAlert critical -> slack
+ inputAlerts[1].Fingerprint(): {"slack"}, // TestingAlert warning -> slack
+ inputAlerts[2].Fingerprint(): {"email"}, // HighErrorRate critical -> email
+ inputAlerts[3].Fingerprint(): {"email"}, // HighErrorRate critical -> email
+ inputAlerts[4].Fingerprint(): {"email"}, // HighErrorRate critical -> email
+ inputAlerts[5].Fingerprint(): {"email"}, // HighLatency warning -> email
+ inputAlerts[6].Fingerprint(): {"pagerduty"},
inputAlerts[7].Fingerprint(): {"email", "pagerduty"},
- }, receivers)
+ }
+ require.Equal(t, expectedReceivers, receivers)
+}
+
+func TestGroupsWithNotificationPolicy(t *testing.T) {
+ // Simplified config with just receivers and default route - no hardcoded routing rules
+ confData := `receivers:
+- name: 'slack'
+- name: 'email'
+- name: 'pagerduty'
+
+route:
+ group_by: ['alertname']
+ group_wait: 10ms
+ group_interval: 10ms
+ receiver: 'slack'`
+ conf, err := config.Load(confData)
+ if err != nil {
+ t.Fatal(err)
+ }
+ providerSettings := createTestProviderSettings()
+ logger := providerSettings.Logger
+ route := dispatch.NewRoute(conf.Route, nil)
+ marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
+ alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, nil, logger, nil)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer alerts.Close()
+
+ timeout := func(d time.Duration) time.Duration { return time.Duration(0) }
+ recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*alertmanagertypes.Alert)}
+ metrics := NewDispatcherMetrics(false, prometheus.NewRegistry())
+ store := nfroutingstoretest.NewMockSQLRouteStore()
+ store.MatchExpectationsInOrder(false)
+ nfManager, err := rulebasednotification.New(context.Background(), providerSettings, nfmanager.Config{}, store)
+ if err != nil {
+ t.Fatal(err)
+ }
+ orgId := "test-org"
+
+ ctx := context.Background()
+ routes := []*alertmanagertypes.RoutePolicy{
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `cluster == "bb" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-OtherAlert",
+ Description: "Route for OtherAlert critical to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `service == "db" && threshold.name == "critical"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-TestingAlert",
+ Description: "Route for TestingAlert warning to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `cluster == "bb" && instance == "inst1"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "ruleId-HighErrorRate",
+ Description: "Route for HighErrorRate critical to Email",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"email"},
+ },
+ }
+ // Set up SQL mock expectations for the CreateBatch call
+ store.ExpectCreateBatch(routes)
+ err = nfManager.CreateRoutePolicies(ctx, orgId, routes)
+ require.NoError(t, err)
+
+ dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, metrics, nfManager, orgId)
+ go dispatcher.Run()
+ defer dispatcher.Stop()
+
+ inputAlerts := []*alertmanagertypes.Alert{
+ newAlert(model.LabelSet{"ruleId": "ruleId-OtherAlert", "cluster": "cc", "service": "db", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "testing", "ruleId": "ruleId-TestingAlert", "service": "api", "instance": "inst1", "threshold.name": "warning"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst1", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "aa", "service": "api", "instance": "inst2", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighErrorRate", "cluster": "bb", "service": "api", "instance": "inst1", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst1", "threshold.name": "warning"}),
+ newAlert(model.LabelSet{"env": "prod", "ruleId": "ruleId-HighLatency", "cluster": "bb", "service": "db", "kafka": "yes", "instance": "inst4", "threshold.name": "critical"}),
+ newAlert(model.LabelSet{"ruleId": "ruleId-HighLatency", "nodata": "true"}),
+ }
+ // Set up expectations with route filtering for each alert
+ for i := 0; i < len(inputAlerts); i++ {
+ store.ExpectGetAllByKindAndOrgID(orgId, alertmanagertypes.PolicyBasedExpression, routes)
+ }
+ notiConfigs := map[string]alertmanagertypes.NotificationConfig{
+ "ruleId-OtherAlert": {
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("ruleId"): {},
+ model.LabelName("cluster"): {},
+ model.LabelName("service"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 10,
+ },
+ UsePolicy: true,
+ },
+ "ruleId-TestingAlert": {
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("ruleId"): {},
+ model.LabelName("service"): {},
+ model.LabelName("instance"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 11,
+ },
+ UsePolicy: true,
+ },
+ "ruleId-HighErrorRate": {
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("ruleId"): {},
+ model.LabelName("cluster"): {},
+ model.LabelName("instance"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 12,
+ },
+ UsePolicy: true,
+ },
+ "ruleId-HighLatency": {
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("ruleId"): {},
+ model.LabelName("service"): {},
+ model.LabelName("kafka"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 13,
+ NoDataInterval: 14,
+ },
+ UsePolicy: true,
+ },
+ }
+
+ for ruleID, config := range notiConfigs {
+ err := nfManager.SetNotificationConfig(orgId, ruleID, &config)
+ require.NoError(t, err)
+ }
+ err = alerts.Put(inputAlerts...)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for i := 0; len(recorder.Alerts()) != 3 && i < 15; i++ {
+ time.Sleep(400 * time.Millisecond)
+ }
+ require.Len(t, recorder.Alerts(), 5)
+
+ alertGroups, receivers := dispatcher.Groups(
+ func(*dispatch.Route) bool {
+ return true
+ }, func(*alertmanagertypes.Alert, time.Time) bool {
+ return true
+ },
+ )
+
+ dispatcher.mtx.RLock()
+ aggrGroupsPerRoute := dispatcher.aggrGroupsPerRoute
+ dispatcher.mtx.RUnlock()
+
+ require.NotEmpty(t, aggrGroupsPerRoute, "Should have aggregation groups per route")
+
+ routeIDsFound := make(map[string]bool)
+ totalAggrGroups := 0
+
+ for route, groups := range aggrGroupsPerRoute {
+ routeID := route.ID()
+ routeIDsFound[routeID] = true
+ expectedReceiver := ""
+ switch routeID {
+ case "{__receiver__=\"slack\"}":
+ expectedReceiver = "slack"
+ case "{__receiver__=\"email\"}":
+ expectedReceiver = "email"
+ case "{__receiver__=\"pagerduty\"}":
+ expectedReceiver = "pagerduty"
+ }
+ if expectedReceiver != "" {
+ require.Equal(t, expectedReceiver, route.RouteOpts.Receiver,
+ "Route %s should have receiver %s", routeID, expectedReceiver)
+ }
+ totalAggrGroups += len(groups)
+ }
+
+ require.Equal(t, 5, totalAggrGroups, "Should have exactly 5 aggregation groups")
+
+ expectedGroupCounts := map[string]int{
+ "{__receiver__=\"slack\"}": 3,
+ "{__receiver__=\"email\"}": 2,
+ }
+
+ for route, groups := range aggrGroupsPerRoute {
+ routeID := route.ID()
+ if expectedCount, exists := expectedGroupCounts[routeID]; exists {
+ require.Equal(t, expectedCount, len(groups),
+ "Route %s should have %d groups, got %d", routeID, expectedCount, len(groups))
+ }
+ }
+
+ // Verify alert groups contain expected alerts
+ require.Len(t, alertGroups, 5)
+
+ // Verify receivers mapping - based on NotificationPolicy routing without ruleID
+ expectedReceivers := map[model.Fingerprint][]string{
+ inputAlerts[0].Fingerprint(): {"slack"},
+ inputAlerts[6].Fingerprint(): {"slack"},
+ inputAlerts[4].Fingerprint(): {"email", "slack"},
+ inputAlerts[5].Fingerprint(): {"email"},
+ }
+ require.Equal(t, expectedReceivers, receivers)
}
type recordStage struct {
mtx sync.RWMutex
- alerts map[string]map[model.Fingerprint]*types.Alert
+ alerts map[string]map[model.Fingerprint]*alertmanagertypes.Alert
}
-func (r *recordStage) Alerts() []*types.Alert {
+func (r *recordStage) Alerts() []*alertmanagertypes.Alert {
r.mtx.RLock()
defer r.mtx.RUnlock()
- alerts := make([]*types.Alert, 0)
+ alerts := make([]*alertmanagertypes.Alert, 0)
for k := range r.alerts {
for _, a := range r.alerts[k] {
alerts = append(alerts, a)
@@ -860,7 +1119,7 @@ func (r *recordStage) Alerts() []*types.Alert {
return alerts
}
-func (r *recordStage) Exec(ctx context.Context, l *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
+func (r *recordStage) Exec(ctx context.Context, l *slog.Logger, alerts ...*alertmanagertypes.Alert) (context.Context, []*alertmanagertypes.Alert, error) {
r.mtx.Lock()
defer r.mtx.Unlock()
gk, ok := notify.GroupKey(ctx)
@@ -868,7 +1127,7 @@ func (r *recordStage) Exec(ctx context.Context, l *slog.Logger, alerts ...*types
panic("GroupKey not present!")
}
if _, ok := r.alerts[gk]; !ok {
- r.alerts[gk] = make(map[model.Fingerprint]*types.Alert)
+ r.alerts[gk] = make(map[model.Fingerprint]*alertmanagertypes.Alert)
}
for _, a := range alerts {
r.alerts[gk][a.Fingerprint()] = a
@@ -883,8 +1142,8 @@ var (
t1 = t0.Add(2 * time.Minute)
)
-func newAlert(labels model.LabelSet) *types.Alert {
- return &types.Alert{
+func newAlert(labels model.LabelSet) *alertmanagertypes.Alert {
+ return &alertmanagertypes.Alert{
Alert: model.Alert{
Labels: labels,
Annotations: model.LabelSet{"foo": "bar"},
@@ -899,7 +1158,7 @@ func newAlert(labels model.LabelSet) *types.Alert {
func TestDispatcherRace(t *testing.T) {
logger := promslog.NewNopLogger()
- marker := types.NewMarker(prometheus.NewRegistry())
+ marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, nil, logger, nil)
if err != nil {
t.Fatal(err)
@@ -917,56 +1176,94 @@ func TestDispatcherRace(t *testing.T) {
func TestDispatcherRaceOnFirstAlertNotDeliveredWhenGroupWaitIsZero(t *testing.T) {
const numAlerts = 5000
+ confData := `receivers:
+- name: 'slack'
+- name: 'email'
+- name: 'pagerduty'
- logger := promslog.NewNopLogger()
- marker := types.NewMarker(prometheus.NewRegistry())
+route:
+ group_by: ['alertname']
+ group_wait: 1h
+ group_interval: 1h
+ receiver: 'slack'`
+ conf, err := config.Load(confData)
+ if err != nil {
+ t.Fatal(err)
+ }
+ route := dispatch.NewRoute(conf.Route, nil)
+ providerSettings := createTestProviderSettings()
+ logger := providerSettings.Logger
+ marker := alertmanagertypes.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, nil, logger, nil)
if err != nil {
t.Fatal(err)
}
defer alerts.Close()
+ timeout := func(d time.Duration) time.Duration { return d }
+ recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*alertmanagertypes.Alert)}
+ metrics := NewDispatcherMetrics(false, prometheus.NewRegistry())
+ store := nfroutingstoretest.NewMockSQLRouteStore()
+ store.MatchExpectationsInOrder(false)
+ nfManager, err := rulebasednotification.New(context.Background(), providerSettings, nfmanager.Config{}, store)
+ if err != nil {
+ t.Fatal(err)
+ }
+ orgId := "test-org"
- route := &dispatch.Route{
- RouteOpts: dispatch.RouteOpts{
- Receiver: "default",
- GroupBy: map[model.LabelName]struct{}{"ruleId": {}},
- GroupWait: 0,
- GroupInterval: 1 * time.Hour, // Should never hit in this test.
- RepeatInterval: 1 * time.Hour, // Should never hit in this test.
- },
+ for i := 0; i < numAlerts; i++ {
+ ruleId := fmt.Sprintf("Alert_%d", i)
+
+ notifConfig := alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("ruleId"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 1 * time.Hour,
+ },
+ UsePolicy: false,
+ }
+ route := &alertmanagertypes.RoutePolicy{
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: fmt.Sprintf(`ruleId == "%s"`, ruleId),
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: ruleId,
+ Description: "Route for OtherAlert critical to Slack",
+ Enabled: true,
+ OrgID: orgId,
+ Channels: []string{"slack"},
+ }
+
+ store.ExpectGetAllByName(orgId, ruleId, []*alertmanagertypes.RoutePolicy{route})
+ err := nfManager.SetNotificationConfig(orgId, ruleId, ¬ifConfig)
+ require.NoError(t, err)
}
- timeout := func(d time.Duration) time.Duration { return d }
- recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
- metrics := NewDispatcherMetrics(false, prometheus.NewRegistry())
- nfManager := nfmanagertest.NewMock()
- dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, metrics, nfManager, "test-org")
+ dispatcher := NewDispatcher(alerts, route, recorder, marker, timeout, nil, logger, metrics, nfManager, orgId)
go dispatcher.Run()
defer dispatcher.Stop()
- // Push all alerts.
for i := 0; i < numAlerts; i++ {
- alert := newAlert(model.LabelSet{"ruleId": model.LabelValue(fmt.Sprintf("Alert_%d", i))})
+ ruleId := fmt.Sprintf("Alert_%d", i)
+ alert := newAlert(model.LabelSet{"ruleId": model.LabelValue(ruleId)})
require.NoError(t, alerts.Put(alert))
}
- // Wait until the alerts have been notified or the waiting timeout expires.
for deadline := time.Now().Add(5 * time.Second); time.Now().Before(deadline); {
if len(recorder.Alerts()) >= numAlerts {
break
}
- // Throttle.
time.Sleep(10 * time.Millisecond)
}
- // We expect all alerts to be notified immediately, since they all belong to different groups.
require.Len(t, recorder.Alerts(), numAlerts)
}
func TestDispatcher_DoMaintenance(t *testing.T) {
r := prometheus.NewRegistry()
- marker := types.NewMarker(r)
+ marker := alertmanagertypes.NewMarker(r)
alerts, err := mem.NewAlerts(context.Background(), marker, time.Minute, nil, promslog.NewNopLogger(), nil)
if err != nil {
@@ -981,7 +1278,7 @@ func TestDispatcher_DoMaintenance(t *testing.T) {
},
}
timeout := func(d time.Duration) time.Duration { return d }
- recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*types.Alert)}
+ recorder := &recordStage{alerts: make(map[string]map[model.Fingerprint]*alertmanagertypes.Alert)}
ctx := context.Background()
metrics := NewDispatcherMetrics(false, r)
@@ -997,7 +1294,7 @@ func TestDispatcher_DoMaintenance(t *testing.T) {
aggrGroups[route][aggrGroup1.fingerprint()] = aggrGroup1
dispatcher.aggrGroupsPerRoute = aggrGroups
// Must run otherwise doMaintenance blocks on aggrGroup1.stop().
- go aggrGroup1.run(func(context.Context, ...*types.Alert) bool { return true })
+ go aggrGroup1.run(func(context.Context, ...*alertmanagertypes.Alert) bool { return true })
// Insert a marker for the aggregation group's group key.
marker.SetMuted(route.ID(), aggrGroup1.GroupKey(), []string{"weekends"})
diff --git a/pkg/alertmanager/alertmanagerserver/server.go b/pkg/alertmanager/alertmanagerserver/server.go
index d4c0ddad7215..208a5ee8880a 100644
--- a/pkg/alertmanager/alertmanagerserver/server.go
+++ b/pkg/alertmanager/alertmanagerserver/server.go
@@ -2,6 +2,9 @@ package alertmanagerserver
import (
"context"
+ "fmt"
+ "github.com/prometheus/alertmanager/types"
+ "golang.org/x/sync/errgroup"
"log/slog"
"strings"
"sync"
@@ -321,39 +324,104 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma
}
func (server *Server) TestReceiver(ctx context.Context, receiver alertmanagertypes.Receiver) error {
- return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now()))
+ testAlert := alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now())
+ return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, testAlert.Labels, testAlert)
}
-func (server *Server) TestAlert(ctx context.Context, postableAlert *alertmanagertypes.PostableAlert, receivers []string) error {
- alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(alertmanagertypes.PostableAlerts{postableAlert}, time.Duration(server.srvConfig.Global.ResolveTimeout), time.Now())
+func (server *Server) TestAlert(ctx context.Context, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
+ if len(receiversMap) == 0 {
+ return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
+ "expected at least 1 alert, got 0")
+ }
+
+ postableAlerts := make(alertmanagertypes.PostableAlerts, 0, len(receiversMap))
+ for alert := range receiversMap {
+ postableAlerts = append(postableAlerts, alert)
+ }
+
+ alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(
+ postableAlerts,
+ time.Duration(server.srvConfig.Global.ResolveTimeout),
+ time.Now(),
+ )
if err != nil {
- return errors.Join(err...)
+ return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
+ "failed to construct alerts from postable alerts: %v", err)
}
- if len(alerts) != 1 {
- return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "expected 1 alert, got %d", len(alerts))
+ type alertGroup struct {
+ groupLabels model.LabelSet
+ alerts []*types.Alert
+ receivers map[string]struct{}
}
- ch := make(chan error, len(receivers))
- for _, receiverName := range receivers {
- go func(receiverName string) {
- receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
- if err != nil {
- ch <- err
- return
+ groupMap := make(map[model.Fingerprint]*alertGroup)
+
+ for i, alert := range alerts {
+ labels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
+ fp := labels.Fingerprint()
+
+ postableAlert := postableAlerts[i]
+ alertReceivers := receiversMap[postableAlert]
+
+ if group, exists := groupMap[fp]; exists {
+ group.alerts = append(group.alerts, alert)
+ for _, r := range alertReceivers {
+ group.receivers[r] = struct{}{}
+ }
+ } else {
+ receiverSet := make(map[string]struct{})
+ for _, r := range alertReceivers {
+ receiverSet[r] = struct{}{}
+ }
+ groupMap[fp] = &alertGroup{
+ groupLabels: labels,
+ alerts: []*types.Alert{alert},
+ receivers: receiverSet,
}
- ch <- alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alerts[0])
- }(receiverName)
- }
-
- var errs []error
- for i := 0; i < len(receivers); i++ {
- if err := <-ch; err != nil {
- errs = append(errs, err)
}
}
- if errs != nil {
+ var mu sync.Mutex
+ var errs []error
+
+ g, gCtx := errgroup.WithContext(ctx)
+ for _, group := range groupMap {
+ for receiverName := range group.receivers {
+ group := group
+ receiverName := receiverName
+
+ g.Go(func() error {
+ receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
+ if err != nil {
+ mu.Lock()
+ errs = append(errs, fmt.Errorf("failed to get receiver %q: %w", receiverName, err))
+ mu.Unlock()
+ return nil // Return nil to continue processing other goroutines
+ }
+
+ err = alertmanagertypes.TestReceiver(
+ gCtx,
+ receiver,
+ alertmanagernotify.NewReceiverIntegrations,
+ server.alertmanagerConfig,
+ server.tmpl,
+ server.logger,
+ group.groupLabels,
+ group.alerts...,
+ )
+ if err != nil {
+ mu.Lock()
+ errs = append(errs, fmt.Errorf("receiver %q test failed: %w", receiverName, err))
+ mu.Unlock()
+ }
+ return nil // Return nil to continue processing other goroutines
+ })
+ }
+ }
+ _ = g.Wait()
+
+ if len(errs) > 0 {
return errors.Join(errs...)
}
diff --git a/pkg/alertmanager/alertmanagerserver/server_e2e_test.go b/pkg/alertmanager/alertmanagerserver/server_e2e_test.go
new file mode 100644
index 000000000000..14cbe02e126c
--- /dev/null
+++ b/pkg/alertmanager/alertmanagerserver/server_e2e_test.go
@@ -0,0 +1,223 @@
+package alertmanagerserver
+
+import (
+ "context"
+ "github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
+ "github.com/prometheus/alertmanager/dispatch"
+ "io"
+ "log/slog"
+ "net/http"
+ "testing"
+ "time"
+
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
+ "github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
+ "github.com/SigNoz/signoz/pkg/types"
+ "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/SigNoz/signoz/pkg/valuer"
+
+ "github.com/go-openapi/strfmt"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/common/model"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestEndToEndAlertManagerFlow(t *testing.T) {
+ ctx := context.Background()
+ providerSettings := instrumentationtest.New().ToProviderSettings()
+
+ store := nfroutingstoretest.NewMockSQLRouteStore()
+ store.MatchExpectationsInOrder(false)
+ notificationManager, err := rulebasednotification.New(ctx, providerSettings, nfmanager.Config{}, store)
+ require.NoError(t, err)
+ orgID := "test-org"
+
+ routes := []*alertmanagertypes.RoutePolicy{
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "high-cpu-usage" && severity == "critical"`,
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Name: "high-cpu-usage",
+ Description: "High CPU critical alerts to webhook",
+ Enabled: true,
+ OrgID: orgID,
+ Channels: []string{"webhook"},
+ },
+ {
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ Expression: `ruleId == "high-cpu-usage" && severity == "warning"`,
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Name: "high-cpu-usage",
+ Description: "High CPU warning alerts to webhook",
+ Enabled: true,
+ OrgID: orgID,
+ Channels: []string{"webhook"},
+ },
+ }
+
+ store.ExpectCreateBatch(routes)
+ err = notificationManager.CreateRoutePolicies(ctx, orgID, routes)
+ require.NoError(t, err)
+
+ for range routes {
+ ruleID := "high-cpu-usage"
+ store.ExpectGetAllByName(orgID, ruleID, routes)
+ store.ExpectGetAllByName(orgID, ruleID, routes)
+ }
+
+ notifConfig := alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{
+ model.LabelName("cluster"): {},
+ model.LabelName("instance"): {},
+ },
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 5 * time.Minute,
+ },
+ UsePolicy: false,
+ }
+
+ err = notificationManager.SetNotificationConfig(orgID, "high-cpu-usage", ¬ifConfig)
+ require.NoError(t, err)
+
+ srvCfg := NewConfig()
+ stateStore := alertmanagertypestest.NewStateStore()
+ registry := prometheus.NewRegistry()
+ logger := slog.New(slog.NewTextHandler(io.Discard, nil))
+ server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
+ require.NoError(t, err)
+ amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
+ require.NoError(t, err)
+ err = server.SetConfig(ctx, amConfig)
+ require.NoError(t, err)
+
+ // Create test alerts
+ now := time.Now()
+ testAlerts := []*alertmanagertypes.PostableAlert{
+ {
+ Alert: alertmanagertypes.AlertModel{
+ Labels: map[string]string{
+ "ruleId": "high-cpu-usage",
+ "severity": "critical",
+ "cluster": "prod-cluster",
+ "instance": "server-01",
+ "alertname": "HighCPUUsage",
+ },
+ },
+ Annotations: map[string]string{
+ "summary": "High CPU usage detected",
+ "description": "CPU usage is above 90% for 5 minutes",
+ },
+ StartsAt: strfmt.DateTime(now.Add(-5 * time.Minute)),
+ EndsAt: strfmt.DateTime(time.Time{}), // Active alert
+ },
+ {
+ Alert: alertmanagertypes.AlertModel{
+ Labels: map[string]string{
+ "ruleId": "high-cpu-usage",
+ "severity": "warning",
+ "cluster": "prod-cluster",
+ "instance": "server-02",
+ "alertname": "HighCPUUsage",
+ },
+ },
+ Annotations: map[string]string{
+ "summary": "Moderate CPU usage detected",
+ "description": "CPU usage is above 70% for 10 minutes",
+ },
+ StartsAt: strfmt.DateTime(now.Add(-10 * time.Minute)),
+ EndsAt: strfmt.DateTime(time.Time{}), // Active alert
+ },
+ {
+ Alert: alertmanagertypes.AlertModel{
+ Labels: map[string]string{
+ "ruleId": "high-cpu-usage",
+ "severity": "critical",
+ "cluster": "prod-cluster",
+ "instance": "server-03",
+ "alertname": "HighCPUUsage",
+ },
+ },
+ Annotations: map[string]string{
+ "summary": "High CPU usage detected on server-03",
+ "description": "CPU usage is above 95% for 3 minutes",
+ },
+ StartsAt: strfmt.DateTime(now.Add(-3 * time.Minute)),
+ EndsAt: strfmt.DateTime(time.Time{}), // Active alert
+ },
+ }
+
+ err = server.PutAlerts(ctx, testAlerts)
+ require.NoError(t, err)
+
+ time.Sleep(2 * time.Second)
+
+ t.Run("verify_alerts_processed", func(t *testing.T) {
+ dummyRequest, err := http.NewRequest(http.MethodGet, "/alerts", nil)
+ require.NoError(t, err)
+
+ params, err := alertmanagertypes.NewGettableAlertsParams(dummyRequest)
+ require.NoError(t, err)
+ alerts, err := server.GetAlerts(context.Background(), params)
+ require.NoError(t, err)
+ require.Len(t, alerts, 3, "Expected 3 active alerts")
+
+ for _, alert := range alerts {
+ require.Equal(t, "high-cpu-usage", alert.Alert.Labels["ruleId"])
+ require.NotEmpty(t, alert.Alert.Labels["severity"])
+ require.Contains(t, []string{"critical", "warning"}, alert.Alert.Labels["severity"])
+ require.Equal(t, "prod-cluster", alert.Alert.Labels["cluster"])
+ require.NotEmpty(t, alert.Alert.Labels["instance"])
+ }
+
+ criticalAlerts := 0
+ warningAlerts := 0
+ for _, alert := range alerts {
+ if alert.Alert.Labels["severity"] == "critical" {
+ criticalAlerts++
+ } else if alert.Alert.Labels["severity"] == "warning" {
+ warningAlerts++
+ }
+ }
+ require.Equal(t, 2, criticalAlerts, "Expected 2 critical alerts")
+ require.Equal(t, 1, warningAlerts, "Expected 1 warning alert")
+ })
+
+ t.Run("verify_notification_routing", func(t *testing.T) {
+
+ notifConfig, err := notificationManager.GetNotificationConfig(orgID, "high-cpu-usage")
+ require.NoError(t, err)
+ require.NotNil(t, notifConfig)
+ require.Equal(t, 5*time.Minute, notifConfig.Renotify.RenotifyInterval)
+ require.Contains(t, notifConfig.NotificationGroup, model.LabelName("ruleId"))
+ require.Contains(t, notifConfig.NotificationGroup, model.LabelName("cluster"))
+ require.Contains(t, notifConfig.NotificationGroup, model.LabelName("instance"))
+ })
+
+ t.Run("verify_alert_groups_and_stages", func(t *testing.T) {
+ time.Sleep(2 * time.Second)
+
+ alertGroups, _ := server.dispatcher.Groups(
+ func(route *dispatch.Route) bool { return true }, // Accept all routes
+ func(alert *alertmanagertypes.Alert, now time.Time) bool { return true }, // Accept all alerts
+ )
+ require.Len(t, alertGroups, 3)
+
+ require.NotEmpty(t, alertGroups, "Should have alert groups created by dispatcher")
+
+ totalAlerts := 0
+ for _, group := range alertGroups {
+ totalAlerts += len(group.Alerts)
+ }
+ require.Equal(t, 3, totalAlerts, "Should have 3 alerts total across all groups")
+ require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-01\", ruleId=\"high-cpu-usage\"}", alertGroups[0].GroupKey)
+ require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-02\", ruleId=\"high-cpu-usage\"}", alertGroups[1].GroupKey)
+ require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-03\", ruleId=\"high-cpu-usage\"}", alertGroups[2].GroupKey)
+ })
+}
diff --git a/pkg/alertmanager/alertmanagerserver/server_test.go b/pkg/alertmanager/alertmanagerserver/server_test.go
index 8aad88b2ff4a..e222e319e883 100644
--- a/pkg/alertmanager/alertmanagerserver/server_test.go
+++ b/pkg/alertmanager/alertmanagerserver/server_test.go
@@ -19,6 +19,7 @@ import (
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/client_golang/prometheus"
commoncfg "github.com/prometheus/common/config"
+ "github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -127,3 +128,189 @@ func TestServerPutAlerts(t *testing.T) {
assert.Equal(t, gettableAlerts[0].Alert.Labels["alertname"], "test-alert")
assert.NoError(t, server.Stop(context.Background()))
}
+
+func TestServerTestAlert(t *testing.T) {
+ stateStore := alertmanagertypestest.NewStateStore()
+ srvCfg := NewConfig()
+ srvCfg.Route.GroupInterval = 1 * time.Second
+ notificationManager := nfmanagertest.NewMock()
+ server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
+ require.NoError(t, err)
+
+ amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
+ require.NoError(t, err)
+
+ webhook1Listener, err := net.Listen("tcp", "localhost:0")
+ require.NoError(t, err)
+ webhook2Listener, err := net.Listen("tcp", "localhost:0")
+ require.NoError(t, err)
+
+ requestCount1 := 0
+ requestCount2 := 0
+ webhook1Server := &http.Server{
+ Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ requestCount1++
+ w.WriteHeader(http.StatusOK)
+ }),
+ }
+ webhook2Server := &http.Server{
+ Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ requestCount2++
+ w.WriteHeader(http.StatusOK)
+ }),
+ }
+
+ go func() {
+ _ = webhook1Server.Serve(webhook1Listener)
+ }()
+ go func() {
+ _ = webhook2Server.Serve(webhook2Listener)
+ }()
+
+ webhook1URL, err := url.Parse("http://" + webhook1Listener.Addr().String() + "/webhook")
+ require.NoError(t, err)
+ webhook2URL, err := url.Parse("http://" + webhook2Listener.Addr().String() + "/webhook")
+ require.NoError(t, err)
+
+ require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
+ Name: "receiver-1",
+ WebhookConfigs: []*config.WebhookConfig{
+ {
+ HTTPConfig: &commoncfg.HTTPClientConfig{},
+ URL: &config.SecretURL{URL: webhook1URL},
+ },
+ },
+ }))
+
+ require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
+ Name: "receiver-2",
+ WebhookConfigs: []*config.WebhookConfig{
+ {
+ HTTPConfig: &commoncfg.HTTPClientConfig{},
+ URL: &config.SecretURL{URL: webhook2URL},
+ },
+ },
+ }))
+
+ require.NoError(t, server.SetConfig(context.Background(), amConfig))
+ defer func() {
+ _ = server.Stop(context.Background())
+ _ = webhook1Server.Close()
+ _ = webhook2Server.Close()
+ }()
+
+ // Test with multiple alerts going to different receivers
+ alert1 := &alertmanagertypes.PostableAlert{
+ Annotations: models.LabelSet{"alertname": "test-alert-1"},
+ StartsAt: strfmt.DateTime(time.Now()),
+ Alert: models.Alert{
+ Labels: models.LabelSet{"alertname": "test-alert-1", "severity": "critical"},
+ },
+ }
+ alert2 := &alertmanagertypes.PostableAlert{
+ Annotations: models.LabelSet{"alertname": "test-alert-2"},
+ StartsAt: strfmt.DateTime(time.Now()),
+ Alert: models.Alert{
+ Labels: models.LabelSet{"alertname": "test-alert-2", "severity": "warning"},
+ },
+ }
+
+ receiversMap := map[*alertmanagertypes.PostableAlert][]string{
+ alert1: {"receiver-1", "receiver-2"},
+ alert2: {"receiver-2"},
+ }
+
+ config := &alertmanagertypes.NotificationConfig{
+ NotificationGroup: make(map[model.LabelName]struct{}),
+ GroupByAll: false,
+ }
+
+ err = server.TestAlert(context.Background(), receiversMap, config)
+ require.NoError(t, err)
+
+ time.Sleep(100 * time.Millisecond)
+
+ assert.Greater(t, requestCount1, 0, "receiver-1 should have received at least one request")
+ assert.Greater(t, requestCount2, 0, "receiver-2 should have received at least one request")
+}
+
+func TestServerTestAlertContinuesOnFailure(t *testing.T) {
+ stateStore := alertmanagertypestest.NewStateStore()
+ srvCfg := NewConfig()
+ srvCfg.Route.GroupInterval = 1 * time.Second
+ notificationManager := nfmanagertest.NewMock()
+ server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
+ require.NoError(t, err)
+
+ amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
+ require.NoError(t, err)
+
+ // Create one working webhook and one failing receiver (non-existent)
+ webhookListener, err := net.Listen("tcp", "localhost:0")
+ require.NoError(t, err)
+
+ requestCount := 0
+ webhookServer := &http.Server{
+ Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ requestCount++
+ w.WriteHeader(http.StatusOK)
+ }),
+ }
+
+ go func() {
+ _ = webhookServer.Serve(webhookListener)
+ }()
+
+ webhookURL, err := url.Parse("http://" + webhookListener.Addr().String() + "/webhook")
+ require.NoError(t, err)
+
+ require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
+ Name: "working-receiver",
+ WebhookConfigs: []*config.WebhookConfig{
+ {
+ HTTPConfig: &commoncfg.HTTPClientConfig{},
+ URL: &config.SecretURL{URL: webhookURL},
+ },
+ },
+ }))
+
+ require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
+ Name: "failing-receiver",
+ WebhookConfigs: []*config.WebhookConfig{
+ {
+ HTTPConfig: &commoncfg.HTTPClientConfig{},
+ URL: &config.SecretURL{URL: &url.URL{Scheme: "http", Host: "localhost:1", Path: "/webhook"}},
+ },
+ },
+ }))
+
+ require.NoError(t, server.SetConfig(context.Background(), amConfig))
+ defer func() {
+ _ = server.Stop(context.Background())
+ _ = webhookServer.Close()
+ }()
+
+ alert := &alertmanagertypes.PostableAlert{
+ Annotations: models.LabelSet{"alertname": "test-alert"},
+ StartsAt: strfmt.DateTime(time.Now()),
+ Alert: models.Alert{
+ Labels: models.LabelSet{"alertname": "test-alert"},
+ },
+ }
+
+ receiversMap := map[*alertmanagertypes.PostableAlert][]string{
+ alert: {"working-receiver", "failing-receiver"},
+ }
+
+ config := &alertmanagertypes.NotificationConfig{
+ NotificationGroup: make(map[model.LabelName]struct{}),
+ GroupByAll: false,
+ }
+
+ err = server.TestAlert(context.Background(), receiversMap, config)
+ assert.Error(t, err)
+
+ time.Sleep(100 * time.Millisecond)
+
+ assert.Greater(t, requestCount, 0, "working-receiver should have received at least one request even though failing-receiver failed")
+}
diff --git a/pkg/alertmanager/api.go b/pkg/alertmanager/api.go
index ece7dcfa371b..c6be90b4970f 100644
--- a/pkg/alertmanager/api.go
+++ b/pkg/alertmanager/api.go
@@ -2,6 +2,7 @@ package alertmanager
import (
"context"
+ "encoding/json"
"io"
"net/http"
"time"
@@ -273,3 +274,128 @@ func (api *API) CreateChannel(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusNoContent, nil)
}
+
+func (api *API) CreateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
+ ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
+ defer cancel()
+
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+ defer req.Body.Close()
+ var policy alertmanagertypes.PostableRoutePolicy
+ err = json.Unmarshal(body, &policy)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
+
+ // Validate the postable route
+ if err := policy.Validate(); err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ result, err := api.alertmanager.CreateRoutePolicy(ctx, &policy)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ render.Success(rw, http.StatusCreated, result)
+}
+
+func (api *API) GetAllRoutePolicies(rw http.ResponseWriter, req *http.Request) {
+ ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
+ defer cancel()
+
+ policies, err := api.alertmanager.GetAllRoutePolicies(ctx)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ render.Success(rw, http.StatusOK, policies)
+}
+
+func (api *API) GetRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
+ ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
+ defer cancel()
+
+ vars := mux.Vars(req)
+ policyID := vars["id"]
+ if policyID == "" {
+ render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
+ return
+ }
+
+ policy, err := api.alertmanager.GetRoutePolicyByID(ctx, policyID)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ render.Success(rw, http.StatusOK, policy)
+}
+
+func (api *API) DeleteRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
+ ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
+ defer cancel()
+
+ vars := mux.Vars(req)
+ policyID := vars["id"]
+ if policyID == "" {
+ render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
+ return
+ }
+
+ err := api.alertmanager.DeleteRoutePolicyByID(ctx, policyID)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ render.Success(rw, http.StatusNoContent, nil)
+}
+
+func (api *API) UpdateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
+ ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
+ defer cancel()
+
+ vars := mux.Vars(req)
+ policyID := vars["id"]
+ if policyID == "" {
+ render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
+ return
+ }
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+ defer req.Body.Close()
+ var policy alertmanagertypes.PostableRoutePolicy
+ err = json.Unmarshal(body, &policy)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+ policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
+
+ // Validate the postable route
+ if err := policy.Validate(); err != nil {
+ render.Error(rw, err)
+ return
+ }
+
+ result, err := api.alertmanager.UpdateRoutePolicyByID(ctx, policyID, &policy)
+ if err != nil {
+ render.Error(rw, err)
+ return
+ }
+ render.Success(rw, http.StatusOK, result)
+}
diff --git a/pkg/alertmanager/nfmanager/nfmanagertest/provider.go b/pkg/alertmanager/nfmanager/nfmanagertest/provider.go
index 2a321ce80712..dfb930495686 100644
--- a/pkg/alertmanager/nfmanager/nfmanagertest/provider.go
+++ b/pkg/alertmanager/nfmanager/nfmanagertest/provider.go
@@ -1,20 +1,29 @@
package nfmanagertest
import (
+ "context"
+ "fmt"
+ "strings"
+
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/prometheus/common/model"
)
// MockNotificationManager is a simple mock implementation of NotificationManager
type MockNotificationManager struct {
- configs map[string]*alertmanagertypes.NotificationConfig
- errors map[string]error
+ configs map[string]*alertmanagertypes.NotificationConfig
+ routes map[string]*alertmanagertypes.RoutePolicy
+ routesByName map[string][]*alertmanagertypes.RoutePolicy
+ errors map[string]error
}
// NewMock creates a new mock notification manager
func NewMock() *MockNotificationManager {
return &MockNotificationManager{
- configs: make(map[string]*alertmanagertypes.NotificationConfig),
- errors: make(map[string]error),
+ configs: make(map[string]*alertmanagertypes.NotificationConfig),
+ routes: make(map[string]*alertmanagertypes.RoutePolicy),
+ routesByName: make(map[string][]*alertmanagertypes.RoutePolicy),
+ errors: make(map[string]error),
}
}
@@ -65,6 +74,8 @@ func (m *MockNotificationManager) SetMockError(orgID, ruleID string, err error)
func (m *MockNotificationManager) ClearMockData() {
m.configs = make(map[string]*alertmanagertypes.NotificationConfig)
+ m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
+ m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
m.errors = make(map[string]error)
}
@@ -73,3 +84,241 @@ func (m *MockNotificationManager) HasConfig(orgID, ruleID string) bool {
_, exists := m.configs[key]
return exists
}
+
+// Route Policy CRUD
+
+func (m *MockNotificationManager) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
+ key := getKey(orgID, "create_route")
+ if err := m.errors[key]; err != nil {
+ return err
+ }
+
+ if route == nil {
+ return fmt.Errorf("route cannot be nil")
+ }
+
+ if err := route.Validate(); err != nil {
+ return err
+ }
+
+ routeKey := getKey(orgID, route.ID.StringValue())
+ m.routes[routeKey] = route
+ nameKey := getKey(orgID, route.Name)
+ m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
+
+ return nil
+}
+
+func (m *MockNotificationManager) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
+ key := getKey(orgID, "create_routes")
+ if err := m.errors[key]; err != nil {
+ return err
+ }
+
+ if len(routes) == 0 {
+ return fmt.Errorf("routes cannot be empty")
+ }
+ for i, route := range routes {
+ if route == nil {
+ return fmt.Errorf("route at index %d cannot be nil", i)
+ }
+ if err := route.Validate(); err != nil {
+ return fmt.Errorf("route at index %d: %s", i, err.Error())
+ }
+ }
+ for _, route := range routes {
+ if err := m.CreateRoutePolicy(ctx, orgID, route); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (m *MockNotificationManager) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
+ key := getKey(orgID, "get_route")
+ if err := m.errors[key]; err != nil {
+ return nil, err
+ }
+
+ if routeID == "" {
+ return nil, fmt.Errorf("routeID cannot be empty")
+ }
+
+ routeKey := getKey(orgID, routeID)
+ route, exists := m.routes[routeKey]
+ if !exists {
+ return nil, fmt.Errorf("route with ID %s not found", routeID)
+ }
+
+ return route, nil
+}
+
+func (m *MockNotificationManager) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
+ key := getKey(orgID, "get_all_routes")
+ if err := m.errors[key]; err != nil {
+ return nil, err
+ }
+
+ if orgID == "" {
+ return nil, fmt.Errorf("orgID cannot be empty")
+ }
+
+ var routes []*alertmanagertypes.RoutePolicy
+ for routeKey, route := range m.routes {
+ if route.OrgID == orgID {
+ routes = append(routes, route)
+ }
+ _ = routeKey
+ }
+
+ return routes, nil
+}
+
+func (m *MockNotificationManager) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
+ key := getKey(orgID, "delete_route")
+ if err := m.errors[key]; err != nil {
+ return err
+ }
+
+ if routeID == "" {
+ return fmt.Errorf("routeID cannot be empty")
+ }
+
+ routeKey := getKey(orgID, routeID)
+ route, exists := m.routes[routeKey]
+ if !exists {
+ return fmt.Errorf("route with ID %s not found", routeID)
+ }
+ delete(m.routes, routeKey)
+
+ nameKey := getKey(orgID, route.Name)
+ if nameRoutes, exists := m.routesByName[nameKey]; exists {
+ var filtered []*alertmanagertypes.RoutePolicy
+ for _, r := range nameRoutes {
+ if r.ID.StringValue() != routeID {
+ filtered = append(filtered, r)
+ }
+ }
+ if len(filtered) == 0 {
+ delete(m.routesByName, nameKey)
+ } else {
+ m.routesByName[nameKey] = filtered
+ }
+ }
+
+ return nil
+}
+
+func (m *MockNotificationManager) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
+ key := getKey(orgID, "delete_routes_by_name")
+ if err := m.errors[key]; err != nil {
+ return err
+ }
+
+ if orgID == "" {
+ return fmt.Errorf("orgID cannot be empty")
+ }
+
+ if name == "" {
+ return fmt.Errorf("name cannot be empty")
+ }
+
+ nameKey := getKey(orgID, name)
+ routes, exists := m.routesByName[nameKey]
+ if !exists {
+ return nil // No routes to delete
+ }
+
+ for _, route := range routes {
+ routeKey := getKey(orgID, route.ID.StringValue())
+ delete(m.routes, routeKey)
+ }
+
+ delete(m.routesByName, nameKey)
+
+ return nil
+}
+
+func (m *MockNotificationManager) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
+ key := getKey(orgID, ruleID)
+ if err := m.errors[key]; err != nil {
+ return nil, err
+ }
+
+ config, err := m.GetNotificationConfig(orgID, ruleID)
+ if err != nil {
+ return nil, err
+ }
+
+ var expressionRoutes []*alertmanagertypes.RoutePolicy
+ if config.UsePolicy {
+ for _, route := range m.routes {
+ if route.OrgID == orgID && route.ExpressionKind == alertmanagertypes.PolicyBasedExpression {
+ expressionRoutes = append(expressionRoutes, route)
+ }
+ }
+ } else {
+ nameKey := getKey(orgID, ruleID)
+ if routes, exists := m.routesByName[nameKey]; exists {
+ expressionRoutes = routes
+ }
+ }
+
+ var matchedChannels []string
+ for _, route := range expressionRoutes {
+ if m.evaluateExpr(route.Expression, set) {
+ matchedChannels = append(matchedChannels, route.Channels...)
+ }
+ }
+
+ return matchedChannels, nil
+}
+
+func (m *MockNotificationManager) evaluateExpr(expression string, labelSet model.LabelSet) bool {
+ ruleID, ok := labelSet["ruleId"]
+ if !ok {
+ return false
+ }
+ if strings.Contains(expression, `ruleId in ["ruleId-OtherAlert", "ruleId-TestingAlert"]`) {
+ return ruleID == "ruleId-OtherAlert" || ruleID == "ruleId-TestingAlert"
+ }
+ if strings.Contains(expression, `ruleId in ["ruleId-HighLatency", "ruleId-HighErrorRate"]`) {
+ return ruleID == "ruleId-HighLatency" || ruleID == "ruleId-HighErrorRate"
+ }
+ if strings.Contains(expression, `ruleId == "ruleId-HighLatency"`) {
+ return ruleID == "ruleId-HighLatency"
+ }
+
+ return false
+}
+
+// Helper methods for testing
+
+func (m *MockNotificationManager) SetMockRoute(orgID string, route *alertmanagertypes.RoutePolicy) {
+ routeKey := getKey(orgID, route.ID.StringValue())
+ m.routes[routeKey] = route
+
+ nameKey := getKey(orgID, route.Name)
+ m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
+}
+
+func (m *MockNotificationManager) SetMockRouteError(orgID, operation string, err error) {
+ key := getKey(orgID, operation)
+ m.errors[key] = err
+}
+
+func (m *MockNotificationManager) ClearMockRoutes() {
+ m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
+ m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
+}
+
+func (m *MockNotificationManager) GetRouteCount() int {
+ return len(m.routes)
+}
+
+func (m *MockNotificationManager) HasRoute(orgID, routeID string) bool {
+ routeKey := getKey(orgID, routeID)
+ _, exists := m.routes[routeKey]
+ return exists
+}
diff --git a/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest/route.go b/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest/route.go
new file mode 100644
index 000000000000..f0fb06689ec5
--- /dev/null
+++ b/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest/route.go
@@ -0,0 +1,176 @@
+package nfroutingstoretest
+
+import (
+ "context"
+ "regexp"
+ "strings"
+
+ "github.com/DATA-DOG/go-sqlmock"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
+ "github.com/SigNoz/signoz/pkg/sqlstore"
+ "github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
+ "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+)
+
+type MockSQLRouteStore struct {
+ routeStore alertmanagertypes.RouteStore
+ mock sqlmock.Sqlmock
+}
+
+func NewMockSQLRouteStore() *MockSQLRouteStore {
+ sqlStore := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherRegexp)
+ routeStore := sqlroutingstore.NewStore(sqlStore)
+
+ return &MockSQLRouteStore{
+ routeStore: routeStore,
+ mock: sqlStore.Mock(),
+ }
+}
+
+func (m *MockSQLRouteStore) Mock() sqlmock.Sqlmock {
+ return m.mock
+}
+
+func (m *MockSQLRouteStore) GetByID(ctx context.Context, orgId string, id string) (*alertmanagertypes.RoutePolicy, error) {
+ return m.routeStore.GetByID(ctx, orgId, id)
+}
+
+func (m *MockSQLRouteStore) Create(ctx context.Context, route *alertmanagertypes.RoutePolicy) error {
+ return m.routeStore.Create(ctx, route)
+}
+
+func (m *MockSQLRouteStore) CreateBatch(ctx context.Context, routes []*alertmanagertypes.RoutePolicy) error {
+ return m.routeStore.CreateBatch(ctx, routes)
+}
+
+func (m *MockSQLRouteStore) Delete(ctx context.Context, orgId string, id string) error {
+ return m.routeStore.Delete(ctx, orgId, id)
+}
+
+func (m *MockSQLRouteStore) GetAllByKind(ctx context.Context, orgID string, kind alertmanagertypes.ExpressionKind) ([]*alertmanagertypes.RoutePolicy, error) {
+ return m.routeStore.GetAllByKind(ctx, orgID, kind)
+}
+
+func (m *MockSQLRouteStore) GetAllByName(ctx context.Context, orgID string, name string) ([]*alertmanagertypes.RoutePolicy, error) {
+ return m.routeStore.GetAllByName(ctx, orgID, name)
+}
+
+func (m *MockSQLRouteStore) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
+ return m.routeStore.DeleteRouteByName(ctx, orgID, name)
+}
+
+func (m *MockSQLRouteStore) ExpectGetByID(orgID, id string, route *alertmanagertypes.RoutePolicy) {
+ rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
+
+ if route != nil {
+ rows.AddRow(
+ route.ID.StringValue(),
+ route.OrgID,
+ route.Name,
+ route.Expression,
+ route.ExpressionKind.StringValue(),
+ route.Description,
+ route.Enabled,
+ "[]", // tags as JSON
+ `["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
+ "0001-01-01T00:00:00Z", // created_at
+ "0001-01-01T00:00:00Z", // updated_at
+ "", // created_by
+ "", // updated_by
+ )
+ }
+
+ m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(id = \$1\) AND \(org_id = \$2\)`).
+ WithArgs(id, orgID).
+ WillReturnRows(rows)
+}
+
+func (m *MockSQLRouteStore) ExpectCreate(route *alertmanagertypes.RoutePolicy) {
+ expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
+ m.mock.ExpectExec(expectedPattern).
+ WillReturnResult(sqlmock.NewResult(1, 1))
+}
+
+func (m *MockSQLRouteStore) ExpectCreateBatch(routes []*alertmanagertypes.RoutePolicy) {
+ if len(routes) == 0 {
+ return
+ }
+
+ // Simplified pattern that should match any INSERT into route_policy
+ expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
+
+ m.mock.ExpectExec(expectedPattern).
+ WillReturnResult(sqlmock.NewResult(1, int64(len(routes))))
+}
+
+func (m *MockSQLRouteStore) ExpectDelete(orgID, id string) {
+ m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(id = '` + regexp.QuoteMeta(id) + `'\)`).
+ WillReturnResult(sqlmock.NewResult(0, 1))
+}
+
+func (m *MockSQLRouteStore) ExpectGetAllByKindAndOrgID(orgID string, kind alertmanagertypes.ExpressionKind, routes []*alertmanagertypes.RoutePolicy) {
+ rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
+
+ for _, route := range routes {
+ if route.OrgID == orgID && route.ExpressionKind == kind {
+ rows.AddRow(
+ route.ID.StringValue(),
+ route.OrgID,
+ route.Name,
+ route.Expression,
+ route.ExpressionKind.StringValue(),
+ route.Description,
+ route.Enabled,
+ "[]", // tags as JSON
+ `["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
+ "0001-01-01T00:00:00Z", // created_at
+ "0001-01-01T00:00:00Z", // updated_at
+ "", // created_by
+ "", // updated_by
+ )
+ }
+ }
+
+ m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(kind = '` + regexp.QuoteMeta(kind.StringValue()) + `'\)`).
+ WillReturnRows(rows)
+}
+
+func (m *MockSQLRouteStore) ExpectGetAllByName(orgID, name string, routes []*alertmanagertypes.RoutePolicy) {
+ rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
+
+ for _, route := range routes {
+ if route.OrgID == orgID && route.Name == name {
+ rows.AddRow(
+ route.ID.StringValue(),
+ route.OrgID,
+ route.Name,
+ route.Expression,
+ route.ExpressionKind.StringValue(),
+ route.Description,
+ route.Enabled,
+ "[]", // tags as JSON
+ `["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
+ "0001-01-01T00:00:00Z", // created_at
+ "0001-01-01T00:00:00Z", // updated_at
+ "", // created_by
+ "", // updated_by
+ )
+ }
+ }
+
+ m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
+ WillReturnRows(rows)
+}
+
+func (m *MockSQLRouteStore) ExpectDeleteRouteByName(orgID, name string) {
+ m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
+ WillReturnResult(sqlmock.NewResult(0, 1))
+}
+
+func (m *MockSQLRouteStore) ExpectationsWereMet() error {
+ return m.mock.ExpectationsWereMet()
+}
+
+func (m *MockSQLRouteStore) MatchExpectationsInOrder(match bool) {
+ m.mock.MatchExpectationsInOrder(match)
+}
diff --git a/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore/store.go b/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore/store.go
new file mode 100644
index 000000000000..78504834c554
--- /dev/null
+++ b/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore/store.go
@@ -0,0 +1,93 @@
+package sqlroutingstore
+
+import (
+ "context"
+ "database/sql"
+
+ "github.com/SigNoz/signoz/pkg/errors"
+ "github.com/SigNoz/signoz/pkg/sqlstore"
+ routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+)
+
+type store struct {
+ sqlstore sqlstore.SQLStore
+}
+
+func NewStore(sqlstore sqlstore.SQLStore) routeTypes.RouteStore {
+ return &store{
+ sqlstore: sqlstore,
+ }
+}
+
+func (store *store) GetByID(ctx context.Context, orgId string, id string) (*routeTypes.RoutePolicy, error) {
+ route := new(routeTypes.RoutePolicy)
+ err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(route).Where("id = ?", id).Where("org_id = ?", orgId).Scan(ctx)
+ if err != nil {
+ if errors.Is(err, sql.ErrNoRows) {
+ return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "routing policy with ID: %s does not exist", id)
+ }
+ return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policy with ID: %s", id)
+ }
+
+ return route, nil
+}
+
+func (store *store) Create(ctx context.Context, route *routeTypes.RoutePolicy) error {
+ _, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(route).Exec(ctx)
+ if err != nil {
+ return errors.NewInternalf(errors.CodeInternal, "error creating routing policy with ID: %s", route.ID)
+ }
+
+ return nil
+}
+
+func (store *store) CreateBatch(ctx context.Context, route []*routeTypes.RoutePolicy) error {
+ _, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(&route).Exec(ctx)
+ if err != nil {
+ return errors.NewInternalf(errors.CodeInternal, "error creating routing policies: %v", err)
+ }
+
+ return nil
+}
+
+func (store *store) Delete(ctx context.Context, orgId string, id string) error {
+ _, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgId).Where("id = ?", id).Exec(ctx)
+ if err != nil {
+ return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policy with ID: %s", id)
+ }
+
+ return nil
+}
+
+func (store *store) GetAllByKind(ctx context.Context, orgID string, kind routeTypes.ExpressionKind) ([]*routeTypes.RoutePolicy, error) {
+ var routes []*routeTypes.RoutePolicy
+ err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("kind = ?", kind).Scan(ctx)
+ if err != nil {
+ if errors.Is(err, sql.ErrNoRows) {
+ return nil, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s", orgID)
+ }
+ return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s", orgID)
+ }
+ return routes, nil
+}
+
+func (store *store) GetAllByName(ctx context.Context, orgID string, name string) ([]*routeTypes.RoutePolicy, error) {
+ var routes []*routeTypes.RoutePolicy
+ err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("name = ?", name).Scan(ctx)
+ if err != nil {
+ if errors.Is(err, sql.ErrNoRows) {
+ return routes, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s and name: %s", orgID, name)
+ }
+ return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s and name: %s", orgID, name)
+ }
+ return routes, nil
+}
+
+func (store *store) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
+ _, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgID).Where("name = ?", name).Exec(ctx)
+ if err != nil {
+ return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policies with name: %s", name)
+ }
+
+ return nil
+}
diff --git a/pkg/alertmanager/nfmanager/notificationmanager.go b/pkg/alertmanager/nfmanager/notificationmanager.go
index 531c2baae725..5f44e385bee7 100644
--- a/pkg/alertmanager/nfmanager/notificationmanager.go
+++ b/pkg/alertmanager/nfmanager/notificationmanager.go
@@ -2,12 +2,27 @@
package nfmanager
import (
+ "context"
+
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/prometheus/common/model"
)
-// NotificationManager defines how alerts should be grouped and configured for notification with multi-tenancy support.
+// NotificationManager defines how alerts should be grouped and configured for notification.
type NotificationManager interface {
+ // Notification Config CRUD
GetNotificationConfig(orgID string, ruleID string) (*alertmanagertypes.NotificationConfig, error)
SetNotificationConfig(orgID string, ruleID string, config *alertmanagertypes.NotificationConfig) error
DeleteNotificationConfig(orgID string, ruleID string) error
+
+ // Route Policy CRUD
+ CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error
+ CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error
+ GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error)
+ GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error)
+ DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error
+ DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error
+
+ // Route matching
+ Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error)
}
diff --git a/pkg/alertmanager/nfmanager/rulebasednotification/provider.go b/pkg/alertmanager/nfmanager/rulebasednotification/provider.go
index 0ce4141ad1d9..13a33184edfc 100644
--- a/pkg/alertmanager/nfmanager/rulebasednotification/provider.go
+++ b/pkg/alertmanager/nfmanager/rulebasednotification/provider.go
@@ -2,11 +2,14 @@ package rulebasednotification
import (
"context"
+ "strings"
"sync"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/expr-lang/expr"
+ "github.com/prometheus/common/model"
"github.com/SigNoz/signoz/pkg/factory"
)
@@ -14,26 +17,28 @@ import (
type provider struct {
settings factory.ScopedProviderSettings
orgToFingerprintToNotificationConfig map[string]map[string]alertmanagertypes.NotificationConfig
+ routeStore alertmanagertypes.RouteStore
mutex sync.RWMutex
}
// NewFactory creates a new factory for the rule-based grouping strategy.
-func NewFactory() factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
+func NewFactory(routeStore alertmanagertypes.RouteStore) factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
return factory.NewProviderFactory(
factory.MustNewName("rulebased"),
func(ctx context.Context, settings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
- return New(ctx, settings, config)
+ return New(ctx, settings, config, routeStore)
},
)
}
// New creates a new rule-based grouping strategy provider.
-func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
+func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config, routeStore alertmanagertypes.RouteStore) (nfmanager.NotificationManager, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification")
return &provider{
settings: settings,
orgToFingerprintToNotificationConfig: make(map[string]map[string]alertmanagertypes.NotificationConfig),
+ routeStore: routeStore,
}, nil
}
@@ -58,6 +63,8 @@ func (r *provider) GetNotificationConfig(orgID string, ruleID string) (*alertman
for k, v := range config.NotificationGroup {
notificationConfig.NotificationGroup[k] = v
}
+ notificationConfig.UsePolicy = config.UsePolicy
+ notificationConfig.GroupByAll = config.GroupByAll
}
}
@@ -101,3 +108,147 @@ func (r *provider) DeleteNotificationConfig(orgID string, ruleID string) error {
return nil
}
+
+func (r *provider) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
+ if route == nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
+ }
+
+ err := route.Validate()
+ if err != nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route policy: %v", err)
+ }
+
+ return r.routeStore.Create(ctx, route)
+}
+
+func (r *provider) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
+ if len(routes) == 0 {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policies cannot be empty")
+ }
+
+ for _, route := range routes {
+ if route == nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
+ }
+ if err := route.Validate(); err != nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy with name %s: %s", route.Name, err.Error())
+ }
+ }
+ return r.routeStore.CreateBatch(ctx, routes)
+}
+
+func (r *provider) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
+ if routeID == "" {
+ return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
+ }
+
+ return r.routeStore.GetByID(ctx, orgID, routeID)
+}
+
+func (r *provider) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
+ if orgID == "" {
+ return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
+ }
+
+ return r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
+}
+
+func (r *provider) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
+ if routeID == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
+ }
+
+ return r.routeStore.Delete(ctx, orgID, routeID)
+}
+
+func (r *provider) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
+ if orgID == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
+ }
+ if name == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "name cannot be empty")
+ }
+ return r.routeStore.DeleteRouteByName(ctx, orgID, name)
+}
+
+func (r *provider) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
+ config, err := r.GetNotificationConfig(orgID, ruleID)
+ if err != nil {
+ return nil, errors.NewInternalf(errors.CodeInternal, "error getting notification configuration: %v", err)
+ }
+ var expressionRoutes []*alertmanagertypes.RoutePolicy
+ if config.UsePolicy {
+ expressionRoutes, err = r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
+ if err != nil {
+ return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
+ }
+ } else {
+ expressionRoutes, err = r.routeStore.GetAllByName(ctx, orgID, ruleID)
+ if err != nil {
+ return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
+ }
+ }
+ var matchedChannels []string
+ if _, ok := set[alertmanagertypes.NoDataLabel]; ok && !config.UsePolicy {
+ for _, expressionRoute := range expressionRoutes {
+ matchedChannels = append(matchedChannels, expressionRoute.Channels...)
+ }
+ return matchedChannels, nil
+ }
+
+ for _, route := range expressionRoutes {
+ evaluateExpr, err := r.evaluateExpr(route.Expression, set)
+ if err != nil {
+ continue
+ }
+ if evaluateExpr {
+ matchedChannels = append(matchedChannels, route.Channels...)
+ }
+ }
+
+ return matchedChannels, nil
+}
+
+func (r *provider) evaluateExpr(expression string, labelSet model.LabelSet) (bool, error) {
+ env := make(map[string]interface{})
+
+ for k, v := range labelSet {
+ key := string(k)
+ value := string(v)
+
+ if strings.Contains(key, ".") {
+ parts := strings.Split(key, ".")
+ current := env
+
+ for i, part := range parts {
+ if i == len(parts)-1 {
+ current[part] = value
+ } else {
+ if current[part] == nil {
+ current[part] = make(map[string]interface{})
+ }
+ current = current[part].(map[string]interface{})
+ }
+ }
+ } else {
+ env[key] = value
+ }
+ }
+
+ program, err := expr.Compile(expression, expr.Env(env))
+ if err != nil {
+ return false, errors.NewInternalf(errors.CodeInternal, "error compiling route policy %s: %v", expression, err)
+ }
+
+ output, err := expr.Run(program, env)
+ if err != nil {
+ return false, errors.NewInternalf(errors.CodeInternal, "error running route policy %s: %v", expression, err)
+ }
+
+ if boolVal, ok := output.(bool); ok {
+ return boolVal, nil
+ }
+
+ return false, errors.NewInternalf(errors.CodeInternal, "error in evaluating route policy %s: %v", expression, err)
+}
diff --git a/pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go b/pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go
index b380cc1cee89..8c32f6e42412 100644
--- a/pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go
+++ b/pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go
@@ -2,18 +2,22 @@ package rulebasednotification
import (
"context"
- "github.com/prometheus/common/model"
"sync"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
+ "github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
- "github.com/prometheus/alertmanager/types"
+ "github.com/SigNoz/signoz/pkg/valuer"
+
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+
+ "github.com/prometheus/common/model"
)
func createTestProviderSettings() factory.ProviderSettings {
@@ -21,7 +25,8 @@ func createTestProviderSettings() factory.ProviderSettings {
}
func TestNewFactory(t *testing.T) {
- providerFactory := NewFactory()
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ providerFactory := NewFactory(routeStore)
assert.NotNil(t, providerFactory)
assert.Equal(t, "rulebased", providerFactory.Name().String())
}
@@ -31,7 +36,8 @@ func TestNew(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
- provider, err := New(ctx, providerSettings, config)
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
assert.NotNil(t, provider)
@@ -44,7 +50,8 @@ func TestProvider_SetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
- provider, err := New(ctx, providerSettings, config)
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
tests := []struct {
@@ -124,11 +131,12 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
- provider, err := New(ctx, providerSettings, config)
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
orgID := "test-org"
- ruleID := "rule1"
+ ruleID := "ruleId"
customConfig := &alertmanagertypes.NotificationConfig{
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute,
@@ -144,7 +152,6 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
},
}
- // Set config for alert1
err = provider.SetNotificationConfig(orgID, ruleID, customConfig)
require.NoError(t, err)
@@ -155,7 +162,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
name string
orgID string
ruleID string
- alert *types.Alert
+ alert *alertmanagertypes.Alert
expectedConfig *alertmanagertypes.NotificationConfig
shouldFallback bool
}{
@@ -165,7 +172,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
ruleID: ruleID,
expectedConfig: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{
- model.LabelName("ruleId"): {},
+ model.LabelName(ruleID): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute,
@@ -182,13 +189,13 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("group1"): {},
model.LabelName("group2"): {},
- model.LabelName("ruleId"): {},
+ model.LabelName(ruleID): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
- }, // Will get fallback from standardnotification
+ },
shouldFallback: false,
},
{
@@ -231,7 +238,8 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
- provider, err := New(ctx, providerSettings, config)
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
orgID := "test-org"
@@ -268,3 +276,634 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
// Wait for both goroutines to complete
wg.Wait()
}
+
+func TestProvider_EvaluateExpression(t *testing.T) {
+ provider := &provider{}
+
+ tests := []struct {
+ name string
+ expression string
+ labelSet model.LabelSet
+ expected bool
+ }{
+ {
+ name: "simple equality check - match",
+ expression: `threshold.name == 'auth' && ruleId == 'rule1'`,
+ labelSet: model.LabelSet{
+ "threshold.name": "auth",
+ "ruleId": "rule1",
+ },
+ expected: true,
+ },
+ {
+ name: "simple equality check - match",
+ expression: `threshold.name = 'auth' AND ruleId = 'rule1'`,
+ labelSet: model.LabelSet{
+ "threshold.name": "auth",
+ "ruleId": "rule1",
+ },
+ expected: true,
+ },
+ {
+ name: "simple equality check - no match",
+ expression: `service == "payment"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "simple equality check - no match",
+ expression: `service = "payment"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "multiple conditions with AND - both match",
+ expression: `service == "auth" && env == "production"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: true,
+ },
+ {
+ name: "multiple conditions with AND - both match",
+ expression: `service = "auth" AND env = "production"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: true,
+ },
+ {
+ name: "multiple conditions with AND - one doesn't match",
+ expression: `service == "auth" && env == "staging"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "multiple conditions with AND - one doesn't match",
+ expression: `service = "auth" AND env = "staging"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "multiple conditions with OR - one matches",
+ expression: `service == "payment" || env == "production"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: true,
+ },
+ {
+ name: "multiple conditions with OR - one matches",
+ expression: `service = "payment" OR env = "production"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: true,
+ },
+ {
+ name: "multiple conditions with OR - none match",
+ expression: `service == "payment" || env == "staging"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "multiple conditions with OR - none match",
+ expression: `service = "payment" OR env = "staging"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ "env": "production",
+ },
+ expected: false,
+ },
+ {
+ name: "in operator - value in list",
+ expression: `service in ["auth", "payment", "notification"]`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: true,
+ },
+ {
+ name: "in operator - value in list",
+ expression: `service IN ["auth", "payment", "notification"]`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: true,
+ },
+ {
+ name: "in operator - value not in list",
+ expression: `service in ["payment", "notification"]`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: false,
+ },
+ {
+ name: "in operator - value not in list",
+ expression: `service IN ["payment", "notification"]`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: false,
+ },
+ {
+ name: "contains operator - substring match",
+ expression: `host contains "prod"`,
+ labelSet: model.LabelSet{
+ "host": "prod-server-01",
+ },
+ expected: true,
+ },
+ {
+ name: "contains operator - substring match",
+ expression: `host CONTAINS "prod"`,
+ labelSet: model.LabelSet{
+ "host": "prod-server-01",
+ },
+ expected: true,
+ },
+ {
+ name: "contains operator - no substring match",
+ expression: `host contains "staging"`,
+ labelSet: model.LabelSet{
+ "host": "prod-server-01",
+ },
+ expected: false,
+ },
+ {
+ name: "contains operator - no substring match",
+ expression: `host CONTAINS "staging"`,
+ labelSet: model.LabelSet{
+ "host": "prod-server-01",
+ },
+ expected: false,
+ },
+ {
+ name: "complex expression with parentheses",
+ expression: `(service == "auth" && env == "production") || critical == "true"`,
+ labelSet: model.LabelSet{
+ "service": "payment",
+ "env": "staging",
+ "critical": "true",
+ },
+ expected: true,
+ },
+ {
+ name: "complex expression with parentheses",
+ expression: `(service = "auth" AND env = "production") OR critical = "true"`,
+ labelSet: model.LabelSet{
+ "service": "payment",
+ "env": "staging",
+ "critical": "true",
+ },
+ expected: true,
+ },
+ {
+ name: "missing label key",
+ expression: `"missing_key" == "value"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: false,
+ },
+ {
+ name: "missing label key",
+ expression: `"missing_key" = "value"`,
+ labelSet: model.LabelSet{
+ "service": "auth",
+ },
+ expected: false,
+ },
+ {
+ name: "rule-based expression with threshold name and ruleId",
+ expression: `'threshold.name' == "high-cpu" && ruleId == "rule-123"`,
+ labelSet: model.LabelSet{
+ "threshold.name": "high-cpu",
+ "ruleId": "rule-123",
+ "service": "auth",
+ },
+ expected: false, //no commas
+ },
+ {
+ name: "rule-based expression with threshold name and ruleId",
+ expression: `'threshold.name' = "high-cpu" AND ruleId == "rule-123"`,
+ labelSet: model.LabelSet{
+ "threshold.name": "high-cpu",
+ "ruleId": "rule-123",
+ "service": "auth",
+ },
+ expected: false, //no commas
+ },
+ {
+ name: "alertname and ruleId combination",
+ expression: `alertname == "HighCPUUsage" && ruleId == "cpu-alert-001"`,
+ labelSet: model.LabelSet{
+ "alertname": "HighCPUUsage",
+ "ruleId": "cpu-alert-001",
+ "severity": "critical",
+ },
+ expected: true,
+ },
+ {
+ name: "alertname and ruleId combination",
+ expression: `alertname = "HighCPUUsage" AND ruleId = "cpu-alert-001"`,
+ labelSet: model.LabelSet{
+ "alertname": "HighCPUUsage",
+ "ruleId": "cpu-alert-001",
+ "severity": "critical",
+ },
+ expected: true,
+ },
+ {
+ name: "kubernetes namespace filtering",
+ expression: `k8s.namespace.name == "auth" && service in ["auth", "payment"]`,
+ labelSet: model.LabelSet{
+ "k8s.namespace.name": "auth",
+ "service": "auth",
+ "host": "k8s-node-1",
+ },
+ expected: true,
+ },
+ {
+ name: "kubernetes namespace filtering",
+ expression: `k8s.namespace.name = "auth" && service IN ["auth", "payment"]`,
+ labelSet: model.LabelSet{
+ "k8s.namespace.name": "auth",
+ "service": "auth",
+ "host": "k8s-node-1",
+ },
+ expected: true,
+ },
+ {
+ name: "migration expression format from SQL migration",
+ expression: `threshold.name == "HighCPUUsage" && ruleId == "rule-uuid-123"`,
+ labelSet: model.LabelSet{
+ "threshold.name": "HighCPUUsage",
+ "ruleId": "rule-uuid-123",
+ "severity": "warning",
+ },
+ expected: true,
+ },
+ {
+ name: "migration expression format from SQL migration",
+ expression: `threshold.name = "HighCPUUsage" && ruleId = "rule-uuid-123"`,
+ labelSet: model.LabelSet{
+ "threshold.name": "HighCPUUsage",
+ "ruleId": "rule-uuid-123",
+ "severity": "warning",
+ },
+ expected: true,
+ },
+ {
+ name: "case sensitive matching",
+ expression: `service == "Auth"`, // capital A
+ labelSet: model.LabelSet{
+ "service": "auth", // lowercase a
+ },
+ expected: false,
+ },
+ {
+ name: "case sensitive matching",
+ expression: `service = "Auth"`, // capital A
+ labelSet: model.LabelSet{
+ "service": "auth", // lowercase a
+ },
+ expected: false,
+ },
+ {
+ name: "numeric comparison as strings",
+ expression: `port == "8080"`,
+ labelSet: model.LabelSet{
+ "port": "8080",
+ },
+ expected: true,
+ },
+ {
+ name: "numeric comparison as strings",
+ expression: `port = "8080"`,
+ labelSet: model.LabelSet{
+ "port": "8080",
+ },
+ expected: true,
+ },
+ {
+ name: "quoted string with special characters",
+ expression: `service == "auth-service-v2"`,
+ labelSet: model.LabelSet{
+ "service": "auth-service-v2",
+ },
+ expected: true,
+ },
+ {
+ name: "quoted string with special characters",
+ expression: `service = "auth-service-v2"`,
+ labelSet: model.LabelSet{
+ "service": "auth-service-v2",
+ },
+ expected: true,
+ },
+ {
+ name: "boolean operators precedence",
+ expression: `service == "auth" && env == "prod" || critical == "true"`,
+ labelSet: model.LabelSet{
+ "service": "payment",
+ "env": "staging",
+ "critical": "true",
+ },
+ expected: true,
+ },
+ {
+ name: "boolean operators precedence",
+ expression: `service = "auth" AND env = "prod" OR critical = "true"`,
+ labelSet: model.LabelSet{
+ "service": "payment",
+ "env": "staging",
+ "critical": "true",
+ },
+ expected: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := provider.evaluateExpr(tt.expression, tt.labelSet)
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expected, result, "Expression: %s", tt.expression)
+ })
+ }
+}
+
+func TestProvider_DeleteRoute(t *testing.T) {
+ ctx := context.Background()
+ providerSettings := createTestProviderSettings()
+ config := nfmanager.Config{}
+
+ tests := []struct {
+ name string
+ orgID string
+ routeID string
+ wantErr bool
+ }{
+ {
+ name: "valid parameters",
+ orgID: "test-org-123",
+ routeID: "route-uuid-456",
+ wantErr: false,
+ },
+ {
+ name: "empty routeID",
+ orgID: "test-org-123",
+ routeID: "",
+ wantErr: true,
+ },
+ {
+ name: "valid orgID with valid routeID",
+ orgID: "another-org",
+ routeID: "another-route-id",
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
+ require.NoError(t, err)
+
+ if !tt.wantErr {
+ routeStore.ExpectDelete(tt.orgID, tt.routeID)
+ }
+
+ err = provider.DeleteRoutePolicy(ctx, tt.orgID, tt.routeID)
+
+ if tt.wantErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.NoError(t, routeStore.ExpectationsWereMet())
+ }
+ })
+ }
+}
+
+func TestProvider_CreateRoute(t *testing.T) {
+ ctx := context.Background()
+ providerSettings := createTestProviderSettings()
+ config := nfmanager.Config{}
+
+ tests := []struct {
+ name string
+ orgID string
+ route *alertmanagertypes.RoutePolicy
+ wantErr bool
+ }{
+ {
+ name: "valid route",
+ orgID: "test-org-123",
+ route: &alertmanagertypes.RoutePolicy{
+ Identifiable: types.Identifiable{ID: valuer.GenerateUUID()},
+ Expression: `service == "auth"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "auth-service-route",
+ Description: "Route for auth service alerts",
+ Enabled: true,
+ OrgID: "test-org-123",
+ Channels: []string{"slack-channel"},
+ },
+ wantErr: false,
+ },
+ {
+ name: "valid route qb format",
+ orgID: "test-org-123",
+ route: &alertmanagertypes.RoutePolicy{
+ Identifiable: types.Identifiable{ID: valuer.GenerateUUID()},
+ Expression: `service = "auth"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "auth-service-route",
+ Description: "Route for auth service alerts",
+ Enabled: true,
+ OrgID: "test-org-123",
+ Channels: []string{"slack-channel"},
+ },
+ wantErr: false,
+ },
+ {
+ name: "nil route",
+ orgID: "test-org-123",
+ route: nil,
+ wantErr: true,
+ },
+ {
+ name: "invalid route - missing expression",
+ orgID: "test-org-123",
+ route: &alertmanagertypes.RoutePolicy{
+ Expression: "", // empty expression
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "invalid-route",
+ OrgID: "test-org-123",
+ },
+ wantErr: true,
+ },
+ {
+ name: "invalid route - missing name",
+ orgID: "test-org-123",
+ route: &alertmanagertypes.RoutePolicy{
+ Expression: `service == "auth"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "", // empty name
+ OrgID: "test-org-123",
+ },
+ wantErr: true,
+ },
+ {
+ name: "invalid route - missing name",
+ orgID: "test-org-123",
+ route: &alertmanagertypes.RoutePolicy{
+ Expression: `service = "auth"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "", // empty name
+ OrgID: "test-org-123",
+ },
+ wantErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
+ require.NoError(t, err)
+
+ if !tt.wantErr && tt.route != nil {
+ routeStore.ExpectCreate(tt.route)
+ }
+
+ err = provider.CreateRoutePolicy(ctx, tt.orgID, tt.route)
+
+ if tt.wantErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.NoError(t, routeStore.ExpectationsWereMet())
+ }
+ })
+ }
+}
+
+func TestProvider_CreateRoutes(t *testing.T) {
+ ctx := context.Background()
+ providerSettings := createTestProviderSettings()
+ config := nfmanager.Config{}
+
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ provider, err := New(ctx, providerSettings, config, routeStore)
+ require.NoError(t, err)
+
+ validRoute1 := &alertmanagertypes.RoutePolicy{
+ Expression: `service == "auth"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "auth-route",
+ Description: "Auth service route",
+ Enabled: true,
+ OrgID: "test-org",
+ Channels: []string{"slack-auth"},
+ }
+
+ validRoute2 := &alertmanagertypes.RoutePolicy{
+ Expression: `service == "payment"`,
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "payment-route",
+ Description: "Payment service route",
+ Enabled: true,
+ OrgID: "test-org",
+ Channels: []string{"slack-payment"},
+ }
+
+ invalidRoute := &alertmanagertypes.RoutePolicy{
+ Expression: "", // empty expression - invalid
+ ExpressionKind: alertmanagertypes.PolicyBasedExpression,
+ Name: "invalid-route",
+ OrgID: "test-org",
+ }
+
+ tests := []struct {
+ name string
+ orgID string
+ routes []*alertmanagertypes.RoutePolicy
+ wantErr bool
+ }{
+ {
+ name: "valid routes",
+ orgID: "test-org",
+ routes: []*alertmanagertypes.RoutePolicy{validRoute1, validRoute2},
+ wantErr: false,
+ },
+ {
+ name: "empty routes list",
+ orgID: "test-org",
+ routes: []*alertmanagertypes.RoutePolicy{},
+ wantErr: true,
+ },
+ {
+ name: "nil routes list",
+ orgID: "test-org",
+ routes: nil,
+ wantErr: true,
+ },
+ {
+ name: "routes with nil route",
+ orgID: "test-org",
+ routes: []*alertmanagertypes.RoutePolicy{validRoute1, nil},
+ wantErr: true,
+ },
+ {
+ name: "routes with invalid route",
+ orgID: "test-org",
+ routes: []*alertmanagertypes.RoutePolicy{validRoute1, invalidRoute},
+ wantErr: true,
+ },
+ {
+ name: "single valid route",
+ orgID: "test-org",
+ routes: []*alertmanagertypes.RoutePolicy{validRoute1},
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if !tt.wantErr && len(tt.routes) > 0 {
+ routeStore.ExpectCreateBatch(tt.routes)
+ }
+
+ err := provider.CreateRoutePolicies(ctx, tt.orgID, tt.routes)
+
+ if tt.wantErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.NoError(t, routeStore.ExpectationsWereMet())
+ }
+ })
+ }
+}
diff --git a/pkg/alertmanager/service.go b/pkg/alertmanager/service.go
index 163c673b7622..606dc72d9ddf 100644
--- a/pkg/alertmanager/service.go
+++ b/pkg/alertmanager/service.go
@@ -4,6 +4,9 @@ import (
"context"
"sync"
+ "github.com/prometheus/alertmanager/featurecontrol"
+ "github.com/prometheus/alertmanager/matcher/compat"
+
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
@@ -61,6 +64,7 @@ func New(
}
func (service *Service) SyncServers(ctx context.Context) error {
+ compat.InitFromFlags(service.settings.Logger(), featurecontrol.NoopFlags{})
orgs, err := service.orgGetter.ListByOwnedKeyRange(ctx)
if err != nil {
return err
@@ -142,7 +146,7 @@ func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver
return server.TestReceiver(ctx, receiver)
}
-func (service *Service) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
+func (service *Service) TestAlert(ctx context.Context, orgID string, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
service.serversMtx.RLock()
defer service.serversMtx.RUnlock()
@@ -151,7 +155,7 @@ func (service *Service) TestAlert(ctx context.Context, orgID string, alert *aler
return err
}
- return server.TestAlert(ctx, alert, receivers)
+ return server.TestAlert(ctx, receiversMap, config)
}
func (service *Service) Stop(ctx context.Context) error {
diff --git a/pkg/alertmanager/signozalertmanager/provider.go b/pkg/alertmanager/signozalertmanager/provider.go
index a92c5ab4e89f..9eab5e4896a9 100644
--- a/pkg/alertmanager/signozalertmanager/provider.go
+++ b/pkg/alertmanager/signozalertmanager/provider.go
@@ -2,8 +2,12 @@ package signozalertmanager
import (
"context"
+ "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
+ "github.com/prometheus/common/model"
"time"
+ amConfig "github.com/prometheus/alertmanager/config"
+
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
@@ -11,7 +15,9 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/sqlstore"
+ "github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+ "github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -94,8 +100,29 @@ func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiv
return provider.service.TestReceiver(ctx, orgID, receiver)
}
-func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
- return provider.service.TestAlert(ctx, orgID, alert, receivers)
+func (provider *provider) TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error {
+ config, err := provider.notificationManager.GetNotificationConfig(orgID, ruleID)
+ if err != nil {
+ return err
+ }
+ if config.UsePolicy {
+ for alert := range receiversMap {
+ set := make(model.LabelSet)
+ for k, v := range alert.Labels {
+ set[model.LabelName(k)] = model.LabelValue(v)
+ }
+ match, err := provider.notificationManager.Match(ctx, orgID, alert.Labels[labels.AlertRuleIdLabel], set)
+ if err != nil {
+ return err
+ }
+ if len(match) == 0 {
+ delete(receiversMap, alert)
+ } else {
+ receiversMap[alert] = match
+ }
+ }
+ }
+ return provider.service.TestAlert(ctx, orgID, receiversMap, config)
}
func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) {
@@ -211,3 +238,316 @@ func (provider *provider) DeleteNotificationConfig(ctx context.Context, orgID va
}
return nil
}
+
+func (provider *provider) CreateRoutePolicy(ctx context.Context, routeRequest *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return nil, err
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := routeRequest.Validate(); err != nil {
+ return nil, err
+ }
+
+ route := alertmanagertypes.RoutePolicy{
+ Expression: routeRequest.Expression,
+ ExpressionKind: routeRequest.ExpressionKind,
+ Name: routeRequest.Name,
+ Description: routeRequest.Description,
+ Enabled: true,
+ Tags: routeRequest.Tags,
+ Channels: routeRequest.Channels,
+ OrgID: claims.OrgID,
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ UserAuditable: types.UserAuditable{
+ CreatedBy: claims.Email,
+ UpdatedBy: claims.Email,
+ },
+ TimeAuditable: types.TimeAuditable{
+ CreatedAt: time.Now(),
+ UpdatedAt: time.Now(),
+ },
+ }
+
+ err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), &route)
+ if err != nil {
+ return nil, err
+ }
+
+ return &alertmanagertypes.GettableRoutePolicy{
+ PostableRoutePolicy: *routeRequest,
+ ID: route.ID.StringValue(),
+ CreatedAt: &route.CreatedAt,
+ UpdatedAt: &route.UpdatedAt,
+ CreatedBy: &route.CreatedBy,
+ UpdatedBy: &route.UpdatedBy,
+ }, nil
+}
+
+func (provider *provider) CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error) {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(routeRequests) == 0 {
+ return []*alertmanagertypes.GettableRoutePolicy{}, nil
+ }
+
+ routes := make([]*alertmanagertypes.RoutePolicy, 0, len(routeRequests))
+ results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routeRequests))
+
+ for _, routeRequest := range routeRequests {
+ if err := routeRequest.Validate(); err != nil {
+ return nil, err
+ }
+
+ route := &alertmanagertypes.RoutePolicy{
+ Expression: routeRequest.Expression,
+ ExpressionKind: routeRequest.ExpressionKind,
+ Name: routeRequest.Name,
+ Description: routeRequest.Description,
+ Enabled: true,
+ Tags: routeRequest.Tags,
+ Channels: routeRequest.Channels,
+ OrgID: claims.OrgID,
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ UserAuditable: types.UserAuditable{
+ CreatedBy: claims.Email,
+ UpdatedBy: claims.Email,
+ },
+ TimeAuditable: types.TimeAuditable{
+ CreatedAt: time.Now(),
+ UpdatedAt: time.Now(),
+ },
+ }
+
+ routes = append(routes, route)
+ results = append(results, &alertmanagertypes.GettableRoutePolicy{
+ PostableRoutePolicy: *routeRequest,
+ ID: route.ID.StringValue(),
+ CreatedAt: &route.CreatedAt,
+ UpdatedAt: &route.UpdatedAt,
+ CreatedBy: &route.CreatedBy,
+ UpdatedBy: &route.UpdatedBy,
+ })
+ }
+
+ err = provider.notificationManager.CreateRoutePolicies(ctx, orgID.String(), routes)
+ if err != nil {
+ return nil, err
+ }
+
+ return results, nil
+}
+
+func (provider *provider) GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error) {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return nil, err
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return nil, err
+ }
+
+ route, err := provider.notificationManager.GetRoutePolicyByID(ctx, orgID.String(), routeID)
+ if err != nil {
+ return nil, err
+ }
+
+ return &alertmanagertypes.GettableRoutePolicy{
+ PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
+ Expression: route.Expression,
+ ExpressionKind: route.ExpressionKind,
+ Channels: route.Channels,
+ Name: route.Name,
+ Description: route.Description,
+ Tags: route.Tags,
+ },
+ ID: route.ID.StringValue(),
+ CreatedAt: &route.CreatedAt,
+ UpdatedAt: &route.UpdatedAt,
+ CreatedBy: &route.CreatedBy,
+ UpdatedBy: &route.UpdatedBy,
+ }, nil
+}
+
+func (provider *provider) GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error) {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return nil, err
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return nil, err
+ }
+
+ routes, err := provider.notificationManager.GetAllRoutePolicies(ctx, orgID.String())
+ if err != nil {
+ return nil, err
+ }
+
+ results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routes))
+ for _, route := range routes {
+ results = append(results, &alertmanagertypes.GettableRoutePolicy{
+ PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
+ Expression: route.Expression,
+ ExpressionKind: route.ExpressionKind,
+ Channels: route.Channels,
+ Name: route.Name,
+ Description: route.Description,
+ Tags: route.Tags,
+ },
+ ID: route.ID.StringValue(),
+ CreatedAt: &route.CreatedAt,
+ UpdatedAt: &route.UpdatedAt,
+ CreatedBy: &route.CreatedBy,
+ UpdatedBy: &route.UpdatedBy,
+ })
+ }
+
+ return results, nil
+}
+
+func (provider *provider) UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return nil, errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return nil, err
+ }
+
+ if routeID == "" {
+ return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
+ }
+
+ if route == nil {
+ return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "route cannot be nil")
+ }
+
+ if err := route.Validate(); err != nil {
+ return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route: %v", err)
+ }
+
+ existingRoute, err := provider.notificationManager.GetRoutePolicyByID(ctx, claims.OrgID, routeID)
+ if err != nil {
+ return nil, errors.NewInvalidInputf(errors.CodeNotFound, "route not found: %v", err)
+ }
+
+ updatedRoute := &alertmanagertypes.RoutePolicy{
+ Expression: route.Expression,
+ ExpressionKind: route.ExpressionKind,
+ Name: route.Name,
+ Description: route.Description,
+ Tags: route.Tags,
+ Channels: route.Channels,
+ OrgID: claims.OrgID,
+ Identifiable: existingRoute.Identifiable,
+ UserAuditable: types.UserAuditable{
+ CreatedBy: existingRoute.CreatedBy,
+ UpdatedBy: claims.Email,
+ },
+ TimeAuditable: types.TimeAuditable{
+ CreatedAt: existingRoute.CreatedAt,
+ UpdatedAt: time.Now(),
+ },
+ }
+
+ err = provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
+ if err != nil {
+ return nil, errors.NewInvalidInputf(errors.CodeInternal, "error deleting existing route: %v", err)
+ }
+
+ err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), updatedRoute)
+ if err != nil {
+ return nil, err
+ }
+
+ return &alertmanagertypes.GettableRoutePolicy{
+ PostableRoutePolicy: *route,
+ ID: updatedRoute.ID.StringValue(),
+ CreatedAt: &updatedRoute.CreatedAt,
+ UpdatedAt: &updatedRoute.UpdatedAt,
+ CreatedBy: &updatedRoute.CreatedBy,
+ UpdatedBy: &updatedRoute.UpdatedBy,
+ }, nil
+}
+
+func (provider *provider) DeleteRoutePolicyByID(ctx context.Context, routeID string) error {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return err
+ }
+ if routeID == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
+ }
+
+ return provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
+}
+
+func (provider *provider) CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error {
+ config, err := provider.configStore.Get(ctx, orgID.String())
+ if err != nil {
+ return err
+ }
+
+ if err := config.AddInhibitRules(rules); err != nil {
+ return err
+ }
+
+ return provider.configStore.Set(ctx, config)
+}
+
+func (provider *provider) DeleteAllRoutePoliciesByRuleId(ctx context.Context, names string) error {
+ claims, err := authtypes.ClaimsFromContext(ctx)
+ if err != nil {
+ return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
+ }
+ orgID, err := valuer.NewUUID(claims.OrgID)
+ if err != nil {
+ return err
+ }
+ return provider.notificationManager.DeleteAllRoutePoliciesByName(ctx, orgID.String(), names)
+}
+
+func (provider *provider) UpdateAllRoutePoliciesByRuleId(ctx context.Context, names string, routes []*alertmanagertypes.PostableRoutePolicy) error {
+ err := provider.DeleteAllRoutePoliciesByRuleId(ctx, names)
+ if err != nil {
+ return errors.NewInvalidInputf(errors.CodeInternal, "error deleting the routes: %v", err)
+ }
+ _, err = provider.CreateRoutePolicies(ctx, routes)
+ return err
+}
+
+func (provider *provider) DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error {
+ config, err := provider.configStore.Get(ctx, orgID.String())
+ if err != nil {
+ return err
+ }
+
+ if err := config.DeleteRuleIDInhibitor(ruleId); err != nil {
+ return err
+ }
+
+ return provider.configStore.Set(ctx, config)
+}
diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go
index a9016eef0e91..03785f7473c3 100644
--- a/pkg/query-service/app/http_handler.go
+++ b/pkg/query-service/app/http_handler.go
@@ -10,7 +10,6 @@ import (
"fmt"
"github.com/SigNoz/signoz/pkg/modules/thirdpartyapi"
- //qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"io"
"math"
"net/http"
@@ -492,6 +491,12 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost)
+ router.HandleFunc("/api/v1/route_policies", am.ViewAccess(aH.AlertmanagerAPI.GetAllRoutePolicies)).Methods(http.MethodGet)
+ router.HandleFunc("/api/v1/route_policies/{id}", am.ViewAccess(aH.AlertmanagerAPI.GetRoutePolicyByID)).Methods(http.MethodGet)
+ router.HandleFunc("/api/v1/route_policies", am.AdminAccess(aH.AlertmanagerAPI.CreateRoutePolicy)).Methods(http.MethodPost)
+ router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.DeleteRoutePolicyByID)).Methods(http.MethodDelete)
+ router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.UpdateRoutePolicy)).Methods(http.MethodPut)
+
router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet)
@@ -616,6 +621,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
// Export
router.HandleFunc("/api/v1/export_raw_data", am.ViewAccess(aH.Signoz.Handlers.RawDataExport.ExportRawData)).Methods(http.MethodGet)
+
}
func (ah *APIHandler) MetricExplorerRoutes(router *mux.Router, am *middleware.AuthZ) {
diff --git a/pkg/query-service/rules/base_rule.go b/pkg/query-service/rules/base_rule.go
index a0ddcbf8444d..62669fad172b 100644
--- a/pkg/query-service/rules/base_rule.go
+++ b/pkg/query-service/rules/base_rule.go
@@ -4,13 +4,11 @@ import (
"context"
"fmt"
"log/slog"
- "math"
"net/url"
"sync"
"time"
"github.com/SigNoz/signoz/pkg/errors"
- "github.com/SigNoz/signoz/pkg/query-service/converter"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
"github.com/SigNoz/signoz/pkg/query-service/model"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@@ -167,22 +165,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
return baseRule, nil
}
-func (r *BaseRule) targetVal() float64 {
- if r.ruleCondition == nil || r.ruleCondition.Target == nil {
- return 0
- }
-
- // get the converter for the target unit
- unitConverter := converter.FromUnit(converter.Unit(r.ruleCondition.TargetUnit))
- // convert the target value to the y-axis unit
- value := unitConverter.Convert(converter.Value{
- F: *r.ruleCondition.Target,
- U: converter.Unit(r.ruleCondition.TargetUnit),
- }, converter.Unit(r.Unit()))
-
- return value.F
-}
-
func (r *BaseRule) matchType() ruletypes.MatchType {
if r.ruleCondition == nil {
return ruletypes.AtleastOnce
@@ -221,10 +203,6 @@ func (r *BaseRule) HoldDuration() time.Duration {
return r.holdDuration
}
-func (r *BaseRule) TargetVal() float64 {
- return r.targetVal()
-}
-
func (r *ThresholdRule) hostFromSource() string {
parsedUrl, err := url.Parse(r.source)
if err != nil {
@@ -380,232 +358,6 @@ func (r *BaseRule) ForEachActiveAlert(f func(*ruletypes.Alert)) {
}
}
-func (r *BaseRule) ShouldAlert(series v3.Series) (ruletypes.Sample, bool) {
- var alertSmpl ruletypes.Sample
- var shouldAlert bool
- var lbls qslabels.Labels
-
- for name, value := range series.Labels {
- lbls = append(lbls, qslabels.Label{Name: name, Value: value})
- }
-
- series.Points = removeGroupinSetPoints(series)
-
- // nothing to evaluate
- if len(series.Points) == 0 {
- return alertSmpl, false
- }
-
- if r.ruleCondition.RequireMinPoints {
- if len(series.Points) < r.ruleCondition.RequiredNumPoints {
- zap.L().Info("not enough data points to evaluate series, skipping", zap.String("ruleid", r.ID()), zap.Int("numPoints", len(series.Points)), zap.Int("requiredPoints", r.ruleCondition.RequiredNumPoints))
- return alertSmpl, false
- }
- }
-
- switch r.matchType() {
- case ruletypes.AtleastOnce:
- // If any sample matches the condition, the rule is firing.
- if r.compareOp() == ruletypes.ValueIsAbove {
- for _, smpl := range series.Points {
- if smpl.Value > r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = true
- break
- }
- }
- } else if r.compareOp() == ruletypes.ValueIsBelow {
- for _, smpl := range series.Points {
- if smpl.Value < r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = true
- break
- }
- }
- } else if r.compareOp() == ruletypes.ValueIsEq {
- for _, smpl := range series.Points {
- if smpl.Value == r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = true
- break
- }
- }
- } else if r.compareOp() == ruletypes.ValueIsNotEq {
- for _, smpl := range series.Points {
- if smpl.Value != r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = true
- break
- }
- }
- } else if r.compareOp() == ruletypes.ValueOutsideBounds {
- for _, smpl := range series.Points {
- if math.Abs(smpl.Value) >= r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = true
- break
- }
- }
- }
- case ruletypes.AllTheTimes:
- // If all samples match the condition, the rule is firing.
- shouldAlert = true
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: r.targetVal()}, Metric: lbls}
- if r.compareOp() == ruletypes.ValueIsAbove {
- for _, smpl := range series.Points {
- if smpl.Value <= r.targetVal() {
- shouldAlert = false
- break
- }
- }
- // use min value from the series
- if shouldAlert {
- var minValue float64 = math.Inf(1)
- for _, smpl := range series.Points {
- if smpl.Value < minValue {
- minValue = smpl.Value
- }
- }
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: minValue}, Metric: lbls}
- }
- } else if r.compareOp() == ruletypes.ValueIsBelow {
- for _, smpl := range series.Points {
- if smpl.Value >= r.targetVal() {
- shouldAlert = false
- break
- }
- }
- if shouldAlert {
- var maxValue float64 = math.Inf(-1)
- for _, smpl := range series.Points {
- if smpl.Value > maxValue {
- maxValue = smpl.Value
- }
- }
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: maxValue}, Metric: lbls}
- }
- } else if r.compareOp() == ruletypes.ValueIsEq {
- for _, smpl := range series.Points {
- if smpl.Value != r.targetVal() {
- shouldAlert = false
- break
- }
- }
- } else if r.compareOp() == ruletypes.ValueIsNotEq {
- for _, smpl := range series.Points {
- if smpl.Value == r.targetVal() {
- shouldAlert = false
- break
- }
- }
- // use any non-inf or nan value from the series
- if shouldAlert {
- for _, smpl := range series.Points {
- if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- break
- }
- }
- }
- } else if r.compareOp() == ruletypes.ValueOutsideBounds {
- for _, smpl := range series.Points {
- if math.Abs(smpl.Value) < r.targetVal() {
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
- shouldAlert = false
- break
- }
- }
- }
- case ruletypes.OnAverage:
- // If the average of all samples matches the condition, the rule is firing.
- var sum, count float64
- for _, smpl := range series.Points {
- if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
- continue
- }
- sum += smpl.Value
- count++
- }
- avg := sum / count
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: avg}, Metric: lbls}
- if r.compareOp() == ruletypes.ValueIsAbove {
- if avg > r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsBelow {
- if avg < r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsEq {
- if avg == r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsNotEq {
- if avg != r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueOutsideBounds {
- if math.Abs(avg) >= r.targetVal() {
- shouldAlert = true
- }
- }
- case ruletypes.InTotal:
- // If the sum of all samples matches the condition, the rule is firing.
- var sum float64
-
- for _, smpl := range series.Points {
- if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
- continue
- }
- sum += smpl.Value
- }
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: sum}, Metric: lbls}
- if r.compareOp() == ruletypes.ValueIsAbove {
- if sum > r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsBelow {
- if sum < r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsEq {
- if sum == r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsNotEq {
- if sum != r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueOutsideBounds {
- if math.Abs(sum) >= r.targetVal() {
- shouldAlert = true
- }
- }
- case ruletypes.Last:
- // If the last sample matches the condition, the rule is firing.
- shouldAlert = false
- alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
- if r.compareOp() == ruletypes.ValueIsAbove {
- if series.Points[len(series.Points)-1].Value > r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsBelow {
- if series.Points[len(series.Points)-1].Value < r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsEq {
- if series.Points[len(series.Points)-1].Value == r.targetVal() {
- shouldAlert = true
- }
- } else if r.compareOp() == ruletypes.ValueIsNotEq {
- if series.Points[len(series.Points)-1].Value != r.targetVal() {
- shouldAlert = true
- }
- }
- }
- return alertSmpl, shouldAlert
-}
-
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error {
zap.L().Debug("recording rule state history", zap.String("ruleid", r.ID()), zap.Any("prevState", prevState), zap.Any("currentState", currentState), zap.Any("itemsToAdd", itemsToAdd))
revisedItemsToAdd := map[uint64]model.RuleStateHistory{}
diff --git a/pkg/query-service/rules/base_rule_test.go b/pkg/query-service/rules/base_rule_test.go
index 33e14b4ea255..8391ded1fcdf 100644
--- a/pkg/query-service/rules/base_rule_test.go
+++ b/pkg/query-service/rules/base_rule_test.go
@@ -1,6 +1,7 @@
package rules
import (
+ "github.com/stretchr/testify/require"
"testing"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@@ -22,6 +23,15 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
RequireMinPoints: true,
RequiredNumPoints: 4,
},
+
+ Threshold: ruletypes.BasicRuleThresholds{
+ {
+ Name: "test-threshold",
+ TargetValue: &threshold,
+ CompareOp: ruletypes.ValueIsAbove,
+ MatchType: ruletypes.AtleastOnce,
+ },
+ },
},
series: &v3.Series{
Points: []v3.Point{
@@ -41,6 +51,14 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
MatchType: ruletypes.AtleastOnce,
Target: &threshold,
},
+ Threshold: ruletypes.BasicRuleThresholds{
+ {
+ Name: "test-threshold",
+ TargetValue: &threshold,
+ CompareOp: ruletypes.ValueIsAbove,
+ MatchType: ruletypes.AtleastOnce,
+ },
+ },
},
series: &v3.Series{
Points: []v3.Point{
@@ -56,10 +74,9 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- _, shouldAlert := test.rule.ShouldAlert(*test.series)
- if shouldAlert != test.shouldAlert {
- t.Errorf("expected shouldAlert to be %v, got %v", test.shouldAlert, shouldAlert)
- }
+ _, err := test.rule.Threshold.ShouldAlert(*test.series, "")
+ require.NoError(t, err)
+ require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
})
}
}
diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go
index 8263b03d5bbd..a935aa259002 100644
--- a/pkg/query-service/rules/manager.go
+++ b/pkg/query-service/rules/manager.go
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
+ "github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"log/slog"
"sort"
"strings"
@@ -350,39 +351,35 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID)
existingRule.Data = ruleStr
return m.ruleStore.EditRule(ctx, existingRule, func(ctx context.Context) error {
- cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
- if err != nil {
- return err
- }
-
- var preferredChannels []string
- if len(parsedRule.PreferredChannels) == 0 {
- channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
- if err != nil {
- return err
- }
-
- for _, channel := range channels {
- preferredChannels = append(preferredChannels, channel.Name)
- }
- } else {
- preferredChannels = parsedRule.PreferredChannels
- }
- err = cfg.UpdateRuleIDMatcher(id.StringValue(), preferredChannels)
- if err != nil {
- return err
- }
if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
- err = m.alertmanager.SetNotificationConfig(ctx, orgID, existingRule.ID.StringValue(), &config)
+ err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil {
return err
}
- }
+ if !parsedRule.NotificationSettings.UsePolicy {
+ request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
+ if err != nil {
+ return err
+ }
+ err = m.alertmanager.UpdateAllRoutePoliciesByRuleId(ctx, id.StringValue(), request)
+ if err != nil {
+ return err
+ }
+ err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.StringValue())
+ if err != nil {
+ return err
+ }
- err = m.alertmanager.SetConfig(ctx, cfg)
- if err != nil {
- return err
+ inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
+ if err != nil {
+ return err
+ }
+ err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
+ if err != nil {
+ return err
+ }
+ }
}
err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule)
if err != nil {
@@ -488,6 +485,19 @@ func (m *Manager) DeleteRule(ctx context.Context, idStr string) error {
}
err = m.alertmanager.DeleteNotificationConfig(ctx, orgID, id.String())
+ if err != nil {
+ return err
+ }
+
+ err = m.alertmanager.DeleteAllRoutePoliciesByRuleId(ctx, id.String())
+ if err != nil {
+ return err
+ }
+
+ err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.String())
+ if err != nil {
+ return err
+ }
taskName := prepareTaskName(id.StringValue())
m.deleteTask(taskName)
@@ -548,41 +558,30 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge
}
id, err := m.ruleStore.CreateRule(ctx, storedRule, func(ctx context.Context, id valuer.UUID) error {
- cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
- if err != nil {
- return err
- }
-
- var preferredChannels []string
- if len(parsedRule.PreferredChannels) == 0 {
- channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
- if err != nil {
- return err
- }
-
- for _, channel := range channels {
- preferredChannels = append(preferredChannels, channel.Name)
- }
- } else {
- preferredChannels = parsedRule.PreferredChannels
- }
-
if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
- err = m.alertmanager.SetNotificationConfig(ctx, orgID, storedRule.ID.StringValue(), &config)
+ err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil {
return err
}
- }
-
- err = cfg.CreateRuleIDMatcher(id.StringValue(), preferredChannels)
- if err != nil {
- return err
- }
-
- err = m.alertmanager.SetConfig(ctx, cfg)
- if err != nil {
- return err
+ if !parsedRule.NotificationSettings.UsePolicy {
+ request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
+ if err != nil {
+ return err
+ }
+ _, err = m.alertmanager.CreateRoutePolicies(ctx, request)
+ if err != nil {
+ return err
+ }
+ inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
+ if err != nil {
+ return err
+ }
+ err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
+ if err != nil {
+ return err
+ }
+ }
}
taskName := prepareTaskName(id.StringValue())
@@ -756,36 +755,30 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
if len(alerts) == 0 {
return
}
+ ruleID := alerts[0].Labels.Map()[labels.AlertRuleIdLabel]
+ receiverMap := make(map[*alertmanagertypes.PostableAlert][]string)
+ for _, alert := range alerts {
+ generatorURL := alert.GeneratorURL
- alert := alerts[0]
- generatorURL := alert.GeneratorURL
-
- a := &alertmanagertypes.PostableAlert{}
- a.Annotations = alert.Annotations.Map()
- a.StartsAt = strfmt.DateTime(alert.FiredAt)
- a.Alert = alertmanagertypes.AlertModel{
- Labels: alert.Labels.Map(),
- GeneratorURL: strfmt.URI(generatorURL),
- }
- if !alert.ResolvedAt.IsZero() {
- a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
- } else {
- a.EndsAt = strfmt.DateTime(alert.ValidUntil)
- }
-
- if len(alert.Receivers) == 0 {
- channels, err := m.alertmanager.ListChannels(ctx, orgID)
- if err != nil {
- zap.L().Error("failed to list channels while sending test notification", zap.Error(err))
- return
+ a := &alertmanagertypes.PostableAlert{}
+ a.Annotations = alert.Annotations.Map()
+ a.StartsAt = strfmt.DateTime(alert.FiredAt)
+ a.Alert = alertmanagertypes.AlertModel{
+ Labels: alert.Labels.Map(),
+ GeneratorURL: strfmt.URI(generatorURL),
}
-
- for _, channel := range channels {
- alert.Receivers = append(alert.Receivers, channel.Name)
+ if !alert.ResolvedAt.IsZero() {
+ a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
+ } else {
+ a.EndsAt = strfmt.DateTime(alert.ValidUntil)
}
+ receiverMap[a] = alert.Receivers
+ }
+ err := m.alertmanager.TestAlert(ctx, orgID, ruleID, receiverMap)
+ if err != nil {
+ zap.L().Error("failed to send test notification", zap.Error(err))
+ return
}
-
- m.alertmanager.TestAlert(ctx, orgID, a, alert.Receivers)
}
}
@@ -983,6 +976,17 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
if err != nil {
return 0, model.BadRequest(err)
}
+ if !parsedRule.NotificationSettings.UsePolicy {
+ parsedRule.NotificationSettings.GroupBy = append(parsedRule.NotificationSettings.GroupBy, ruletypes.LabelThresholdName)
+ }
+ config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
+ err = m.alertmanager.SetNotificationConfig(ctx, orgID, parsedRule.AlertName, &config)
+ if err != nil {
+ return 0, &model.ApiError{
+ Typ: model.ErrorBadData,
+ Err: err,
+ }
+ }
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: &parsedRule,
diff --git a/pkg/query-service/rules/manager_test.go b/pkg/query-service/rules/manager_test.go
index c17690a85480..8505e1600bb5 100644
--- a/pkg/query-service/rules/manager_test.go
+++ b/pkg/query-service/rules/manager_test.go
@@ -2,10 +2,15 @@ package rules
import (
"context"
+ "fmt"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
+ "github.com/prometheus/common/model"
+ "strings"
"testing"
"time"
- "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
@@ -32,19 +37,38 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
Email: "test@example.com",
Role: "admin",
}
- manager, mockSQLRuleStore, orgId := setupTestManager(t)
+ manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
name string
originalData string
patchData string
+ Route []*alertmanagertypes.RoutePolicy
+ Config *alertmanagertypes.NotificationConfig
expectedResult func(*ruletypes.GettableRule) bool
expectError bool
description string
}{
{
name: "patch complete rule with task sync validation",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "{{.ruleId}}",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 4 * time.Hour,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
originalData: `{
"schemaVersion":"v1",
"alert": "test-original-alert",
@@ -95,6 +119,23 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
},
{
name: "patch rule to disabled state",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "{{.ruleId}}",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 4 * time.Hour,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
originalData: `{
"schemaVersion":"v2",
"alert": "test-disable-alert",
@@ -179,6 +220,20 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
OrgID: claims.OrgID,
}
+ // Update route expectations with actual rule ID
+ routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
+ for i, route := range tc.Route {
+ routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
+ Expression: strings.Replace(route.Expression, "{{.ruleId}}", ruleID.String(), -1),
+ ExpressionKind: route.ExpressionKind,
+ Channels: route.Channels,
+ Name: strings.Replace(route.Name, "{{.ruleId}}", ruleID.String(), -1),
+ Enabled: route.Enabled,
+ }
+ }
+
+ mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleID.String())
+ mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule)
@@ -200,6 +255,12 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
assert.Nil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be removed for disabled rule")
} else {
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
+
+ // Verify notification config
+ config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
+ assert.NoError(t, err)
+ assert.Equal(t, tc.Config, config)
+
assert.True(t, syncCompleted, "Task synchronization should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created/updated for enabled rule")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
@@ -234,7 +295,7 @@ func findTaskByName(tasks []Task, taskName string) Task {
return nil
}
-func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, string) {
+func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, *nfroutingstoretest.MockSQLRouteStore, nfmanager.NotificationManager, string) {
settings := instrumentationtest.New().ToProviderSettings()
testDB := utils.NewQueryServiceDBForTests(t)
@@ -266,7 +327,11 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
t.Fatalf("Failed to create noop sharder: %v", err)
}
orgGetter := implorganization.NewGetter(implorganization.NewStore(testDB), noopSharder)
- notificationManager := nfmanagertest.NewMock()
+ routeStore := nfroutingstoretest.NewMockSQLRouteStore()
+ notificationManager, err := rulebasednotification.New(t.Context(), settings, nfmanager.Config{}, routeStore)
+ if err != nil {
+ t.Fatalf("Failed to create alert manager: %v", err)
+ }
alertManager, err := signozalertmanager.New(context.TODO(), settings, alertmanager.Config{Provider: "signoz", Signoz: alertmanager.Signoz{PollInterval: 10 * time.Second, Config: alertmanagerserver.NewConfig()}}, testDB, orgGetter, notificationManager)
if err != nil {
t.Fatalf("Failed to create alert manager: %v", err)
@@ -290,21 +355,40 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
}
close(manager.block)
- return manager, mockSQLRuleStore, testOrgID.StringValue()
+ return manager, mockSQLRuleStore, routeStore, notificationManager, testOrgID.StringValue()
}
func TestCreateRule(t *testing.T) {
claims := &authtypes.Claims{
Email: "test@example.com",
}
- manager, mockSQLRuleStore, orgId := setupTestManager(t)
+ manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
name string
+ Route []*alertmanagertypes.RoutePolicy
+ Config *alertmanagertypes.NotificationConfig
ruleStr string
}{
{
name: "validate stored rule data structure",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "{{.ruleId}}",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 4 * time.Hour,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
ruleStr: `{
"alert": "cpu usage",
"ruleType": "threshold_rule",
@@ -341,6 +425,30 @@ func TestCreateRule(t *testing.T) {
},
{
name: "create complete v2 rule with thresholds",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"critical\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "{{.ruleId}}",
+ Enabled: true,
+ },
+ {
+ Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "{{.ruleId}}",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("k8s.node.name"): {}, model.LabelName("ruleId"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 10 * time.Minute,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
ruleStr: `{
"schemaVersion":"v2",
"state": "firing",
@@ -399,6 +507,18 @@ func TestCreateRule(t *testing.T) {
"frequency": "1m"
}
},
+ "notificationSettings": {
+ "GroupBy": [
+ "k8s.node.name"
+ ],
+ "renotify": {
+ "interval": "10m",
+ "enabled": true,
+ "alertStates": [
+ "firing"
+ ]
+ }
+ },
"labels": {
"severity": "warning"
},
@@ -429,6 +549,20 @@ func TestCreateRule(t *testing.T) {
},
OrgID: claims.OrgID,
}
+
+ // Update route expectations with actual rule ID
+ routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
+ for i, route := range tc.Route {
+ routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
+ Expression: strings.Replace(route.Expression, "{{.ruleId}}", rule.ID.String(), -1),
+ ExpressionKind: route.ExpressionKind,
+ Channels: route.Channels,
+ Name: strings.Replace(route.Name, "{{.ruleId}}", rule.ID.String(), -1),
+ Enabled: route.Enabled,
+ }
+ }
+
+ mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectCreateRule(rule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
@@ -441,6 +575,12 @@ func TestCreateRule(t *testing.T) {
// Wait for task creation with proper synchronization
taskName := prepareTaskName(result.Id)
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
+
+ // Verify notification config
+ config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
+ assert.NoError(t, err)
+ assert.Equal(t, tc.Config, config)
+
assert.True(t, syncCompleted, "Task creation should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be added to manager")
@@ -455,14 +595,35 @@ func TestEditRule(t *testing.T) {
claims := &authtypes.Claims{
Email: "test@example.com",
}
- manager, mockSQLRuleStore, orgId := setupTestManager(t)
+ manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
+ ruleID string
name string
+ Route []*alertmanagertypes.RoutePolicy
+ Config *alertmanagertypes.NotificationConfig
ruleStr string
}{
{
- name: "validate edit rule functionality",
+ ruleID: "12345678-1234-1234-1234-123456789012",
+ name: "validate edit rule functionality",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"rule1\" && threshold.name == \"critical\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"critical-alerts"},
+ Name: "12345678-1234-1234-1234-123456789012",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 4 * time.Hour,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
ruleStr: `{
"alert": "updated cpu usage",
"ruleType": "threshold_rule",
@@ -498,7 +659,32 @@ func TestEditRule(t *testing.T) {
}`,
},
{
- name: "edit complete v2 rule with thresholds",
+ ruleID: "12345678-1234-1234-1234-123456789013",
+ name: "edit complete v2 rule with thresholds",
+ Route: []*alertmanagertypes.RoutePolicy{
+ {
+ Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"critical\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "12345678-1234-1234-1234-123456789013",
+ Enabled: true,
+ },
+ {
+ Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"warning\""),
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: []string{"test-alerts"},
+ Name: "12345678-1234-1234-1234-123456789013",
+ Enabled: true,
+ },
+ },
+ Config: &alertmanagertypes.NotificationConfig{
+ NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}, model.LabelName("k8s.node.name"): {}},
+ Renotify: alertmanagertypes.ReNotificationConfig{
+ RenotifyInterval: 10 * time.Minute,
+ NoDataInterval: 4 * time.Hour,
+ },
+ UsePolicy: false,
+ },
ruleStr: `{
"schemaVersion":"v2",
"state": "firing",
@@ -560,6 +746,18 @@ func TestEditRule(t *testing.T) {
"labels": {
"severity": "critical"
},
+ "notificationSettings": {
+ "GroupBy": [
+ "k8s.node.name"
+ ],
+ "renotify": {
+ "interval": "10m",
+ "enabled": true,
+ "alertStates": [
+ "firing"
+ ]
+ }
+ },
"annotations": {
"description": "This alert is fired when memory usage crosses the threshold",
"summary": "Memory usage threshold exceeded"
@@ -573,11 +771,13 @@ func TestEditRule(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- ruleID := valuer.GenerateUUID()
-
+ ruleId, err := valuer.NewUUID(tc.ruleID)
+ if err != nil {
+ t.Errorf("error creating ruleId: %s", err)
+ }
existingRule := &ruletypes.Rule{
Identifiable: types.Identifiable{
- ID: ruleID,
+ ID: ruleId,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
@@ -590,18 +790,24 @@ func TestEditRule(t *testing.T) {
Data: `{"alert": "original cpu usage", "disabled": false}`,
OrgID: claims.OrgID,
}
-
- mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
+ mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleId.String())
+ mockRouteStore.ExpectCreateBatch(tc.Route)
+ mockSQLRuleStore.ExpectGetStoredRule(ruleId, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
- err := manager.EditRule(ctx, tc.ruleStr, ruleID)
+ err = manager.EditRule(ctx, tc.ruleStr, ruleId)
assert.NoError(t, err)
// Wait for task update with proper synchronization
- taskName := prepareTaskName(ruleID.StringValue())
+
+ taskName := prepareTaskName(ruleId.String())
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
+
+ config, err := nfmanager.GetNotificationConfig(orgId, ruleId.String())
+ assert.NoError(t, err)
+ assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task update should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be updated with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
diff --git a/pkg/query-service/rules/prom_rule.go b/pkg/query-service/rules/prom_rule.go
index 773c86a2368b..a880b98d4c9a 100644
--- a/pkg/query-service/rules/prom_rule.go
+++ b/pkg/query-service/rules/prom_rule.go
@@ -147,13 +147,19 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
+ ruleReceivers := r.Threshold.GetRuleReceivers()
+ ruleReceiverMap := make(map[string][]string)
+ for _, value := range ruleReceivers {
+ ruleReceiverMap[value.Name] = value.Channels
+ }
+
for _, series := range res {
if len(series.Floats) == 0 {
continue
}
- results, err := r.Threshold.ShouldAlert(toCommonSeries(series))
+ results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
if err != nil {
return nil, err
}
@@ -165,7 +171,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
}
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series)
- threshold := valueFormatter.Format(r.targetVal(), r.Unit())
+ threshold := valueFormatter.Format(result.Target, result.TargetUnit)
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
// Inject some convenience variables that are easier to remember for users
@@ -218,7 +224,6 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
r.lastError = err
return nil, err
}
-
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
@@ -227,13 +232,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
State: model.StatePending,
Value: result.V,
GeneratorURL: r.GeneratorURL(),
- Receivers: r.preferredChannels,
+ Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
}
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
-
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
@@ -241,7 +245,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
- alert.Receivers = r.preferredChannels
+ if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
+ alert.Receivers = ruleReceiverMap[v]
+ }
continue
}
diff --git a/pkg/query-service/rules/promrule_test.go b/pkg/query-service/rules/promrule_test.go
index 17177de622c9..ef0dbcab32f3 100644
--- a/pkg/query-service/rules/promrule_test.go
+++ b/pkg/query-service/rules/promrule_test.go
@@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
assert.NoError(t, err)
}
- resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values))
+ resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
assert.NoError(t, err)
// Compare full result vector with expected vector
diff --git a/pkg/query-service/rules/test_notification.go b/pkg/query-service/rules/test_notification.go
index f2a6420a4240..5f72136b99e2 100644
--- a/pkg/query-service/rules/test_notification.go
+++ b/pkg/query-service/rules/test_notification.go
@@ -38,7 +38,6 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
// add special labels for test alerts
- parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
diff --git a/pkg/query-service/rules/threshold_rule.go b/pkg/query-service/rules/threshold_rule.go
index 536ee1cf2f0f..e881ba4fb184 100644
--- a/pkg/query-service/rules/threshold_rule.go
+++ b/pkg/query-service/rules/threshold_rule.go
@@ -488,7 +488,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
continue
}
}
- resultSeries, err := r.Threshold.ShouldAlert(*series)
+ resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@@ -565,7 +565,7 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
continue
}
}
- resultSeries, err := r.Threshold.ShouldAlert(*series)
+ resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@@ -602,6 +602,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
+ ruleReceivers := r.Threshold.GetRuleReceivers()
+ ruleReceiverMap := make(map[string][]string)
+ for _, value := range ruleReceivers {
+ ruleReceiverMap[value.Name] = value.Channels
+ }
+
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
@@ -610,7 +616,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
value := valueFormatter.Format(smpl.V, r.Unit())
//todo(aniket): handle different threshold
- threshold := valueFormatter.Format(r.targetVal(), r.Unit())
+ threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
@@ -690,7 +696,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
- Receivers: r.preferredChannels,
+ Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
}
}
@@ -705,7 +711,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
alert.Value = a.Value
alert.Annotations = a.Annotations
- alert.Receivers = r.preferredChannels
+ if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
+ alert.Receivers = ruleReceiverMap[v]
+ }
continue
}
diff --git a/pkg/query-service/rules/threshold_rule_test.go b/pkg/query-service/rules/threshold_rule_test.go
index d311a47e186e..2e7523669135 100644
--- a/pkg/query-service/rules/threshold_rule_test.go
+++ b/pkg/query-service/rules/threshold_rule_test.go
@@ -824,7 +824,7 @@ func TestThresholdRuleShouldAlert(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli()
}
- resultVectors, err := rule.Threshold.ShouldAlert(c.values)
+ resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior
@@ -1201,7 +1201,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli()
}
- vector, err := rule.Threshold.ShouldAlert(c.values)
+ vector, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err)
for name, value := range c.values.Labels {
@@ -1211,7 +1211,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
}
// Get result vectors from threshold evaluation
- resultVectors, err := rule.Threshold.ShouldAlert(c.values)
+ resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior
@@ -1501,13 +1501,11 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{
{
- Name: postableRule.AlertName,
- TargetValue: &c.target,
- TargetUnit: c.targetUnit,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.MatchType(c.matchType),
- CompareOp: ruletypes.CompareOp(c.compareOp),
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: postableRule.AlertName,
+ TargetValue: &c.target,
+ TargetUnit: c.targetUnit,
+ MatchType: ruletypes.MatchType(c.matchType),
+ CompareOp: ruletypes.CompareOp(c.compareOp),
},
},
}
@@ -1612,12 +1610,10 @@ func TestThresholdRuleNoData(t *testing.T) {
Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{
{
- Name: postableRule.AlertName,
- TargetValue: &target,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.AtleastOnce,
- CompareOp: ruletypes.ValueIsEq,
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: postableRule.AlertName,
+ TargetValue: &target,
+ MatchType: ruletypes.AtleastOnce,
+ CompareOp: ruletypes.ValueIsEq,
},
},
}
@@ -1734,13 +1730,11 @@ func TestThresholdRuleTracesLink(t *testing.T) {
Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{
{
- Name: postableRule.AlertName,
- TargetValue: &c.target,
- TargetUnit: c.targetUnit,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.MatchType(c.matchType),
- CompareOp: ruletypes.CompareOp(c.compareOp),
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: postableRule.AlertName,
+ TargetValue: &c.target,
+ TargetUnit: c.targetUnit,
+ MatchType: ruletypes.MatchType(c.matchType),
+ CompareOp: ruletypes.CompareOp(c.compareOp),
},
},
}
@@ -1873,13 +1867,11 @@ func TestThresholdRuleLogsLink(t *testing.T) {
Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{
{
- Name: postableRule.AlertName,
- TargetValue: &c.target,
- TargetUnit: c.targetUnit,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.MatchType(c.matchType),
- CompareOp: ruletypes.CompareOp(c.compareOp),
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: postableRule.AlertName,
+ TargetValue: &c.target,
+ TargetUnit: c.targetUnit,
+ MatchType: ruletypes.MatchType(c.matchType),
+ CompareOp: ruletypes.CompareOp(c.compareOp),
},
},
}
@@ -2125,22 +2117,18 @@ func TestMultipleThresholdRule(t *testing.T) {
Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{
{
- Name: "first_threshold",
- TargetValue: &c.target,
- TargetUnit: c.targetUnit,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.MatchType(c.matchType),
- CompareOp: ruletypes.CompareOp(c.compareOp),
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: "first_threshold",
+ TargetValue: &c.target,
+ TargetUnit: c.targetUnit,
+ MatchType: ruletypes.MatchType(c.matchType),
+ CompareOp: ruletypes.CompareOp(c.compareOp),
},
{
- Name: "second_threshold",
- TargetValue: &c.secondTarget,
- TargetUnit: c.targetUnit,
- RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
- MatchType: ruletypes.MatchType(c.matchType),
- CompareOp: ruletypes.CompareOp(c.compareOp),
- SelectedQuery: postableRule.RuleCondition.SelectedQuery,
+ Name: "second_threshold",
+ TargetValue: &c.secondTarget,
+ TargetUnit: c.targetUnit,
+ MatchType: ruletypes.MatchType(c.matchType),
+ CompareOp: ruletypes.CompareOp(c.compareOp),
},
},
}
diff --git a/pkg/signoz/provider.go b/pkg/signoz/provider.go
index 72b037ad31f5..d66b2acd461e 100644
--- a/pkg/signoz/provider.go
+++ b/pkg/signoz/provider.go
@@ -38,6 +38,7 @@ import (
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/clickhousetelemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystorehook"
+ routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/version"
"github.com/SigNoz/signoz/pkg/web"
"github.com/SigNoz/signoz/pkg/web/noopweb"
@@ -133,6 +134,7 @@ func NewSQLMigrationProviderFactories(
sqlmigration.NewQueryBuilderV5MigrationFactory(sqlstore, telemetryStore),
sqlmigration.NewAddMeterQuickFiltersFactory(sqlstore, sqlschema),
sqlmigration.NewUpdateTTLSettingForCustomRetentionFactory(sqlstore, sqlschema),
+ sqlmigration.NewAddRoutePolicyFactory(sqlstore, sqlschema),
)
}
@@ -155,9 +157,9 @@ func NewPrometheusProviderFactories(telemetryStore telemetrystore.TelemetryStore
)
}
-func NewNotificationManagerProviderFactories() factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
+func NewNotificationManagerProviderFactories(routeStore routeTypes.RouteStore) factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
return factory.MustNewNamedMap(
- rulebasednotification.NewFactory(),
+ rulebasednotification.NewFactory(routeStore),
)
}
diff --git a/pkg/signoz/signoz.go b/pkg/signoz/signoz.go
index 08fd5cb60c4b..6d531e6fe3da 100644
--- a/pkg/signoz/signoz.go
+++ b/pkg/signoz/signoz.go
@@ -4,6 +4,7 @@ import (
"context"
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
+ "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/emailing"
@@ -230,12 +231,14 @@ func New(
// Initialize user getter
userGetter := impluser.NewGetter(impluser.NewStore(sqlstore, providerSettings))
+ // will need to create factory for all stores
+ routeStore := sqlroutingstore.NewStore(sqlstore)
// shared NotificationManager instance for both alertmanager and rules
notificationManager, err := factory.NewProviderFromNamedMap(
ctx,
providerSettings,
nfmanager.Config{},
- NewNotificationManagerProviderFactories(),
+ NewNotificationManagerProviderFactories(routeStore),
"rulebased",
)
if err != nil {
diff --git a/pkg/sqlmigration/049_add_route_policy.go b/pkg/sqlmigration/049_add_route_policy.go
new file mode 100644
index 000000000000..c59207b4fda4
--- /dev/null
+++ b/pkg/sqlmigration/049_add_route_policy.go
@@ -0,0 +1,260 @@
+package sqlmigration
+
+import (
+ "context"
+ "database/sql"
+ "encoding/json"
+ "fmt"
+ "github.com/SigNoz/signoz/pkg/errors"
+ "github.com/SigNoz/signoz/pkg/factory"
+ "github.com/SigNoz/signoz/pkg/sqlschema"
+ "github.com/SigNoz/signoz/pkg/sqlstore"
+ "github.com/SigNoz/signoz/pkg/types"
+ "github.com/SigNoz/signoz/pkg/types/ruletypes"
+ "github.com/SigNoz/signoz/pkg/valuer"
+ "github.com/uptrace/bun"
+ "github.com/uptrace/bun/migrate"
+ "log/slog"
+ "time"
+)
+
+// Shared types for migration
+
+type expressionRoute struct {
+ bun.BaseModel `bun:"table:route_policy"`
+ types.Identifiable
+ types.TimeAuditable
+ types.UserAuditable
+
+ Expression string `bun:"expression,type:text"`
+ ExpressionKind string `bun:"kind,type:text"`
+
+ Channels []string `bun:"channels,type:text"`
+
+ Name string `bun:"name,type:text"`
+ Description string `bun:"description,type:text"`
+ Enabled bool `bun:"enabled,type:boolean,default:true"`
+ Tags []string `bun:"tags,type:text"`
+
+ OrgID string `bun:"org_id,type:text"`
+}
+
+type rule struct {
+ bun.BaseModel `bun:"table:rule"`
+ types.Identifiable
+ types.TimeAuditable
+ types.UserAuditable
+ Deleted int `bun:"deleted,default:0"`
+ Data string `bun:"data,type:text"`
+ OrgID string `bun:"org_id,type:text"`
+}
+
+type addRoutePolicies struct {
+ sqlstore sqlstore.SQLStore
+ sqlschema sqlschema.SQLSchema
+ logger *slog.Logger
+}
+
+func NewAddRoutePolicyFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
+ return factory.NewProviderFactory(factory.MustNewName("add_route_policy"), func(ctx context.Context, providerSettings factory.ProviderSettings, config Config) (SQLMigration, error) {
+ return newAddRoutePolicy(ctx, providerSettings, config, sqlstore, sqlschema)
+ })
+}
+
+func newAddRoutePolicy(_ context.Context, settings factory.ProviderSettings, _ Config, sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) (SQLMigration, error) {
+ return &addRoutePolicies{
+ sqlstore: sqlstore,
+ sqlschema: sqlschema,
+ logger: settings.Logger,
+ }, nil
+}
+
+func (migration *addRoutePolicies) Register(migrations *migrate.Migrations) error {
+ if err := migrations.Register(migration.Up, migration.Down); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (migration *addRoutePolicies) Up(ctx context.Context, db *bun.DB) error {
+ _, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("route_policy"))
+ if err == nil {
+ return nil
+ }
+
+ tx, err := db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ defer func() {
+ _ = tx.Rollback()
+ }()
+
+ sqls := [][]byte{}
+
+ // Create the route_policy table
+ table := &sqlschema.Table{
+ Name: "route_policy",
+ Columns: []*sqlschema.Column{
+ {Name: "id", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "created_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
+ {Name: "updated_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
+ {Name: "created_by", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "updated_by", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "expression", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "kind", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "channels", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "name", DataType: sqlschema.DataTypeText, Nullable: false},
+ {Name: "description", DataType: sqlschema.DataTypeText, Nullable: true},
+ {Name: "enabled", DataType: sqlschema.DataTypeBoolean, Nullable: false, Default: "true"},
+ {Name: "tags", DataType: sqlschema.DataTypeText, Nullable: true},
+ {Name: "org_id", DataType: sqlschema.DataTypeText, Nullable: false},
+ },
+ PrimaryKeyConstraint: &sqlschema.PrimaryKeyConstraint{
+ ColumnNames: []sqlschema.ColumnName{"id"},
+ },
+ ForeignKeyConstraints: []*sqlschema.ForeignKeyConstraint{
+ {
+ ReferencingColumnName: "org_id",
+ ReferencedTableName: "organizations",
+ ReferencedColumnName: "id",
+ },
+ },
+ }
+
+ tableSQLs := migration.sqlschema.Operator().CreateTable(table)
+ sqls = append(sqls, tableSQLs...)
+
+ for _, sqlStmt := range sqls {
+ if _, err := tx.ExecContext(ctx, string(sqlStmt)); err != nil {
+ return err
+ }
+ }
+
+ err = migration.migrateRulesToRoutePolicies(ctx, tx)
+ if err != nil {
+ return err
+ }
+
+ if err := tx.Commit(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (migration *addRoutePolicies) migrateRulesToRoutePolicies(ctx context.Context, tx bun.Tx) error {
+ var rules []*rule
+ err := tx.NewSelect().
+ Model(&rules).
+ Where("deleted = ?", 0).
+ Scan(ctx)
+ if err != nil {
+ if errors.Is(err, sql.ErrNoRows) {
+ return nil // No rules to migrate
+ }
+ return errors.NewInternalf(errors.CodeInternal, "failed to fetch rules")
+ }
+
+ channelsByOrg, err := migration.getAllChannels(ctx, tx)
+ if err != nil {
+ return errors.NewInternalf(errors.CodeInternal, "fetching channels error: %v", err)
+ }
+
+ var routesToInsert []*expressionRoute
+
+ routesToInsert, err = migration.convertRulesToRoutes(rules, channelsByOrg)
+ if err != nil {
+ return errors.NewInternalf(errors.CodeInternal, "converting rules to routes error: %v", err)
+ }
+
+ // Insert all routes in a single batch operation
+ if len(routesToInsert) > 0 {
+ _, err = tx.NewInsert().
+ Model(&routesToInsert).
+ Exec(ctx)
+ if err != nil {
+ return errors.NewInternalf(errors.CodeInternal, "failed to insert notification routes")
+ }
+ }
+
+ return nil
+}
+
+func (migration *addRoutePolicies) convertRulesToRoutes(rules []*rule, channelsByOrg map[string][]string) ([]*expressionRoute, error) {
+ var routes []*expressionRoute
+ for _, r := range rules {
+ var gettableRule ruletypes.GettableRule
+ if err := json.Unmarshal([]byte(r.Data), &gettableRule); err != nil {
+ return nil, errors.NewInternalf(errors.CodeInternal, "failed to unmarshal rule data for rule ID %s: %v", r.ID, err)
+ }
+
+ if len(gettableRule.PreferredChannels) == 0 {
+ channels, exists := channelsByOrg[r.OrgID]
+ if !exists || len(channels) == 0 {
+ continue
+ }
+ gettableRule.PreferredChannels = channels
+ }
+ severity := "critical"
+ if v, ok := gettableRule.Labels["severity"]; ok {
+ severity = v
+ }
+ expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, "threshold.name", severity, "ruleId", r.ID.String())
+ route := &expressionRoute{
+ Identifiable: types.Identifiable{
+ ID: valuer.GenerateUUID(),
+ },
+ TimeAuditable: types.TimeAuditable{
+ CreatedAt: time.Now(),
+ UpdatedAt: time.Now(),
+ },
+ UserAuditable: types.UserAuditable{
+ CreatedBy: r.CreatedBy,
+ UpdatedBy: r.UpdatedBy,
+ },
+ Expression: expression,
+ ExpressionKind: "rule",
+ Channels: gettableRule.PreferredChannels,
+ Name: r.ID.StringValue(),
+ Enabled: true,
+ OrgID: r.OrgID,
+ }
+ routes = append(routes, route)
+ }
+ return routes, nil
+}
+
+func (migration *addRoutePolicies) getAllChannels(ctx context.Context, tx bun.Tx) (map[string][]string, error) {
+ type channel struct {
+ bun.BaseModel `bun:"table:notification_channel"`
+ types.Identifiable
+ types.TimeAuditable
+ Name string `json:"name" bun:"name"`
+ Type string `json:"type" bun:"type"`
+ Data string `json:"data" bun:"data"`
+ OrgID string `json:"org_id" bun:"org_id"`
+ }
+
+ var channels []*channel
+ err := tx.NewSelect().
+ Model(&channels).
+ Scan(ctx)
+ if err != nil {
+ return nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch all channels")
+ }
+
+ // Group channels by org ID
+ channelsByOrg := make(map[string][]string)
+ for _, ch := range channels {
+ channelsByOrg[ch.OrgID] = append(channelsByOrg[ch.OrgID], ch.Name)
+ }
+
+ return channelsByOrg, nil
+}
+
+func (migration *addRoutePolicies) Down(ctx context.Context, db *bun.DB) error {
+ return nil
+}
diff --git a/pkg/types/alertmanagertypes/alert.go b/pkg/types/alertmanagertypes/alert.go
index 971ec23b1ccd..02d3ee2fe039 100644
--- a/pkg/types/alertmanagertypes/alert.go
+++ b/pkg/types/alertmanagertypes/alert.go
@@ -27,6 +27,8 @@ type (
// An alias for the Alert type from the alertmanager package.
Alert = types.Alert
+ AlertSlice = types.AlertSlice
+
PostableAlert = models.PostableAlert
PostableAlerts = models.PostableAlerts
@@ -38,6 +40,10 @@ type (
GettableAlerts = models.GettableAlerts
)
+const (
+ NoDataLabel = model.LabelName("nodata")
+)
+
type DeprecatedGettableAlert struct {
*model.Alert
Status types.AlertStatus `json:"status"`
@@ -307,3 +313,11 @@ func receiversMatchFilter(receivers []string, filter *regexp.Regexp) bool {
return false
}
+
+func NoDataAlert(alert *types.Alert) bool {
+ if _, ok := alert.Labels[NoDataLabel]; ok {
+ return true
+ } else {
+ return false
+ }
+}
diff --git a/pkg/types/alertmanagertypes/config.go b/pkg/types/alertmanagertypes/config.go
index a438afc7ee12..1b394fb0acfc 100644
--- a/pkg/types/alertmanagertypes/config.go
+++ b/pkg/types/alertmanagertypes/config.go
@@ -21,6 +21,7 @@ import (
const (
DefaultReceiverName string = "default-receiver"
DefaultGroupBy string = "ruleId"
+ DefaultGroupByAll string = "__all__"
)
var (
@@ -193,6 +194,20 @@ func (c *Config) SetRouteConfig(routeConfig RouteConfig) error {
return nil
}
+func (c *Config) AddInhibitRules(rules []config.InhibitRule) error {
+ if c.alertmanagerConfig == nil {
+ return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "config is nil")
+ }
+
+ c.alertmanagerConfig.InhibitRules = append(c.alertmanagerConfig.InhibitRules, rules...)
+
+ c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
+ c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
+ c.storeableConfig.UpdatedAt = time.Now()
+
+ return nil
+}
+
func (c *Config) AlertmanagerConfig() *config.Config {
return c.alertmanagerConfig
}
@@ -304,6 +319,27 @@ func (c *Config) CreateRuleIDMatcher(ruleID string, receiverNames []string) erro
return nil
}
+func (c *Config) DeleteRuleIDInhibitor(ruleID string) error {
+ if c.alertmanagerConfig.InhibitRules == nil {
+ return nil // already nil
+ }
+
+ var filteredRules []config.InhibitRule
+ for _, inhibitor := range c.alertmanagerConfig.InhibitRules {
+ sourceContainsRuleID := matcherContainsRuleID(inhibitor.SourceMatchers, ruleID)
+ targetContainsRuleID := matcherContainsRuleID(inhibitor.TargetMatchers, ruleID)
+ if !sourceContainsRuleID && !targetContainsRuleID {
+ filteredRules = append(filteredRules, inhibitor)
+ }
+ }
+ c.alertmanagerConfig.InhibitRules = filteredRules
+ c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
+ c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
+ c.storeableConfig.UpdatedAt = time.Now()
+
+ return nil
+}
+
func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) error {
err := c.DeleteRuleIDMatcher(ruleID)
if err != nil {
@@ -405,6 +441,8 @@ func init() {
type NotificationConfig struct {
NotificationGroup map[model.LabelName]struct{}
Renotify ReNotificationConfig
+ UsePolicy bool
+ GroupByAll bool
}
func (nc *NotificationConfig) DeepCopy() NotificationConfig {
@@ -415,6 +453,7 @@ func (nc *NotificationConfig) DeepCopy() NotificationConfig {
for k, v := range nc.NotificationGroup {
deepCopy.NotificationGroup[k] = v
}
+ deepCopy.UsePolicy = nc.UsePolicy
return deepCopy
}
@@ -423,7 +462,7 @@ type ReNotificationConfig struct {
RenotifyInterval time.Duration
}
-func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration) NotificationConfig {
+func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration, policy bool) NotificationConfig {
notificationConfig := GetDefaultNotificationConfig()
if renotifyInterval != 0 {
@@ -435,8 +474,13 @@ func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDa
}
for _, group := range groups {
notificationConfig.NotificationGroup[model.LabelName(group)] = struct{}{}
+ if group == DefaultGroupByAll {
+ notificationConfig.GroupByAll = true
+ }
}
+ notificationConfig.UsePolicy = policy
+
return notificationConfig
}
diff --git a/pkg/types/alertmanagertypes/expressionroute.go b/pkg/types/alertmanagertypes/expressionroute.go
new file mode 100644
index 000000000000..858864c681cc
--- /dev/null
+++ b/pkg/types/alertmanagertypes/expressionroute.go
@@ -0,0 +1,139 @@
+package alertmanagertypes
+
+import (
+ "context"
+ "github.com/expr-lang/expr"
+ "time"
+
+ "github.com/SigNoz/signoz/pkg/errors"
+ "github.com/SigNoz/signoz/pkg/types"
+ "github.com/SigNoz/signoz/pkg/valuer"
+ "github.com/uptrace/bun"
+)
+
+type PostableRoutePolicy struct {
+ Expression string `json:"expression"`
+ ExpressionKind ExpressionKind `json:"kind"`
+ Channels []string `json:"channels"`
+ Name string `json:"name"`
+ Description string `json:"description"`
+ Tags []string `json:"tags,omitempty"`
+}
+
+func (p *PostableRoutePolicy) Validate() error {
+ if p.Expression == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
+ }
+
+ if p.Name == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
+ }
+
+ if len(p.Channels) == 0 {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
+ }
+
+ // Validate channels are not empty
+ for i, channel := range p.Channels {
+ if channel == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
+ }
+ }
+
+ if p.ExpressionKind != PolicyBasedExpression && p.ExpressionKind != RuleBasedExpression {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", p.ExpressionKind.StringValue())
+ }
+
+ _, err := expr.Compile(p.Expression)
+ if err != nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid expression syntax: %v", err)
+ }
+
+ return nil
+}
+
+type GettableRoutePolicy struct {
+ PostableRoutePolicy // Embedded
+
+ ID string `json:"id"`
+
+ // Audit fields
+ CreatedAt *time.Time `json:"createdAt"`
+ UpdatedAt *time.Time `json:"updatedAt"`
+ CreatedBy *string `json:"createdBy"`
+ UpdatedBy *string `json:"updatedBy"`
+}
+
+type ExpressionKind struct {
+ valuer.String
+}
+
+var (
+ RuleBasedExpression = ExpressionKind{valuer.NewString("rule")}
+ PolicyBasedExpression = ExpressionKind{valuer.NewString("policy")}
+)
+
+// RoutePolicy represents the database model for expression routes
+type RoutePolicy struct {
+ bun.BaseModel `bun:"table:route_policy"`
+ types.Identifiable
+ types.TimeAuditable
+ types.UserAuditable
+
+ Expression string `bun:"expression,type:text,notnull" json:"expression"`
+ ExpressionKind ExpressionKind `bun:"kind,type:text" json:"kind"`
+
+ Channels []string `bun:"channels,type:jsonb" json:"channels"`
+
+ Name string `bun:"name,type:text" json:"name"`
+ Description string `bun:"description,type:text" json:"description"`
+ Enabled bool `bun:"enabled,type:boolean,default:true" json:"enabled"`
+ Tags []string `bun:"tags,type:jsonb" json:"tags,omitempty"`
+
+ OrgID string `bun:"org_id,type:text,notnull" json:"orgId"`
+}
+
+func (er *RoutePolicy) Validate() error {
+ if er == nil {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "route_policy cannot be nil")
+ }
+
+ if er.Expression == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
+ }
+
+ if er.Name == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
+ }
+
+ if er.OrgID == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "organization ID is required")
+ }
+
+ if len(er.Channels) == 0 {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
+ }
+
+ // Validate channels are not empty
+ for i, channel := range er.Channels {
+ if channel == "" {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
+ }
+ }
+
+ if er.ExpressionKind != PolicyBasedExpression && er.ExpressionKind != RuleBasedExpression {
+ return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", er.ExpressionKind.StringValue())
+ }
+
+ return nil
+}
+
+type RouteStore interface {
+ GetByID(ctx context.Context, orgId string, id string) (*RoutePolicy, error)
+ Create(ctx context.Context, route *RoutePolicy) error
+ CreateBatch(ctx context.Context, routes []*RoutePolicy) error
+ Delete(ctx context.Context, orgId string, id string) error
+ GetAllByKind(ctx context.Context, orgID string, kind ExpressionKind) ([]*RoutePolicy, error)
+ GetAllByName(ctx context.Context, orgID string, name string) ([]*RoutePolicy, error)
+ DeleteRouteByName(ctx context.Context, orgID string, name string) error
+}
diff --git a/pkg/types/alertmanagertypes/receiver.go b/pkg/types/alertmanagertypes/receiver.go
index 83cae2931b8d..3916c150eecf 100644
--- a/pkg/types/alertmanagertypes/receiver.go
+++ b/pkg/types/alertmanagertypes/receiver.go
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
+ "github.com/prometheus/common/model"
"log/slog"
"time"
@@ -49,9 +50,9 @@ func NewReceiver(input string) (Receiver, error) {
return receiverWithDefaults, nil
}
-func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, alert *Alert) error {
- ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, alert.Labels.Fingerprint(), time.Now().Unix()))
- ctx = notify.WithGroupLabels(ctx, alert.Labels)
+func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, lSet model.LabelSet, alert ...*Alert) error {
+ ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, lSet.Fingerprint(), time.Now().Unix()))
+ ctx = notify.WithGroupLabels(ctx, lSet)
ctx = notify.WithReceiverName(ctx, receiver.Name)
// We need to create a new config with the same global and route config but empty receivers and routes
@@ -80,7 +81,7 @@ func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFu
return errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "no integrations found for receiver %s", receiver.Name)
}
- if _, err = integrations[0].Notify(ctx, alert); err != nil {
+ if _, err = integrations[0].Notify(ctx, alert...); err != nil {
return err
}
diff --git a/pkg/types/ruletypes/api_params.go b/pkg/types/ruletypes/api_params.go
index 4cc44ed90230..df04c68382cf 100644
--- a/pkg/types/ruletypes/api_params.go
+++ b/pkg/types/ruletypes/api_params.go
@@ -15,6 +15,8 @@ import (
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
+
+ "github.com/prometheus/alertmanager/config"
)
type AlertType string
@@ -65,21 +67,95 @@ type PostableRule struct {
}
type NotificationSettings struct {
- NotificationGroupBy []string `json:"notificationGroupBy,omitempty"`
- ReNotifyInterval Duration `json:"renotify,omitempty"`
- AlertStates []model.AlertState `json:"alertStates,omitempty"`
+ GroupBy []string `json:"groupBy,omitempty"`
+ Renotify Renotify `json:"renotify,omitempty"`
+ UsePolicy bool `json:"usePolicy,omitempty"`
+}
+
+type Renotify struct {
+ Enabled bool `json:"enabled"`
+ ReNotifyInterval Duration `json:"interval,omitempty"`
+ AlertStates []model.AlertState `json:"alertStates,omitempty"`
}
func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig {
- var renotifyInterval Duration
- var noDataRenotifyInterval Duration
- if slices.Contains(ns.AlertStates, model.StateNoData) {
- noDataRenotifyInterval = ns.ReNotifyInterval
+ var renotifyInterval time.Duration
+ var noDataRenotifyInterval time.Duration
+ if ns.Renotify.Enabled {
+ if slices.Contains(ns.Renotify.AlertStates, model.StateNoData) {
+ noDataRenotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
+ }
+ if slices.Contains(ns.Renotify.AlertStates, model.StateFiring) {
+ renotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
+ }
+ } else {
+ renotifyInterval = 8760 * time.Hour //1 year for no renotify substitute
+ noDataRenotifyInterval = 8760 * time.Hour
}
- if slices.Contains(ns.AlertStates, model.StateFiring) {
- renotifyInterval = ns.ReNotifyInterval
+ return alertmanagertypes.NewNotificationConfig(ns.GroupBy, renotifyInterval, noDataRenotifyInterval, ns.UsePolicy)
+}
+
+func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
+ threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
+ if err != nil {
+ return nil, err
}
- return alertmanagertypes.NewNotificationConfig(ns.NotificationGroupBy, time.Duration(renotifyInterval), time.Duration(noDataRenotifyInterval))
+ receivers := threshold.GetRuleReceivers()
+ routeRequests := make([]*alertmanagertypes.PostableRoutePolicy, 0)
+ for _, receiver := range receivers {
+ expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleId, ruleId)
+ routeRequests = append(routeRequests, &alertmanagertypes.PostableRoutePolicy{
+ Expression: expression,
+ ExpressionKind: alertmanagertypes.RuleBasedExpression,
+ Channels: receiver.Channels,
+ Name: ruleId,
+ Description: fmt.Sprintf("Auto-generated route for rule %s", ruleId),
+ Tags: []string{"auto-generated", "rule-based"},
+ })
+ }
+ return routeRequests, nil
+}
+
+func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, error) {
+ threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
+ if err != nil {
+ return nil, err
+ }
+ var groups []string
+ if r.NotificationSettings != nil {
+ for k := range r.NotificationSettings.GetAlertManagerNotificationConfig().NotificationGroup {
+ groups = append(groups, string(k))
+ }
+ }
+ receivers := threshold.GetRuleReceivers()
+ var inhibitRules []config.InhibitRule
+ for i := 0; i < len(receivers)-1; i++ {
+ rule := config.InhibitRule{
+ SourceMatchers: config.Matchers{
+ {
+ Name: LabelThresholdName,
+ Value: receivers[i].Name,
+ },
+ {
+ Name: LabelRuleId,
+ Value: ruleId,
+ },
+ },
+ TargetMatchers: config.Matchers{
+ {
+ Name: LabelThresholdName,
+ Value: receivers[i+1].Name,
+ },
+ {
+ Name: LabelRuleId,
+ Value: ruleId,
+ },
+ },
+ Equal: groups,
+ }
+ inhibitRules = append(inhibitRules, rule)
+ }
+ return inhibitRules, nil
}
func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
@@ -95,7 +171,7 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
}
// Validate states after unmarshaling
- for _, state := range ns.AlertStates {
+ for _, state := range ns.Renotify.AlertStates {
if state != model.StateFiring && state != model.StateNoData {
return fmt.Errorf("invalid alert state: %s", state)
}
@@ -143,15 +219,25 @@ func (r *PostableRule) processRuleDefaults() error {
Kind: BasicThresholdKind,
Spec: BasicRuleThresholds{{
Name: thresholdName,
- RuleUnit: r.RuleCondition.CompositeQuery.Unit,
TargetUnit: r.RuleCondition.TargetUnit,
TargetValue: r.RuleCondition.Target,
MatchType: r.RuleCondition.MatchType,
CompareOp: r.RuleCondition.CompareOp,
+ Channels: r.PreferredChannels,
}},
}
r.RuleCondition.Thresholds = &thresholdData
r.Evaluation = &EvaluationEnvelope{RollingEvaluation, RollingWindow{EvalWindow: r.EvalWindow, Frequency: r.Frequency}}
+ r.NotificationSettings = &NotificationSettings{
+ Renotify: Renotify{
+ Enabled: true,
+ ReNotifyInterval: Duration(4 * time.Hour),
+ AlertStates: []model.AlertState{model.StateFiring},
+ },
+ }
+ if r.RuleCondition.AlertOnAbsent {
+ r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, model.StateNoData)
+ }
}
}
@@ -170,6 +256,7 @@ func (r *PostableRule) MarshalJSON() ([]byte, error) {
}
aux.Evaluation = nil
aux.SchemaVersion = ""
+ aux.NotificationSettings = nil
return json.Marshal(aux)
default:
copyStruct := *r
@@ -192,7 +279,7 @@ func isValidLabelName(ln string) bool {
return false
}
for i, b := range ln {
- if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) {
+ if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '.' || (b >= '0' && b <= '9' && i > 0)) {
return false
}
}
@@ -347,6 +434,7 @@ func (g *GettableRule) MarshalJSON() ([]byte, error) {
}
aux.Evaluation = nil
aux.SchemaVersion = ""
+ aux.NotificationSettings = nil
return json.Marshal(aux)
default:
copyStruct := *g
diff --git a/pkg/types/ruletypes/api_params_test.go b/pkg/types/ruletypes/api_params_test.go
index 74d58fdb39a0..5b33d0b72e0d 100644
--- a/pkg/types/ruletypes/api_params_test.go
+++ b/pkg/types/ruletypes/api_params_test.go
@@ -2,10 +2,11 @@ package ruletypes
import (
"encoding/json"
- "github.com/stretchr/testify/assert"
"testing"
"time"
+ "github.com/stretchr/testify/assert"
+
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
)
@@ -303,10 +304,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
t.Errorf("Expected threshold name 'warning' from severity label, got '%s'", spec.Name)
}
- // Verify all fields are copied from RuleCondition
- if spec.RuleUnit != "percent" {
- t.Errorf("Expected RuleUnit 'percent', got '%s'", spec.RuleUnit)
- }
if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%', got '%s'", spec.TargetUnit)
}
@@ -455,9 +452,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%' (overwritten), got '%s'", spec.TargetUnit)
}
- if spec.RuleUnit != "percent" {
- t.Errorf("Expected RuleUnit 'percent' (overwritten), got '%s'", spec.RuleUnit)
- }
if rule.Evaluation == nil {
t.Fatal("Expected Evaluation to be populated")
@@ -630,9 +624,9 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
Labels: map[string]string{"test": "label"},
- })
+ }, "")
if err != nil {
- t.Fatalf("Unexpected error in ShouldAlert: %v", err)
+ t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
if len(vector) == 0 {
@@ -707,9 +701,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
Labels: map[string]string{"service": "test"},
- })
+ }, "")
if err != nil {
- t.Fatalf("Unexpected error in ShouldAlert: %v", err)
+ t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
assert.Equal(t, 2, len(vector))
@@ -717,9 +711,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err = threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
Labels: map[string]string{"service": "test"},
- })
+ }, "")
if err != nil {
- t.Fatalf("Unexpected error in ShouldAlert: %v", err)
+ t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
assert.Equal(t, 1, len(vector))
diff --git a/pkg/types/ruletypes/constants.go b/pkg/types/ruletypes/constants.go
index 1851ef919a11..43f97055a931 100644
--- a/pkg/types/ruletypes/constants.go
+++ b/pkg/types/ruletypes/constants.go
@@ -2,3 +2,4 @@ package ruletypes
const CriticalThresholdName = "CRITICAL"
const LabelThresholdName = "threshold.name"
+const LabelRuleId = "ruleId"
diff --git a/pkg/types/ruletypes/result_types.go b/pkg/types/ruletypes/result_types.go
index 0eda5c8aaaf2..2460322a6dcd 100644
--- a/pkg/types/ruletypes/result_types.go
+++ b/pkg/types/ruletypes/result_types.go
@@ -18,6 +18,10 @@ type Sample struct {
Metric labels.Labels
IsMissing bool
+
+ Target float64
+
+ TargetUnit string
}
func (s Sample) String() string {
diff --git a/pkg/types/ruletypes/threshold.go b/pkg/types/ruletypes/threshold.go
index fba9765d5793..87f771b2fe4a 100644
--- a/pkg/types/ruletypes/threshold.go
+++ b/pkg/types/ruletypes/threshold.go
@@ -51,23 +51,41 @@ func (r *RuleThresholdData) UnmarshalJSON(data []byte) error {
return nil
}
+type RuleReceivers struct {
+ Channels []string `json:"channels"`
+ Name string `json:"name"`
+}
+
type RuleThreshold interface {
- ShouldAlert(series v3.Series) (Vector, error)
+ ShouldAlert(series v3.Series, unit string) (Vector, error)
+ GetRuleReceivers() []RuleReceivers
}
type BasicRuleThreshold struct {
Name string `json:"name"`
TargetValue *float64 `json:"target"`
TargetUnit string `json:"targetUnit"`
- RuleUnit string `json:"ruleUnit"`
RecoveryTarget *float64 `json:"recoveryTarget"`
MatchType MatchType `json:"matchType"`
CompareOp CompareOp `json:"op"`
- SelectedQuery string `json:"selectedQuery"`
+ Channels []string `json:"channels"`
}
type BasicRuleThresholds []BasicRuleThreshold
+func (r BasicRuleThresholds) GetRuleReceivers() []RuleReceivers {
+ thresholds := []BasicRuleThreshold(r)
+ var receiverRoutes []RuleReceivers
+ sortThresholds(thresholds)
+ for _, threshold := range thresholds {
+ receiverRoutes = append(receiverRoutes, RuleReceivers{
+ Name: threshold.Name,
+ Channels: threshold.Channels,
+ })
+ }
+ return receiverRoutes
+}
+
func (r BasicRuleThresholds) Validate() error {
var errs []error
for _, basicThreshold := range r {
@@ -78,13 +96,27 @@ func (r BasicRuleThresholds) Validate() error {
return errors.Join(errs...)
}
-func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
+func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
var resultVector Vector
thresholds := []BasicRuleThreshold(r)
+ sortThresholds(thresholds)
+ for _, threshold := range thresholds {
+ smpl, shouldAlert := threshold.shouldAlert(series, unit)
+ if shouldAlert {
+ smpl.Target = threshold.target(unit)
+ smpl.TargetUnit = threshold.TargetUnit
+ resultVector = append(resultVector, smpl)
+ }
+ }
+ return resultVector, nil
+}
+
+func sortThresholds(thresholds []BasicRuleThreshold) {
sort.Slice(thresholds, func(i, j int) bool {
- compareOp := thresholds[i].GetCompareOp()
- targetI := thresholds[i].Target()
- targetJ := thresholds[j].Target()
+
+ compareOp := thresholds[i].getCompareOp()
+ targetI := thresholds[i].target(thresholds[i].TargetUnit) //for sorting we dont need rule unit
+ targetJ := thresholds[j].target(thresholds[j].TargetUnit)
switch compareOp {
case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds:
@@ -98,49 +130,22 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
return targetI > targetJ
}
})
- for _, threshold := range thresholds {
- smpl, shouldAlert := threshold.ShouldAlert(series)
- if shouldAlert {
- resultVector = append(resultVector, smpl)
- }
- }
- return resultVector, nil
}
-func (b BasicRuleThreshold) GetName() string {
- return b.Name
-}
-
-func (b BasicRuleThreshold) Target() float64 {
+func (b BasicRuleThreshold) target(ruleUnit string) float64 {
unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
// convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{
F: *b.TargetValue,
U: converter.Unit(b.TargetUnit),
- }, converter.Unit(b.RuleUnit))
+ }, converter.Unit(ruleUnit))
return value.F
}
-func (b BasicRuleThreshold) GetRecoveryTarget() float64 {
- if b.RecoveryTarget == nil {
- return 0
- } else {
- return *b.RecoveryTarget
- }
-}
-
-func (b BasicRuleThreshold) GetMatchType() MatchType {
- return b.MatchType
-}
-
-func (b BasicRuleThreshold) GetCompareOp() CompareOp {
+func (b BasicRuleThreshold) getCompareOp() CompareOp {
return b.CompareOp
}
-func (b BasicRuleThreshold) GetSelectedQuery() string {
- return b.SelectedQuery
-}
-
func (b BasicRuleThreshold) Validate() error {
var errs []error
if b.Name == "" {
@@ -182,7 +187,7 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
return result
}
-func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
+func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
var shouldAlert bool
var alertSmpl Sample
var lbls labels.Labels
@@ -191,6 +196,8 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
lbls = append(lbls, labels.Label{Name: name, Value: value})
}
+ target := b.target(ruleUnit)
+
lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
series.Points = removeGroupinSetPoints(series)
@@ -205,7 +212,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
// If any sample matches the condition, the rule is firing.
if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points {
- if smpl.Value > b.Target() {
+ if smpl.Value > target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@@ -213,7 +220,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points {
- if smpl.Value < b.Target() {
+ if smpl.Value < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@@ -221,7 +228,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points {
- if smpl.Value == b.Target() {
+ if smpl.Value == target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@@ -229,7 +236,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points {
- if smpl.Value != b.Target() {
+ if smpl.Value != target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@@ -237,7 +244,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points {
- if math.Abs(smpl.Value) >= b.Target() {
+ if math.Abs(smpl.Value) >= target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@@ -247,10 +254,10 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
case AllTheTimes:
// If all samples match the condition, the rule is firing.
shouldAlert = true
- alertSmpl = Sample{Point: Point{V: b.Target()}, Metric: lbls}
+ alertSmpl = Sample{Point: Point{V: target}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points {
- if smpl.Value <= b.Target() {
+ if smpl.Value <= target {
shouldAlert = false
break
}
@@ -267,7 +274,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points {
- if smpl.Value >= b.Target() {
+ if smpl.Value >= target {
shouldAlert = false
break
}
@@ -283,14 +290,14 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points {
- if smpl.Value != b.Target() {
+ if smpl.Value != target {
shouldAlert = false
break
}
}
} else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points {
- if smpl.Value == b.Target() {
+ if smpl.Value == target {
shouldAlert = false
break
}
@@ -306,7 +313,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points {
- if math.Abs(smpl.Value) < b.Target() {
+ if math.Abs(smpl.Value) < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = false
break
@@ -326,23 +333,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
avg := sum / count
alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
- if avg > b.Target() {
+ if avg > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
- if avg < b.Target() {
+ if avg < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
- if avg == b.Target() {
+ if avg == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
- if avg != b.Target() {
+ if avg != target {
shouldAlert = true
}
} else if b.CompareOp == ValueOutsideBounds {
- if math.Abs(avg) >= b.Target() {
+ if math.Abs(avg) >= target {
shouldAlert = true
}
}
@@ -358,23 +365,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
- if sum > b.Target() {
+ if sum > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
- if sum < b.Target() {
+ if sum < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
- if sum == b.Target() {
+ if sum == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
- if sum != b.Target() {
+ if sum != target {
shouldAlert = true
}
} else if b.CompareOp == ValueOutsideBounds {
- if math.Abs(sum) >= b.Target() {
+ if math.Abs(sum) >= target {
shouldAlert = true
}
}
@@ -383,19 +390,19 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
shouldAlert = false
alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
- if series.Points[len(series.Points)-1].Value > b.Target() {
+ if series.Points[len(series.Points)-1].Value > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
- if series.Points[len(series.Points)-1].Value < b.Target() {
+ if series.Points[len(series.Points)-1].Value < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
- if series.Points[len(series.Points)-1].Value == b.Target() {
+ if series.Points[len(series.Points)-1].Value == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
- if series.Points[len(series.Points)-1].Value != b.Target() {
+ if series.Points[len(series.Points)-1].Value != target {
shouldAlert = true
}
}
diff --git a/pkg/version/deployment.go b/pkg/version/deployment.go
index fd5c05ab1fff..4b4458ffd00e 100644
--- a/pkg/version/deployment.go
+++ b/pkg/version/deployment.go
@@ -141,7 +141,7 @@ func detectPlatform() string {
}
// Azure metadata
- if req, err := http.NewRequest(http.MethodGet, "http://169.254.169.254/metadata/instance", nil); err == nil {
+ if req, err := http.NewRequest(http.MethodGet, "http://169.254.169.254/metadata/instance?api-version=2017-03-01", nil); err == nil {
req.Header.Add("Metadata", "true")
if resp, err := client.Do(req); err == nil {
resp.Body.Close()