Add for tenant silences limit by SungJin1212 · Pull Request #6605 · cortexproject/cortex · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
9 changes: 9 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3751,6 +3751,15 @@ query_rejection:
# CLI flag: -alertmanager.max-alerts-size-bytes
[alertmanager_max_alerts_size_bytes: <int> | default = 0]

# Maximum number of silences that a single user can have, including expired
# silences. 0 = no limit.
# CLI flag: -alertmanager.max-silences-count
[alertmanager_max_silences_count: <int> | default = 0]

# Maximum size of individual silences that a single user can have. 0 = no limit.
# CLI flag: -alertmanager.max-silences-size-bytes
[alertmanager_max_silences_size_bytes: <int> | default = 0]

# list of rule groups to disable
[disabled_rule_groups: <list of DisabledRuleGroup> | default = []]
```
Expand Down
9 changes: 7 additions & 2 deletions pkg/alertmanager/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,16 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
am.groupMarker = memMarker

silencesFile := filepath.Join(cfg.TenantDataDir, silencesSnapshot)

am.silences, err = silence.New(silence.Options{
SnapshotFile: silencesFile,
Retention: cfg.Retention,
Logger: util_log.GoKitLogToSlog(log.With(am.logger, "component", "silences")),
Metrics: am.registry,
Limits: silence.Limits{
MaxSilences: func() int { return cfg.Limits.AlertmanagerMaxSilencesCount(cfg.UserID) },
MaxSilenceSizeBytes: func() int { return cfg.Limits.AlertmanagerMaxSilenceSizeBytes(cfg.UserID) },
},
Logger: util_log.GoKitLogToSlog(log.With(am.logger, "component", "silences")),
Metrics: am.registry,
})
if err != nil {
return nil, fmt.Errorf("failed to create silences: %v", err)
Expand Down
70 changes: 70 additions & 0 deletions pkg/alertmanager/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/go-kit/log"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/silence/silencepb"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
Expand All @@ -19,6 +20,75 @@ import (
"github.com/cortexproject/cortex/pkg/util/test"
)

func TestSilencesLimits(t *testing.T) {
user := "test"

reg := prometheus.NewPedanticRegistry()
maxSilencesCount := 3
maxSilencesSizeBytes := 500
am, err := New(&Config{
UserID: user,
Logger: log.NewNopLogger(),
Limits: &mockAlertManagerLimits{maxSilencesCount: maxSilencesCount, maxSilencesSizeBytes: maxSilencesSizeBytes},
TenantDataDir: t.TempDir(),
ExternalURL: &url.URL{Path: "/am"},
ShardingEnabled: false,
GCInterval: 30 * time.Minute,
}, reg)
require.NoError(t, err)
defer am.StopAndWait()

t.Run("Test maxSilencesCount", func(t *testing.T) {
createSilences := func() *silencepb.Silence {
return &silencepb.Silence{
Matchers: []*silencepb.Matcher{{Name: "name", Pattern: "pattern"}},
StartsAt: time.Now(),
EndsAt: time.Now().Add(time.Minute * 30),
}
}

// create silences up to maxSilencesCount
for i := 0; i < maxSilencesCount; i++ {
err := am.silences.Set(createSilences())
require.NoError(t, err)
}

// exceeds limit
err = am.silences.Set(createSilences())
require.Error(t, err)
require.Equal(t, fmt.Sprintf("exceeded maximum number of silences: %d (limit: %d)", maxSilencesCount, maxSilencesCount), err.Error())

// expire whole silences
silences, _, err := am.silences.Query()
require.NoError(t, err)
for _, s := range silences {
err := am.silences.Expire(s.Id)
require.NoError(t, err)
}

// check maxSilencesCount includes expired silences
err = am.silences.Set(createSilences())
require.Error(t, err)
require.Equal(t, fmt.Sprintf("exceeded maximum number of silences: %d (limit: %d)", maxSilencesCount, maxSilencesCount), err.Error())

// GC
n, err := am.silences.GC()
require.NoError(t, err)
require.Equal(t, maxSilencesCount, n)
})
t.Run("Test maxSilencesSizeBytes", func(t *testing.T) {
bigSilences := &silencepb.Silence{
Matchers: []*silencepb.Matcher{{Name: strings.Repeat("a", maxSilencesSizeBytes/2+1), Pattern: strings.Repeat("b", maxSilencesSizeBytes/2+1)}},
StartsAt: time.Now(),
EndsAt: time.Now().Add(time.Minute * 30),
}

err = am.silences.Set(bigSilences)
require.Error(t, err)
require.True(t, strings.Contains(err.Error(), "silence exceeded maximum size"))
})
}

func TestDispatcherGroupLimits(t *testing.T) {
for name, tc := range map[string]struct {
groups int
Expand Down
6 changes: 6 additions & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,12 @@ type Limits interface {
// AlertmanagerMaxAlertsSizeBytes returns total max size of alerts that tenant can have active at the same time. 0 = no limit.
// Size of the alert is computed from alert labels, annotations and generator URL.
AlertmanagerMaxAlertsSizeBytes(tenant string) int

// AlertmanagerMaxSilencesCount returns max number of silences that tenant can have, including expired silences. 0 = no limit.
AlertmanagerMaxSilencesCount(tenant string) int

// AlertmanagerMaxSilenceSizeBytes returns the maximum size of an individual silence. 0 = no limit.
AlertmanagerMaxSilenceSizeBytes(tenant string) int
}

// A MultitenantAlertmanager manages Alertmanager instances for multiple
Expand Down
26 changes: 24 additions & 2 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1777,8 +1777,14 @@ func TestAlertmanager_StateReplicationWithSharding(t *testing.T) {
amConfig.ShardingEnabled = true
}

var limits validation.Limits
flagext.DefaultValues(&limits)

overrides, err := validation.NewOverrides(limits, nil)
require.NoError(t, err)

reg := prometheus.NewPedanticRegistry()
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, overrides, log.NewNopLogger(), reg)
require.NoError(t, err)
defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck

Expand Down Expand Up @@ -1969,8 +1975,14 @@ func TestAlertmanager_StateReplicationWithSharding_InitialSyncFromPeers(t *testi

amConfig.ShardingEnabled = true

var limits validation.Limits
flagext.DefaultValues(&limits)

overrides, err := validation.NewOverrides(limits, nil)
require.NoError(t, err)

reg := prometheus.NewPedanticRegistry()
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, overrides, log.NewNopLogger(), reg)
require.NoError(t, err)

clientPool.setServer(amConfig.ShardingRing.InstanceAddr+":0", am)
Expand Down Expand Up @@ -2285,6 +2297,8 @@ type mockAlertManagerLimits struct {
maxDispatcherAggregationGroups int
maxAlertsCount int
maxAlertsSizeBytes int
maxSilencesCount int
maxSilencesSizeBytes int
}

func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
Expand Down Expand Up @@ -2326,3 +2340,11 @@ func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsCount(_ string) int {
func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsSizeBytes(_ string) int {
return m.maxAlertsSizeBytes
}

func (m *mockAlertManagerLimits) AlertmanagerMaxSilencesCount(_ string) int {
return m.maxSilencesCount
}

func (m *mockAlertManagerLimits) AlertmanagerMaxSilenceSizeBytes(_ string) int {
return m.maxSilencesSizeBytes
}
12 changes: 12 additions & 0 deletions pkg/util/validation/limits.go