Emit error when the rule synchronization fails by SungJin1212 · Pull Request #6902 · cortexproject/cortex · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
25 changes: 19 additions & 6 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,29 +693,40 @@ func (r *Ruler) run(ctx context.Context) error {
ringTickerChan = ringTicker.C
}

r.syncRules(ctx, rulerSyncReasonInitial)
syncRuleErrMsg := func(syncRulesErr error) {
level.Error(r.logger).Log("msg", "failed to sync rules", "err", syncRulesErr)
}

initialSyncErr := r.syncRules(ctx, rulerSyncReasonInitial)
if initialSyncErr != nil {
syncRuleErrMsg(initialSyncErr)
}
for {
var syncRulesErr error
select {
case <-ctx.Done():
return nil
case <-tick.C:
r.syncRules(ctx, rulerSyncReasonPeriodic)
syncRulesErr = r.syncRules(ctx, rulerSyncReasonPeriodic)
case <-ringTickerChan:
// We ignore the error because in case of error it will return an empty
// replication set which we use to compare with the previous state.
currRingState, _ := r.ring.GetAllHealthy(RingOp)

if ring.HasReplicationSetChanged(ringLastState, currRingState) {
ringLastState = currRingState
r.syncRules(ctx, rulerSyncReasonRingChange)
syncRulesErr = r.syncRules(ctx, rulerSyncReasonRingChange)
}
case err := <-r.subservicesWatcher.Chan():
return errors.Wrap(err, "ruler subservice failed")
}
if syncRulesErr != nil {
syncRuleErrMsg(syncRulesErr)
}
}
}

func (r *Ruler) syncRules(ctx context.Context, reason string) {
func (r *Ruler) syncRules(ctx context.Context, reason string) error {
level.Info(r.logger).Log("msg", "syncing rules", "reason", reason)
r.rulerSync.WithLabelValues(reason).Inc()
timer := prometheus.NewTimer(nil)
Expand All @@ -727,19 +738,21 @@ func (r *Ruler) syncRules(ctx context.Context, reason string) {

loadedConfigs, backupConfigs, err := r.loadRuleGroups(ctx)
if err != nil {
return
return err
}

if ctx.Err() != nil {
level.Info(r.logger).Log("msg", "context is canceled. not syncing rules")
return
return err
}
// This will also delete local group files for users that are no longer in 'configs' map.
r.manager.SyncRuleGroups(ctx, loadedConfigs)

if r.cfg.RulesBackupEnabled() {
r.manager.BackUpRuleGroups(ctx, backupConfigs)
}

return nil
}

func (r *Ruler) loadRuleGroups(ctx context.Context) (map[string]rulespb.RuleGroupList, map[string]rulespb.RuleGroupList, error) {
Expand Down
21 changes: 14 additions & 7 deletions pkg/ruler/ruler_test.go
Loading