feat: stable version

Co-authored-by: Dmitry Fedotov <dmitry@uint32.ru>
Co-committed-by: Dmitry Fedotov <dmitry@uint32.ru>
This commit is contained in:
2025-07-25 18:42:16 +03:00
committed by dmitry
parent 82a4641ab0
commit 3aadddbcac
8 changed files with 362 additions and 228 deletions

22
LICENSE Normal file
View File

@@ -0,0 +1,22 @@
Copyright 2025 Dmitry Fedotov
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the “Software”), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom
the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall
be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.

6
README.md Normal file
View File

@@ -0,0 +1,6 @@
# watchdog
go get code.uint32.ru/tiny/watchdog
This package is a dead simple monitoring systems which may be used to check availability of online resources.
It is a work in progress, but also a working code with decent test coverage. API might change in later versions as well as more features might be added.

View File

@@ -14,12 +14,13 @@ import (
const DefaultTimeout = time.Second * 10 const DefaultTimeout = time.Second * 10
// GetHTTP creates a CheckFunc that operates as follows. // GetHTTP creates a CheckFunc that operates as follows.
// The check calls GET method for provided addr using http.DefaultClient. // The check makes a request with GET method to provided addr using http.DefaultClient.
// If request fails within specified timeout the returned status is StatusDown. // If request fails within specified timeout the returned status is StatusDown.
// The function then tries to read response body. If it fails, // The function then tries to read response body. If it fails,
// the returned status is StatusDown. // the returned status is StatusDown.
// If request secceeds but reponse code is not 200, the returned //
// status is StatusPartiallyAvailable and response body is contained in the // If request succeeds but reponse code is not 200, the returned
// status is StatusDown and response body is contained in the
// returned error. // returned error.
// //
// GetHTTP return an error if addr can not be parsed with url.Parse. // GetHTTP return an error if addr can not be parsed with url.Parse.
@@ -64,11 +65,11 @@ func GetHTTP(addr string, timeout time.Duration) (CheckFunc, error) {
} }
// HeadHTTP creates a CheckFunc that operates as follows. // HeadHTTP creates a CheckFunc that operates as follows.
// The check calls GET method for provided addr using http.DefaultClient. // The check make a request with HEAD method to provided addr using http.DefaultClient.
// If request fails within specified timeout the returned status is StatusDown. // If request fails within specified timeout the returned status is StatusDown.
// //
// If request secceeds but reponse code is not 200, the returned // If request succeeds but reponse code is not 200, the returned
// status is StatusPartiallyAvailable. // status is StatusDown.
// //
// HeadHTTP return an error if addr can not be parsed with url.Parse. // HeadHTTP return an error if addr can not be parsed with url.Parse.
// If zero timeout is provided then the DefaultTimeout (10 second) is used. // If zero timeout is provided then the DefaultTimeout (10 second) is used.
@@ -102,16 +103,18 @@ func HeadHTTP(addr string, timeout time.Duration) (CheckFunc, error) {
return StatusDown, fmt.Errorf("got HTTP response code %d", resp.StatusCode) return StatusDown, fmt.Errorf("got HTTP response code %d", resp.StatusCode)
} }
return http.StatusOK, nil return StatusOK, nil
}, nil }, nil
} }
// DialTCP creates a CheckFunc that may be used to check tcp connectivity // DialTCP creates a CheckFunc that may be used to check tcp connectivity
// to a host. // to a host.
//
// The check tries to net.DialTimeout to the provided addr. If it fails, // The check tries to net.DialTimeout to the provided addr. If it fails,
// the returned status is StatusDown. // the returned status is StatusDown.
// //
// No validation of addr is made. // No validation of addr is made.
//
// If zero timeout is provided then the DefaultTimeout (10 second) is used. // If zero timeout is provided then the DefaultTimeout (10 second) is used.
func DialTCP(addr string, timeout time.Duration) (CheckFunc, error) { func DialTCP(addr string, timeout time.Duration) (CheckFunc, error) {
if timeout == 0 { if timeout == 0 {

127
checks_test.go Normal file
View File

@@ -0,0 +1,127 @@
package watchdog_test
import (
"net"
"net/http"
"net/http/httptest"
"testing"
"time"
"code.uint32.ru/tiny/watchdog"
)
func TestGetHTTPSuccess(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("OK"))
}))
addr := srv.URL
fn, err := watchdog.GetHTTP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if err != nil {
t.Fatal(err)
}
if status != watchdog.StatusOK {
t.Fatalf("incorrect status %s, expected %s", status, watchdog.StatusOK)
}
}
func TestGetHTTPError(t *testing.T) {
addr := "https://127.0.0.1:42014"
fn, err := watchdog.GetHTTP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if status != watchdog.StatusDown || err == nil {
t.Errorf("incorrect status for unavalable host")
}
}
func TestHeadHTTPSuccess(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
addr := srv.URL
fn, err := watchdog.HeadHTTP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if err != nil {
t.Fatal(err)
}
if status != watchdog.StatusOK {
t.Fatalf("incorrect status %s, expected %s", status, watchdog.StatusOK)
}
}
func TestHeadHTTPError(t *testing.T) {
addr := "https://127.0.0.1:42014"
fn, err := watchdog.HeadHTTP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if status != watchdog.StatusDown || err == nil {
t.Errorf("incorrect status for unavalable host")
}
}
func TestDialTCPSuccess(t *testing.T) {
addr := "127.0.0.1:42013"
lis, err := net.Listen("tcp", addr)
if err != nil {
t.Fatal(err)
}
go func() {
conn, err := lis.Accept()
if err != nil {
t.Error(err)
}
<-t.Context().Done()
conn.Close()
}()
fn, err := watchdog.DialTCP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if err != nil {
t.Fatal(err)
}
if status != watchdog.StatusOK {
t.Error("incorrect status for available addr")
}
}
func TestDialTCPError(t *testing.T) {
addr := "127.0.0.1:65535"
// check for non-existent addr
fn, err := watchdog.DialTCP(addr, time.Second)
if err != nil {
t.Fatal(err)
}
status, err := fn(t.Context())
if (status != watchdog.StatusDown) || (err == nil) {
t.Errorf("incorrect status %s, expected %s", status, watchdog.StatusDown)
}
}

View File

@@ -1,91 +0,0 @@
mode: set
code.uint32.ru/tiny/watchdog/checks.go:27.69,29.16 2 0
code.uint32.ru/tiny/watchdog/checks.go:29.16,31.3 1 0
code.uint32.ru/tiny/watchdog/checks.go:33.2,33.18 1 0
code.uint32.ru/tiny/watchdog/checks.go:33.18,35.3 1 0
code.uint32.ru/tiny/watchdog/checks.go:37.2,37.51 1 0
code.uint32.ru/tiny/watchdog/checks.go:37.51,42.17 4 0
code.uint32.ru/tiny/watchdog/checks.go:42.17,44.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:46.3,47.17 2 0
code.uint32.ru/tiny/watchdog/checks.go:47.17,49.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:51.3,54.17 3 0
code.uint32.ru/tiny/watchdog/checks.go:54.17,56.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:58.3,58.39 1 0
code.uint32.ru/tiny/watchdog/checks.go:58.39,60.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:62.3,62.23 1 0
code.uint32.ru/tiny/watchdog/checks.go:75.70,77.16 2 0
code.uint32.ru/tiny/watchdog/checks.go:77.16,79.3 1 0
code.uint32.ru/tiny/watchdog/checks.go:81.2,81.18 1 0
code.uint32.ru/tiny/watchdog/checks.go:81.18,83.3 1 0
code.uint32.ru/tiny/watchdog/checks.go:85.2,85.51 1 0
code.uint32.ru/tiny/watchdog/checks.go:85.51,90.17 4 0
code.uint32.ru/tiny/watchdog/checks.go:90.17,92.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:94.3,95.17 2 0
code.uint32.ru/tiny/watchdog/checks.go:95.17,97.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:99.3,101.39 2 0
code.uint32.ru/tiny/watchdog/checks.go:101.39,103.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:105.3,105.28 1 0
code.uint32.ru/tiny/watchdog/checks.go:116.69,117.18 1 0
code.uint32.ru/tiny/watchdog/checks.go:117.18,119.3 1 0
code.uint32.ru/tiny/watchdog/checks.go:121.2,121.51 1 0
code.uint32.ru/tiny/watchdog/checks.go:121.51,123.56 2 0
code.uint32.ru/tiny/watchdog/checks.go:123.56,125.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:127.3,128.17 2 0
code.uint32.ru/tiny/watchdog/checks.go:128.17,130.4 1 0
code.uint32.ru/tiny/watchdog/checks.go:132.3,134.23 2 0
code.uint32.ru/tiny/watchdog/entity.go:16.33,17.11 1 0
code.uint32.ru/tiny/watchdog/entity.go:18.16,19.14 1 0
code.uint32.ru/tiny/watchdog/entity.go:20.18,21.16 1 0
code.uint32.ru/tiny/watchdog/entity.go:22.10,23.19 1 0
code.uint32.ru/tiny/watchdog/watchdog.go:25.37,31.2 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:38.41,46.2 5 1
code.uint32.ru/tiny/watchdog/watchdog.go:53.47,58.2 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:62.50,67.29 4 1
code.uint32.ru/tiny/watchdog/watchdog.go:67.29,68.37 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:68.37,69.12 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:72.3,72.35 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:75.2,75.22 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:85.92,89.22 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:89.22,91.3 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:94.2,94.24 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:94.24,96.3 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:98.2,99.22 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:99.22,101.3 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:103.2,105.27 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:111.33,115.22 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:115.22,117.3 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:119.2,119.9 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:120.24,120.24 0 0
code.uint32.ru/tiny/watchdog/watchdog.go:122.10,123.24 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:126.2,126.26 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:126.27,128.3 0 0
code.uint32.ru/tiny/watchdog/watchdog.go:130.2,130.12 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:136.96,138.24 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:138.24,141.3 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:143.2,146.22 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:146.22,148.3 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:150.2,152.55 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:152.55,153.22 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:153.22,155.4 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:155.9,155.29 1 0
code.uint32.ru/tiny/watchdog/watchdog.go:155.29,157.4 1 0
code.uint32.ru/tiny/watchdog/watchdog.go:158.3,158.11 1 0
code.uint32.ru/tiny/watchdog/watchdog.go:161.2,161.22 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:164.41,169.2 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:171.96,180.27 6 1
code.uint32.ru/tiny/watchdog/watchdog.go:180.27,181.23 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:181.23,185.8 3 1
code.uint32.ru/tiny/watchdog/watchdog.go:185.8,198.33 5 1
code.uint32.ru/tiny/watchdog/watchdog.go:198.33,200.6 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:202.5,204.12 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:205.19,205.19 0 0
code.uint32.ru/tiny/watchdog/watchdog.go:207.17,208.16 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:209.23,210.22 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:218.2,218.12 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:218.12,226.3 5 1
code.uint32.ru/tiny/watchdog/watchdog.go:229.92,237.23 5 1
code.uint32.ru/tiny/watchdog/watchdog.go:237.23,240.25 2 1
code.uint32.ru/tiny/watchdog/watchdog.go:240.25,241.17 1 1
code.uint32.ru/tiny/watchdog/watchdog.go:241.18,242.5 0 1
code.uint32.ru/tiny/watchdog/watchdog.go:246.4,260.14 7 1
code.uint32.ru/tiny/watchdog/watchdog.go:264.2,266.17 2 1

View File

@@ -24,7 +24,9 @@ func (s Status) String() string {
} }
} }
// CheckFunc is a function run by Watchdog. // CheckFunc is a function that does the actual work.
// This package provides a number of check functions but
// any function matching the signature may be provided.
// It must obey context dealine and cancellation. // It must obey context dealine and cancellation.
type CheckFunc func(context.Context) (Status, error) type CheckFunc func(context.Context) (Status, error)

View File

@@ -6,8 +6,6 @@ import (
"slices" "slices"
"sync" "sync"
"time" "time"
"golang.org/x/sync/errgroup"
) )
var ( var (
@@ -18,20 +16,37 @@ var (
// Watchdog keeps checks to run either periodically // Watchdog keeps checks to run either periodically
// or on demand. // or on demand.
type Watchdog struct { type Watchdog struct {
checks []Check checks []*wdCheck
running *running
mu sync.Mutex mu sync.Mutex
monitoring bool // is monitoring currently in progress
events chan CheckResult // output channel
limiter chan struct{} // TODO: use proper limiter here
timeout time.Duration
running int
} }
type running struct { type wdCheck struct {
out chan CheckResult check Check
stop chan struct{} stop chan struct{}
} }
// New accepts checks to run. // New creates instance of Watchdog with
// provided checks.
func New(checks ...Check) *Watchdog { func New(checks ...Check) *Watchdog {
ch := make([]*wdCheck, len(checks))
for i := range checks {
ch[i] = &wdCheck{
check: checks[i],
}
}
w := &Watchdog{ w := &Watchdog{
checks: checks, checks: ch,
} }
return w return w
@@ -42,94 +57,129 @@ func (w *Watchdog) ListChecks() []Check {
defer w.mu.Unlock() defer w.mu.Unlock()
out := make([]Check, len(w.checks)) out := make([]Check, len(w.checks))
copy(out, w.checks) for i := range w.checks {
out[i] = w.checks[i].check
}
return out return out
} }
// AddChecks adds checks to the group. This DOES NOT // SetTimeout sets timeout for all checks that
// affect already runnning monitoring for group. Use Stop and // get started with Start method. Changing this
// then Start to restart monitoring when a new check is added. // value does not affect running checks.
// Watchdog does not enforce this timeout, it
// just passes context.WithTimemout to check functions.
// If this method is not called the default timeout
// of 10 seconds is used.
func (w *Watchdog) SetTimeout(d time.Duration) {
w.mu.Lock()
defer w.mu.Unlock()
w.timeout = d
}
// AddChecks adds checks to the group.
// If monitoring is in progress then monitoring it started for the newly added
// check as well.
// Check may have duplicate Name fields but note that RemoveChecks removes checks // Check may have duplicate Name fields but note that RemoveChecks removes checks
// by their Name fields. // by their Name fields.
func (w *Watchdog) AddChecks(checks ...Check) { func (w *Watchdog) AddChecks(checks ...Check) {
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
w.checks = append(w.checks, checks...) for i := range checks {
nc := &wdCheck{
check: checks[i],
}
w.checks = append(w.checks, nc)
if w.monitoring {
w.startMonitoring(nc)
}
}
} }
// RemoveChecks removes the named checks. // RemoveChecks removes the named checks.
// This does not affect the already running monitoring for the group.
func (w *Watchdog) RemoveChecks(names ...string) { func (w *Watchdog) RemoveChecks(names ...string) {
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
remaining := make([]Check, 0, len(w.checks)-len(names)) remaining := make([]*wdCheck, 0, len(w.checks)-len(names))
for _, e := range w.checks { for _, c := range w.checks {
if slices.Contains(names, e.Name) { if slices.Contains(names, c.check.Name) {
if w.monitoring {
w.stopMonitoring(c)
}
continue continue
} }
remaining = append(remaining, e) remaining = append(remaining, c)
} }
w.checks = remaining w.checks = remaining
} }
// Start starts monitoring. // Start starts monitoring.
// Subsequent calls to start just returns the SAME channel. If you need // Subsequent calls to start return the SAME channel. If you need
// to have more that one reader from the channel - fan out on your side. // to have more that one reader from the channel - fan out on your side.
// On start Watchdog runs all provided check and pushes current status of ALL checks of // On start Watchdog runs all provided check and pushes current status of checks of
// the group to the channel. // to the channel.
// Subsequently only changes of status are pushed regardless of return error values of // Subsequently if check func returns different status or if it returns an error the
// CheckFunc provided to Watchdog. // result is pushed to the channel.
// Concurrency limits the number of checks that can run concurrently. 0 means no // Concurrency argument limits the number of checks that can run concurrently. 0 means no
// limit (all checks may run concurrently). // limit (all checks may run concurrently).
func (w *Watchdog) Start(ctx context.Context, concurrency int) (<-chan CheckResult, error) { func (w *Watchdog) Start(concurrency int) (<-chan CheckResult, error) {
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
if w.running != nil {
return w.running.out, nil
}
// start new session
if len(w.checks) == 0 { if len(w.checks) == 0 {
return nil, ErrNotConfigured return nil, ErrNotConfigured
} }
cp := w.copyChecks() if w.monitoring {
if concurrency == 0 { return w.events, nil
concurrency = len(cp)
} }
w.runMonitoringForGroup(ctx, cp, concurrency) if concurrency == 0 {
concurrency = len(w.checks)
}
return w.running.out, nil if w.timeout == 0 {
w.timeout = DefaultTimeout
}
w.events = make(chan CheckResult, concurrency)
w.limiter = make(chan struct{}, concurrency)
for i := range w.checks {
w.startMonitoring(w.checks[i])
}
w.monitoring = true
return w.events, nil
} }
// Stop stops execution of checks. // Stop stops execution of checks.
// Subsequent calls of Stop for the same groupID // Subsequent calls return ErrNotRunning.
// return ErrNotRunning.
func (w *Watchdog) Stop() error { func (w *Watchdog) Stop() error {
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
if w.running == nil { if !w.monitoring {
return ErrNotRunning return ErrNotRunning
} }
close(w.running.stop) for i := range w.checks {
w.stopMonitoring(w.checks[i])
w.running = nil }
return nil return nil
} }
// RunImmediately runs configured checks for group concurrently and returns results. // RunImmediately runs configured checks concurrently and returns results.
// Concurrency limits number of checks that are allowed run concurrently. Setting // Setting concurrency to 0 means that all check of the group are allowed to run simultaneously.
// concurrency to 0 means that all check of the group are allowed to run simultaneously. // Otherwise at most concurrency checks will be allowed to run simultaneously.
func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]CheckResult, error) { func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]CheckResult, error) {
w.mu.Lock() w.mu.Lock()
if len(w.checks) == 0 { if len(w.checks) == 0 {
@@ -160,31 +210,48 @@ func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]Check
func (w *Watchdog) copyChecks() []Check { func (w *Watchdog) copyChecks() []Check {
cp := make([]Check, len(w.checks)) cp := make([]Check, len(w.checks))
copy(cp, w.checks) for i := range w.checks {
cp[i] = w.checks[i].check
}
return cp return cp
} }
func (w *Watchdog) runMonitoringForGroup(ctx context.Context, checks []Check, concurrency int) { func (w *Watchdog) startMonitoring(wdc *wdCheck) {
events := make(chan CheckResult, len(checks)) wdc.stop = make(chan struct{})
stop := make(chan struct{})
w.running = &running{out: events, stop: stop}
grp := errgroup.Group{}
sema := make(chan struct{}, concurrency)
for _, c := range checks {
grp.Go(func() error {
state := CheckResult{} state := CheckResult{}
ticker := time.Tick(c.Interval) ticker := time.Tick(wdc.check.Interval)
c := wdc.check
// this method is called only with
// w.mu locked
w.running++
go func() {
defer func() {
w.mu.Lock()
defer w.mu.Unlock()
w.running--
if w.running == 0 {
// last goroutine to exit will also
// close the output chan
close(w.events)
w.monitoring = false
}
}()
for { for {
sema <- struct{}{} w.limiter <- struct{}{}
ctx, cancel := context.WithTimeout(context.Background(), w.timeout)
defer cancel()
status, err := c.Check(ctx) status, err := c.Check(ctx)
<-sema <-w.limiter
s := CheckResult{ s := CheckResult{
Name: c.Name, Name: c.Name,
@@ -192,8 +259,8 @@ func (w *Watchdog) runMonitoringForGroup(ctx context.Context, checks []Check, co
Error: err, Error: err,
} }
if s.Status != state.Status { if s.Status != state.Status || s.Error != nil {
events <- s w.events <- s
} }
state = s state = s
@@ -201,45 +268,36 @@ func (w *Watchdog) runMonitoringForGroup(ctx context.Context, checks []Check, co
select { select {
case <-ticker: case <-ticker:
// continue looping // continue looping
case <-stop: case <-wdc.stop:
return nil // stopping this specific check
case <-ctx.Done(): return
return ctx.Err()
} }
} }
})
}
// separate goroutine to close the output chan
// when everyone's dead
go func() {
grp.Wait()
close(events)
w.mu.Lock()
defer w.mu.Unlock()
if w.running != nil {
w.running = nil
}
}() }()
} }
func (w *Watchdog) stopMonitoring(wdc *wdCheck) {
close(wdc.stop)
}
func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []CheckResult { func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []CheckResult {
statuses := make([]CheckResult, 0, len(ch)) statuses := make([]CheckResult, 0, len(ch))
m := sync.Mutex{} // for append operations m := sync.Mutex{} // for append operations
group := errgroup.Group{}
sema := make(chan struct{}, concurrency) // semaphore to limit concurrency sema := make(chan struct{}, concurrency) // semaphore to limit concurrency
done := make(chan struct{}, len(ch))
count := len(ch)
for _, e := range ch { for _, e := range ch {
sema <- struct{}{} // acquire sema <- struct{}{} // acquire
go func() error {
group.Go(func() error {
defer func() { defer func() {
<-sema
done <- struct{}{}
}() }()
// relying on fact that CheckFunc obeys context // relying on assumption that CheckFunc obeys context
// cancellation // cancellation
status, err := e.Check(ctx) status, err := e.Check(ctx)
@@ -253,13 +311,17 @@ func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []C
defer m.Unlock() defer m.Unlock()
statuses = append(statuses, r) statuses = append(statuses, r)
<-sema // release
return nil return nil
}) }()
} }
group.Wait() // wait for all to finish
for range done {
count--
if count == 0 {
close(done)
}
}
return statuses return statuses
} }

View File

@@ -2,6 +2,7 @@ package watchdog_test
import ( import (
"context" "context"
"errors"
"reflect" "reflect"
"testing" "testing"
"time" "time"
@@ -16,8 +17,14 @@ type mockChecker struct {
called bool called bool
} }
func (m *mockChecker) Func(_ context.Context) (watchdog.Status, error) { func (m *mockChecker) Func(ctx context.Context) (watchdog.Status, error) {
m.called = true m.called = true
time.Sleep(time.Millisecond * 10)
if err := ctx.Err(); err != nil {
return watchdog.StatusUnknown, err
}
return m.status, m.err return m.status, m.err
} }
@@ -106,9 +113,9 @@ func TestRunImmediately(t *testing.T) {
} }
func TestStart(t *testing.T) { func TestStartStop(t *testing.T) {
w := new(watchdog.Watchdog) w := new(watchdog.Watchdog)
if _, err := w.Start(t.Context(), 0); err == nil { if _, err := w.Start(0); err == nil {
t.Error("Start doen't error on empty checks slice") t.Error("Start doen't error on empty checks slice")
} }
@@ -117,12 +124,12 @@ func TestStart(t *testing.T) {
w.AddChecks(m1.Check(), m2.Check()) w.AddChecks(m1.Check(), m2.Check())
out, err := w.Start(t.Context(), 0) out, err := w.Start(0)
if err != nil { if err != nil {
t.Error("Start returns error", err) t.Error("Start returns error", err)
} }
out2, err := w.Start(t.Context(), 0) out2, err := w.Start(0)
if err != nil { if err != nil {
t.Error("second call to Start returns error") t.Error("second call to Start returns error")
} }
@@ -131,13 +138,12 @@ func TestStart(t *testing.T) {
t.Error("returned channels are not equal") t.Error("returned channels are not equal")
} }
count := 0
go func() {
time.Sleep(time.Second) time.Sleep(time.Second)
if err := w.Stop(); err != nil { if err := w.Stop(); err != nil {
t.Error("Stop returned error", err) t.Error("Stop returned error", err)
} }
}()
count := 0
for res := range out { for res := range out {
if res.Status != watchdog.StatusOK || res.Error != nil { if res.Status != watchdog.StatusOK || res.Error != nil {
@@ -162,22 +168,19 @@ func TestStart(t *testing.T) {
} }
} }
func TestWatchdogObeysContext(t *testing.T) { func TestSetTimeout(t *testing.T) {
w := new(watchdog.Watchdog) w := new(watchdog.Watchdog)
w.SetTimeout(time.Millisecond)
m1 := newMockChecker("mock", watchdog.StatusOK, nil) m1 := newMockChecker("mock", watchdog.StatusOK, nil)
m2 := newMockChecker("mock2", watchdog.StatusOK, nil) w.AddChecks(m1.Check())
w.AddChecks(m1.Check(), m2.Check()) out, _ := w.Start(0)
ctx, stop := context.WithCancel(t.Context())
if _, err := w.Start(ctx, 2); err != nil {
t.Error(err)
}
stop()
// wait for goroutines to finish
time.Sleep(time.Second) time.Sleep(time.Second)
if err := w.Stop(); err == nil { w.Stop()
t.Error("no error calling Stop for stopped instance") res := <-out
if !(res.Status == watchdog.StatusUnknown) || !errors.Is(res.Error, context.DeadlineExceeded) {
t.Logf("got status: %s, err: %v", res.Status, res.Error)
t.Fatal("incorrect status for timed out op")
} }
} }