feat: stable version
Co-authored-by: Dmitry Fedotov <dmitry@uint32.ru> Co-committed-by: Dmitry Fedotov <dmitry@uint32.ru>
This commit is contained in:
22
LICENSE
Normal file
22
LICENSE
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
Copyright 2025 Dmitry Fedotov
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person
|
||||||
|
obtaining a copy of this software and associated documentation
|
||||||
|
files (the “Software”), to deal in the Software without
|
||||||
|
restriction, including without limitation the rights to use,
|
||||||
|
copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
sell copies of the Software, and to permit persons to whom
|
||||||
|
the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall
|
||||||
|
be included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||||
|
OR OTHER DEALINGS IN THE SOFTWARE.
|
6
README.md
Normal file
6
README.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# watchdog
|
||||||
|
go get code.uint32.ru/tiny/watchdog
|
||||||
|
|
||||||
|
This package is a dead simple monitoring systems which may be used to check availability of online resources.
|
||||||
|
|
||||||
|
It is a work in progress, but also a working code with decent test coverage. API might change in later versions as well as more features might be added.
|
17
checks.go
17
checks.go
@@ -14,12 +14,13 @@ import (
|
|||||||
const DefaultTimeout = time.Second * 10
|
const DefaultTimeout = time.Second * 10
|
||||||
|
|
||||||
// GetHTTP creates a CheckFunc that operates as follows.
|
// GetHTTP creates a CheckFunc that operates as follows.
|
||||||
// The check calls GET method for provided addr using http.DefaultClient.
|
// The check makes a request with GET method to provided addr using http.DefaultClient.
|
||||||
// If request fails within specified timeout the returned status is StatusDown.
|
// If request fails within specified timeout the returned status is StatusDown.
|
||||||
// The function then tries to read response body. If it fails,
|
// The function then tries to read response body. If it fails,
|
||||||
// the returned status is StatusDown.
|
// the returned status is StatusDown.
|
||||||
// If request secceeds but reponse code is not 200, the returned
|
//
|
||||||
// status is StatusPartiallyAvailable and response body is contained in the
|
// If request succeeds but reponse code is not 200, the returned
|
||||||
|
// status is StatusDown and response body is contained in the
|
||||||
// returned error.
|
// returned error.
|
||||||
//
|
//
|
||||||
// GetHTTP return an error if addr can not be parsed with url.Parse.
|
// GetHTTP return an error if addr can not be parsed with url.Parse.
|
||||||
@@ -64,11 +65,11 @@ func GetHTTP(addr string, timeout time.Duration) (CheckFunc, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HeadHTTP creates a CheckFunc that operates as follows.
|
// HeadHTTP creates a CheckFunc that operates as follows.
|
||||||
// The check calls GET method for provided addr using http.DefaultClient.
|
// The check make a request with HEAD method to provided addr using http.DefaultClient.
|
||||||
// If request fails within specified timeout the returned status is StatusDown.
|
// If request fails within specified timeout the returned status is StatusDown.
|
||||||
//
|
//
|
||||||
// If request secceeds but reponse code is not 200, the returned
|
// If request succeeds but reponse code is not 200, the returned
|
||||||
// status is StatusPartiallyAvailable.
|
// status is StatusDown.
|
||||||
//
|
//
|
||||||
// HeadHTTP return an error if addr can not be parsed with url.Parse.
|
// HeadHTTP return an error if addr can not be parsed with url.Parse.
|
||||||
// If zero timeout is provided then the DefaultTimeout (10 second) is used.
|
// If zero timeout is provided then the DefaultTimeout (10 second) is used.
|
||||||
@@ -102,16 +103,18 @@ func HeadHTTP(addr string, timeout time.Duration) (CheckFunc, error) {
|
|||||||
return StatusDown, fmt.Errorf("got HTTP response code %d", resp.StatusCode)
|
return StatusDown, fmt.Errorf("got HTTP response code %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
return http.StatusOK, nil
|
return StatusOK, nil
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DialTCP creates a CheckFunc that may be used to check tcp connectivity
|
// DialTCP creates a CheckFunc that may be used to check tcp connectivity
|
||||||
// to a host.
|
// to a host.
|
||||||
|
//
|
||||||
// The check tries to net.DialTimeout to the provided addr. If it fails,
|
// The check tries to net.DialTimeout to the provided addr. If it fails,
|
||||||
// the returned status is StatusDown.
|
// the returned status is StatusDown.
|
||||||
//
|
//
|
||||||
// No validation of addr is made.
|
// No validation of addr is made.
|
||||||
|
//
|
||||||
// If zero timeout is provided then the DefaultTimeout (10 second) is used.
|
// If zero timeout is provided then the DefaultTimeout (10 second) is used.
|
||||||
func DialTCP(addr string, timeout time.Duration) (CheckFunc, error) {
|
func DialTCP(addr string, timeout time.Duration) (CheckFunc, error) {
|
||||||
if timeout == 0 {
|
if timeout == 0 {
|
||||||
|
127
checks_test.go
Normal file
127
checks_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
package watchdog_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"code.uint32.ru/tiny/watchdog"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetHTTPSuccess(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Write([]byte("OK"))
|
||||||
|
}))
|
||||||
|
|
||||||
|
addr := srv.URL
|
||||||
|
|
||||||
|
fn, err := watchdog.GetHTTP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if status != watchdog.StatusOK {
|
||||||
|
t.Fatalf("incorrect status %s, expected %s", status, watchdog.StatusOK)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetHTTPError(t *testing.T) {
|
||||||
|
addr := "https://127.0.0.1:42014"
|
||||||
|
fn, err := watchdog.GetHTTP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if status != watchdog.StatusDown || err == nil {
|
||||||
|
t.Errorf("incorrect status for unavalable host")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHeadHTTPSuccess(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
|
||||||
|
addr := srv.URL
|
||||||
|
|
||||||
|
fn, err := watchdog.HeadHTTP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if status != watchdog.StatusOK {
|
||||||
|
t.Fatalf("incorrect status %s, expected %s", status, watchdog.StatusOK)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHeadHTTPError(t *testing.T) {
|
||||||
|
addr := "https://127.0.0.1:42014"
|
||||||
|
fn, err := watchdog.HeadHTTP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if status != watchdog.StatusDown || err == nil {
|
||||||
|
t.Errorf("incorrect status for unavalable host")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDialTCPSuccess(t *testing.T) {
|
||||||
|
addr := "127.0.0.1:42013"
|
||||||
|
lis, err := net.Listen("tcp", addr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
conn, err := lis.Accept()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
<-t.Context().Done()
|
||||||
|
conn.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
fn, err := watchdog.DialTCP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if status != watchdog.StatusOK {
|
||||||
|
t.Error("incorrect status for available addr")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDialTCPError(t *testing.T) {
|
||||||
|
addr := "127.0.0.1:65535"
|
||||||
|
// check for non-existent addr
|
||||||
|
fn, err := watchdog.DialTCP(addr, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := fn(t.Context())
|
||||||
|
if (status != watchdog.StatusDown) || (err == nil) {
|
||||||
|
t.Errorf("incorrect status %s, expected %s", status, watchdog.StatusDown)
|
||||||
|
}
|
||||||
|
}
|
91
coverage.out
91
coverage.out
@@ -1,91 +0,0 @@
|
|||||||
mode: set
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:27.69,29.16 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:29.16,31.3 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:33.2,33.18 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:33.18,35.3 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:37.2,37.51 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:37.51,42.17 4 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:42.17,44.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:46.3,47.17 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:47.17,49.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:51.3,54.17 3 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:54.17,56.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:58.3,58.39 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:58.39,60.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:62.3,62.23 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:75.70,77.16 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:77.16,79.3 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:81.2,81.18 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:81.18,83.3 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:85.2,85.51 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:85.51,90.17 4 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:90.17,92.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:94.3,95.17 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:95.17,97.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:99.3,101.39 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:101.39,103.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:105.3,105.28 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:116.69,117.18 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:117.18,119.3 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:121.2,121.51 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:121.51,123.56 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:123.56,125.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:127.3,128.17 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:128.17,130.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/checks.go:132.3,134.23 2 0
|
|
||||||
code.uint32.ru/tiny/watchdog/entity.go:16.33,17.11 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/entity.go:18.16,19.14 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/entity.go:20.18,21.16 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/entity.go:22.10,23.19 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:25.37,31.2 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:38.41,46.2 5 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:53.47,58.2 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:62.50,67.29 4 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:67.29,68.37 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:68.37,69.12 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:72.3,72.35 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:75.2,75.22 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:85.92,89.22 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:89.22,91.3 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:94.2,94.24 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:94.24,96.3 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:98.2,99.22 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:99.22,101.3 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:103.2,105.27 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:111.33,115.22 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:115.22,117.3 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:119.2,119.9 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:120.24,120.24 0 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:122.10,123.24 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:126.2,126.26 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:126.27,128.3 0 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:130.2,130.12 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:136.96,138.24 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:138.24,141.3 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:143.2,146.22 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:146.22,148.3 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:150.2,152.55 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:152.55,153.22 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:153.22,155.4 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:155.9,155.29 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:155.29,157.4 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:158.3,158.11 1 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:161.2,161.22 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:164.41,169.2 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:171.96,180.27 6 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:180.27,181.23 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:181.23,185.8 3 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:185.8,198.33 5 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:198.33,200.6 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:202.5,204.12 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:205.19,205.19 0 0
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:207.17,208.16 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:209.23,210.22 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:218.2,218.12 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:218.12,226.3 5 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:229.92,237.23 5 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:237.23,240.25 2 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:240.25,241.17 1 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:241.18,242.5 0 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:246.4,260.14 7 1
|
|
||||||
code.uint32.ru/tiny/watchdog/watchdog.go:264.2,266.17 2 1
|
|
@@ -24,7 +24,9 @@ func (s Status) String() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckFunc is a function run by Watchdog.
|
// CheckFunc is a function that does the actual work.
|
||||||
|
// This package provides a number of check functions but
|
||||||
|
// any function matching the signature may be provided.
|
||||||
// It must obey context dealine and cancellation.
|
// It must obey context dealine and cancellation.
|
||||||
type CheckFunc func(context.Context) (Status, error)
|
type CheckFunc func(context.Context) (Status, error)
|
||||||
|
|
||||||
|
272
watchdog.go
272
watchdog.go
@@ -6,8 +6,6 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/sync/errgroup"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -18,20 +16,37 @@ var (
|
|||||||
// Watchdog keeps checks to run either periodically
|
// Watchdog keeps checks to run either periodically
|
||||||
// or on demand.
|
// or on demand.
|
||||||
type Watchdog struct {
|
type Watchdog struct {
|
||||||
checks []Check
|
checks []*wdCheck
|
||||||
running *running
|
mu sync.Mutex
|
||||||
mu sync.Mutex
|
|
||||||
|
monitoring bool // is monitoring currently in progress
|
||||||
|
|
||||||
|
events chan CheckResult // output channel
|
||||||
|
limiter chan struct{} // TODO: use proper limiter here
|
||||||
|
|
||||||
|
timeout time.Duration
|
||||||
|
|
||||||
|
running int
|
||||||
}
|
}
|
||||||
|
|
||||||
type running struct {
|
type wdCheck struct {
|
||||||
out chan CheckResult
|
check Check
|
||||||
stop chan struct{}
|
stop chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// New accepts checks to run.
|
// New creates instance of Watchdog with
|
||||||
|
// provided checks.
|
||||||
func New(checks ...Check) *Watchdog {
|
func New(checks ...Check) *Watchdog {
|
||||||
|
ch := make([]*wdCheck, len(checks))
|
||||||
|
|
||||||
|
for i := range checks {
|
||||||
|
ch[i] = &wdCheck{
|
||||||
|
check: checks[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
w := &Watchdog{
|
w := &Watchdog{
|
||||||
checks: checks,
|
checks: ch,
|
||||||
}
|
}
|
||||||
|
|
||||||
return w
|
return w
|
||||||
@@ -42,94 +57,129 @@ func (w *Watchdog) ListChecks() []Check {
|
|||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
out := make([]Check, len(w.checks))
|
out := make([]Check, len(w.checks))
|
||||||
copy(out, w.checks)
|
for i := range w.checks {
|
||||||
|
out[i] = w.checks[i].check
|
||||||
|
}
|
||||||
|
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddChecks adds checks to the group. This DOES NOT
|
// SetTimeout sets timeout for all checks that
|
||||||
// affect already runnning monitoring for group. Use Stop and
|
// get started with Start method. Changing this
|
||||||
// then Start to restart monitoring when a new check is added.
|
// value does not affect running checks.
|
||||||
|
// Watchdog does not enforce this timeout, it
|
||||||
|
// just passes context.WithTimemout to check functions.
|
||||||
|
// If this method is not called the default timeout
|
||||||
|
// of 10 seconds is used.
|
||||||
|
func (w *Watchdog) SetTimeout(d time.Duration) {
|
||||||
|
w.mu.Lock()
|
||||||
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
|
w.timeout = d
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddChecks adds checks to the group.
|
||||||
|
// If monitoring is in progress then monitoring it started for the newly added
|
||||||
|
// check as well.
|
||||||
// Check may have duplicate Name fields but note that RemoveChecks removes checks
|
// Check may have duplicate Name fields but note that RemoveChecks removes checks
|
||||||
// by their Name fields.
|
// by their Name fields.
|
||||||
func (w *Watchdog) AddChecks(checks ...Check) {
|
func (w *Watchdog) AddChecks(checks ...Check) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
w.checks = append(w.checks, checks...)
|
for i := range checks {
|
||||||
|
nc := &wdCheck{
|
||||||
|
check: checks[i],
|
||||||
|
}
|
||||||
|
w.checks = append(w.checks, nc)
|
||||||
|
|
||||||
|
if w.monitoring {
|
||||||
|
w.startMonitoring(nc)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RemoveChecks removes the named checks.
|
// RemoveChecks removes the named checks.
|
||||||
// This does not affect the already running monitoring for the group.
|
|
||||||
func (w *Watchdog) RemoveChecks(names ...string) {
|
func (w *Watchdog) RemoveChecks(names ...string) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
remaining := make([]Check, 0, len(w.checks)-len(names))
|
remaining := make([]*wdCheck, 0, len(w.checks)-len(names))
|
||||||
for _, e := range w.checks {
|
for _, c := range w.checks {
|
||||||
if slices.Contains(names, e.Name) {
|
if slices.Contains(names, c.check.Name) {
|
||||||
|
if w.monitoring {
|
||||||
|
w.stopMonitoring(c)
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
remaining = append(remaining, e)
|
remaining = append(remaining, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.checks = remaining
|
w.checks = remaining
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start starts monitoring.
|
// Start starts monitoring.
|
||||||
// Subsequent calls to start just returns the SAME channel. If you need
|
// Subsequent calls to start return the SAME channel. If you need
|
||||||
// to have more that one reader from the channel - fan out on your side.
|
// to have more that one reader from the channel - fan out on your side.
|
||||||
// On start Watchdog runs all provided check and pushes current status of ALL checks of
|
// On start Watchdog runs all provided check and pushes current status of checks of
|
||||||
// the group to the channel.
|
// to the channel.
|
||||||
// Subsequently only changes of status are pushed regardless of return error values of
|
// Subsequently if check func returns different status or if it returns an error the
|
||||||
// CheckFunc provided to Watchdog.
|
// result is pushed to the channel.
|
||||||
// Concurrency limits the number of checks that can run concurrently. 0 means no
|
// Concurrency argument limits the number of checks that can run concurrently. 0 means no
|
||||||
// limit (all checks may run concurrently).
|
// limit (all checks may run concurrently).
|
||||||
func (w *Watchdog) Start(ctx context.Context, concurrency int) (<-chan CheckResult, error) {
|
func (w *Watchdog) Start(concurrency int) (<-chan CheckResult, error) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
if w.running != nil {
|
|
||||||
return w.running.out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// start new session
|
|
||||||
if len(w.checks) == 0 {
|
if len(w.checks) == 0 {
|
||||||
return nil, ErrNotConfigured
|
return nil, ErrNotConfigured
|
||||||
}
|
}
|
||||||
|
|
||||||
cp := w.copyChecks()
|
if w.monitoring {
|
||||||
if concurrency == 0 {
|
return w.events, nil
|
||||||
concurrency = len(cp)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
w.runMonitoringForGroup(ctx, cp, concurrency)
|
if concurrency == 0 {
|
||||||
|
concurrency = len(w.checks)
|
||||||
|
}
|
||||||
|
|
||||||
return w.running.out, nil
|
if w.timeout == 0 {
|
||||||
|
w.timeout = DefaultTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
w.events = make(chan CheckResult, concurrency)
|
||||||
|
w.limiter = make(chan struct{}, concurrency)
|
||||||
|
|
||||||
|
for i := range w.checks {
|
||||||
|
w.startMonitoring(w.checks[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
w.monitoring = true
|
||||||
|
|
||||||
|
return w.events, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop stops execution of checks.
|
// Stop stops execution of checks.
|
||||||
// Subsequent calls of Stop for the same groupID
|
// Subsequent calls return ErrNotRunning.
|
||||||
// return ErrNotRunning.
|
|
||||||
func (w *Watchdog) Stop() error {
|
func (w *Watchdog) Stop() error {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
if w.running == nil {
|
if !w.monitoring {
|
||||||
return ErrNotRunning
|
return ErrNotRunning
|
||||||
}
|
}
|
||||||
|
|
||||||
close(w.running.stop)
|
for i := range w.checks {
|
||||||
|
w.stopMonitoring(w.checks[i])
|
||||||
w.running = nil
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunImmediately runs configured checks for group concurrently and returns results.
|
// RunImmediately runs configured checks concurrently and returns results.
|
||||||
// Concurrency limits number of checks that are allowed run concurrently. Setting
|
// Setting concurrency to 0 means that all check of the group are allowed to run simultaneously.
|
||||||
// concurrency to 0 means that all check of the group are allowed to run simultaneously.
|
// Otherwise at most concurrency checks will be allowed to run simultaneously.
|
||||||
func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]CheckResult, error) {
|
func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]CheckResult, error) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
if len(w.checks) == 0 {
|
if len(w.checks) == 0 {
|
||||||
@@ -160,86 +210,94 @@ func (w *Watchdog) RunImmediately(ctx context.Context, concurrency int) ([]Check
|
|||||||
|
|
||||||
func (w *Watchdog) copyChecks() []Check {
|
func (w *Watchdog) copyChecks() []Check {
|
||||||
cp := make([]Check, len(w.checks))
|
cp := make([]Check, len(w.checks))
|
||||||
copy(cp, w.checks)
|
for i := range w.checks {
|
||||||
|
cp[i] = w.checks[i].check
|
||||||
|
}
|
||||||
|
|
||||||
return cp
|
return cp
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *Watchdog) runMonitoringForGroup(ctx context.Context, checks []Check, concurrency int) {
|
func (w *Watchdog) startMonitoring(wdc *wdCheck) {
|
||||||
events := make(chan CheckResult, len(checks))
|
wdc.stop = make(chan struct{})
|
||||||
stop := make(chan struct{})
|
|
||||||
|
|
||||||
w.running = &running{out: events, stop: stop}
|
state := CheckResult{}
|
||||||
|
ticker := time.Tick(wdc.check.Interval)
|
||||||
|
|
||||||
grp := errgroup.Group{}
|
c := wdc.check
|
||||||
sema := make(chan struct{}, concurrency)
|
|
||||||
|
|
||||||
for _, c := range checks {
|
// this method is called only with
|
||||||
grp.Go(func() error {
|
// w.mu locked
|
||||||
state := CheckResult{}
|
w.running++
|
||||||
ticker := time.Tick(c.Interval)
|
|
||||||
|
|
||||||
for {
|
|
||||||
sema <- struct{}{}
|
|
||||||
|
|
||||||
status, err := c.Check(ctx)
|
|
||||||
|
|
||||||
<-sema
|
|
||||||
|
|
||||||
s := CheckResult{
|
|
||||||
Name: c.Name,
|
|
||||||
Status: status,
|
|
||||||
Error: err,
|
|
||||||
}
|
|
||||||
|
|
||||||
if s.Status != state.Status {
|
|
||||||
events <- s
|
|
||||||
}
|
|
||||||
|
|
||||||
state = s
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-ticker:
|
|
||||||
// continue looping
|
|
||||||
case <-stop:
|
|
||||||
return nil
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// separate goroutine to close the output chan
|
|
||||||
// when everyone's dead
|
|
||||||
go func() {
|
go func() {
|
||||||
grp.Wait()
|
defer func() {
|
||||||
close(events)
|
w.mu.Lock()
|
||||||
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
w.mu.Lock()
|
w.running--
|
||||||
defer w.mu.Unlock()
|
if w.running == 0 {
|
||||||
if w.running != nil {
|
// last goroutine to exit will also
|
||||||
w.running = nil
|
// close the output chan
|
||||||
|
close(w.events)
|
||||||
|
w.monitoring = false
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
for {
|
||||||
|
w.limiter <- struct{}{}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), w.timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
status, err := c.Check(ctx)
|
||||||
|
|
||||||
|
<-w.limiter
|
||||||
|
|
||||||
|
s := CheckResult{
|
||||||
|
Name: c.Name,
|
||||||
|
Status: status,
|
||||||
|
Error: err,
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.Status != state.Status || s.Error != nil {
|
||||||
|
w.events <- s
|
||||||
|
}
|
||||||
|
|
||||||
|
state = s
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ticker:
|
||||||
|
// continue looping
|
||||||
|
case <-wdc.stop:
|
||||||
|
// stopping this specific check
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *Watchdog) stopMonitoring(wdc *wdCheck) {
|
||||||
|
close(wdc.stop)
|
||||||
|
}
|
||||||
|
|
||||||
func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []CheckResult {
|
func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []CheckResult {
|
||||||
statuses := make([]CheckResult, 0, len(ch))
|
statuses := make([]CheckResult, 0, len(ch))
|
||||||
m := sync.Mutex{} // for append operations
|
m := sync.Mutex{} // for append operations
|
||||||
|
|
||||||
group := errgroup.Group{}
|
|
||||||
|
|
||||||
sema := make(chan struct{}, concurrency) // semaphore to limit concurrency
|
sema := make(chan struct{}, concurrency) // semaphore to limit concurrency
|
||||||
|
done := make(chan struct{}, len(ch))
|
||||||
|
|
||||||
|
count := len(ch)
|
||||||
|
|
||||||
for _, e := range ch {
|
for _, e := range ch {
|
||||||
sema <- struct{}{} // acquire
|
sema <- struct{}{} // acquire
|
||||||
|
go func() error {
|
||||||
group.Go(func() error {
|
|
||||||
defer func() {
|
defer func() {
|
||||||
|
<-sema
|
||||||
|
done <- struct{}{}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// relying on fact that CheckFunc obeys context
|
// relying on assumption that CheckFunc obeys context
|
||||||
// cancellation
|
// cancellation
|
||||||
status, err := e.Check(ctx)
|
status, err := e.Check(ctx)
|
||||||
|
|
||||||
@@ -253,13 +311,17 @@ func runChecksConcurrently(ctx context.Context, ch []Check, concurrency int) []C
|
|||||||
defer m.Unlock()
|
defer m.Unlock()
|
||||||
statuses = append(statuses, r)
|
statuses = append(statuses, r)
|
||||||
|
|
||||||
<-sema // release
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
group.Wait()
|
// wait for all to finish
|
||||||
|
for range done {
|
||||||
|
count--
|
||||||
|
if count == 0 {
|
||||||
|
close(done)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return statuses
|
return statuses
|
||||||
}
|
}
|
||||||
|
@@ -2,6 +2,7 @@ package watchdog_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -16,8 +17,14 @@ type mockChecker struct {
|
|||||||
called bool
|
called bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockChecker) Func(_ context.Context) (watchdog.Status, error) {
|
func (m *mockChecker) Func(ctx context.Context) (watchdog.Status, error) {
|
||||||
m.called = true
|
m.called = true
|
||||||
|
|
||||||
|
time.Sleep(time.Millisecond * 10)
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return watchdog.StatusUnknown, err
|
||||||
|
}
|
||||||
|
|
||||||
return m.status, m.err
|
return m.status, m.err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,9 +113,9 @@ func TestRunImmediately(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStart(t *testing.T) {
|
func TestStartStop(t *testing.T) {
|
||||||
w := new(watchdog.Watchdog)
|
w := new(watchdog.Watchdog)
|
||||||
if _, err := w.Start(t.Context(), 0); err == nil {
|
if _, err := w.Start(0); err == nil {
|
||||||
t.Error("Start doen't error on empty checks slice")
|
t.Error("Start doen't error on empty checks slice")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,12 +124,12 @@ func TestStart(t *testing.T) {
|
|||||||
|
|
||||||
w.AddChecks(m1.Check(), m2.Check())
|
w.AddChecks(m1.Check(), m2.Check())
|
||||||
|
|
||||||
out, err := w.Start(t.Context(), 0)
|
out, err := w.Start(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error("Start returns error", err)
|
t.Error("Start returns error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
out2, err := w.Start(t.Context(), 0)
|
out2, err := w.Start(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error("second call to Start returns error")
|
t.Error("second call to Start returns error")
|
||||||
}
|
}
|
||||||
@@ -131,13 +138,12 @@ func TestStart(t *testing.T) {
|
|||||||
t.Error("returned channels are not equal")
|
t.Error("returned channels are not equal")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
if err := w.Stop(); err != nil {
|
||||||
|
t.Error("Stop returned error", err)
|
||||||
|
}
|
||||||
|
|
||||||
count := 0
|
count := 0
|
||||||
go func() {
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
if err := w.Stop(); err != nil {
|
|
||||||
t.Error("Stop returned error", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
for res := range out {
|
for res := range out {
|
||||||
if res.Status != watchdog.StatusOK || res.Error != nil {
|
if res.Status != watchdog.StatusOK || res.Error != nil {
|
||||||
@@ -162,22 +168,19 @@ func TestStart(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWatchdogObeysContext(t *testing.T) {
|
func TestSetTimeout(t *testing.T) {
|
||||||
w := new(watchdog.Watchdog)
|
w := new(watchdog.Watchdog)
|
||||||
|
w.SetTimeout(time.Millisecond)
|
||||||
m1 := newMockChecker("mock", watchdog.StatusOK, nil)
|
m1 := newMockChecker("mock", watchdog.StatusOK, nil)
|
||||||
m2 := newMockChecker("mock2", watchdog.StatusOK, nil)
|
w.AddChecks(m1.Check())
|
||||||
|
|
||||||
w.AddChecks(m1.Check(), m2.Check())
|
out, _ := w.Start(0)
|
||||||
|
|
||||||
ctx, stop := context.WithCancel(t.Context())
|
|
||||||
if _, err := w.Start(ctx, 2); err != nil {
|
|
||||||
t.Error(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
stop()
|
|
||||||
// wait for goroutines to finish
|
|
||||||
time.Sleep(time.Second)
|
time.Sleep(time.Second)
|
||||||
if err := w.Stop(); err == nil {
|
w.Stop()
|
||||||
t.Error("no error calling Stop for stopped instance")
|
res := <-out
|
||||||
|
|
||||||
|
if !(res.Status == watchdog.StatusUnknown) || !errors.Is(res.Error, context.DeadlineExceeded) {
|
||||||
|
t.Logf("got status: %s, err: %v", res.Status, res.Error)
|
||||||
|
t.Fatal("incorrect status for timed out op")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user