Improve queue and logger context (#24924)

Before there was a "graceful function": RunWithShutdownFns, it's mainly
for some modules which doesn't support context.

The old queue system doesn't work well with context, so the old queues
need it.

After the queue refactoring, the new queue works with context well, so,
use Golang context as much as possible, the `RunWithShutdownFns` could
be removed (replaced by RunWithCancel for context cancel mechanism), the
related code could be simplified.

This PR also fixes some legacy queue-init problems, eg:

* typo : archiver: "unable to create codes indexer queue" => "unable to
create repo-archive queue"
* no nil check for failed queues, which causes unfriendly panic

After this PR, many goroutines could have better display name:

![image](701b2a9b-8065-4137-aeaa-0bda2b34604a)

![image](f1d5f50f-0534-40f0-b0be-f2c9daa5fe92)
This commit is contained in:
wxiaoguang 2023-05-26 15:31:55 +08:00 committed by GitHub
parent e4922d484b
commit 18f26cfbf7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
31 changed files with 204 additions and 263 deletions

View file

@ -88,22 +88,22 @@ func (m *Manager) FlushAll(ctx context.Context, timeout time.Duration) error {
}
// CreateSimpleQueue creates a simple queue from global setting config provider by name
func CreateSimpleQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
return createWorkerPoolQueue(name, setting.CfgProvider, handler, false)
func CreateSimpleQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, false)
}
// CreateUniqueQueue creates a unique queue from global setting config provider by name
func CreateUniqueQueue[T any](name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
return createWorkerPoolQueue(name, setting.CfgProvider, handler, true)
func CreateUniqueQueue[T any](ctx context.Context, name string, handler HandlerFuncT[T]) *WorkerPoolQueue[T] {
return createWorkerPoolQueue(ctx, name, setting.CfgProvider, handler, true)
}
func createWorkerPoolQueue[T any](name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] {
func createWorkerPoolQueue[T any](ctx context.Context, name string, cfgProvider setting.ConfigProvider, handler HandlerFuncT[T], unique bool) *WorkerPoolQueue[T] {
queueSetting, err := setting.GetQueueSettings(cfgProvider, name)
if err != nil {
log.Error("Failed to get queue settings for %q: %v", name, err)
return nil
}
w, err := NewWorkerPoolQueueBySetting(name, queueSetting, handler, unique)
w, err := NewWorkerPoolQueueWithContext(ctx, name, queueSetting, handler, unique)
if err != nil {
log.Error("Failed to create queue %q: %v", name, err)
return nil

View file

@ -29,7 +29,7 @@ func TestManager(t *testing.T) {
if err != nil {
return nil, err
}
return NewWorkerPoolQueueBySetting(name, qs, func(s ...int) (unhandled []int) { return nil }, false)
return newWorkerPoolQueueForTest(name, qs, func(s ...int) (unhandled []int) { return nil }, false)
}
// test invalid CONN_STR
@ -80,7 +80,7 @@ MAX_WORKERS = 2
assert.NoError(t, err)
q1 := createWorkerPoolQueue[string]("no-such", cfgProvider, nil, false)
q1 := createWorkerPoolQueue[string](context.Background(), "no-such", cfgProvider, nil, false)
assert.Equal(t, "no-such", q1.GetName())
assert.Equal(t, "dummy", q1.GetType()) // no handler, so it becomes dummy
assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir1"), q1.baseConfig.DataFullDir)
@ -96,7 +96,7 @@ MAX_WORKERS = 2
assert.Equal(t, "string", q1.GetItemTypeName())
qid1 := GetManager().qidCounter
q2 := createWorkerPoolQueue("sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false)
q2 := createWorkerPoolQueue(context.Background(), "sub", cfgProvider, func(s ...int) (unhandled []int) { return nil }, false)
assert.Equal(t, "sub", q2.GetName())
assert.Equal(t, "level", q2.GetType())
assert.Equal(t, filepath.Join(setting.AppDataPath, "queues/dir2"), q2.baseConfig.DataFullDir)

View file

@ -5,6 +5,7 @@ package queue
import (
"context"
"runtime/pprof"
"sync"
"sync/atomic"
"time"
@ -13,9 +14,10 @@ import (
)
var (
infiniteTimerC = make(chan time.Time)
batchDebounceDuration = 100 * time.Millisecond
workerIdleDuration = 1 * time.Second
infiniteTimerC = make(chan time.Time)
batchDebounceDuration = 100 * time.Millisecond
workerIdleDuration = 1 * time.Second
shutdownDefaultTimeout = 2 * time.Second
unhandledItemRequeueDuration atomic.Int64 // to avoid data race during test
)
@ -116,13 +118,15 @@ func (q *WorkerPoolQueue[T]) doWorkerHandle(batch []T) {
// If the queue is shutting down, it returns true and try to push the items
// Otherwise it does nothing and returns false
func (q *WorkerPoolQueue[T]) basePushForShutdown(items ...T) bool {
ctxShutdown := q.ctxShutdown.Load()
if ctxShutdown == nil {
shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
if shutdownTimeout == 0 {
return false
}
ctxShutdown, ctxShutdownCancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer ctxShutdownCancel()
for _, item := range items {
// if there is still any error, the queue can do nothing instead of losing the items
if err := q.baseQueue.PushItem(*ctxShutdown, q.marshal(item)); err != nil {
if err := q.baseQueue.PushItem(ctxShutdown, q.marshal(item)); err != nil {
log.Error("Failed to requeue item for queue %q when shutting down: %v", q.GetName(), err)
}
}
@ -246,6 +250,8 @@ var skipFlushChan = make(chan flushType) // an empty flush chan, used to skip re
// doRun is the main loop of the queue. All related "doXxx" functions are executed in its context.
func (q *WorkerPoolQueue[T]) doRun() {
pprof.SetGoroutineLabels(q.ctxRun)
log.Debug("Queue %q starts running", q.GetName())
defer log.Debug("Queue %q stops running", q.GetName())
@ -271,8 +277,8 @@ func (q *WorkerPoolQueue[T]) doRun() {
}
}
ctxShutdownPtr := q.ctxShutdown.Load()
if ctxShutdownPtr != nil {
shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
if shutdownTimeout != 0 {
// if there is a shutdown context, try to push the items back to the base queue
q.basePushForShutdown(unhandled...)
workerDone := make(chan struct{})
@ -280,7 +286,7 @@ func (q *WorkerPoolQueue[T]) doRun() {
go func() { wg.wg.Wait(); close(workerDone) }()
select {
case <-workerDone:
case <-(*ctxShutdownPtr).Done():
case <-time.After(shutdownTimeout):
log.Error("Queue %q is shutting down, but workers are still running after timeout", q.GetName())
}
} else {

View file

@ -10,9 +10,9 @@ import (
"sync/atomic"
"time"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
)
@ -21,8 +21,9 @@ import (
type WorkerPoolQueue[T any] struct {
ctxRun context.Context
ctxRunCancel context.CancelFunc
ctxShutdown atomic.Pointer[context.Context]
shutdownDone chan struct{}
shutdownDone chan struct{}
shutdownTimeout atomic.Int64 // in case some buggy handlers (workers) would hang forever, "shutdown" should finish in predictable time
origHandler HandlerFuncT[T]
safeHandler HandlerFuncT[T]
@ -175,22 +176,19 @@ func (q *WorkerPoolQueue[T]) Has(data T) (bool, error) {
return q.baseQueue.HasItem(q.ctxRun, q.marshal(data))
}
func (q *WorkerPoolQueue[T]) Run(atShutdown, atTerminate func(func())) {
atShutdown(func() {
// in case some queue handlers are slow or have hanging bugs, at most wait for a short time
q.ShutdownWait(1 * time.Second)
})
func (q *WorkerPoolQueue[T]) Run() {
q.doRun()
}
func (q *WorkerPoolQueue[T]) Cancel() {
q.ctxRunCancel()
}
// ShutdownWait shuts down the queue, waits for all workers to finish their jobs, and pushes the unhandled items back to the base queue
// It waits for all workers (handlers) to finish their jobs, in case some buggy handlers would hang forever, a reasonable timeout is needed
func (q *WorkerPoolQueue[T]) ShutdownWait(timeout time.Duration) {
shutdownCtx, shutdownCtxCancel := context.WithTimeout(context.Background(), timeout)
defer shutdownCtxCancel()
if q.ctxShutdown.CompareAndSwap(nil, &shutdownCtx) {
q.ctxRunCancel()
}
q.shutdownTimeout.Store(int64(timeout))
q.ctxRunCancel()
<-q.shutdownDone
}
@ -207,7 +205,11 @@ func getNewQueueFn(t string) (string, func(cfg *BaseConfig, unique bool) (baseQu
}
}
func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
func newWorkerPoolQueueForTest[T any](name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
return NewWorkerPoolQueueWithContext(context.Background(), name, queueSetting, handler, unique)
}
func NewWorkerPoolQueueWithContext[T any](ctx context.Context, name string, queueSetting setting.QueueSettings, handler HandlerFuncT[T], unique bool) (*WorkerPoolQueue[T], error) {
if handler == nil {
log.Debug("Use dummy queue for %q because handler is nil and caller doesn't want to process the queue items", name)
queueSetting.Type = "dummy"
@ -224,10 +226,11 @@ func NewWorkerPoolQueueBySetting[T any](name string, queueSetting setting.QueueS
}
log.Trace("Created queue %q of type %q", name, queueType)
w.ctxRun, w.ctxRunCancel = context.WithCancel(graceful.GetManager().ShutdownContext())
w.ctxRun, _, w.ctxRunCancel = process.GetManager().AddTypedContext(ctx, "Queue: "+w.GetName(), process.SystemProcessType, false)
w.batchChan = make(chan []T)
w.flushChan = make(chan flushType)
w.shutdownDone = make(chan struct{})
w.shutdownTimeout.Store(int64(shutdownDefaultTimeout))
w.workerMaxNum = queueSetting.MaxWorkers
w.batchLength = queueSetting.BatchLength

View file

@ -16,17 +16,9 @@ import (
)
func runWorkerPoolQueue[T any](q *WorkerPoolQueue[T]) func() {
var stop func()
started := make(chan struct{})
stopped := make(chan struct{})
go func() {
q.Run(func(f func()) { stop = f; close(started) }, nil)
close(stopped)
}()
<-started
go q.Run()
return func() {
stop()
<-stopped
q.ShutdownWait(1 * time.Second)
}
}
@ -57,7 +49,7 @@ func TestWorkerPoolQueueUnhandled(t *testing.T) {
return unhandled
}
q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", queueSetting, handler, false)
q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", queueSetting, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < queueSetting.Length; i++ {
testRecorder.Record("push:%v", i)
@ -145,7 +137,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett
return nil
}
q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true)
q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true)
stop := runWorkerPoolQueue(q)
for i := 0; i < testCount; i++ {
_ = q.Push("task-" + strconv.Itoa(i))
@ -169,7 +161,7 @@ func testWorkerPoolQueuePersistence(t *testing.T, queueSetting setting.QueueSett
return nil
}
q, _ := NewWorkerPoolQueueBySetting("pr_patch_checker_test", queueSetting, testHandler, true)
q, _ := newWorkerPoolQueueForTest("pr_patch_checker_test", queueSetting, testHandler, true)
stop := runWorkerPoolQueue(q)
assert.NoError(t, q.FlushWithContext(context.Background(), 0))
stop()
@ -194,7 +186,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) {
return nil
}
q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false)
q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 1, Length: 100}, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < 5; i++ {
assert.NoError(t, q.Push(i))
@ -210,7 +202,7 @@ func TestWorkerPoolQueueActiveWorkers(t *testing.T) {
assert.EqualValues(t, 1, q.GetWorkerNumber()) // there is at least one worker after the queue begins working
stop()
q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false)
q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", setting.QueueSettings{Type: "channel", BatchLength: 1, MaxWorkers: 3, Length: 100}, handler, false)
stop = runWorkerPoolQueue(q)
for i := 0; i < 15; i++ {
assert.NoError(t, q.Push(i))
@ -238,23 +230,23 @@ func TestWorkerPoolQueueShutdown(t *testing.T) {
if items[0] == 0 {
close(handlerCalled)
}
time.Sleep(100 * time.Millisecond)
time.Sleep(400 * time.Millisecond)
return items
}
qs := setting.QueueSettings{Type: "level", Datadir: t.TempDir() + "/queue", BatchLength: 3, MaxWorkers: 4, Length: 20}
q, _ := NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false)
q, _ := newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false)
stop := runWorkerPoolQueue(q)
for i := 0; i < qs.Length; i++ {
assert.NoError(t, q.Push(i))
}
<-handlerCalled
time.Sleep(50 * time.Millisecond) // wait for a while to make sure all workers are active
time.Sleep(200 * time.Millisecond) // wait for a while to make sure all workers are active
assert.EqualValues(t, 4, q.GetWorkerActiveNumber())
stop() // stop triggers shutdown
assert.EqualValues(t, 0, q.GetWorkerActiveNumber())
// no item was ever handled, so we still get all of them again
q, _ = NewWorkerPoolQueueBySetting("test-workpoolqueue", qs, handler, false)
q, _ = newWorkerPoolQueueForTest("test-workpoolqueue", qs, handler, false)
assert.EqualValues(t, 20, q.GetQueueItemNumber())
}