Support performance trace (#32973)

1. Add a OpenTelemetry-like shim-layer to collect traces
2. Add a simple builtin trace collector and exporter, end users could
download the diagnosis report to get the traces.

This PR's design is quite lightweight, no hard-dependency, and it is
easy to improve or remove. We can try it on gitea.com first to see
whether it works well, and fine tune the details.

---------

Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
wxiaoguang 2025-01-22 02:57:07 +08:00 committed by GitHub
parent 2cb3946496
commit 7069369e03
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 628 additions and 52 deletions

View file

@ -18,6 +18,7 @@ import (
"time"
"code.gitea.io/gitea/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions
"code.gitea.io/gitea/modules/gtprof"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/util"
@ -54,7 +55,7 @@ func logArgSanitize(arg string) string {
} else if filepath.IsAbs(arg) {
base := filepath.Base(arg)
dir := filepath.Dir(arg)
return filepath.Join(filepath.Base(dir), base)
return ".../" + filepath.Join(filepath.Base(dir), base)
}
return arg
}
@ -295,15 +296,20 @@ func (c *Command) run(skip int, opts *RunOpts) error {
timeout = defaultCommandExecutionTimeout
}
var desc string
cmdLogString := c.LogString()
callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + skip /* parent */)
if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 {
callerInfo = callerInfo[pos+1:]
}
// these logs are for debugging purposes only, so no guarantee of correctness or stability
desc = fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(opts.Dir), c.LogString())
desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(opts.Dir), cmdLogString)
log.Debug("git.Command: %s", desc)
_, span := gtprof.GetTracer().Start(c.parentContext, gtprof.TraceSpanGitRun)
defer span.End()
span.SetAttributeString(gtprof.TraceAttrFuncCaller, callerInfo)
span.SetAttributeString(gtprof.TraceAttrGitCommand, cmdLogString)
var ctx context.Context
var cancel context.CancelFunc
var finished context.CancelFunc

View file

@ -58,5 +58,5 @@ func TestCommandString(t *testing.T) {
assert.EqualValues(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString())
cmd = NewCommandContextNoGlobals(context.Background(), "url: https://a:b@c/", "/root/dir-a/dir-b")
assert.EqualValues(t, cmd.prog+` "url: https://sanitized-credential@c/" dir-a/dir-b`, cmd.LogString())
assert.EqualValues(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString())
}

32
modules/gtprof/event.go Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gtprof
type EventConfig struct {
attributes []*TraceAttribute
}
type EventOption interface {
applyEvent(*EventConfig)
}
type applyEventFunc func(*EventConfig)
func (f applyEventFunc) applyEvent(cfg *EventConfig) {
f(cfg)
}
func WithAttributes(attrs ...*TraceAttribute) EventOption {
return applyEventFunc(func(cfg *EventConfig) {
cfg.attributes = append(cfg.attributes, attrs...)
})
}
func eventConfigFromOptions(options ...EventOption) *EventConfig {
cfg := &EventConfig{}
for _, opt := range options {
opt.applyEvent(cfg)
}
return cfg
}

175
modules/gtprof/trace.go Normal file
View file

@ -0,0 +1,175 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gtprof
import (
"context"
"fmt"
"sync"
"time"
"code.gitea.io/gitea/modules/util"
)
type contextKey struct {
name string
}
var contextKeySpan = &contextKey{"span"}
type traceStarter interface {
start(ctx context.Context, traceSpan *TraceSpan, internalSpanIdx int) (context.Context, traceSpanInternal)
}
type traceSpanInternal interface {
addEvent(name string, cfg *EventConfig)
recordError(err error, cfg *EventConfig)
end()
}
type TraceSpan struct {
// immutable
parent *TraceSpan
internalSpans []traceSpanInternal
internalContexts []context.Context
// mutable, must be protected by mutex
mu sync.RWMutex
name string
statusCode uint32
statusDesc string
startTime time.Time
endTime time.Time
attributes []*TraceAttribute
children []*TraceSpan
}
type TraceAttribute struct {
Key string
Value TraceValue
}
type TraceValue struct {
v any
}
func (t *TraceValue) AsString() string {
return fmt.Sprint(t.v)
}
func (t *TraceValue) AsInt64() int64 {
v, _ := util.ToInt64(t.v)
return v
}
func (t *TraceValue) AsFloat64() float64 {
v, _ := util.ToFloat64(t.v)
return v
}
var globalTraceStarters []traceStarter
type Tracer struct {
starters []traceStarter
}
func (s *TraceSpan) SetName(name string) {
s.mu.Lock()
defer s.mu.Unlock()
s.name = name
}
func (s *TraceSpan) SetStatus(code uint32, desc string) {
s.mu.Lock()
defer s.mu.Unlock()
s.statusCode, s.statusDesc = code, desc
}
func (s *TraceSpan) AddEvent(name string, options ...EventOption) {
cfg := eventConfigFromOptions(options...)
for _, tsp := range s.internalSpans {
tsp.addEvent(name, cfg)
}
}
func (s *TraceSpan) RecordError(err error, options ...EventOption) {
cfg := eventConfigFromOptions(options...)
for _, tsp := range s.internalSpans {
tsp.recordError(err, cfg)
}
}
func (s *TraceSpan) SetAttributeString(key, value string) *TraceSpan {
s.mu.Lock()
defer s.mu.Unlock()
s.attributes = append(s.attributes, &TraceAttribute{Key: key, Value: TraceValue{v: value}})
return s
}
func (t *Tracer) Start(ctx context.Context, spanName string) (context.Context, *TraceSpan) {
starters := t.starters
if starters == nil {
starters = globalTraceStarters
}
ts := &TraceSpan{name: spanName, startTime: time.Now()}
parentSpan := GetContextSpan(ctx)
if parentSpan != nil {
parentSpan.mu.Lock()
parentSpan.children = append(parentSpan.children, ts)
parentSpan.mu.Unlock()
ts.parent = parentSpan
}
parentCtx := ctx
for internalSpanIdx, tsp := range starters {
var internalSpan traceSpanInternal
if parentSpan != nil {
parentCtx = parentSpan.internalContexts[internalSpanIdx]
}
ctx, internalSpan = tsp.start(parentCtx, ts, internalSpanIdx)
ts.internalContexts = append(ts.internalContexts, ctx)
ts.internalSpans = append(ts.internalSpans, internalSpan)
}
ctx = context.WithValue(ctx, contextKeySpan, ts)
return ctx, ts
}
type mutableContext interface {
context.Context
SetContextValue(key, value any)
GetContextValue(key any) any
}
// StartInContext starts a trace span in Gitea's mutable context (usually the web request context).
// Due to the design limitation of Gitea's web framework, it can't use `context.WithValue` to bind a new span into a new context.
// So here we use our "reqctx" framework to achieve the same result: web request context could always see the latest "span".
func (t *Tracer) StartInContext(ctx mutableContext, spanName string) (*TraceSpan, func()) {
curTraceSpan := GetContextSpan(ctx)
_, newTraceSpan := GetTracer().Start(ctx, spanName)
ctx.SetContextValue(contextKeySpan, newTraceSpan)
return newTraceSpan, func() {
newTraceSpan.End()
ctx.SetContextValue(contextKeySpan, curTraceSpan)
}
}
func (s *TraceSpan) End() {
s.mu.Lock()
s.endTime = time.Now()
s.mu.Unlock()
for _, tsp := range s.internalSpans {
tsp.end()
}
}
func GetTracer() *Tracer {
return &Tracer{}
}
func GetContextSpan(ctx context.Context) *TraceSpan {
ts, _ := ctx.Value(contextKeySpan).(*TraceSpan)
return ts
}

View file

@ -0,0 +1,96 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gtprof
import (
"context"
"fmt"
"strings"
"sync/atomic"
"time"
"code.gitea.io/gitea/modules/tailmsg"
)
type traceBuiltinStarter struct{}
type traceBuiltinSpan struct {
ts *TraceSpan
internalSpanIdx int
}
func (t *traceBuiltinSpan) addEvent(name string, cfg *EventConfig) {
// No-op because builtin tracer doesn't need it.
// In the future we might use it to mark the time point between backend logic and network response.
}
func (t *traceBuiltinSpan) recordError(err error, cfg *EventConfig) {
// No-op because builtin tracer doesn't need it.
// Actually Gitea doesn't handle err this way in most cases
}
func (t *traceBuiltinSpan) toString(out *strings.Builder, indent int) {
t.ts.mu.RLock()
defer t.ts.mu.RUnlock()
out.WriteString(strings.Repeat(" ", indent))
out.WriteString(t.ts.name)
if t.ts.endTime.IsZero() {
out.WriteString(" duration: (not ended)")
} else {
out.WriteString(fmt.Sprintf(" duration=%.4fs", t.ts.endTime.Sub(t.ts.startTime).Seconds()))
}
for _, a := range t.ts.attributes {
out.WriteString(" ")
out.WriteString(a.Key)
out.WriteString("=")
value := a.Value.AsString()
if strings.ContainsAny(value, " \t\r\n") {
quoted := false
for _, c := range "\"'`" {
if quoted = !strings.Contains(value, string(c)); quoted {
value = string(c) + value + string(c)
break
}
}
if !quoted {
value = fmt.Sprintf("%q", value)
}
}
out.WriteString(value)
}
out.WriteString("\n")
for _, c := range t.ts.children {
span := c.internalSpans[t.internalSpanIdx].(*traceBuiltinSpan)
span.toString(out, indent+2)
}
}
func (t *traceBuiltinSpan) end() {
if t.ts.parent == nil {
// TODO: debug purpose only
// TODO: it should distinguish between http response network lag and actual processing time
threshold := time.Duration(traceBuiltinThreshold.Load())
if threshold != 0 && t.ts.endTime.Sub(t.ts.startTime) > threshold {
sb := &strings.Builder{}
t.toString(sb, 0)
tailmsg.GetManager().GetTraceRecorder().Record(sb.String())
}
}
}
func (t *traceBuiltinStarter) start(ctx context.Context, traceSpan *TraceSpan, internalSpanIdx int) (context.Context, traceSpanInternal) {
return ctx, &traceBuiltinSpan{ts: traceSpan, internalSpanIdx: internalSpanIdx}
}
func init() {
globalTraceStarters = append(globalTraceStarters, &traceBuiltinStarter{})
}
var traceBuiltinThreshold atomic.Int64
func EnableBuiltinTracer(threshold time.Duration) {
traceBuiltinThreshold.Store(int64(threshold))
}

View file

@ -0,0 +1,19 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gtprof
// Some interesting names could be found in https://github.com/open-telemetry/opentelemetry-go/tree/main/semconv
const (
TraceSpanHTTP = "http"
TraceSpanGitRun = "git-run"
TraceSpanDatabase = "database"
)
const (
TraceAttrFuncCaller = "func.caller"
TraceAttrDbSQL = "db.sql"
TraceAttrGitCommand = "git.command"
TraceAttrHTTPRoute = "http.route"
)

View file

@ -0,0 +1,93 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gtprof
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
// "vendor span" is a simple demo for a span from a vendor library
var vendorContextKey any = "vendorContextKey"
type vendorSpan struct {
name string
children []*vendorSpan
}
func vendorTraceStart(ctx context.Context, name string) (context.Context, *vendorSpan) {
span := &vendorSpan{name: name}
parentSpan, ok := ctx.Value(vendorContextKey).(*vendorSpan)
if ok {
parentSpan.children = append(parentSpan.children, span)
}
ctx = context.WithValue(ctx, vendorContextKey, span)
return ctx, span
}
// below "testTrace*" integrate the vendor span into our trace system
type testTraceSpan struct {
vendorSpan *vendorSpan
}
func (t *testTraceSpan) addEvent(name string, cfg *EventConfig) {}
func (t *testTraceSpan) recordError(err error, cfg *EventConfig) {}
func (t *testTraceSpan) end() {}
type testTraceStarter struct{}
func (t *testTraceStarter) start(ctx context.Context, traceSpan *TraceSpan, internalSpanIdx int) (context.Context, traceSpanInternal) {
ctx, span := vendorTraceStart(ctx, traceSpan.name)
return ctx, &testTraceSpan{span}
}
func TestTraceStarter(t *testing.T) {
globalTraceStarters = []traceStarter{&testTraceStarter{}}
ctx := context.Background()
ctx, span := GetTracer().Start(ctx, "root")
defer span.End()
func(ctx context.Context) {
ctx, span := GetTracer().Start(ctx, "span1")
defer span.End()
func(ctx context.Context) {
_, span := GetTracer().Start(ctx, "spanA")
defer span.End()
}(ctx)
func(ctx context.Context) {
_, span := GetTracer().Start(ctx, "spanB")
defer span.End()
}(ctx)
}(ctx)
func(ctx context.Context) {
_, span := GetTracer().Start(ctx, "span2")
defer span.End()
}(ctx)
var spanFullNames []string
var collectSpanNames func(parentFullName string, s *vendorSpan)
collectSpanNames = func(parentFullName string, s *vendorSpan) {
fullName := parentFullName + "/" + s.name
spanFullNames = append(spanFullNames, fullName)
for _, c := range s.children {
collectSpanNames(fullName, c)
}
}
collectSpanNames("", span.internalSpans[0].(*testTraceSpan).vendorSpan)
assert.Equal(t, []string{
"/root",
"/root/span1",
"/root/span1/spanA",
"/root/span1/spanB",
"/root/span2",
}, spanFullNames)
}

View file

@ -0,0 +1,73 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package tailmsg
import (
"sync"
"time"
)
type MsgRecord struct {
Time time.Time
Content string
}
type MsgRecorder interface {
Record(content string)
GetRecords() []*MsgRecord
}
type memoryMsgRecorder struct {
mu sync.RWMutex
msgs []*MsgRecord
limit int
}
// TODO: use redis for a clustered environment
func (m *memoryMsgRecorder) Record(content string) {
m.mu.Lock()
defer m.mu.Unlock()
m.msgs = append(m.msgs, &MsgRecord{
Time: time.Now(),
Content: content,
})
if len(m.msgs) > m.limit {
m.msgs = m.msgs[len(m.msgs)-m.limit:]
}
}
func (m *memoryMsgRecorder) GetRecords() []*MsgRecord {
m.mu.RLock()
defer m.mu.RUnlock()
ret := make([]*MsgRecord, len(m.msgs))
copy(ret, m.msgs)
return ret
}
func NewMsgRecorder(limit int) MsgRecorder {
return &memoryMsgRecorder{
limit: limit,
}
}
type Manager struct {
traceRecorder MsgRecorder
logRecorder MsgRecorder
}
func (m *Manager) GetTraceRecorder() MsgRecorder {
return m.traceRecorder
}
func (m *Manager) GetLogRecorder() MsgRecorder {
return m.logRecorder
}
var GetManager = sync.OnceValue(func() *Manager {
return &Manager{
traceRecorder: NewMsgRecorder(100),
logRecorder: NewMsgRecorder(1000),
}
})

View file

@ -6,6 +6,9 @@ package routing
import (
"context"
"net/http"
"code.gitea.io/gitea/modules/gtprof"
"code.gitea.io/gitea/modules/reqctx"
)
type contextKeyType struct{}
@ -14,10 +17,12 @@ var contextKey contextKeyType
// RecordFuncInfo records a func info into context
func RecordFuncInfo(ctx context.Context, funcInfo *FuncInfo) (end func()) {
// TODO: reqCtx := reqctx.FromContext(ctx), add trace support
end = func() {}
// save the func info into the context record
if reqCtx := reqctx.FromContext(ctx); reqCtx != nil {
var traceSpan *gtprof.TraceSpan
traceSpan, end = gtprof.GetTracer().StartInContext(reqCtx, "http.func")
traceSpan.SetAttributeString("func", funcInfo.shortName)
}
if record, ok := ctx.Value(contextKey).(*requestRecord); ok {
record.lock.Lock()
record.funcInfo = funcInfo