improve performance of diffs (#32393)

This has two major changes that significantly reduce the amount of work
done for large diffs:

* Kill a running git process when reaching the maximum number of files
in a diff, preventing it from processing the entire diff.
* When loading a diff with the URL param `file-only=true`, skip loading
stats. This speeds up loading both hidden files of a diff and sections
of a diff when clicking the "Show More" button.

A couple of minor things from profiling are also included:

* Reuse existing repo in `PrepareViewPullInfo` if head and base are the
same.

The performance impact is going to depend heavily on the individual diff
and the hardware it runs on, but when testing locally on a diff changing
100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction
in time to load the result of "Show More"

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Rowan Bohde 2024-11-01 22:29:37 -05:00 committed by GitHub
parent ec2d1593c2
commit 7dcccc3bb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 44 additions and 42 deletions

View file

@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
Stderr: stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer cancel()
diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "")
if finalErr != nil {
log.Error("ParsePatch: %v", finalErr)
@ -371,10 +372,14 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr)
return nil, finalErr
}
log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
t.repo.FullName(), t.basePath, err, stderr)
return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
t.repo.FullName(), err, stderr)
// If the process exited early, don't error
if err != context.Canceled {
log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
t.repo.FullName(), t.basePath, err, stderr)
return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
t.repo.FullName(), err, stderr)
}
}
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD")