Use batch database operations instead of one by one to optimze api pulls (#32680)

Resolve #31492

The response time for the Pull Requests API has improved significantly,
dropping from over `2000ms` to about `350ms` on my local machine. It's
about `6` times faster.

A key area for further optimization lies in batch-fetching data for
`apiPullRequest.ChangedFiles, apiPullRequest.Additions, and
apiPullRequest.Deletions`.

Tests `TestAPIViewPulls` does exist and new tests added.

- This PR also fixes some bugs in `GetDiff` functions.
- This PR also fixes data inconsistent in test data. For a pull request,
the head branch's reference should be equal to the reference in
`pull/xxx/head`.
This commit is contained in:
Lunny Xiao 2024-12-10 13:15:06 -08:00 committed by GitHub
parent 2ac6f2b129
commit fbe6d9dc6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 566 additions and 93 deletions

View file

@ -4,6 +4,8 @@
package integration
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
@ -19,6 +21,7 @@ import (
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/services/forms"
"code.gitea.io/gitea/services/gitdiff"
issue_service "code.gitea.io/gitea/services/issue"
"code.gitea.io/gitea/tests"
@ -41,25 +44,99 @@ func TestAPIViewPulls(t *testing.T) {
expectedLen := unittest.GetCount(t, &issues_model.Issue{RepoID: repo.ID}, unittest.Cond("is_pull = ?", true))
assert.Len(t, pulls, expectedLen)
assert.Len(t, pulls, 3)
pull := pulls[0]
assert.EqualValues(t, 1, pull.Poster.ID)
assert.Len(t, pull.RequestedReviewers, 2)
assert.Len(t, pull.RequestedReviewersTeams, 0)
assert.EqualValues(t, 5, pull.RequestedReviewers[0].ID)
assert.EqualValues(t, 6, pull.RequestedReviewers[1].ID)
assert.EqualValues(t, 1, pull.ChangedFiles)
if assert.EqualValues(t, 5, pull.ID) {
resp = ctx.Session.MakeRequest(t, NewRequest(t, "GET", pull.DiffURL), http.StatusOK)
_, err := io.ReadAll(resp.Body)
bs, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
// TODO: use diff to generate stats to test against
patch, err := gitdiff.ParsePatch(context.Background(), 1000, 5000, 10, bytes.NewReader(bs), "")
assert.NoError(t, err)
if assert.Len(t, patch.Files, pull.ChangedFiles) {
assert.Equal(t, "File-WoW", patch.Files[0].Name)
// FIXME: The old name should be empty if it's a file add type
assert.Equal(t, "File-WoW", patch.Files[0].OldName)
assert.EqualValues(t, pull.Additions, patch.Files[0].Addition)
assert.EqualValues(t, pull.Deletions, patch.Files[0].Deletion)
assert.Equal(t, gitdiff.DiffFileAdd, patch.Files[0].Type)
}
t.Run(fmt.Sprintf("APIGetPullFiles_%d", pull.ID),
doAPIGetPullFiles(ctx, pull, func(t *testing.T, files []*api.ChangedFile) {
if assert.Len(t, files, 1) {
if assert.Len(t, files, pull.ChangedFiles) {
assert.Equal(t, "File-WoW", files[0].Filename)
assert.Empty(t, files[0].PreviousFilename)
assert.EqualValues(t, 1, files[0].Additions)
assert.EqualValues(t, 1, files[0].Changes)
assert.EqualValues(t, 0, files[0].Deletions)
assert.EqualValues(t, pull.Additions, files[0].Additions)
assert.EqualValues(t, pull.Deletions, files[0].Deletions)
assert.Equal(t, "added", files[0].Status)
}
}))
}
pull = pulls[1]
assert.EqualValues(t, 1, pull.Poster.ID)
assert.Len(t, pull.RequestedReviewers, 4)
assert.Len(t, pull.RequestedReviewersTeams, 0)
assert.EqualValues(t, 3, pull.RequestedReviewers[0].ID)
assert.EqualValues(t, 4, pull.RequestedReviewers[1].ID)
assert.EqualValues(t, 2, pull.RequestedReviewers[2].ID)
assert.EqualValues(t, 5, pull.RequestedReviewers[3].ID)
assert.EqualValues(t, 1, pull.ChangedFiles)
if assert.EqualValues(t, 2, pull.ID) {
resp = ctx.Session.MakeRequest(t, NewRequest(t, "GET", pull.DiffURL), http.StatusOK)
bs, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
patch, err := gitdiff.ParsePatch(context.Background(), 1000, 5000, 10, bytes.NewReader(bs), "")
assert.NoError(t, err)
if assert.Len(t, patch.Files, pull.ChangedFiles) {
assert.Equal(t, "README.md", patch.Files[0].Name)
assert.Equal(t, "README.md", patch.Files[0].OldName)
assert.EqualValues(t, pull.Additions, patch.Files[0].Addition)
assert.EqualValues(t, pull.Deletions, patch.Files[0].Deletion)
assert.Equal(t, gitdiff.DiffFileChange, patch.Files[0].Type)
}
t.Run(fmt.Sprintf("APIGetPullFiles_%d", pull.ID),
doAPIGetPullFiles(ctx, pull, func(t *testing.T, files []*api.ChangedFile) {
if assert.Len(t, files, pull.ChangedFiles) {
assert.Equal(t, "README.md", files[0].Filename)
// FIXME: The PreviousFilename name should be the same as Filename if it's a file change
assert.Equal(t, "", files[0].PreviousFilename)
assert.EqualValues(t, pull.Additions, files[0].Additions)
assert.EqualValues(t, pull.Deletions, files[0].Deletions)
assert.Equal(t, "changed", files[0].Status)
}
}))
}
pull = pulls[2]
assert.EqualValues(t, 1, pull.Poster.ID)
assert.Len(t, pull.RequestedReviewers, 1)
assert.Len(t, pull.RequestedReviewersTeams, 0)
assert.EqualValues(t, 1, pull.RequestedReviewers[0].ID)
assert.EqualValues(t, 0, pull.ChangedFiles)
if assert.EqualValues(t, 1, pull.ID) {
resp = ctx.Session.MakeRequest(t, NewRequest(t, "GET", pull.DiffURL), http.StatusOK)
bs, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
patch, err := gitdiff.ParsePatch(context.Background(), 1000, 5000, 10, bytes.NewReader(bs), "")
assert.NoError(t, err)
assert.EqualValues(t, pull.ChangedFiles, patch.NumFiles)
t.Run(fmt.Sprintf("APIGetPullFiles_%d", pull.ID),
doAPIGetPullFiles(ctx, pull, func(t *testing.T, files []*api.ChangedFile) {
assert.Len(t, files, pull.ChangedFiles)
}))
}
}
func TestAPIViewPullsByBaseHead(t *testing.T) {