Refactor the usage of batch catfile (#31754)

When opening a repository, it will call `ensureValidRepository` and also
`CatFileBatch`. But sometimes these will not be used until repository
closed. So it's a waste of CPU to invoke 3 times git command for every
open repository.

This PR removed all of these from `OpenRepository` but only kept
checking whether the folder exists. When a batch is necessary, the
necessary functions will be invoked.
This commit is contained in:
Lunny Xiao 2024-08-21 01:04:57 +08:00 committed by GitHub
parent 8b92eba21f
commit c03baab678
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 201 additions and 90 deletions

View file

@ -16,10 +16,10 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
@ -189,21 +189,23 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
if len(changes.Updates) > 0 {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
r, err := gitrepo.OpenRepository(ctx, repo)
if err != nil {
return err
}
batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
defer cancel()
defer r.Close()
gitBatch, err := r.NewBatch(ctx)
if err != nil {
return err
}
defer gitBatch.Close()
for _, update := range changes.Updates {
if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, batch); err != nil {
if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil {
return err
}
}
cancel()
gitBatch.Close()
}
for _, filename := range changes.RemovedFilenames {
if err := b.addDelete(filename, repo, batch); err != nil {

View file

@ -15,11 +15,11 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
@ -154,17 +154,19 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
reqs := make([]elastic.BulkableRequest, 0)
if len(changes.Updates) > 0 {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
r, err := gitrepo.OpenRepository(ctx, repo)
if err != nil {
return err
}
batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
defer cancel()
defer r.Close()
batch, err := r.NewBatch(ctx)
if err != nil {
return err
}
defer batch.Close()
for _, update := range changes.Updates {
updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo)
updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo)
if err != nil {
return err
}
@ -172,7 +174,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
reqs = append(reqs, updateReqs...)
}
}
cancel()
batch.Close()
}
for _, filename := range changes.RemovedFilenames {