Refactor the usage of batch catfile (#31754)
When opening a repository, it will call `ensureValidRepository` and also `CatFileBatch`. But sometimes these will not be used until repository closed. So it's a waste of CPU to invoke 3 times git command for every open repository. This PR removed all of these from `OpenRepository` but only kept checking whether the folder exists. When a batch is necessary, the necessary functions will be invoked.
This commit is contained in:
parent
8b92eba21f
commit
c03baab678
15 changed files with 201 additions and 90 deletions
|
@ -16,10 +16,10 @@ import (
|
|||
"code.gitea.io/gitea/modules/analyze"
|
||||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/gitrepo"
|
||||
"code.gitea.io/gitea/modules/indexer/code/internal"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
"code.gitea.io/gitea/modules/typesniffer"
|
||||
|
@ -189,21 +189,23 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch
|
|||
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
|
||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||
if len(changes.Updates) > 0 {
|
||||
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
|
||||
if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
|
||||
log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
|
||||
r, err := gitrepo.OpenRepository(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
|
||||
defer cancel()
|
||||
defer r.Close()
|
||||
gitBatch, err := r.NewBatch(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer gitBatch.Close()
|
||||
|
||||
for _, update := range changes.Updates {
|
||||
if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, batch); err != nil {
|
||||
if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
gitBatch.Close()
|
||||
}
|
||||
for _, filename := range changes.RemovedFilenames {
|
||||
if err := b.addDelete(filename, repo, batch); err != nil {
|
||||
|
|
|
@ -15,11 +15,11 @@ import (
|
|||
"code.gitea.io/gitea/modules/analyze"
|
||||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/gitrepo"
|
||||
"code.gitea.io/gitea/modules/indexer/code/internal"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
"code.gitea.io/gitea/modules/typesniffer"
|
||||
|
@ -154,17 +154,19 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti
|
|||
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
|
||||
reqs := make([]elastic.BulkableRequest, 0)
|
||||
if len(changes.Updates) > 0 {
|
||||
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
|
||||
if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
|
||||
log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
|
||||
r, err := gitrepo.OpenRepository(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
|
||||
defer cancel()
|
||||
defer r.Close()
|
||||
batch, err := r.NewBatch(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer batch.Close()
|
||||
|
||||
for _, update := range changes.Updates {
|
||||
updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo)
|
||||
updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -172,7 +174,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
|
|||
reqs = append(reqs, updateReqs...)
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
batch.Close()
|
||||
}
|
||||
|
||||
for _, filename := range changes.RemovedFilenames {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue