Use git attributes to determine generated and vendored status for language stats and diffs (#16773)

Replaces #16262
Replaces #16250
Replaces #14833

This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader.

It then wires this in to the language stats helper and into the git diff generation.

Files which are marked generated will be folded by default.

Fixes #14786
Fixes #12653
This commit is contained in:
zeripath 2021-09-09 21:13:36 +01:00 committed by GitHub
parent b83b4fbef9
commit 248b96d8a3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 736 additions and 17 deletions

View file

@ -6,7 +6,12 @@ package git
import (
"bytes"
"context"
"fmt"
"io"
"os"
"strconv"
"strings"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
@ -21,7 +26,7 @@ type CheckAttributeOpts struct {
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
err := LoadGitVersion()
if err != nil {
return nil, fmt.Errorf("Git version missing: %v", err)
return nil, fmt.Errorf("git version missing: %v", err)
}
stdOut := new(bytes.Buffer)
@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...)
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, fmt.Errorf("Wrong number of fields in return from check-attr")
return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}
var name2attribute2info = make(map[string]map[string]string)
@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return name2attribute2info, nil
}
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string
stdinReader io.ReadCloser
stdinWriter *os.File
stdOut attributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
running chan struct{}
}
// Init initializes the cmd
func (c *CheckAttributeReader) Init(ctx context.Context) error {
c.running = make(chan struct{})
cmdArgs := []string{"check-attr", "--stdin", "-z"}
if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
cmdArgs = append(cmdArgs, "--cached")
c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile}
}
if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
c.env = []string{"GIT_WORK_TREE=" + c.WorkTree}
}
if len(c.Attributes) > 0 {
cmdArgs = append(cmdArgs, c.Attributes...)
cmdArgs = append(cmdArgs, "--")
} else {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)
c.stdOut = lw
c.stdOut.Close()
return fmt.Errorf("no provided Attributes to check")
}
c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommandContext(c.ctx, cmdArgs...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
return err
}
if CheckGitVersionAtLeast("1.8.5") == nil {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
c.stdOut = lw
} else {
lw := new(lineSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
c.stdOut = lw
}
return nil
}
// Run run cmd
func (c *CheckAttributeReader) Run() error {
stdErr := new(bytes.Buffer)
err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error {
close(c.running)
return nil
})
defer c.cancel()
_ = c.stdOut.Close()
if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" {
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
return nil
}
// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) {
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
case <-c.running:
}
if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.cancel()
return nil, err
}
if err := c.stdinWriter.Sync(); err != nil {
defer c.cancel()
return nil, err
}
rs := make(map[string]string)
for range c.Attributes {
select {
case attr := <-c.stdOut.ReadAttribute():
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
// Close close pip after use
func (c *CheckAttributeReader) Close() error {
select {
case <-c.running:
default:
close(c.running)
}
defer c.cancel()
return c.stdinWriter.Close()
}
type attributeWriter interface {
io.WriteCloser
ReadAttribute() <-chan attributeTriple
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return len(p), nil
}
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}
type lineSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
}
func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l := len(p)
nlIdx := bytes.IndexByte(p, '\n')
for nlIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nlIdx]...)
if len(wr.tmp) == 0 {
// This should not happen
if len(p) > nlIdx+1 {
wr.tmp = wr.tmp[:0]
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}
working := attributeTriple{}
if wr.tmp[0] == '"' {
sb := new(strings.Builder)
remaining := string(wr.tmp[1:])
for len(remaining) > 0 {
rn, _, tail, err := strconv.UnquoteChar(remaining, '"')
if err != nil {
if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' {
working.Filename = sb.String()
wr.tmp = []byte(remaining[3:])
break
}
return l, fmt.Errorf("unexpected tail %s", string(remaining))
}
_, _ = sb.WriteRune(rn)
remaining = tail
}
} else {
idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Filename = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
wr.tmp = wr.tmp[idx+2:]
}
idx := bytes.IndexByte(wr.tmp, ':')
if idx < 0 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Attribute = string(wr.tmp[:idx])
if len(wr.tmp) < idx+2 {
return l, fmt.Errorf("unexpected input %s", string(wr.tmp))
}
working.Value = string(wr.tmp[idx+2:])
wr.attributes <- working
wr.tmp = wr.tmp[:0]
if len(p) > nlIdx+1 {
p = p[nlIdx+1:]
nlIdx = bytes.IndexByte(p, '\n')
continue
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return l, nil
}
func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *lineSeparatedAttributeWriter) Close() error {
close(wr.attributes)
return nil
}