1package lfs23import (4 "bufio"5 "bytes"6 "context"7 "fmt"8 "io"9 "strconv"10 "strings"11 "sync"1213 gitm "github.com/aymanbagabas/git-module"14 "github.com/charmbracelet/soft-serve/git"15)1617// SearchPointerBlobs scans the whole repository for LFS pointer files18func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {19 basePath := repo.Path2021 catFileCheckReader, catFileCheckWriter := io.Pipe()22 shasToBatchReader, shasToBatchWriter := io.Pipe()23 catFileBatchReader, catFileBatchWriter := io.Pipe()2425 wg := sync.WaitGroup{}26 wg.Add(6)2728 // Create the go-routines in reverse order.2930 // 4. Take the output of cat-file --batch and check if each file in turn31 // to see if they're pointers to files in the LFS store32 go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)3334 // 3. Take the shas of the blobs and batch read them35 go catFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath)3637 // 2. From the provided objects restrict to blobs <=1k38 go blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)3940 // 1. Run batch-check on all objects in the repository41 revListReader, revListWriter := io.Pipe()42 shasToCheckReader, shasToCheckWriter := io.Pipe()43 go catFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath)44 go blobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)45 go revListAllObjects(ctx, revListWriter, &wg, basePath, errChan)46 wg.Wait()4748 close(pointerChan)49 close(errChan)50}5152func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {53 defer wg.Done()54 defer catFileBatchReader.Close() // nolint: errcheck5556 bufferedReader := bufio.NewReader(catFileBatchReader)57 buf := make([]byte, 1025)5859loop:60 for {61 select {62 case <-ctx.Done():63 break loop64 default:65 }6667 // File descriptor line: sha68 sha, err := bufferedReader.ReadString(' ')69 if err != nil {70 _ = catFileBatchReader.CloseWithError(err)71 break72 }73 sha = strings.TrimSpace(sha)74 // Throw away the blob75 if _, err := bufferedReader.ReadString(' '); err != nil {76 _ = catFileBatchReader.CloseWithError(err)77 break78 }79 sizeStr, err := bufferedReader.ReadString('\n')80 if err != nil {81 _ = catFileBatchReader.CloseWithError(err)82 break83 }84 size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])85 if err != nil {86 _ = catFileBatchReader.CloseWithError(err)87 break88 }89 pointerBuf := buf[:size+1]90 if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {91 _ = catFileBatchReader.CloseWithError(err)92 break93 }94 pointerBuf = pointerBuf[:size]95 // Now we need to check if the pointerBuf is an LFS pointer96 pointer, _ := ReadPointerFromBuffer(pointerBuf)97 if !pointer.IsValid() {98 continue99 }100101 pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}102 }103}104105func catFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {106 defer wg.Done()107 defer shasToBatchReader.Close() // nolint: errcheck108 defer catFileBatchWriter.Close() // nolint: errcheck109110 stderr := new(bytes.Buffer)111 var errbuf strings.Builder112 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch").113 WithTimeout(-1).114 RunInDirWithOptions(basePath, gitm.RunInDirOptions{115 Stdout: catFileBatchWriter,116 Stdin: shasToBatchReader,117 Stderr: stderr,118 }); err != nil {119 _ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))120 }121}122123func blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {124 defer wg.Done()125 defer catFileCheckReader.Close() // nolint: errcheck126 scanner := bufio.NewScanner(catFileCheckReader)127 defer func() {128 _ = shasToBatchWriter.CloseWithError(scanner.Err())129 }()130 for scanner.Scan() {131 line := scanner.Text()132 if len(line) == 0 {133 continue134 }135 fields := strings.Split(line, " ")136 if len(fields) < 3 || fields[1] != "blob" {137 continue138 }139 size, _ := strconv.Atoi(fields[2])140 if size > 1024 {141 continue142 }143 toWrite := []byte(fields[0] + "\n")144 for len(toWrite) > 0 {145 n, err := shasToBatchWriter.Write(toWrite)146 if err != nil {147 _ = catFileCheckReader.CloseWithError(err)148 break149 }150 toWrite = toWrite[n:]151 }152 }153}154155func catFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {156 defer wg.Done()157 defer shasToCheckReader.Close() // nolint: errcheck158 defer catFileCheckWriter.Close() // nolint: errcheck159160 stderr := new(bytes.Buffer)161 var errbuf strings.Builder162 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch-check").163 WithTimeout(-1).164 RunInDirWithOptions(basePath, gitm.RunInDirOptions{165 Stdout: catFileCheckWriter,166 Stdin: shasToCheckReader,167 Stderr: stderr,168 }); err != nil {169 _ = shasToCheckReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))170 }171}172173func blobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {174 defer wg.Done()175 defer revListReader.Close() // nolint: errcheck176 scanner := bufio.NewScanner(revListReader)177 defer func() {178 _ = shasToCheckWriter.CloseWithError(scanner.Err())179 }()180181 for scanner.Scan() {182 line := scanner.Text()183 if len(line) == 0 {184 continue185 }186 fields := strings.Split(line, " ")187 if len(fields) < 2 || len(fields[1]) == 0 {188 continue189 }190 toWrite := []byte(fields[0] + "\n")191 for len(toWrite) > 0 {192 n, err := shasToCheckWriter.Write(toWrite)193 if err != nil {194 _ = revListReader.CloseWithError(err)195 break196 }197 toWrite = toWrite[n:]198 }199 }200}201202func revListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {203 defer wg.Done()204 defer revListWriter.Close() // nolint: errcheck205206 stderr := new(bytes.Buffer)207 var errbuf strings.Builder208 if err := gitm.NewCommandWithContext(ctx, "rev-list", "--objects", "--all").209 WithTimeout(-1).210 RunInDirWithOptions(basePath, gitm.RunInDirOptions{211 Stdout: revListWriter,212 Stderr: stderr,213 }); err != nil {214 errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String())215 }216}