 41fcf7b7de
			
		
	
	
		41fcf7b7de
		
			
		
	
	
	
	
		
			
			Within doArchive there is a service goroutine that performs the archiving function. This goroutine reports its error using a `chan error` called `done`. Prior to this PR this channel had 0 capacity meaning that the goroutine would block until the `done` channel was cleared - however there are a couple of ways in which this channel might not be read. The simplest solution is to add a single space of capacity to the goroutine which will mean that the goroutine will always complete and even if the `done` channel is not read it will be simply garbage collected away. (The PR also contains two other places when setting up the indexers which do not leak but where the blocking of the sending goroutine is also unnecessary and so we should just add a small amount of capacity and let the sending goroutine complete as soon as it can.) Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de>
		
			
				
	
	
		
			337 lines
		
	
	
	
		
			8.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			337 lines
		
	
	
	
		
			8.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2020 The Gitea Authors. All rights reserved.
 | |
| // Use of this source code is governed by a MIT-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package archiver
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"os"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	"code.gitea.io/gitea/models/db"
 | |
| 	repo_model "code.gitea.io/gitea/models/repo"
 | |
| 	"code.gitea.io/gitea/modules/git"
 | |
| 	"code.gitea.io/gitea/modules/graceful"
 | |
| 	"code.gitea.io/gitea/modules/log"
 | |
| 	"code.gitea.io/gitea/modules/process"
 | |
| 	"code.gitea.io/gitea/modules/queue"
 | |
| 	"code.gitea.io/gitea/modules/setting"
 | |
| 	"code.gitea.io/gitea/modules/storage"
 | |
| )
 | |
| 
 | |
| // ArchiveRequest defines the parameters of an archive request, which notably
 | |
| // includes the specific repository being archived as well as the commit, the
 | |
| // name by which it was requested, and the kind of archive being requested.
 | |
| // This is entirely opaque to external entities, though, and mostly used as a
 | |
| // handle elsewhere.
 | |
| type ArchiveRequest struct {
 | |
| 	RepoID   int64
 | |
| 	refName  string
 | |
| 	Type     git.ArchiveType
 | |
| 	CommitID string
 | |
| }
 | |
| 
 | |
| // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all
 | |
| // the way to 64.
 | |
| var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`)
 | |
| 
 | |
| // ErrUnknownArchiveFormat request archive format is not supported
 | |
| type ErrUnknownArchiveFormat struct {
 | |
| 	RequestFormat string
 | |
| }
 | |
| 
 | |
| // Error implements error
 | |
| func (err ErrUnknownArchiveFormat) Error() string {
 | |
| 	return fmt.Sprintf("unknown format: %s", err.RequestFormat)
 | |
| }
 | |
| 
 | |
| // Is implements error
 | |
| func (ErrUnknownArchiveFormat) Is(err error) bool {
 | |
| 	_, ok := err.(ErrUnknownArchiveFormat)
 | |
| 	return ok
 | |
| }
 | |
| 
 | |
| // NewRequest creates an archival request, based on the URI.  The
 | |
| // resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
 | |
| // if it's determined that the request still needs to be satisfied.
 | |
| func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
 | |
| 	r := &ArchiveRequest{
 | |
| 		RepoID: repoID,
 | |
| 	}
 | |
| 
 | |
| 	var ext string
 | |
| 	switch {
 | |
| 	case strings.HasSuffix(uri, ".zip"):
 | |
| 		ext = ".zip"
 | |
| 		r.Type = git.ZIP
 | |
| 	case strings.HasSuffix(uri, ".tar.gz"):
 | |
| 		ext = ".tar.gz"
 | |
| 		r.Type = git.TARGZ
 | |
| 	case strings.HasSuffix(uri, ".bundle"):
 | |
| 		ext = ".bundle"
 | |
| 		r.Type = git.BUNDLE
 | |
| 	default:
 | |
| 		return nil, ErrUnknownArchiveFormat{RequestFormat: uri}
 | |
| 	}
 | |
| 
 | |
| 	r.refName = strings.TrimSuffix(uri, ext)
 | |
| 
 | |
| 	var err error
 | |
| 	// Get corresponding commit.
 | |
| 	if repo.IsBranchExist(r.refName) {
 | |
| 		r.CommitID, err = repo.GetBranchCommitID(r.refName)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	} else if repo.IsTagExist(r.refName) {
 | |
| 		r.CommitID, err = repo.GetTagCommitID(r.refName)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	} else if shaRegex.MatchString(r.refName) {
 | |
| 		if repo.IsCommitExist(r.refName) {
 | |
| 			r.CommitID = r.refName
 | |
| 		} else {
 | |
| 			return nil, git.ErrNotExist{
 | |
| 				ID: r.refName,
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		return nil, fmt.Errorf("Unknow ref %s type", r.refName)
 | |
| 	}
 | |
| 
 | |
| 	return r, nil
 | |
| }
 | |
| 
 | |
| // GetArchiveName returns the name of the caller, based on the ref used by the
 | |
| // caller to create this request.
 | |
| func (aReq *ArchiveRequest) GetArchiveName() string {
 | |
| 	return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
 | |
| }
 | |
| 
 | |
| func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
 | |
| 	txCtx, committer, err := db.TxContext()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	defer committer.Close()
 | |
| 	ctx, _, finished := process.GetManager().AddContext(txCtx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName()))
 | |
| 	defer finished()
 | |
| 
 | |
| 	archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if archiver != nil {
 | |
| 		// FIXME: If another process are generating it, we think it's not ready and just return
 | |
| 		// Or we should wait until the archive generated.
 | |
| 		if archiver.Status == repo_model.ArchiverGenerating {
 | |
| 			return nil, nil
 | |
| 		}
 | |
| 	} else {
 | |
| 		archiver = &repo_model.RepoArchiver{
 | |
| 			RepoID:   r.RepoID,
 | |
| 			Type:     r.Type,
 | |
| 			CommitID: r.CommitID,
 | |
| 			Status:   repo_model.ArchiverGenerating,
 | |
| 		}
 | |
| 		if err := repo_model.AddRepoArchiver(ctx, archiver); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	rPath, err := archiver.RelativePath()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	_, err = storage.RepoArchives.Stat(rPath)
 | |
| 	if err == nil {
 | |
| 		if archiver.Status == repo_model.ArchiverGenerating {
 | |
| 			archiver.Status = repo_model.ArchiverReady
 | |
| 			if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 		}
 | |
| 		return archiver, committer.Commit()
 | |
| 	}
 | |
| 
 | |
| 	if !errors.Is(err, os.ErrNotExist) {
 | |
| 		return nil, fmt.Errorf("unable to stat archive: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	rd, w := io.Pipe()
 | |
| 	defer func() {
 | |
| 		w.Close()
 | |
| 		rd.Close()
 | |
| 	}()
 | |
| 	done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish
 | |
| 	repo, err := repo_model.GetRepositoryByID(archiver.RepoID)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("archiver.LoadRepo failed: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	defer gitRepo.Close()
 | |
| 
 | |
| 	go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) {
 | |
| 		defer func() {
 | |
| 			if r := recover(); r != nil {
 | |
| 				done <- fmt.Errorf("%v", r)
 | |
| 			}
 | |
| 		}()
 | |
| 
 | |
| 		if archiver.Type == git.BUNDLE {
 | |
| 			err = gitRepo.CreateBundle(
 | |
| 				ctx,
 | |
| 				archiver.CommitID,
 | |
| 				w,
 | |
| 			)
 | |
| 		} else {
 | |
| 			err = gitRepo.CreateArchive(
 | |
| 				ctx,
 | |
| 				archiver.Type,
 | |
| 				w,
 | |
| 				setting.Repository.PrefixArchiveFiles,
 | |
| 				archiver.CommitID,
 | |
| 			)
 | |
| 		}
 | |
| 		_ = w.CloseWithError(err)
 | |
| 		done <- err
 | |
| 	}(done, w, archiver, gitRepo)
 | |
| 
 | |
| 	// TODO: add lfs data to zip
 | |
| 	// TODO: add submodule data to zip
 | |
| 
 | |
| 	if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil {
 | |
| 		return nil, fmt.Errorf("unable to write archive: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	err = <-done
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if archiver.Status == repo_model.ArchiverGenerating {
 | |
| 		archiver.Status = repo_model.ArchiverReady
 | |
| 		if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return archiver, committer.Commit()
 | |
| }
 | |
| 
 | |
| // ArchiveRepository satisfies the ArchiveRequest being passed in.  Processing
 | |
| // will occur in a separate goroutine, as this phase may take a while to
 | |
| // complete.  If the archive already exists, ArchiveRepository will not do
 | |
| // anything.  In all cases, the caller should be examining the *ArchiveRequest
 | |
| // being returned for completion, as it may be different than the one they passed
 | |
| // in.
 | |
| func ArchiveRepository(request *ArchiveRequest) (*repo_model.RepoArchiver, error) {
 | |
| 	return doArchive(request)
 | |
| }
 | |
| 
 | |
| var archiverQueue queue.UniqueQueue
 | |
| 
 | |
| // Init initlize archive
 | |
| func Init() error {
 | |
| 	handler := func(data ...queue.Data) []queue.Data {
 | |
| 		for _, datum := range data {
 | |
| 			archiveReq, ok := datum.(*ArchiveRequest)
 | |
| 			if !ok {
 | |
| 				log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum)
 | |
| 				continue
 | |
| 			}
 | |
| 			log.Trace("ArchiverData Process: %#v", archiveReq)
 | |
| 			if _, err := doArchive(archiveReq); err != nil {
 | |
| 				log.Error("Archive %v failed: %v", datum, err)
 | |
| 			}
 | |
| 		}
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	archiverQueue = queue.CreateUniqueQueue("repo-archive", handler, new(ArchiveRequest))
 | |
| 	if archiverQueue == nil {
 | |
| 		return errors.New("unable to create codes indexer queue")
 | |
| 	}
 | |
| 
 | |
| 	go graceful.GetManager().RunWithShutdownFns(archiverQueue.Run)
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // StartArchive push the archive request to the queue
 | |
| func StartArchive(request *ArchiveRequest) error {
 | |
| 	has, err := archiverQueue.Has(request)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if has {
 | |
| 		return nil
 | |
| 	}
 | |
| 	return archiverQueue.Push(request)
 | |
| }
 | |
| 
 | |
| func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error {
 | |
| 	p, err := archiver.RelativePath()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if err := repo_model.DeleteRepoArchiver(ctx, archiver); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if err := storage.RepoArchives.Delete(p); err != nil {
 | |
| 		log.Error("delete repo archive file failed: %v", err)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // DeleteOldRepositoryArchives deletes old repository archives.
 | |
| func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error {
 | |
| 	log.Trace("Doing: ArchiveCleanup")
 | |
| 
 | |
| 	for {
 | |
| 		archivers, err := repo_model.FindRepoArchives(repo_model.FindRepoArchiversOption{
 | |
| 			ListOptions: db.ListOptions{
 | |
| 				PageSize: 100,
 | |
| 				Page:     1,
 | |
| 			},
 | |
| 			OlderThan: olderThan,
 | |
| 		})
 | |
| 		if err != nil {
 | |
| 			log.Trace("Error: ArchiveClean: %v", err)
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		for _, archiver := range archivers {
 | |
| 			if err := deleteOldRepoArchiver(ctx, archiver); err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 		if len(archivers) < 100 {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	log.Trace("Finished: ArchiveCleanup")
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // DeleteRepositoryArchives deletes all repositories' archives.
 | |
| func DeleteRepositoryArchives(ctx context.Context) error {
 | |
| 	if err := repo_model.DeleteAllRepoArchives(); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	return storage.Clean(storage.RepoArchives)
 | |
| }
 |