commit 7861d21ed45d459db90bdd0efd137e3f3e3197dd Author: Fabien Potencier Date: Mon Jun 6 18:12:57 2016 +0200 added initial set of files diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e165e44 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +splitter-lite-tests/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f2b0470 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015-2016 Fabien Potencier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ed2b6f8 --- /dev/null +++ b/README.md @@ -0,0 +1,125 @@ +Monorepo to Manyrepos made easy +=============================== + +**tl;dr**: **splitsh-lite** is a replacement for the `subtree split` Git +build-in command that is much faster and has more features at the same time. + +When starting a new project, do you store all the code in one monolith +repository? Or are you creating many repositories? + +Both strategies work well but both have drawbacks as well. **splitsh** helps use +both strategies at the same time by providing tools that automatically +synchronize a mono repository to many repositories. + +**splitsh-lite** is a sub-project with the goal of providing a faster replacement +of the `git subtree split` command. + +If you want to learn more about monorepo vs manyrepos, watch this 4-minutes +lightning talk I gave at dotScale... or the longer version from DrupalCon. + +The main **splitsh-lite** feature is its ability to create a branch in a repository +from one or many directories. + +Installation +------------ + +Install libgit2: + +```bash +go get github.com/libgit2/git2go +cd $GOPATH/src/github.com/libgit2/git2go +git checkout next +git submodule update --init +make install +``` + +Compiling + +```bash +go build -o splitsh-lite github.com/splitsh/lite +``` + +If everything goes fine, a `splitsh-lite` binary should be available in the +current directory. + +Usage +----- + +Let say you want to split the `lib/` directory of a repository to its own +branch; from the "master" Git repository (bare or clone), run: + +```bash +splitsh-lite --prefix=lib/ +``` + +The *sha1* of the split is displayed at the end of the execution: + +```bash +SHA1=`splitsh-lite --prefix=lib/` +``` + +The sha1 can be used to create a branch or to push the commits to a new +repository. + +Automatically create a branch for the split by passing a branch name +via the `--target` option: + +```bash +splitsh-lite --prefix=lib/ --target=branch-name +``` + +If new commits are made on the repository, update the split by running the same +command again. Updates are much faster as **splitsh-lite** keeps a cache of already +split commits. Caching is possible as **splitsh-lite** guarantees that two splits of +the same code always results in the same history and the same `sha1`s for each +commit. + +By default, **splitsh-lite** splits the current checkout-ed branch but you can split +a different branch by passing it explicitly with `--origin` (mandatory when +splitting a bare repository): + +```bash +splitsh-lite --prefix=lib/ --origin=origin/1.0 +``` + +You don't even need to run the command from the Git repository directory if you +pass the `--path` option: + +```bash +splitsh-lite --prefix=lib/ --origin=origin/1.0 --path=/path/to/repo +``` + +Available options: + + * `--prefix` is the prefix of the directory to split; you can put the split + contents in a directory by using the `--prefix=from:to` syntax; splitting + several directories is also possible by passing multiple `--prefix` options; + + * `--path` is the path to the repository to split (current directory by default); + + * `--origin` is the Git reference for the origin (can be any Git reference + like `HEAD`, `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`); + + * `--target` creates a reference for the tip of the split (can be any Git reference + like `HEAD`, `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`); + + * `--progress` displays a nice progress bar during the split; + + * `--quiet` suppresses all output on stderr (useful when run from an automated + script). + + * `--scratch` flushes the cache (useful when a branch is force pushed or in + case of corruption) + + * `--legacy` simulates old versions of `git subtree split` where `sha1`s + for the split commits were computed differently (useful if you are switching + from the git command to **splitsh-lite**). + +**splitsh** provides more features including a sanity checker, Github integration +for real-time splitting, tagging management and synchronization, and more. +It has been used by the Symfony project for many years but the tool is not yet +ready for Open-Source. Stay tuned! + +If you think that your Open-Source project might benefit from the full version +of splitsh, send me an email and I will consider splitting your project for free +on my servers (like I do for Symfony and Laravel). diff --git a/main.go b/main.go new file mode 100644 index 0000000..cffa216 --- /dev/null +++ b/main.go @@ -0,0 +1,104 @@ +package main + +import ( + "flag" + "fmt" + "os" + "strings" + "time" + + "github.com/splitsh/lite/splitter" +) + +type prefixesFlag []*splitter.Prefix + +func (p *prefixesFlag) String() string { + return fmt.Sprint(*p) +} + +func (p *prefixesFlag) Set(value string) error { + parts := strings.Split(value, ":") + from := parts[0] + to := "" + if len(parts) > 1 { + to = parts[1] + } + + // value must be unique + for _, prefix := range []*splitter.Prefix(*p) { + // FIXME: to should be normalized (xxx vs xxx/ for instance) + if prefix.To == to { + return fmt.Errorf("Cannot have two prefix split under the same directory: %s -> %s vs %s -> %s", prefix.From, prefix.To, from, to) + } + } + + *p = append(*p, &splitter.Prefix{From: from, To: to}) + return nil +} + +var prefixes prefixesFlag +var origin, target, commit, path string +var scratch, debug, quiet, legacy, progress bool + +func init() { + flag.Var(&prefixes, "prefix", "The directory(ies) to split") + flag.StringVar(&origin, "origin", "HEAD", "The branch to split (optional, defaults to the current one)") + flag.StringVar(&target, "target", "", "The branch to create when split is finished (optional)") + flag.StringVar(&commit, "commit", "", "The commit at which to start the split (optional)") + flag.StringVar(&path, "path", ".", "The repository path (optional, current directory by default)") + flag.BoolVar(&scratch, "scratch", false, "Flush the cache (optional)") + flag.BoolVar(&debug, "debug", false, "Enable the debug mode (optional)") + flag.BoolVar(&quiet, "quiet", false, "Suppress the output (optional)") + flag.BoolVar(&legacy, "legacy", false, "Enable the legacy mode for projects migrating from an old version of git subtree split (optional)") + flag.BoolVar(&progress, "progress", false, "Show progress bar (optional, cannot be enabled when debug is enabled)") +} + +func main() { + flag.Parse() + + if len(prefixes) == 0 { + fmt.Println("You must provide the directory to split via the --prefix flag") + os.Exit(1) + } + + config := &splitter.Config{ + Path: path, + Origin: origin, + Prefixes: []*splitter.Prefix(prefixes), + Target: target, + Commit: commit, + Debug: debug && !quiet, + Scratch: scratch, + Legacy: legacy, + } + + result := &splitter.Result{} + + var ticker *time.Ticker + if progress && !debug && !quiet { + ticker = time.NewTicker(time.Millisecond * 50) + go func() { + for range ticker.C { + fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed\r", result.Created(), result.Traversed()) + } + }() + } + + err := splitter.Split(config, result) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + if ticker != nil { + ticker.Stop() + } + + if !quiet { + fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed, in %s\n", result.Created(), result.Traversed(), result.Duration(time.Millisecond)) + } + + if result.Head() != nil { + fmt.Println(result.Head().String()) + } +} diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..7172ce2 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +set -e +set -f + +if [ ! -d splitter-lite-tests ]; then + mkdir splitter-lite-tests +fi +cd splitter-lite-tests + +rm -rf simple +mkdir simple +cd simple +git init > /dev/null + +export GIT_AUTHOR_NAME="Sammy Cobol" +export GIT_AUTHOR_EMAIL="" +export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 19:01:02 +0200" +export GIT_COMMITTER_NAME="Fred Foobar" +export GIT_COMMITTER_EMAIL="" +export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 19:11:22 +0200" +echo "a" > a +git add a +git commit -m"added a" > /dev/null + +export GIT_AUTHOR_NAME="Fred Foobar" +export GIT_AUTHOR_EMAIL="" +export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 20:01:02 +0200" +export GIT_COMMITTER_NAME="Sammy Cobol" +export GIT_COMMITTER_EMAIL="" +export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 20:11:22 +0200" +mkdir b/ +echo "b" > b/b +git add b +git commit -m"added b" > /dev/null + +export GIT_AUTHOR_NAME="Fred Foobar" +export GIT_AUTHOR_EMAIL="" +export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 21:01:02 +0200" +export GIT_COMMITTER_NAME="Sammy Cobol" +export GIT_COMMITTER_EMAIL="" +export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 21:11:22 +0200" +echo "aa" > a +git add a +git commit -m"updated a" > /dev/null + +export GIT_AUTHOR_NAME="Fred Foobar" +export GIT_AUTHOR_EMAIL="" +export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 22:01:02 +0200" +export GIT_COMMITTER_NAME="Sammy Cobol" +export GIT_COMMITTER_EMAIL="" +export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 22:11:22 +0200" +git rm a > /dev/null +git commit -m"updated a" > /dev/null + +export GIT_AUTHOR_NAME="Fred Foobar" +export GIT_AUTHOR_EMAIL="" +export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 23:01:02 +0200" +export GIT_COMMITTER_NAME="Sammy Cobol" +export GIT_COMMITTER_EMAIL="" +export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 23:11:22 +0200" +echo "bb" > b/b +git add b/ +git commit -m"updated b" > /dev/null + +GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q` +GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=b/ --quiet` + +if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)" +else + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)" + exit 1 +fi + +GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q bff8cdfaaf78a8842b8d9241ccfd8fb6e026f508...` +GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=b/ --quiet --commit=bff8cdfaaf78a8842b8d9241ccfd8fb6e026f508` + +if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)" +else + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)" + exit 1 +fi + +cd ../ + +# run on some Open-Source repositories +if [ ! -d Twig ]; then + git clone https://github.com/twigphp/Twig > /dev/null +fi +GIT_SUBTREE_SPLIT_SHA1="ea449b0f2acba7d489a91f88154687250d2bdf42" +GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=lib/ --origin=refs/tags/v1.24.1 --path=Twig --quiet --scratch` + +if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)" +else + echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)" + exit 1 +fi + +cd ../ diff --git a/splitter/cache.go b/splitter/cache.go new file mode 100644 index 0000000..8f43f1d --- /dev/null +++ b/splitter/cache.go @@ -0,0 +1,174 @@ +package splitter + +import ( + "crypto/sha1" + "fmt" + "io" + "path/filepath" + "time" + + "github.com/boltdb/bolt" + "github.com/libgit2/git2go" +) + +type cache struct { + key []byte + branch string + db *bolt.DB +} + +func newCache(branch string, config *Config) (*cache, error) { + var err error + db := config.DB + if db == nil { + db, err = bolt.Open(filepath.Join(GitDirectory(config.Path), "splitsh.db"), 0644, &bolt.Options{Timeout: 5 * time.Second}) + if err != nil { + return nil, err + } + } + + c := &cache{ + db: db, + branch: branch, + key: key(config), + } + + err = db.Update(func(tx *bolt.Tx) error { + _, err1 := tx.CreateBucketIfNotExists(c.key) + return err1 + }) + if err != nil { + return nil, fmt.Errorf("Impossible to create bucket: %s", err) + } + + return c, nil +} + +func (c *cache) close() error { + err := c.db.Close() + if err != nil { + return err + } + + return nil +} + +func key(config *Config) []byte { + h := sha1.New() + if config.Commit != "" { + io.WriteString(h, config.Commit) + } else { + // value does not matter, should just be always the same + io.WriteString(h, "oldest") + } + + if config.Legacy { + io.WriteString(h, "legacy") + } + + for _, prefix := range config.Prefixes { + io.WriteString(h, prefix.From) + io.WriteString(h, prefix.To) + } + + return h.Sum(nil) +} + +func (c *cache) setHead(head *git.Oid) error { + return c.db.Update(func(tx *bolt.Tx) error { + return tx.Bucket(c.key).Put([]byte("head/"+c.branch), head[0:20]) + }) +} + +func (c *cache) getHead() *git.Oid { + var oid *git.Oid + c.db.View(func(tx *bolt.Tx) error { + result := tx.Bucket(c.key).Get([]byte("head/" + c.branch)) + if result != nil { + oid = git.NewOidFromBytes(result) + } + return nil + }) + return oid +} + +// which is newest or oldest +func (c *cache) reverse(rev *git.Oid, which string) *git.Oid { + var oid *git.Oid + c.db.View(func(tx *bolt.Tx) error { + result := tx.Bucket(c.key).Get(append(rev[0:20], []byte("/"+which)...)) + if result == nil && which == "newest" { + result = tx.Bucket(c.key).Get(append(rev[0:20], []byte("/oldest")...)) + } + if result != nil { + oid = git.NewOidFromBytes(result) + } + return nil + }) + return oid +} + +func (c *cache) get(rev *git.Oid) *git.Oid { + var oid *git.Oid + c.db.View(func(tx *bolt.Tx) error { + result := tx.Bucket(c.key).Get(rev[0:20]) + if result != nil { + oid = git.NewOidFromBytes(result) + } + return nil + }) + return oid +} + +func (c *cache) set(rev, newrev *git.Oid, created bool) error { + return c.db.Update(func(tx *bolt.Tx) error { + err := tx.Bucket(c.key).Put(rev[0:20], newrev[0:20]) + if err != nil { + return err + } + + postfix := "/newest" + if created { + postfix = "/oldest" + } + + key := append(newrev[0:20], []byte(postfix)...) + return tx.Bucket(c.key).Put(key, rev[0:20]) + }) +} + +func (c *cache) gets(commits []*git.Oid) []*git.Oid { + var oids []*git.Oid + + c.db.View(func(tx *bolt.Tx) error { + b := tx.Bucket(c.key) + for _, commit := range commits { + result := b.Get(commit[0:20]) + if result != nil { + oids = append(oids, git.NewOidFromBytes(result)) + } + } + + return nil + }) + + return oids +} + +func (c *cache) flush() error { + return c.db.Update(func(tx *bolt.Tx) error { + if tx.Bucket(c.key) != nil { + err := tx.DeleteBucket(c.key) + if err != nil { + return err + } + + _, err = tx.CreateBucketIfNotExists(c.key) + if err != nil { + return err + } + } + + return nil + }) +} diff --git a/splitter/config.go b/splitter/config.go new file mode 100644 index 0000000..49d6e0e --- /dev/null +++ b/splitter/config.go @@ -0,0 +1,58 @@ +package splitter + +import ( + "fmt" + "log" + "sync" + + "github.com/boltdb/bolt" + "github.com/libgit2/git2go" +) + +// Prefix represents which paths to split +type Prefix struct { + From string + To string +} + +// Config represents a split configuration +type Config struct { + Prefixes []*Prefix + Path string + Origin string + Commit string + Target string + Debug bool + Scratch bool + Legacy bool + + // for advanced usage only + // naming and types subject to change anytime! + Logger *log.Logger + DB *bolt.DB + RepoMu *sync.Mutex + Repo *git.Repository +} + +// Split splits a configuration +func Split(config *Config, result *Result) error { + state, err := newState(config, result) + if err != nil { + return err + } + defer state.close() + return state.split() +} + +// Validate validates the configuration +func (config *Config) Validate() error { + if !git.ReferenceIsValidName(config.Origin) { + return fmt.Errorf("The origin is not a valid Git reference") + } + + if config.Target != "" && !git.ReferenceIsValidName(config.Target) { + return fmt.Errorf("The target is not a valid Git reference") + } + + return nil +} diff --git a/splitter/result.go b/splitter/result.go new file mode 100644 index 0000000..7bec3a8 --- /dev/null +++ b/splitter/result.go @@ -0,0 +1,98 @@ +package splitter + +import ( + "sync" + "time" + + "github.com/libgit2/git2go" +) + +// Result represents the outcome of a split +type Result struct { + mu sync.RWMutex + traversed int + created int + head *git.Oid + duration time.Duration +} + +// NewResult returns a pre-populated result +func NewResult(duration time.Duration, traversed, created int) *Result { + return &Result{ + duration: duration, + traversed: traversed, + created: created, + } +} + +// Traversed returns the number of commits traversed during the split +func (r *Result) Traversed() int { + r.mu.RLock() + defer r.mu.RUnlock() + return r.traversed +} + +// Created returns the number of created commits +func (r *Result) Created() int { + r.mu.RLock() + defer r.mu.RUnlock() + return r.created +} + +// Duration returns the current duration of the split +func (r *Result) Duration(precision time.Duration) time.Duration { + r.mu.RLock() + defer r.mu.RUnlock() + return roundDuration(r.duration, precision) +} + +// Head returns the latest split sha1 +func (r *Result) Head() *git.Oid { + r.mu.RLock() + defer r.mu.RUnlock() + return r.head +} + +func (r *Result) moveHead(oid *git.Oid) { + r.mu.Lock() + r.head = oid + r.mu.Unlock() +} + +func (r *Result) incCreated() { + r.mu.Lock() + r.created++ + r.mu.Unlock() +} + +func (r *Result) incTraversed() { + r.mu.Lock() + r.traversed++ + r.mu.Unlock() +} + +func (r *Result) end(start time.Time) { + r.mu.Lock() + r.duration = time.Now().Sub(start) + r.mu.Unlock() +} + +// roundDuration rounds a duration to a given precision (use roundDuration(d, 10*time.Second) to get a 10s precision fe) +func roundDuration(d, r time.Duration) time.Duration { + if r <= 0 { + return d + } + neg := d < 0 + if neg { + d = -d + } + if m := d % r; m+m < r { + d = d - m + } else { + d = d + r - m + } + if neg { + return -d + } + return d +} diff --git a/splitter/state.go b/splitter/state.go new file mode 100644 index 0000000..04c52a4 --- /dev/null +++ b/splitter/state.go @@ -0,0 +1,521 @@ +package splitter + +import ( + "fmt" + "log" + "os" + "strings" + "sync" + "time" + + "github.com/libgit2/git2go" +) + +type state struct { + config *Config + originBranch string + repoMu *sync.Mutex + repo *git.Repository + cache *cache + logger *log.Logger + simplePrefix string + result *Result +} + +func newState(config *Config, result *Result) (*state, error) { + // validate config + err := config.Validate() + if err != nil { + return nil, err + } + + repo := config.Repo + if config.Repo == nil { + repo, err = git.OpenRepository(config.Path) + if err != nil { + return nil, err + } + } + + logger := config.Logger + if logger == nil { + logger = log.New(os.Stderr, "", log.LstdFlags) + } + + originBranch, err := normalizeOriginBranch(repo, config.Origin) + if err != nil { + return nil, err + } + if config.Debug { + logger.Printf("Splitting %s\n", originBranch) + for _, v := range config.Prefixes { + to := v.To + if to == "" { + to = "ROOT" + } + logger.Printf(" From \"%s\" to \"%s\"\n", v.From, to) + } + } + + cache, err := newCache(originBranch, config) + if err != nil { + return nil, err + } + + if config.Scratch { + err = cache.flush() + if err != nil { + return nil, err + } + + if config.Target != "" { + branch, err := repo.LookupBranch(config.Target, git.BranchLocal) + if err == nil { + branch.Delete() + branch.Free() + } + } + } + + // SimplePrefix contains the prefix when there is only one + // with an empty value (target) + simplePrefix := "" + if len(config.Prefixes) == 1 { + for _, prefix := range config.Prefixes { + if prefix.To == "" { + simplePrefix = prefix.From + } + break + } + } + + repoMu := &sync.Mutex{} + if config.RepoMu != nil { + repoMu = config.RepoMu + } + + return &state{ + config: config, + result: result, + repoMu: repoMu, + repo: repo, + cache: cache, + logger: logger, + simplePrefix: simplePrefix, + originBranch: originBranch, + }, nil +} + +func (s *state) close() error { + err := s.cache.close() + if err != nil { + return err + } + s.repo.Free() + return nil +} + +func (s *state) split() error { + startTime := time.Now() + defer func() { + s.result.end(startTime) + }() + + revWalk, err := s.walker() + if err != nil { + return fmt.Errorf("Impossible to walk the repository: %s", err) + } + defer revWalk.Free() + + var iterationErr error + var lastRev *git.Oid + err = revWalk.Iterate(func(rev *git.Commit) bool { + defer rev.Free() + lastRev = rev.Id() + + if s.config.Debug { + s.logger.Printf("Processing commit: %s\n", rev.Id().String()) + } + + var newrev *git.Oid + newrev, err = s.splitRev(rev) + if err != nil { + iterationErr = err + return false + } + + if newrev != nil { + s.result.moveHead(newrev) + } + + return true + }) + if err != nil { + return err + } + if iterationErr != nil { + return iterationErr + } + + if lastRev != nil { + s.cache.setHead(lastRev) + } + + return s.updateTarget() +} + +func (s *state) walker() (*git.RevWalk, error) { + revWalk, err := s.repo.Walk() + if err != nil { + return nil, fmt.Errorf("Impossible to walk the repository: %s", err) + } + + err = s.pushRevs(revWalk) + if err != nil { + return nil, fmt.Errorf("Impossible to determine split range: %s", err) + } + + revWalk.Sorting(git.SortTopological | git.SortReverse) + + return revWalk, nil +} + +func (s *state) splitRev(rev *git.Commit) (*git.Oid, error) { + s.result.incTraversed() + + v := s.cache.get(rev.Id()) + if v != nil { + if s.config.Debug { + s.logger.Printf(" prior: %s\n", v.String()) + } + return v, nil + } + + var parents []*git.Oid + var n uint + for n = 0; n < rev.ParentCount(); n++ { + parents = append(parents, rev.ParentId(n)) + } + + if s.config.Debug { + debugMsg := " parents:" + for _, parent := range parents { + debugMsg += fmt.Sprintf(" %s", parent.String()) + } + s.logger.Print(debugMsg) + } + + newParents := s.cache.gets(parents) + + if s.config.Debug { + debugMsg := " newparents:" + for _, parent := range newParents { + debugMsg += fmt.Sprintf(" %s", parent) + } + s.logger.Print(debugMsg) + } + + tree, err := s.subtreeForCommit(rev) + if err != nil { + return nil, err + } + + if nil == tree { + // should never happen + return nil, nil + } + defer tree.Free() + + if s.config.Debug { + s.logger.Printf(" tree is: %s\n", tree.Id().String()) + } + + newrev, created, err := s.copyOrSkip(rev, tree, newParents) + if err != nil { + return nil, err + } + + if s.config.Debug { + s.logger.Printf(" newrev is: %s\n", newrev) + } + + if created { + s.result.incCreated() + } + + if err := s.cache.set(rev.Id(), newrev, created); err != nil { + return nil, err + } + + return newrev, nil +} + +func (s *state) subtreeForCommit(commit *git.Commit) (*git.Tree, error) { + tree, err := commit.Tree() + if err != nil { + return nil, err + } + defer tree.Free() + + if s.simplePrefix != "" { + return s.treeByPath(tree, s.simplePrefix) + } + + return s.treeByPaths(tree, s.config.Prefixes) +} + +func (s *state) treeByPath(tree *git.Tree, prefix string) (*git.Tree, error) { + treeEntry, err := tree.EntryByPath(prefix) + if err != nil { + return nil, nil + } + + return s.repo.LookupTree(treeEntry.Id) +} + +func (s *state) treeByPaths(tree *git.Tree, prefixes []*Prefix) (*git.Tree, error) { + var currentTree, prefixedTree, mergedTree *git.Tree + for _, prefix := range s.config.Prefixes { + // splitting + splitTree, err := s.treeByPath(tree, prefix.From) + if err != nil { + return nil, err + } + if splitTree == nil { + continue + } + + // adding the prefix + if prefix.To != "" { + prefixedTree, err = s.addPrefixToTree(splitTree, prefix.To) + if err != nil { + return nil, err + } + } else { + prefixedTree = splitTree + } + + // merging with the current tree + if currentTree != nil { + mergedTree, err = s.mergeTrees(currentTree, prefixedTree) + currentTree.Free() + prefixedTree.Free() + if err != nil { + return nil, err + } + } else { + mergedTree = prefixedTree + } + + currentTree = mergedTree + } + + return currentTree, nil +} + +func (s *state) mergeTrees(t1, t2 *git.Tree) (*git.Tree, error) { + index, err := s.repo.MergeTrees(nil, t1, t2, nil) + if err != nil { + return nil, err + } + defer index.Free() + + if index.HasConflicts() { + return nil, fmt.Errorf("Cannot split as there is a merge conflict between two paths") + } + + oid, err := index.WriteTreeTo(s.repo) + if err != nil { + return nil, err + } + + return s.repo.LookupTree(oid) +} + +func (s *state) addPrefixToTree(tree *git.Tree, prefix string) (*git.Tree, error) { + treeOid := tree.Id() + parts := strings.Split(prefix, "/") + for i := len(parts) - 1; i >= 0; i-- { + treeBuilder, err := s.repo.TreeBuilder() + if err != nil { + return nil, err + } + defer treeBuilder.Free() + + err = treeBuilder.Insert(parts[i], treeOid, git.FilemodeTree) + if err != nil { + return nil, err + } + + treeOid, err = treeBuilder.Write() + if err != nil { + return nil, err + } + } + + prefixedTree, err := s.repo.LookupTree(treeOid) + if err != nil { + return nil, err + } + + return prefixedTree, nil +} + +func (s *state) copyOrSkip(rev *git.Commit, tree *git.Tree, newParents []*git.Oid) (*git.Oid, bool, error) { + var identical *git.Oid + var gotParents []*git.Oid + var p []*git.Commit + for _, parent := range newParents { + ptree, err := s.topTreeForCommit(parent) + if err != nil { + return nil, false, err + } + if nil == ptree { + continue + } + + if 0 == ptree.Cmp(tree.Id()) { + // an identical parent could be used in place of this rev. + identical = parent + } + + // sometimes both old parents map to the same newparent + // eliminate duplicates + isNew := true + for _, gp := range gotParents { + if 0 == gp.Cmp(parent) { + isNew = false + break + } + } + + if isNew { + gotParents = append(gotParents, parent) + commit, err := s.repo.LookupCommit(parent) + if err != nil { + return nil, false, err + } + defer commit.Free() + p = append(p, commit) + } + } + + if nil != identical { + return identical, false, nil + } + + commit, err := s.copyCommit(rev, tree, p) + if err != nil { + return nil, false, err + } + + return commit, true, nil +} + +func (s *state) topTreeForCommit(sha *git.Oid) (*git.Oid, error) { + commit, err := s.repo.LookupCommit(sha) + if err != nil { + return nil, err + } + defer commit.Free() + + tree, err := commit.Tree() + if err != nil { + return nil, err + } + defer tree.Free() + + return tree.Id(), nil +} + +func (s *state) copyCommit(rev *git.Commit, tree *git.Tree, parents []*git.Commit) (*git.Oid, error) { + if s.config.Debug { + parentStrs := make([]string, len(parents)) + for i, parent := range parents { + parentStrs[i] = parent.Id().String() + } + s.logger.Printf(" copy commit \"%s\" \"%s\" \"%s\"\n", rev.Id().String(), tree.Id().String(), strings.Join(parentStrs, " ")) + } + + message := rev.Message() + if s.config.Legacy { + message = s.legacyMessage(rev) + } + + author := rev.Author() + if author.Email == "" { + author.Email = "nobody@example.com" + } + + oid, err := s.repo.CreateCommit("", author, rev.Committer(), message, tree, parents...) + if err != nil { + return nil, err + } + + return oid, nil +} + +func (s *state) updateTarget() error { + if s.config.Target == "" { + return nil + } + + if nil == s.result.Head() { + return fmt.Errorf("Unable to create branch %s as it is empty (no commits were split)", s.config.Target) + } + + obj, ref, err := s.repo.RevparseExt(s.config.Target) + if obj != nil { + obj.Free() + } + if err != nil { + ref, err = s.repo.References.Create(s.config.Target, s.result.Head(), false, "subtree split") + if err != nil { + return err + } + ref.Free() + } else { + defer ref.Free() + ref.SetTarget(s.result.Head(), "subtree split") + } + + return nil +} + +func (s *state) legacyMessage(rev *git.Commit) string { + subject, body := SplitMessage(rev.Message()) + return subject + "\n\n" + body +} + +// pushRevs sets the range to split +func (s *state) pushRevs(revWalk *git.RevWalk) error { + // this is needed as origin might be in the process of being updated by git.FetchOrigin() + s.repoMu.Lock() + defer s.repoMu.Unlock() + + // find the latest split sha1 if any on origin + var start *git.Oid + var err error + if s.config.Commit != "" { + start, err = git.NewOid(s.config.Commit) + if err != nil { + return err + } + s.result.moveHead(s.cache.get(start)) + return revWalk.PushRange(fmt.Sprintf("%s^..%s", start, s.originBranch)) + } + + start = s.cache.getHead() + if start != nil { + s.result.moveHead(s.cache.get(start)) + // FIXME: CHECK that this is an ancestor of the branch? + return revWalk.PushRange(fmt.Sprintf("%s..%s", start, s.originBranch)) + } + + branch, err := s.repo.RevparseSingle(s.originBranch) + if err != nil { + return err + } + + return revWalk.Push(branch.Id()) +} diff --git a/splitter/utils.go b/splitter/utils.go new file mode 100644 index 0000000..5292196 --- /dev/null +++ b/splitter/utils.go @@ -0,0 +1,74 @@ +package splitter + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/libgit2/git2go" +) + +var messageNormalizer = regexp.MustCompile("\\s*\\r?\\n") + +// GitDirectory returns the .git directory for a given directory +func GitDirectory(path string) string { + gitPath := filepath.Join(path, ".git") + if _, err := os.Stat(gitPath); os.IsNotExist(err) { + // this might be a bare repo + return path + } + + return gitPath +} + +// SplitMessage splits a git message +func SplitMessage(message string) (string, string) { + // we split the messsage at \n\n or \r\n\r\n + var subject, body string + found := false + for i := 0; i+4 <= len(message); i++ { + if message[i] == '\n' && message[i+1] == '\n' { + subject = message[0:i] + body = message[i+2:] + found = true + break + } else if message[i] == '\r' && message[i+1] == '\n' && message[i+2] == '\r' && message[i+3] == '\n' { + subject = message[0:i] + body = message[i+4:] + found = true + break + } + } + + if !found { + subject = message + body = "" + } + + // normalize \r\n and whitespaces + subject = messageNormalizer.ReplaceAllLiteralString(subject, " ") + + // remove spaces at the end of the subject + subject = strings.TrimRight(subject, " ") + body = strings.TrimLeft(body, "\r\n") + return subject, body +} + +func normalizeOriginBranch(repo *git.Repository, origin string) (string, error) { + if origin == "" { + origin = "HEAD" + } + + obj, ref, err := repo.RevparseExt(origin) + if err != nil { + return "", fmt.Errorf("Bad revision for origin: %s", err) + } + if obj != nil { + obj.Free() + } + defer ref.Free() + + return ref.Name(), nil +}