added initial set of files

This commit is contained in:
Fabien Potencier 2016-06-06 18:12:57 +02:00
commit 7861d21ed4
10 changed files with 1276 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
splitter-lite-tests/

19
LICENSE Normal file
View file

@ -0,0 +1,19 @@
Copyright (c) 2015-2016 Fabien Potencier
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

125
README.md Normal file
View file

@ -0,0 +1,125 @@
Monorepo to Manyrepos made easy
===============================
**tl;dr**: **splitsh-lite** is a replacement for the `subtree split` Git
build-in command that is much faster and has more features at the same time.
When starting a new project, do you store all the code in one monolith
repository? Or are you creating many repositories?
Both strategies work well but both have drawbacks as well. **splitsh** helps use
both strategies at the same time by providing tools that automatically
synchronize a mono repository to many repositories.
**splitsh-lite** is a sub-project with the goal of providing a faster replacement
of the `git subtree split` command.
If you want to learn more about monorepo vs manyrepos, watch this 4-minutes
lightning talk I gave at dotScale... or the longer version from DrupalCon.
The main **splitsh-lite** feature is its ability to create a branch in a repository
from one or many directories.
Installation
------------
Install libgit2:
```bash
go get github.com/libgit2/git2go
cd $GOPATH/src/github.com/libgit2/git2go
git checkout next
git submodule update --init
make install
```
Compiling
```bash
go build -o splitsh-lite github.com/splitsh/lite
```
If everything goes fine, a `splitsh-lite` binary should be available in the
current directory.
Usage
-----
Let say you want to split the `lib/` directory of a repository to its own
branch; from the "master" Git repository (bare or clone), run:
```bash
splitsh-lite --prefix=lib/
```
The *sha1* of the split is displayed at the end of the execution:
```bash
SHA1=`splitsh-lite --prefix=lib/`
```
The sha1 can be used to create a branch or to push the commits to a new
repository.
Automatically create a branch for the split by passing a branch name
via the `--target` option:
```bash
splitsh-lite --prefix=lib/ --target=branch-name
```
If new commits are made on the repository, update the split by running the same
command again. Updates are much faster as **splitsh-lite** keeps a cache of already
split commits. Caching is possible as **splitsh-lite** guarantees that two splits of
the same code always results in the same history and the same `sha1`s for each
commit.
By default, **splitsh-lite** splits the current checkout-ed branch but you can split
a different branch by passing it explicitly with `--origin` (mandatory when
splitting a bare repository):
```bash
splitsh-lite --prefix=lib/ --origin=origin/1.0
```
You don't even need to run the command from the Git repository directory if you
pass the `--path` option:
```bash
splitsh-lite --prefix=lib/ --origin=origin/1.0 --path=/path/to/repo
```
Available options:
* `--prefix` is the prefix of the directory to split; you can put the split
contents in a directory by using the `--prefix=from:to` syntax; splitting
several directories is also possible by passing multiple `--prefix` options;
* `--path` is the path to the repository to split (current directory by default);
* `--origin` is the Git reference for the origin (can be any Git reference
like `HEAD`, `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`);
* `--target` creates a reference for the tip of the split (can be any Git reference
like `HEAD`, `heads/xxx`, `tags/xxx`, `origin/xxx`, or any `refs/xxx`);
* `--progress` displays a nice progress bar during the split;
* `--quiet` suppresses all output on stderr (useful when run from an automated
script).
* `--scratch` flushes the cache (useful when a branch is force pushed or in
case of corruption)
* `--legacy` simulates old versions of `git subtree split` where `sha1`s
for the split commits were computed differently (useful if you are switching
from the git command to **splitsh-lite**).
**splitsh** provides more features including a sanity checker, Github integration
for real-time splitting, tagging management and synchronization, and more.
It has been used by the Symfony project for many years but the tool is not yet
ready for Open-Source. Stay tuned!
If you think that your Open-Source project might benefit from the full version
of splitsh, send me an email and I will consider splitting your project for free
on my servers (like I do for Symfony and Laravel).

104
main.go Normal file
View file

@ -0,0 +1,104 @@
package main
import (
"flag"
"fmt"
"os"
"strings"
"time"
"github.com/splitsh/lite/splitter"
)
type prefixesFlag []*splitter.Prefix
func (p *prefixesFlag) String() string {
return fmt.Sprint(*p)
}
func (p *prefixesFlag) Set(value string) error {
parts := strings.Split(value, ":")
from := parts[0]
to := ""
if len(parts) > 1 {
to = parts[1]
}
// value must be unique
for _, prefix := range []*splitter.Prefix(*p) {
// FIXME: to should be normalized (xxx vs xxx/ for instance)
if prefix.To == to {
return fmt.Errorf("Cannot have two prefix split under the same directory: %s -> %s vs %s -> %s", prefix.From, prefix.To, from, to)
}
}
*p = append(*p, &splitter.Prefix{From: from, To: to})
return nil
}
var prefixes prefixesFlag
var origin, target, commit, path string
var scratch, debug, quiet, legacy, progress bool
func init() {
flag.Var(&prefixes, "prefix", "The directory(ies) to split")
flag.StringVar(&origin, "origin", "HEAD", "The branch to split (optional, defaults to the current one)")
flag.StringVar(&target, "target", "", "The branch to create when split is finished (optional)")
flag.StringVar(&commit, "commit", "", "The commit at which to start the split (optional)")
flag.StringVar(&path, "path", ".", "The repository path (optional, current directory by default)")
flag.BoolVar(&scratch, "scratch", false, "Flush the cache (optional)")
flag.BoolVar(&debug, "debug", false, "Enable the debug mode (optional)")
flag.BoolVar(&quiet, "quiet", false, "Suppress the output (optional)")
flag.BoolVar(&legacy, "legacy", false, "Enable the legacy mode for projects migrating from an old version of git subtree split (optional)")
flag.BoolVar(&progress, "progress", false, "Show progress bar (optional, cannot be enabled when debug is enabled)")
}
func main() {
flag.Parse()
if len(prefixes) == 0 {
fmt.Println("You must provide the directory to split via the --prefix flag")
os.Exit(1)
}
config := &splitter.Config{
Path: path,
Origin: origin,
Prefixes: []*splitter.Prefix(prefixes),
Target: target,
Commit: commit,
Debug: debug && !quiet,
Scratch: scratch,
Legacy: legacy,
}
result := &splitter.Result{}
var ticker *time.Ticker
if progress && !debug && !quiet {
ticker = time.NewTicker(time.Millisecond * 50)
go func() {
for range ticker.C {
fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed\r", result.Created(), result.Traversed())
}
}()
}
err := splitter.Split(config, result)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if ticker != nil {
ticker.Stop()
}
if !quiet {
fmt.Fprintf(os.Stderr, "%d commits created, %d commits traversed, in %s\n", result.Created(), result.Traversed(), result.Duration(time.Millisecond))
}
if result.Head() != nil {
fmt.Println(result.Head().String())
}
}

102
run-tests.sh Executable file
View file

@ -0,0 +1,102 @@
#!/bin/bash
set -e
set -f
if [ ! -d splitter-lite-tests ]; then
mkdir splitter-lite-tests
fi
cd splitter-lite-tests
rm -rf simple
mkdir simple
cd simple
git init > /dev/null
export GIT_AUTHOR_NAME="Sammy Cobol"
export GIT_AUTHOR_EMAIL="<sammy.cobol@example.com>"
export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 19:01:02 +0200"
export GIT_COMMITTER_NAME="Fred Foobar"
export GIT_COMMITTER_EMAIL="<fred.foobar@example.com>"
export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 19:11:22 +0200"
echo "a" > a
git add a
git commit -m"added a" > /dev/null
export GIT_AUTHOR_NAME="Fred Foobar"
export GIT_AUTHOR_EMAIL="<fred.foobar@example.com>"
export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 20:01:02 +0200"
export GIT_COMMITTER_NAME="Sammy Cobol"
export GIT_COMMITTER_EMAIL="<sammy.cobol@example.com>"
export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 20:11:22 +0200"
mkdir b/
echo "b" > b/b
git add b
git commit -m"added b" > /dev/null
export GIT_AUTHOR_NAME="Fred Foobar"
export GIT_AUTHOR_EMAIL="<fred.foobar@example.com>"
export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 21:01:02 +0200"
export GIT_COMMITTER_NAME="Sammy Cobol"
export GIT_COMMITTER_EMAIL="<sammy.cobol@example.com>"
export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 21:11:22 +0200"
echo "aa" > a
git add a
git commit -m"updated a" > /dev/null
export GIT_AUTHOR_NAME="Fred Foobar"
export GIT_AUTHOR_EMAIL="<fred.foobar@example.com>"
export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 22:01:02 +0200"
export GIT_COMMITTER_NAME="Sammy Cobol"
export GIT_COMMITTER_EMAIL="<sammy.cobol@example.com>"
export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 22:11:22 +0200"
git rm a > /dev/null
git commit -m"updated a" > /dev/null
export GIT_AUTHOR_NAME="Fred Foobar"
export GIT_AUTHOR_EMAIL="<fred.foobar@example.com>"
export GIT_AUTHOR_DATE="Sat, 24 Nov 1973 23:01:02 +0200"
export GIT_COMMITTER_NAME="Sammy Cobol"
export GIT_COMMITTER_EMAIL="<sammy.cobol@example.com>"
export GIT_COMMITTER_DATE="Sat, 24 Nov 1973 23:11:22 +0200"
echo "bb" > b/b
git add b/
git commit -m"updated b" > /dev/null
GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q`
GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=b/ --quiet`
if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)"
else
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)"
exit 1
fi
GIT_SUBTREE_SPLIT_SHA1=`git subtree split --prefix=b/ -q bff8cdfaaf78a8842b8d9241ccfd8fb6e026f508...`
GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=b/ --quiet --commit=bff8cdfaaf78a8842b8d9241ccfd8fb6e026f508`
if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)"
else
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)"
exit 1
fi
cd ../
# run on some Open-Source repositories
if [ ! -d Twig ]; then
git clone https://github.com/twigphp/Twig > /dev/null
fi
GIT_SUBTREE_SPLIT_SHA1="ea449b0f2acba7d489a91f88154687250d2bdf42"
GIT_SPLITSH_SHA1=`$GOPATH/src/github.com/splitsh/lite/lite --prefix=lib/ --origin=refs/tags/v1.24.1 --path=Twig --quiet --scratch`
if [ "$GIT_SUBTREE_SPLIT_SHA1" == "$GIT_SUBTREE_SPLIT_SHA1" ]; then
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 == $GIT_SUBTREE_SPLIT_SHA1)"
else
echo "OK ($GIT_SUBTREE_SPLIT_SHA1 != $GIT_SUBTREE_SPLIT_SHA1)"
exit 1
fi
cd ../

174
splitter/cache.go Normal file
View file

@ -0,0 +1,174 @@
package splitter
import (
"crypto/sha1"
"fmt"
"io"
"path/filepath"
"time"
"github.com/boltdb/bolt"
"github.com/libgit2/git2go"
)
type cache struct {
key []byte
branch string
db *bolt.DB
}
func newCache(branch string, config *Config) (*cache, error) {
var err error
db := config.DB
if db == nil {
db, err = bolt.Open(filepath.Join(GitDirectory(config.Path), "splitsh.db"), 0644, &bolt.Options{Timeout: 5 * time.Second})
if err != nil {
return nil, err
}
}
c := &cache{
db: db,
branch: branch,
key: key(config),
}
err = db.Update(func(tx *bolt.Tx) error {
_, err1 := tx.CreateBucketIfNotExists(c.key)
return err1
})
if err != nil {
return nil, fmt.Errorf("Impossible to create bucket: %s", err)
}
return c, nil
}
func (c *cache) close() error {
err := c.db.Close()
if err != nil {
return err
}
return nil
}
func key(config *Config) []byte {
h := sha1.New()
if config.Commit != "" {
io.WriteString(h, config.Commit)
} else {
// value does not matter, should just be always the same
io.WriteString(h, "oldest")
}
if config.Legacy {
io.WriteString(h, "legacy")
}
for _, prefix := range config.Prefixes {
io.WriteString(h, prefix.From)
io.WriteString(h, prefix.To)
}
return h.Sum(nil)
}
func (c *cache) setHead(head *git.Oid) error {
return c.db.Update(func(tx *bolt.Tx) error {
return tx.Bucket(c.key).Put([]byte("head/"+c.branch), head[0:20])
})
}
func (c *cache) getHead() *git.Oid {
var oid *git.Oid
c.db.View(func(tx *bolt.Tx) error {
result := tx.Bucket(c.key).Get([]byte("head/" + c.branch))
if result != nil {
oid = git.NewOidFromBytes(result)
}
return nil
})
return oid
}
// which is newest or oldest
func (c *cache) reverse(rev *git.Oid, which string) *git.Oid {
var oid *git.Oid
c.db.View(func(tx *bolt.Tx) error {
result := tx.Bucket(c.key).Get(append(rev[0:20], []byte("/"+which)...))
if result == nil && which == "newest" {
result = tx.Bucket(c.key).Get(append(rev[0:20], []byte("/oldest")...))
}
if result != nil {
oid = git.NewOidFromBytes(result)
}
return nil
})
return oid
}
func (c *cache) get(rev *git.Oid) *git.Oid {
var oid *git.Oid
c.db.View(func(tx *bolt.Tx) error {
result := tx.Bucket(c.key).Get(rev[0:20])
if result != nil {
oid = git.NewOidFromBytes(result)
}
return nil
})
return oid
}
func (c *cache) set(rev, newrev *git.Oid, created bool) error {
return c.db.Update(func(tx *bolt.Tx) error {
err := tx.Bucket(c.key).Put(rev[0:20], newrev[0:20])
if err != nil {
return err
}
postfix := "/newest"
if created {
postfix = "/oldest"
}
key := append(newrev[0:20], []byte(postfix)...)
return tx.Bucket(c.key).Put(key, rev[0:20])
})
}
func (c *cache) gets(commits []*git.Oid) []*git.Oid {
var oids []*git.Oid
c.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(c.key)
for _, commit := range commits {
result := b.Get(commit[0:20])
if result != nil {
oids = append(oids, git.NewOidFromBytes(result))
}
}
return nil
})
return oids
}
func (c *cache) flush() error {
return c.db.Update(func(tx *bolt.Tx) error {
if tx.Bucket(c.key) != nil {
err := tx.DeleteBucket(c.key)
if err != nil {
return err
}
_, err = tx.CreateBucketIfNotExists(c.key)
if err != nil {
return err
}
}
return nil
})
}

58
splitter/config.go Normal file
View file

@ -0,0 +1,58 @@
package splitter
import (
"fmt"
"log"
"sync"
"github.com/boltdb/bolt"
"github.com/libgit2/git2go"
)
// Prefix represents which paths to split
type Prefix struct {
From string
To string
}
// Config represents a split configuration
type Config struct {
Prefixes []*Prefix
Path string
Origin string
Commit string
Target string
Debug bool
Scratch bool
Legacy bool
// for advanced usage only
// naming and types subject to change anytime!
Logger *log.Logger
DB *bolt.DB
RepoMu *sync.Mutex
Repo *git.Repository
}
// Split splits a configuration
func Split(config *Config, result *Result) error {
state, err := newState(config, result)
if err != nil {
return err
}
defer state.close()
return state.split()
}
// Validate validates the configuration
func (config *Config) Validate() error {
if !git.ReferenceIsValidName(config.Origin) {
return fmt.Errorf("The origin is not a valid Git reference")
}
if config.Target != "" && !git.ReferenceIsValidName(config.Target) {
return fmt.Errorf("The target is not a valid Git reference")
}
return nil
}

98
splitter/result.go Normal file
View file

@ -0,0 +1,98 @@
package splitter
import (
"sync"
"time"
"github.com/libgit2/git2go"
)
// Result represents the outcome of a split
type Result struct {
mu sync.RWMutex
traversed int
created int
head *git.Oid
duration time.Duration
}
// NewResult returns a pre-populated result
func NewResult(duration time.Duration, traversed, created int) *Result {
return &Result{
duration: duration,
traversed: traversed,
created: created,
}
}
// Traversed returns the number of commits traversed during the split
func (r *Result) Traversed() int {
r.mu.RLock()
defer r.mu.RUnlock()
return r.traversed
}
// Created returns the number of created commits
func (r *Result) Created() int {
r.mu.RLock()
defer r.mu.RUnlock()
return r.created
}
// Duration returns the current duration of the split
func (r *Result) Duration(precision time.Duration) time.Duration {
r.mu.RLock()
defer r.mu.RUnlock()
return roundDuration(r.duration, precision)
}
// Head returns the latest split sha1
func (r *Result) Head() *git.Oid {
r.mu.RLock()
defer r.mu.RUnlock()
return r.head
}
func (r *Result) moveHead(oid *git.Oid) {
r.mu.Lock()
r.head = oid
r.mu.Unlock()
}
func (r *Result) incCreated() {
r.mu.Lock()
r.created++
r.mu.Unlock()
}
func (r *Result) incTraversed() {
r.mu.Lock()
r.traversed++
r.mu.Unlock()
}
func (r *Result) end(start time.Time) {
r.mu.Lock()
r.duration = time.Now().Sub(start)
r.mu.Unlock()
}
// roundDuration rounds a duration to a given precision (use roundDuration(d, 10*time.Second) to get a 10s precision fe)
func roundDuration(d, r time.Duration) time.Duration {
if r <= 0 {
return d
}
neg := d < 0
if neg {
d = -d
}
if m := d % r; m+m < r {
d = d - m
} else {
d = d + r - m
}
if neg {
return -d
}
return d
}

521
splitter/state.go Normal file
View file

@ -0,0 +1,521 @@
package splitter
import (
"fmt"
"log"
"os"
"strings"
"sync"
"time"
"github.com/libgit2/git2go"
)
type state struct {
config *Config
originBranch string
repoMu *sync.Mutex
repo *git.Repository
cache *cache
logger *log.Logger
simplePrefix string
result *Result
}
func newState(config *Config, result *Result) (*state, error) {
// validate config
err := config.Validate()
if err != nil {
return nil, err
}
repo := config.Repo
if config.Repo == nil {
repo, err = git.OpenRepository(config.Path)
if err != nil {
return nil, err
}
}
logger := config.Logger
if logger == nil {
logger = log.New(os.Stderr, "", log.LstdFlags)
}
originBranch, err := normalizeOriginBranch(repo, config.Origin)
if err != nil {
return nil, err
}
if config.Debug {
logger.Printf("Splitting %s\n", originBranch)
for _, v := range config.Prefixes {
to := v.To
if to == "" {
to = "ROOT"
}
logger.Printf(" From \"%s\" to \"%s\"\n", v.From, to)
}
}
cache, err := newCache(originBranch, config)
if err != nil {
return nil, err
}
if config.Scratch {
err = cache.flush()
if err != nil {
return nil, err
}
if config.Target != "" {
branch, err := repo.LookupBranch(config.Target, git.BranchLocal)
if err == nil {
branch.Delete()
branch.Free()
}
}
}
// SimplePrefix contains the prefix when there is only one
// with an empty value (target)
simplePrefix := ""
if len(config.Prefixes) == 1 {
for _, prefix := range config.Prefixes {
if prefix.To == "" {
simplePrefix = prefix.From
}
break
}
}
repoMu := &sync.Mutex{}
if config.RepoMu != nil {
repoMu = config.RepoMu
}
return &state{
config: config,
result: result,
repoMu: repoMu,
repo: repo,
cache: cache,
logger: logger,
simplePrefix: simplePrefix,
originBranch: originBranch,
}, nil
}
func (s *state) close() error {
err := s.cache.close()
if err != nil {
return err
}
s.repo.Free()
return nil
}
func (s *state) split() error {
startTime := time.Now()
defer func() {
s.result.end(startTime)
}()
revWalk, err := s.walker()
if err != nil {
return fmt.Errorf("Impossible to walk the repository: %s", err)
}
defer revWalk.Free()
var iterationErr error
var lastRev *git.Oid
err = revWalk.Iterate(func(rev *git.Commit) bool {
defer rev.Free()
lastRev = rev.Id()
if s.config.Debug {
s.logger.Printf("Processing commit: %s\n", rev.Id().String())
}
var newrev *git.Oid
newrev, err = s.splitRev(rev)
if err != nil {
iterationErr = err
return false
}
if newrev != nil {
s.result.moveHead(newrev)
}
return true
})
if err != nil {
return err
}
if iterationErr != nil {
return iterationErr
}
if lastRev != nil {
s.cache.setHead(lastRev)
}
return s.updateTarget()
}
func (s *state) walker() (*git.RevWalk, error) {
revWalk, err := s.repo.Walk()
if err != nil {
return nil, fmt.Errorf("Impossible to walk the repository: %s", err)
}
err = s.pushRevs(revWalk)
if err != nil {
return nil, fmt.Errorf("Impossible to determine split range: %s", err)
}
revWalk.Sorting(git.SortTopological | git.SortReverse)
return revWalk, nil
}
func (s *state) splitRev(rev *git.Commit) (*git.Oid, error) {
s.result.incTraversed()
v := s.cache.get(rev.Id())
if v != nil {
if s.config.Debug {
s.logger.Printf(" prior: %s\n", v.String())
}
return v, nil
}
var parents []*git.Oid
var n uint
for n = 0; n < rev.ParentCount(); n++ {
parents = append(parents, rev.ParentId(n))
}
if s.config.Debug {
debugMsg := " parents:"
for _, parent := range parents {
debugMsg += fmt.Sprintf(" %s", parent.String())
}
s.logger.Print(debugMsg)
}
newParents := s.cache.gets(parents)
if s.config.Debug {
debugMsg := " newparents:"
for _, parent := range newParents {
debugMsg += fmt.Sprintf(" %s", parent)
}
s.logger.Print(debugMsg)
}
tree, err := s.subtreeForCommit(rev)
if err != nil {
return nil, err
}
if nil == tree {
// should never happen
return nil, nil
}
defer tree.Free()
if s.config.Debug {
s.logger.Printf(" tree is: %s\n", tree.Id().String())
}
newrev, created, err := s.copyOrSkip(rev, tree, newParents)
if err != nil {
return nil, err
}
if s.config.Debug {
s.logger.Printf(" newrev is: %s\n", newrev)
}
if created {
s.result.incCreated()
}
if err := s.cache.set(rev.Id(), newrev, created); err != nil {
return nil, err
}
return newrev, nil
}
func (s *state) subtreeForCommit(commit *git.Commit) (*git.Tree, error) {
tree, err := commit.Tree()
if err != nil {
return nil, err
}
defer tree.Free()
if s.simplePrefix != "" {
return s.treeByPath(tree, s.simplePrefix)
}
return s.treeByPaths(tree, s.config.Prefixes)
}
func (s *state) treeByPath(tree *git.Tree, prefix string) (*git.Tree, error) {
treeEntry, err := tree.EntryByPath(prefix)
if err != nil {
return nil, nil
}
return s.repo.LookupTree(treeEntry.Id)
}
func (s *state) treeByPaths(tree *git.Tree, prefixes []*Prefix) (*git.Tree, error) {
var currentTree, prefixedTree, mergedTree *git.Tree
for _, prefix := range s.config.Prefixes {
// splitting
splitTree, err := s.treeByPath(tree, prefix.From)
if err != nil {
return nil, err
}
if splitTree == nil {
continue
}
// adding the prefix
if prefix.To != "" {
prefixedTree, err = s.addPrefixToTree(splitTree, prefix.To)
if err != nil {
return nil, err
}
} else {
prefixedTree = splitTree
}
// merging with the current tree
if currentTree != nil {
mergedTree, err = s.mergeTrees(currentTree, prefixedTree)
currentTree.Free()
prefixedTree.Free()
if err != nil {
return nil, err
}
} else {
mergedTree = prefixedTree
}
currentTree = mergedTree
}
return currentTree, nil
}
func (s *state) mergeTrees(t1, t2 *git.Tree) (*git.Tree, error) {
index, err := s.repo.MergeTrees(nil, t1, t2, nil)
if err != nil {
return nil, err
}
defer index.Free()
if index.HasConflicts() {
return nil, fmt.Errorf("Cannot split as there is a merge conflict between two paths")
}
oid, err := index.WriteTreeTo(s.repo)
if err != nil {
return nil, err
}
return s.repo.LookupTree(oid)
}
func (s *state) addPrefixToTree(tree *git.Tree, prefix string) (*git.Tree, error) {
treeOid := tree.Id()
parts := strings.Split(prefix, "/")
for i := len(parts) - 1; i >= 0; i-- {
treeBuilder, err := s.repo.TreeBuilder()
if err != nil {
return nil, err
}
defer treeBuilder.Free()
err = treeBuilder.Insert(parts[i], treeOid, git.FilemodeTree)
if err != nil {
return nil, err
}
treeOid, err = treeBuilder.Write()
if err != nil {
return nil, err
}
}
prefixedTree, err := s.repo.LookupTree(treeOid)
if err != nil {
return nil, err
}
return prefixedTree, nil
}
func (s *state) copyOrSkip(rev *git.Commit, tree *git.Tree, newParents []*git.Oid) (*git.Oid, bool, error) {
var identical *git.Oid
var gotParents []*git.Oid
var p []*git.Commit
for _, parent := range newParents {
ptree, err := s.topTreeForCommit(parent)
if err != nil {
return nil, false, err
}
if nil == ptree {
continue
}
if 0 == ptree.Cmp(tree.Id()) {
// an identical parent could be used in place of this rev.
identical = parent
}
// sometimes both old parents map to the same newparent
// eliminate duplicates
isNew := true
for _, gp := range gotParents {
if 0 == gp.Cmp(parent) {
isNew = false
break
}
}
if isNew {
gotParents = append(gotParents, parent)
commit, err := s.repo.LookupCommit(parent)
if err != nil {
return nil, false, err
}
defer commit.Free()
p = append(p, commit)
}
}
if nil != identical {
return identical, false, nil
}
commit, err := s.copyCommit(rev, tree, p)
if err != nil {
return nil, false, err
}
return commit, true, nil
}
func (s *state) topTreeForCommit(sha *git.Oid) (*git.Oid, error) {
commit, err := s.repo.LookupCommit(sha)
if err != nil {
return nil, err
}
defer commit.Free()
tree, err := commit.Tree()
if err != nil {
return nil, err
}
defer tree.Free()
return tree.Id(), nil
}
func (s *state) copyCommit(rev *git.Commit, tree *git.Tree, parents []*git.Commit) (*git.Oid, error) {
if s.config.Debug {
parentStrs := make([]string, len(parents))
for i, parent := range parents {
parentStrs[i] = parent.Id().String()
}
s.logger.Printf(" copy commit \"%s\" \"%s\" \"%s\"\n", rev.Id().String(), tree.Id().String(), strings.Join(parentStrs, " "))
}
message := rev.Message()
if s.config.Legacy {
message = s.legacyMessage(rev)
}
author := rev.Author()
if author.Email == "" {
author.Email = "nobody@example.com"
}
oid, err := s.repo.CreateCommit("", author, rev.Committer(), message, tree, parents...)
if err != nil {
return nil, err
}
return oid, nil
}
func (s *state) updateTarget() error {
if s.config.Target == "" {
return nil
}
if nil == s.result.Head() {
return fmt.Errorf("Unable to create branch %s as it is empty (no commits were split)", s.config.Target)
}
obj, ref, err := s.repo.RevparseExt(s.config.Target)
if obj != nil {
obj.Free()
}
if err != nil {
ref, err = s.repo.References.Create(s.config.Target, s.result.Head(), false, "subtree split")
if err != nil {
return err
}
ref.Free()
} else {
defer ref.Free()
ref.SetTarget(s.result.Head(), "subtree split")
}
return nil
}
func (s *state) legacyMessage(rev *git.Commit) string {
subject, body := SplitMessage(rev.Message())
return subject + "\n\n" + body
}
// pushRevs sets the range to split
func (s *state) pushRevs(revWalk *git.RevWalk) error {
// this is needed as origin might be in the process of being updated by git.FetchOrigin()
s.repoMu.Lock()
defer s.repoMu.Unlock()
// find the latest split sha1 if any on origin
var start *git.Oid
var err error
if s.config.Commit != "" {
start, err = git.NewOid(s.config.Commit)
if err != nil {
return err
}
s.result.moveHead(s.cache.get(start))
return revWalk.PushRange(fmt.Sprintf("%s^..%s", start, s.originBranch))
}
start = s.cache.getHead()
if start != nil {
s.result.moveHead(s.cache.get(start))
// FIXME: CHECK that this is an ancestor of the branch?
return revWalk.PushRange(fmt.Sprintf("%s..%s", start, s.originBranch))
}
branch, err := s.repo.RevparseSingle(s.originBranch)
if err != nil {
return err
}
return revWalk.Push(branch.Id())
}

74
splitter/utils.go Normal file
View file

@ -0,0 +1,74 @@
package splitter
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/libgit2/git2go"
)
var messageNormalizer = regexp.MustCompile("\\s*\\r?\\n")
// GitDirectory returns the .git directory for a given directory
func GitDirectory(path string) string {
gitPath := filepath.Join(path, ".git")
if _, err := os.Stat(gitPath); os.IsNotExist(err) {
// this might be a bare repo
return path
}
return gitPath
}
// SplitMessage splits a git message
func SplitMessage(message string) (string, string) {
// we split the messsage at \n\n or \r\n\r\n
var subject, body string
found := false
for i := 0; i+4 <= len(message); i++ {
if message[i] == '\n' && message[i+1] == '\n' {
subject = message[0:i]
body = message[i+2:]
found = true
break
} else if message[i] == '\r' && message[i+1] == '\n' && message[i+2] == '\r' && message[i+3] == '\n' {
subject = message[0:i]
body = message[i+4:]
found = true
break
}
}
if !found {
subject = message
body = ""
}
// normalize \r\n and whitespaces
subject = messageNormalizer.ReplaceAllLiteralString(subject, " ")
// remove spaces at the end of the subject
subject = strings.TrimRight(subject, " ")
body = strings.TrimLeft(body, "\r\n")
return subject, body
}
func normalizeOriginBranch(repo *git.Repository, origin string) (string, error) {
if origin == "" {
origin = "HEAD"
}
obj, ref, err := repo.RevparseExt(origin)
if err != nil {
return "", fmt.Errorf("Bad revision for origin: %s", err)
}
if obj != nil {
obj.Free()
}
defer ref.Free()
return ref.Name(), nil
}