-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgit-split
More file actions
executable file
·156 lines (140 loc) · 5.67 KB
/
git-split
File metadata and controls
executable file
·156 lines (140 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/python
# git-split: Split the history of a git repository by subdirectories and ranges
# Copyright (C) 2006 Jamey Sharp, Josh Triplett
#
# You can redistribute this software and/or modify it under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
from itertools import izip
from subprocess import Popen, PIPE
import os, sys
import pprint
def run(cmd, stdin=None, env={}):
newenv = os.environ.copy()
newenv.update(env)
return Popen(cmd, stdin=PIPE, stdout=PIPE, env=newenv).communicate(stdin)[0]
def parse_author_date(s):
"""Given a GIT author or committer string, return (name, email, date)"""
(name, email, time, timezone) = s.rsplit(None, 3)
return (name, email[1:-1], time + " " + timezone)
def get_subtree(tree, name):
output = run(["git", "ls-tree", tree, name])
if not output:
return None
return output.split()[2]
def is_ancestor(new_commits, cur, other):
"""Return True if cur has other as an ancestor, or False otherwise."""
return run(["git", "merge-base", cur, other]).strip() == other
def walk(commits, new_commits, commit_hash, project):
if os.environ.has_key('GSDEBUG'):
print "Walking "+commit_hash
commit = commits[commit_hash]
if not(commit.has_key("new_hash")):
tree = get_subtree(commit["tree"], project)
commit["new_tree"] = tree
if not tree:
if os.environ.has_key('GSDEBUG'):
pprint.pprint(commit)
raise Exception("Did not find project in tree "+commit["tree"]+" for commit " + commit_hash)
new_parents = list(set([walk(commits, new_commits, parent, project)
for parent in commit["parents"]]))
new_hash = None
if len(new_parents) == 1:
new_hash = new_parents[0]
elif len(new_parents) == 2: # Check for unnecessary merge
if is_ancestor(new_commits, new_parents[0], new_parents[1]):
new_hash = new_parents[0]
elif is_ancestor(new_commits, new_parents[1], new_parents[0]):
new_hash = new_parents[1]
if new_hash and new_commits[new_hash]["new_tree"] != tree:
new_hash = None
if not new_hash:
args = ["git", "commit-tree", tree]
for new_parent in new_parents:
args.extend(["-p", new_parent])
env = dict(zip(["GIT_AUTHOR_"+n for n in ["NAME", "EMAIL", "DATE"]],
parse_author_date(commit["author"]))
+zip(["GIT_COMMITTER_"+n for n in ["NAME", "EMAIL", "DATE"]],
parse_author_date(commit["committer"])))
if os.environ.has_key('GSDEBUG'):
pprint.pprint(env)
pprint.pprint(args)
new_hash = run(args, commit["message"], env).strip()
commit["new_parents"] = new_parents
commit["new_hash"] = new_hash
if new_hash not in new_commits:
new_commits[new_hash] = commit
return commit["new_hash"]
def main(args):
if not(1 <= len(args) <= 3):
print "Usage: git-split subdir [newest [oldest]]"
return 1
project = args[0]
if len(args) > 1:
newest = args[1]
else:
newest = "HEAD"
newest_hash = run(["git", "rev-parse", newest]).strip()
if len(args) > 2:
oldest = args[2]
oldest_hash = run(["git", "rev-parse", oldest]).strip()
else:
oldest_hash = None
grafts = {}
try:
for line in file(".git/info/grafts").read().split("\n"):
if line:
child, parents = line.split(None, 1)
parents = parents.split()
grafts[child] = parents
except IOError:
pass
temp = run(["git", "log", "--pretty=raw", newest_hash]).split("\n\n")
commits = {}
for headers,message in izip(temp[0::2], temp[1::2]):
commit = {}
commit_hash = None
headers = [header.split(None, 1) for header in headers.split("\n")]
for key,value in headers:
if key == "parent":
commit.setdefault("parents", []).append(value)
elif key == "commit":
commit_hash = value
else:
if key in commit:
raise Exception('Duplicate key "%s"' % key)
commit[key] = value
commit["message"] = "".join([line[4:]+"\n"
for line in message.split("\n")])
if commit_hash is None:
raise Exception("Commit without hash")
if commit_hash in grafts:
commit["parents"] = grafts[commit_hash]
if commit_hash == oldest_hash or "parents" not in commit:
commit["parents"] = []
# check if we're still in the parents
if os.environ.has_key('GSDEBUG'):
pprint.pprint(commit["parents"])
new_parents = []
for parent in commit["parents"]:
x = get_subtree(parent, project)
if not x:
print "Parent %s missing" % (parent, )
else:
new_parents.append(parent)
if os.environ.has_key('GSDEBUG'):
pprint.pprint(new_parents)
commit["parents"] = new_parents
commits[commit_hash] = commit
print walk(commits, dict(), newest_hash, project)
try:
import psyco
psyco.full()
except ImportError:
pass
if __name__ == "__main__": sys.exit(main(sys.argv[1:]))