synch_hg_p4/synch_hg_xxx.pl
changeset 0 02cd6b52f378
child 1 4a4ca5a019bb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/synch_hg_p4/synch_hg_xxx.pl	Thu May 28 10:10:03 2009 +0100
@@ -0,0 +1,657 @@
+#! perl
+
+# sync_hg_xxx.pl
+
+use strict;
+use Getopt::Long;
+use File::Temp qw/ tempfile tempdir /;	# for tempfile()
+
+my $verbose;
+my $debug = 0;
+my $rootdir;
+my $help;
+my $remoterepo;
+my $hgbranch;
+my $sync_prefix = "sync_";
+
+# abandon_sync, all ye who enter here
+# This should send a notification to someone, as it will probably mean manual repair
+#
+sub abandon_sync(@)
+	{
+	print "ERROR - synchronisation of $rootdir abandoned\n\n";
+	print @_;
+	print "\n\n";
+	exit(1);
+	}
+	
+# utility to run an external command
+#
+sub run_cmd($;$)
+	{
+	my ($cmd,$failurematch) = @_;
+	print "--- $cmd\n" if ($verbose || $debug);
+	my @output = `$cmd`;
+	print @output,"\n---\n" if ($debug);
+	
+	if (defined $failurematch)
+		{
+		if (grep /$failurematch/, @output)
+			{
+			abandon_sync("COMMAND FAILED: $cmd\n", @output,"\n\n",
+				"Output matched $failurematch\n");
+			}
+		else
+			{
+			print "CMD OK - Didn't match /$failurematch/\n" if ($debug);
+			}
+		}
+	if ($?)
+		{
+		print @output,"\n---\n" if ($verbose);
+		abandon_sync("COMMAND FAILED: exit status = $?\n",$cmd,"\n");
+		}
+	
+	return @output;
+	}
+	
+
+# -------------- hg section -------------
+#
+# Code relating to other SCM system is abstracted into 
+# functions to do relatively simple actions. This section
+# contains the driving logic for the script, and all of the
+# manipulations of Mercurial
+#
+
+sub scm_usage();		# forward declarations
+sub scm_options();
+sub scm_init($@);
+sub scm_checkout($);	# non-destructive, i.e. leave untouched any workspace files not managed in SCM
+sub scm_checkin($$$$$$);
+
+sub Usage(;$)
+	{
+	my ($errmsg) = @_;
+	print "\nERROR: $errmsg\n" if (defined $errmsg);
+	scm_usage();
+	print <<'EOF';
+
+General options:
+
+-root rootdir       root of the Mercurial gateway repository
+-v                  verbose
+-h                  print this usage information
+
+Setting up a new synchronisation:
+
+-clone remoterepo   clones gateway from remote repository 
+-branch hgbranch    Mercurial branch name (if needed)
+
+EOF
+	exit 1;
+	}
+
+Usage() if !GetOptions(
+	'root=s' => \$rootdir,
+	'h' => \$help,
+	'v' => \$verbose,
+	'debug' => \$debug,
+	'clone=s' => \$remoterepo,
+	'branch=s' => \$hgbranch,
+	scm_options()
+	);
+
+Usage() if ($help);
+
+Usage("Must specify root directory for Mercurial gateway") if (!defined $rootdir);
+Usage("-branch is only used with -clone") if (defined $hgbranch && !defined $remoterepo);
+
+if ($verbose)
+	{
+	my @hgversion = run_cmd("hg --version");
+	print @hgversion;
+	}
+
+# utility to return the heads descended from a particular point
+#
+sub hg_heads($)
+	{
+	my ($rev_on_branch) = @_;
+	my @heads = run_cmd("hg heads --template {rev}\\t{tags}\\n $rev_on_branch");
+	return @heads;
+	}
+
+# return an unsorted list of synchronisation points, identified by
+# tags beginning with "sync_"
+# 
+sub hg_syncpoints(;$)
+	{
+	my ($tip_rev) = @_;
+	my @tags = run_cmd("hg tags");
+	my @syncpoints;
+	foreach my $tag (@tags)
+		{
+		if ($tag =~ /^tip\s+(\d+):\S+$/)
+			{
+			$$tip_rev = $1 if (defined $tip_rev);
+			next;
+			}
+		if ($tag =~ /^$sync_prefix(.*\S)\s+\S+$/)
+			{
+			push @syncpoints, $1;
+			next
+			}
+		}
+	if ($debug)
+		{
+		printf "Found %d syncpoints in %d tags:", scalar @syncpoints, scalar @tags;
+		print join("\n * ", "",@syncpoints), "\n";
+		}
+	return @syncpoints;
+	}
+
+my $hg_updated = 0;
+
+# Update the Mercurial workspace to a given sync point
+#
+sub hg_checkout($)
+	{
+	my ($scmref) = @_;
+	
+	my $tag = $sync_prefix.$scmref;
+	my @output = run_cmd("hg update --clean --rev $tag", "^abort:");
+	$hg_updated = 1;	# could check the output in case it didn't change anything
+	}
+
+# 0. Create the gateway repository, if -clone is specified
+
+if (defined $remoterepo)
+	{
+	Usage("Cannot create gateway because $rootdir already exists") if (-d $rootdir);
+
+	my $clonecmd = "clone";
+	$clonecmd .= " --rev $hgbranch" if (defined $hgbranch);
+	my @output = run_cmd("hg $clonecmd $remoterepo $rootdir");
+	$hg_updated = 1;
+	}
+
+chdir $rootdir;
+Usage("$rootdir is not a Mercurial repository") if (!-d ".hg");
+
+my $something_to_push = 0;
+
+# 1. Prime the SCM system, and get the ordered list of changes available to 
+# convert into Mercurial commits
+
+my $first_sync;		# is this the first synchronisation?
+my $scm_tip_only;	# can we process a series of changes in the SCM system?
+
+my $tip_rev = -1;
+my @syncpoints = hg_syncpoints(\$tip_rev);
+
+if (scalar @syncpoints != 0)
+	{
+	$first_sync = 0;	# no - it's already synchronised
+	$scm_tip_only = 0;	# so can allow sequence of SCM changes
+	}
+else
+	{
+	print "First synchronisation through this gateway\n" if ($verbose);
+ 	$first_sync = 1;
+	if ($tip_rev != -1)
+		{
+ 		$scm_tip_only = 1;	# because there's already something in the repository
+ 		}
+ 	else
+ 		{
+		print "Mercurial repository is empty\n" if ($verbose);
+		$scm_tip_only = 0;	# allow multiple SCM changes, because there's nothing to merge with
+		}
+	}
+
+my $opening_scmtag;	# ancestor by which we judge the headcount of the result
+my $latest_scmtag;
+
+my @scmrefs = scm_init($scm_tip_only, @syncpoints);
+
+if (scalar @scmrefs == 0)
+	{
+	print "No changes to process in local SCM\n";
+	$opening_scmtag = $tip_rev;
+	}
+else
+	{
+	$opening_scmtag = $sync_prefix.$scmrefs[0];
+	}
+$latest_scmtag = $opening_scmtag;
+
+if ($scm_tip_only && scalar @scmrefs > 1)
+	{
+	print "ERROR - cannot handle multiple SCM changes in this situation\n";
+	exit(1);
+	}
+
+# 2. Process the SCM changes, turning them into Mercurial commits and marking with tags
+# - we guarantee that there is at most one change, if this is the first synchronisation
+
+foreach my $scmref (@scmrefs)
+	{
+	my ($user,$date,@description) = scm_checkout($scmref);
+	
+	# commit the result
+
+	my ($fh,$filename) = tempfile();
+	print $fh join("\n",@description), "\n";
+	close $fh;
+	
+	run_cmd("hg commit --addremove --date \"$date\" --user \"$user\" --logfile  $filename", "^abort\:");
+	$something_to_push = 1;
+	
+	unlink($filename);	# remove temporary file
+
+	my $tag = $sync_prefix.$scmref;
+	run_cmd("hg tag --local $tag");
+	$latest_scmtag = $tag;
+	print "Synchronised $scmref into Mercurial gateway repository\n";
+	}
+
+# 3. Put the full Mercurial state into the SCM, if this is the first synchronisation
+
+if ($first_sync)
+	{
+	my @traceback = run_cmd("hg log --limit 1 --template {rev}\\t{node}\\t{tags}\\n");
+	my $line = shift @traceback;
+
+	chomp $line;
+	my ($rev,$node,$tags) = split /\t/,$line;
+	
+	if ($rev != 0)
+		{
+		# repository was not empty, so need to commit the current state back into Perforce
+	
+		my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\"");
+		chomp @description;
+		my $author = shift @description;
+		my $date = shift @description;
+		my @changes = run_cmd("hg status --clean");	# include info on unmodified files
+		@changes = sort @changes;
+
+		# Deliver changes to SCM
+		my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags);
+		
+		my $tag = $sync_prefix.$scmref;
+		run_cmd("hg tag --local $tag");
+		$latest_scmtag = $tag;
+		print "Synchronised $scmref from Mercurial gateway, to initialise the synchronisation\n";
+		}
+	
+	$opening_scmtag = $latest_scmtag;	# don't consider history before this point
+	}
+
+
+# 3. pull from Mercurial default path, deal with new stuff
+
+my @pull_output = run_cmd("hg pull --update");
+$hg_updated = 1;
+
+my @heads = hg_heads($opening_scmtag);
+
+if (scalar @heads > 1)
+	{
+	# more than one head - try a safe merge
+	print "WARNING: multiple heads\n",@heads,"\nMerge is needed\n\n\n" if ($verbose);
+	
+	my @merge_output = run_cmd("hg --config \"ui.merge=internal:fail\" merge");	# which head?
+	if ($merge_output[0] =~ / 0 files unresolved/)
+		{
+		# successful merge - commit it.
+		run_cmd("hg commit --message \"Automatic merge\"");
+		$something_to_push = 1;
+		}
+	else
+		{
+		# clean up any partially merged files
+		run_cmd("hg update -C");
+		}
+	}
+
+# 4. Identify the sequence of Mercurial changes on the trunk and put them into the SCM
+# - Do only the head revision if this is the first synchronisation, to avoid copying ancient history
+
+my $options = "--follow-first";
+$options .= " --prune $latest_scmtag";
+
+my @traceback = run_cmd("hg log $options --template {rev}\\t{node}\\t{tags}\\n");
+foreach my $line (reverse @traceback)
+	{
+	chomp $line;
+	my ($rev,$node,$tags) = split /\t/,$line;
+	if ($tags =~ /$sync_prefix/)
+		{
+		# shouldn't happen - it would mean that tip goes back to an ancestor
+		# of the latest sync point
+		abandon_sync("Cannot handle this structure\n",@traceback);
+		}
+	
+	# Read commit information and update workspace from Mercurial
+	
+	my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\"");
+	chomp @description;
+	my $author = shift @description;
+	my $date = shift @description;
+	my @changes = run_cmd("hg status --rev $latest_scmtag --rev $rev");
+	@changes = sort @changes;
+
+	run_cmd("hg update -C --rev $rev");
+	$hg_updated = 1;
+	
+	# Deliver changes to SCM
+	my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags);
+	
+	# Tag as the latest sync point
+	my $tag = $sync_prefix.$scmref;
+	run_cmd("hg tag --local $tag");
+	$latest_scmtag = $tag;
+	print "Synchronised $scmref from Mercurial gateway\n";
+	}
+
+# 3. push changes to the destination gateway
+
+if ($something_to_push)
+	{
+	my @output = run_cmd("hg -v push --force --rev $latest_scmtag");
+	print "\n",@output,"\n" if ($verbose);
+	print "Destination Mercurial repository has been updated\n"; 
+	}
+else
+	{
+	print "Nothing to push to destination Mercurial repository\n";
+	}
+
+# 4. Check to see if we are in a clean state
+
+@heads = hg_heads($opening_scmtag);
+if (scalar @heads > 1)
+	{
+	print "\n------------------\n";
+	print "WARNING: Mercurial repository has multiple heads - manual merge recommended\n";
+	}
+
+exit(0);
+
+
+# -------------- SCM section -------------
+#
+# Code relating to non-Mercurial SCM system.
+# This version implements the sync with XXX
+#
+
+# Utility functions you might want to call are:
+#
+# run_cmd($cmd)
+# - Function which runs the specified command in a subshell and returns the stdout output as a list
+#   of strings. The whole script will terminate if the command returns with a non-zero exit status.
+#
+# abandon_sync(@messagelines)
+# - Terminate the synchronisation script and pass on the message to someone who might care...
+#
+# $hg_updated
+# - Global variable set to 1 if Mercurial changes the content of the workspace. This can be used to
+#   optimise the XXX system processing during initialisation, because it indicates when the workspace
+#   can be considered "clean" by the XXX system. Should be set to 0 whenever the XXX system knows the
+#   state of the workspace.
+#
+# hg_checkout($scmref)
+# - Function to call "hg update" to the sync label associated with $scmref. Used during the scm_init
+#   function as part of getting the workspaces in harmony before transferring changes. Will set
+#   $hg_updated to 1.
+
+
+# scm_usage()
+#
+# This function is called to supply the main "usage" statement for the script, as the
+# interesting description is all about the interaction between Mercurial and XXX. It takes no
+# arguments and should return nothing.
+#
+sub scm_usage()
+	{
+	print <<'EOF';
+
+perl sync_hg_xxx.pl -root rootdir [options]
+version 0.7
+ 
+Synchronise a branch in XXX with a branch in Mercurial.
+
+The branch starts at rootdir, which is a local Mercurial repository.
+The Perforce clientspec is assumed to exist, to specify modtime & rmdir, 
+and to exclude the .hg directory from the rootdir.
+
+The tool will sync rootdir/... to the specified changelist, and
+then reflect all changes affecting this part of the directory tree into
+Mercurial.
+
+The -first option is used to specify the first sync point if the gateway
+has not been previously synchronised, e.g. when -clone is specified.
+
+Perforce-related options:
+
+-m maxchangelist    highest changelist to consider
+                    defaults to #head
+
+EOF
+	}
+
+my $max_changelist;			# put XXX-specific global variables here
+
+sub scm_options()
+	{
+	# set defaults
+	
+	$max_changelist = "#head";		# initialise the XXX global variables here, otherwise it doesn't happen
+	
+	# return the GetOpt specification
+	return (
+		'm|max=s' => \$max_changelist,		# add your own bits of GetOpt arguments
+		);
+	}
+
+
+# scm_init ($tip_only, @syncpoints)
+#
+# The main code calls this routine once, passing two arguments, and expects to get a 
+# list of identifiers to XXX changes which could be synchronised as individual Mercurial
+# commits. These references can be anything which can be used as part of a Mercurial tag name,
+# and the main code will call back into this routine supplying the references one at a time
+# in the list order.
+#
+# If $tip_only is true, then the synchronisation can only handle the most recent version
+# of the code in the XXX system, and will not be able to sync intermediate steps. This usually
+# means that this is the first synchronisation of this branch with Mercurial. At most one identifier
+# should be returned.
+#
+# @syncpoints contains a list of identifiers extracted from the local sync_* tags which mark a change
+# that was synchronised with Mercurial in some previous run. This information allows the XXX system
+# to ignore changes which have previously been processed, and to set itself into a state where
+# the workspace corresponds to the contents of the XXX system at that point.
+#
+# The function returns the list of change identifiers for changes to be taken out of XXX and applied
+# to Mercurial. This list can be empty if there are no new changes since the last synchronisation point,
+# or if there is no content in the branch.
+
+sub scm_init($@)
+	{
+	my ($tip_only, @syncpoints) = @_;
+	
+	my $first_changelist;
+	
+	if ($tip_only)
+		{
+		# Script says we must synchronise from the XXX tip revision
+		
+		# Find the first changelist
+
+		if (!defined $first_changelist)
+			{
+			print "XXX branch contains no changes\n";
+			return ();
+			}
+		print "Synchronisation from tip ($first_changelist)\n" if ($verbose);
+		# fall through to complete the initialisation
+		}
+	else
+		{
+		# deduce the last synchronisation point from the @syncpoints list
+
+		$first_changelist = some_function_of(@syncpoints);
+		
+		# Get Mercurial & XXX into the synchronised state
+
+		hg_checkout($first_changelist);		# call back to Mercurial to update from hg repository
+		
+		# get XXX into state associated with $first_changelist
+
+		## NB this is a bit Perforce specific - you might prefer to separate tip_only processing
+		## from the normal "changes since last synchronisation" processing
+		##
+		$first_changelist += 1;		# we've already synched that one
+		##
+		}
+	
+	# enumerate the changelists available from the XXX system & return as an ordered list
+
+	my @scmrefs = some_function($first_changelist);
+
+	if ($verbose)
+		{
+		printf "Found %d new changelists to process\n", scalar @scmrefs;
+		print join(", ", @scmrefs), "\n";
+		}
+	
+	return @scmrefs;
+	}
+
+# scm_checkout($scmref)
+#
+# Update the workspace to reflect the given SCM reference
+#
+# This update should not change files which are not currently managed by the XXX system, but
+# should delete workspace files if the XX change involved deleting files. It is useful, but not essential,
+# for empty directories to be removed if they become empty due to deletions recorded in the XXX change.
+#
+# The function returns a list containing three things:
+# 
+# $change_user - the username to be recorded for the Mercurial commit
+# $change_date - the date to be recorded for the Mercurial commit (yyyy-mm-dd hh:mm:ss format)
+# @change_description - the commit message to be used in Mercurial, as a list of lines of text
+#
+# The lines in the change description should not contain end of line markers, as these will be supplied
+# by the main code.
+#
+sub scm_checkout($)
+	{
+	my ($scmref) = @_;
+	
+	my @change_description;
+	my $change_date;
+	my $change_user;
+	
+	# obtain the user , date and description for the given change identifier
+	
+	# apply the change to the workspace, ready for Mercurial to deduce with "hg commit --addremove"
+	
+	return ($change_user,$change_date,@change_description);
+	}
+
+
+# scm_checkin($hgnode,$author,$date,$changes,$description,$tags)
+#
+# Describe the changes to the workspace as an SCM change, and return the new identifier (for use in a tag)
+#
+# The function receives 6 parameters, all of which are associated with the Mercurial commit
+#
+# $hgnode      - Mercurial commit reference, as a globally unique hexadecimal identifier
+# $author      - the author recorded in the commit
+# $date        - the date recorded in the commit, in ISO date format (yyyy-mm-dd hh:mm:ss)
+# $changes     - Perl reference to the list of file changes (more information below)
+# $description - Perl reference to the list of lines in the commit message
+# $tags        - Perl reference to the list of (non-local) tags associated with this commit
+#
+# The changes come from the Mercurial "hg status" command, and consist of a filename relative to the
+# root of the repository, prefixed by a single letter code and a space. The codes that this routine
+# must handle are M = modify, R = remove, A = add, and C = clean. The C codes are only used for the
+# first synchronisation, and so should be handled as "add if not already in XXX". All of the implied 
+# changes have already been applied to the files in the workspace - the R files have been deleted, 
+# the A files have been added and the M files contain the desired content.
+#
+# The $hgnode should be recorded in the description in the XXX system, as should the $author and $date if
+# they can't be used directly. 
+#
+# The function returns the identifier for the completed change, which will be tagged in the Mercurial
+# repository and reported to scm_init() in future synchronisation runs.
+#
+# WARNING: The tag information isn't currently filtered properly, and it is likely that there will
+# need to be a separate scm_tag() function to handle the important Mercurial tags explicitly. Don't try
+# to do much with the tag information just yet.
+#
+sub scm_checkin($$$$$$)
+	{
+	my ($hgnode,$author,$date,$changes,$description,$tags) = @_;
+	
+	my @hg_tags = grep !/^tip$/, split /\s+/, $tags;
+	my @xxx_modify;
+	my @xxx_remove;
+	my @xxx_add;
+	
+	# Separate the changes into lists of files for modify/add/remove
+	
+	foreach my $line (@$changes)
+		{
+		my $type = substr($line,0,2,"");	# removes type as well as extracting it
+		if ($type eq "M ")
+			{
+			push @xxx_modify, $line;
+			next;
+			}
+		if ($type eq "A " || $type eq "C ")
+			{
+			push @xxx_add, $line;
+			next;
+			}		
+		if ($type eq "R ")
+			{
+			push @xxx_remove, $line;
+			next;
+			}
+		
+		abandon_sync("Unexpected hg status line: $type$line");
+		}
+	
+	# Create an XXX system change object to record the changes (if necessary)
+	# process the lists of files
+	
+	if (scalar @xxx_add)
+		{
+		# add the files in the xxx_add list
+		}
+	
+	if (scalar @xxx_modify)
+		{
+		# process the list of modified files
+		}
+		
+	if (scalar @xxx_remove)
+		{
+		# remove the files in the xxx_remove list
+		}
+	
+	# Create the change description from the @$description list
+	
+	# Include the $hgnode as part of the description
+	# Use the $author and $date if possible (by might just have to be more descriptive text
+	# TODO: Do something with the interesting tags?
+	
+	my $scmref = some_function();
+	
+	return $scmref;
+	}