synch_hg_p4/synch_hg_xxx.pl
author Simon Howkins <simonh@symbian.org>
Wed, 14 Apr 2010 12:58:22 +0100
changeset 219 d57b367400c0
parent 1 4a4ca5a019bb
permissions -rw-r--r--
Updated release notes generation: Added copyright message Proper command line parsing Usage message if inputs are not right Added explanatory preamble to output "NEW" FCLs are marked as such Merge changesets are included in output

#! perl

# Copyright (c) 2009 Symbian Foundation Ltd
# This component and the accompanying materials are made available
# under the terms of the License "Eclipse Public License v1.0"
# which accompanies this distribution, and is available
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
#
# Initial Contributors:
# Symbian Foundation Ltd - initial contribution.
# 
# Contributors:
#
# Description:
# Skeleton Perl script to synchronise a branch in an SCM system with a Mercurial repository
# It's synch_hg_p4.pl with the Perforce-specific details removed.

use strict;
use Getopt::Long;
use File::Temp qw/ tempfile tempdir /;	# for tempfile()

my $verbose;
my $debug = 0;
my $rootdir;
my $help;
my $remoterepo;
my $hgbranch;
my $sync_prefix = "sync_";

# abandon_sync, all ye who enter here
# This should send a notification to someone, as it will probably mean manual repair
#
sub abandon_sync(@)
	{
	print "ERROR - synchronisation of $rootdir abandoned\n\n";
	print @_;
	print "\n\n";
	exit(1);
	}
	
# utility to run an external command
#
sub run_cmd($;$)
	{
	my ($cmd,$failurematch) = @_;
	print "--- $cmd\n" if ($verbose || $debug);
	my @output = `$cmd`;
	print @output,"\n---\n" if ($debug);
	
	if (defined $failurematch)
		{
		if (grep /$failurematch/, @output)
			{
			abandon_sync("COMMAND FAILED: $cmd\n", @output,"\n\n",
				"Output matched $failurematch\n");
			}
		else
			{
			print "CMD OK - Didn't match /$failurematch/\n" if ($debug);
			}
		}
	if ($?)
		{
		print @output,"\n---\n" if ($verbose);
		abandon_sync("COMMAND FAILED: exit status = $?\n",$cmd,"\n");
		}
	
	return @output;
	}
	

# -------------- hg section -------------
#
# Code relating to other SCM system is abstracted into 
# functions to do relatively simple actions. This section
# contains the driving logic for the script, and all of the
# manipulations of Mercurial
#

sub scm_usage();		# forward declarations
sub scm_options();
sub scm_init($@);
sub scm_checkout($);	# non-destructive, i.e. leave untouched any workspace files not managed in SCM
sub scm_checkin($$$$$$);

sub Usage(;$)
	{
	my ($errmsg) = @_;
	print "\nERROR: $errmsg\n" if (defined $errmsg);
	scm_usage();
	print <<'EOF';

General options:

-root rootdir       root of the Mercurial gateway repository
-v                  verbose
-h                  print this usage information

Setting up a new synchronisation:

-clone remoterepo   clones gateway from remote repository 
-branch hgbranch    Mercurial branch name (if needed)

EOF
	exit 1;
	}

Usage() if !GetOptions(
	'root=s' => \$rootdir,
	'h' => \$help,
	'v' => \$verbose,
	'debug' => \$debug,
	'clone=s' => \$remoterepo,
	'branch=s' => \$hgbranch,
	scm_options()
	);

Usage() if ($help);

Usage("Must specify root directory for Mercurial gateway") if (!defined $rootdir);
Usage("-branch is only used with -clone") if (defined $hgbranch && !defined $remoterepo);

if ($verbose)
	{
	my @hgversion = run_cmd("hg --version");
	print @hgversion;
	}

# utility to return the heads descended from a particular point
#
sub hg_heads($)
	{
	my ($rev_on_branch) = @_;
	my @heads = run_cmd("hg heads --template {rev}\\t{tags}\\n $rev_on_branch");
	return @heads;
	}

# return an unsorted list of synchronisation points, identified by
# tags beginning with "sync_"
# 
sub hg_syncpoints(;$)
	{
	my ($tip_rev) = @_;
	my @tags = run_cmd("hg tags");
	my @syncpoints;
	foreach my $tag (@tags)
		{
		if ($tag =~ /^tip\s+(\d+):\S+$/)
			{
			$$tip_rev = $1 if (defined $tip_rev);
			next;
			}
		if ($tag =~ /^$sync_prefix(.*\S)\s+\S+$/)
			{
			push @syncpoints, $1;
			next
			}
		}
	if ($debug)
		{
		printf "Found %d syncpoints in %d tags:", scalar @syncpoints, scalar @tags;
		print join("\n * ", "",@syncpoints), "\n";
		}
	return @syncpoints;
	}

my $hg_updated = 0;

# Update the Mercurial workspace to a given sync point
#
sub hg_checkout($)
	{
	my ($scmref) = @_;
	
	my $tag = $sync_prefix.$scmref;
	my @output = run_cmd("hg update --clean --rev $tag", "^abort:");
	$hg_updated = 1;	# could check the output in case it didn't change anything
	}

# 0. Create the gateway repository, if -clone is specified

if (defined $remoterepo)
	{
	Usage("Cannot create gateway because $rootdir already exists") if (-d $rootdir);

	my $clonecmd = "clone";
	$clonecmd .= " --rev $hgbranch" if (defined $hgbranch);
	my @output = run_cmd("hg $clonecmd $remoterepo $rootdir");
	$hg_updated = 1;
	}

chdir $rootdir;
Usage("$rootdir is not a Mercurial repository") if (!-d ".hg");

my $something_to_push = 0;

# 1. Prime the SCM system, and get the ordered list of changes available to 
# convert into Mercurial commits

my $first_sync;		# is this the first synchronisation?
my $scm_tip_only;	# can we process a series of changes in the SCM system?

my $tip_rev = -1;
my @syncpoints = hg_syncpoints(\$tip_rev);

if (scalar @syncpoints != 0)
	{
	$first_sync = 0;	# no - it's already synchronised
	$scm_tip_only = 0;	# so can allow sequence of SCM changes
	}
else
	{
	print "First synchronisation through this gateway\n" if ($verbose);
 	$first_sync = 1;
	if ($tip_rev != -1)
		{
 		$scm_tip_only = 1;	# because there's already something in the repository
 		}
 	else
 		{
		print "Mercurial repository is empty\n" if ($verbose);
		$scm_tip_only = 0;	# allow multiple SCM changes, because there's nothing to merge with
		}
	}

my $opening_scmtag;	# ancestor by which we judge the headcount of the result
my $latest_scmtag;

my @scmrefs = scm_init($scm_tip_only, @syncpoints);

if (scalar @scmrefs == 0)
	{
	print "No changes to process in local SCM\n";
	$opening_scmtag = $tip_rev;
	}
else
	{
	$opening_scmtag = $sync_prefix.$scmrefs[0];
	}
$latest_scmtag = $opening_scmtag;

if ($scm_tip_only && scalar @scmrefs > 1)
	{
	print "ERROR - cannot handle multiple SCM changes in this situation\n";
	exit(1);
	}

# 2. Process the SCM changes, turning them into Mercurial commits and marking with tags
# - we guarantee that there is at most one change, if this is the first synchronisation

foreach my $scmref (@scmrefs)
	{
	my ($user,$date,@description) = scm_checkout($scmref);
	
	# commit the result

	my ($fh,$filename) = tempfile();
	print $fh join("\n",@description), "\n";
	close $fh;
	
	run_cmd("hg commit --addremove --date \"$date\" --user \"$user\" --logfile  $filename", "^abort\:");
	$something_to_push = 1;
	
	unlink($filename);	# remove temporary file

	my $tag = $sync_prefix.$scmref;
	run_cmd("hg tag --local $tag");
	$latest_scmtag = $tag;
	print "Synchronised $scmref into Mercurial gateway repository\n";
	}

# 3. Put the full Mercurial state into the SCM, if this is the first synchronisation

if ($first_sync)
	{
	my @traceback = run_cmd("hg log --limit 1 --template {rev}\\t{node}\\t{tags}\\n");
	my $line = shift @traceback;

	chomp $line;
	my ($rev,$node,$tags) = split /\t/,$line;
	
	if ($rev != 0)
		{
		# repository was not empty, so need to commit the current state back into Perforce
	
		my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\"");
		chomp @description;
		my $author = shift @description;
		my $date = shift @description;
		my @changes = run_cmd("hg status --clean");	# include info on unmodified files
		@changes = sort @changes;

		# Deliver changes to SCM
		my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags);
		
		my $tag = $sync_prefix.$scmref;
		run_cmd("hg tag --local $tag");
		$latest_scmtag = $tag;
		print "Synchronised $scmref from Mercurial gateway, to initialise the synchronisation\n";
		}
	
	$opening_scmtag = $latest_scmtag;	# don't consider history before this point
	}


# 3. pull from Mercurial default path, deal with new stuff

my @pull_output = run_cmd("hg pull --update");
$hg_updated = 1;

my @heads = hg_heads($opening_scmtag);

if (scalar @heads > 1)
	{
	# more than one head - try a safe merge
	print "WARNING: multiple heads\n",@heads,"\nMerge is needed\n\n\n" if ($verbose);
	
	my @merge_output = run_cmd("hg --config \"ui.merge=internal:fail\" merge");	# which head?
	if ($merge_output[0] =~ / 0 files unresolved/)
		{
		# successful merge - commit it.
		run_cmd("hg commit --message \"Automatic merge\"");
		$something_to_push = 1;
		}
	else
		{
		# clean up any partially merged files
		run_cmd("hg update -C");
		}
	}

# 4. Identify the sequence of Mercurial changes on the trunk and put them into the SCM
# - Do only the head revision if this is the first synchronisation, to avoid copying ancient history

my $options = "--follow-first";
$options .= " --prune $latest_scmtag";

my @traceback = run_cmd("hg log $options --template {rev}\\t{node}\\t{tags}\\n");
foreach my $line (reverse @traceback)
	{
	chomp $line;
	my ($rev,$node,$tags) = split /\t/,$line;
	if ($tags =~ /$sync_prefix/)
		{
		# shouldn't happen - it would mean that tip goes back to an ancestor
		# of the latest sync point
		abandon_sync("Cannot handle this structure\n",@traceback);
		}
	
	# Read commit information and update workspace from Mercurial
	
	my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\"");
	chomp @description;
	my $author = shift @description;
	my $date = shift @description;
	my @changes = run_cmd("hg status --rev $latest_scmtag --rev $rev");
	@changes = sort @changes;

	run_cmd("hg update -C --rev $rev");
	$hg_updated = 1;
	
	# Deliver changes to SCM
	my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags);
	
	# Tag as the latest sync point
	my $tag = $sync_prefix.$scmref;
	run_cmd("hg tag --local $tag");
	$latest_scmtag = $tag;
	print "Synchronised $scmref from Mercurial gateway\n";
	}

# 3. push changes to the destination gateway

if ($something_to_push)
	{
	my @output = run_cmd("hg -v push --force --rev $latest_scmtag");
	print "\n",@output,"\n" if ($verbose);
	print "Destination Mercurial repository has been updated\n"; 
	}
else
	{
	print "Nothing to push to destination Mercurial repository\n";
	}

# 4. Check to see if we are in a clean state

@heads = hg_heads($opening_scmtag);
if (scalar @heads > 1)
	{
	print "\n------------------\n";
	print "WARNING: Mercurial repository has multiple heads - manual merge recommended\n";
	}

exit(0);


# -------------- SCM section -------------
#
# Code relating to non-Mercurial SCM system.
# This version implements the sync with XXX
#

# Utility functions you might want to call are:
#
# run_cmd($cmd)
# - Function which runs the specified command in a subshell and returns the stdout output as a list
#   of strings. The whole script will terminate if the command returns with a non-zero exit status.
#
# abandon_sync(@messagelines)
# - Terminate the synchronisation script and pass on the message to someone who might care...
#
# $hg_updated
# - Global variable set to 1 if Mercurial changes the content of the workspace. This can be used to
#   optimise the XXX system processing during initialisation, because it indicates when the workspace
#   can be considered "clean" by the XXX system. Should be set to 0 whenever the XXX system knows the
#   state of the workspace.
#
# hg_checkout($scmref)
# - Function to call "hg update" to the sync label associated with $scmref. Used during the scm_init
#   function as part of getting the workspaces in harmony before transferring changes. Will set
#   $hg_updated to 1.


# scm_usage()
#
# This function is called to supply the main "usage" statement for the script, as the
# interesting description is all about the interaction between Mercurial and XXX. It takes no
# arguments and should return nothing.
#
sub scm_usage()
	{
	print <<'EOF';

perl sync_hg_xxx.pl -root rootdir [options]
version 0.7
 
Synchronise a branch in XXX with a branch in Mercurial.

The branch starts at rootdir, which is a local Mercurial repository.
The Perforce clientspec is assumed to exist, to specify modtime & rmdir, 
and to exclude the .hg directory from the rootdir.

The tool will sync rootdir/... to the specified changelist, and
then reflect all changes affecting this part of the directory tree into
Mercurial.

The -first option is used to specify the first sync point if the gateway
has not been previously synchronised, e.g. when -clone is specified.

Perforce-related options:

-m maxchangelist    highest changelist to consider
                    defaults to #head

EOF
	}

my $max_changelist;			# put XXX-specific global variables here

sub scm_options()
	{
	# set defaults
	
	$max_changelist = "#head";		# initialise the XXX global variables here, otherwise it doesn't happen
	
	# return the GetOpt specification
	return (
		'm|max=s' => \$max_changelist,		# add your own bits of GetOpt arguments
		);
	}


# scm_init ($tip_only, @syncpoints)
#
# The main code calls this routine once, passing two arguments, and expects to get a 
# list of identifiers to XXX changes which could be synchronised as individual Mercurial
# commits. These references can be anything which can be used as part of a Mercurial tag name,
# and the main code will call back into this routine supplying the references one at a time
# in the list order.
#
# If $tip_only is true, then the synchronisation can only handle the most recent version
# of the code in the XXX system, and will not be able to sync intermediate steps. This usually
# means that this is the first synchronisation of this branch with Mercurial. At most one identifier
# should be returned.
#
# @syncpoints contains a list of identifiers extracted from the local sync_* tags which mark a change
# that was synchronised with Mercurial in some previous run. This information allows the XXX system
# to ignore changes which have previously been processed, and to set itself into a state where
# the workspace corresponds to the contents of the XXX system at that point.
#
# The function returns the list of change identifiers for changes to be taken out of XXX and applied
# to Mercurial. This list can be empty if there are no new changes since the last synchronisation point,
# or if there is no content in the branch.

sub scm_init($@)
	{
	my ($tip_only, @syncpoints) = @_;
	
	my $first_changelist;
	
	if ($tip_only)
		{
		# Script says we must synchronise from the XXX tip revision
		
		# Find the first changelist

		if (!defined $first_changelist)
			{
			print "XXX branch contains no changes\n";
			return ();
			}
		print "Synchronisation from tip ($first_changelist)\n" if ($verbose);
		# fall through to complete the initialisation
		}
	else
		{
		# deduce the last synchronisation point from the @syncpoints list

		$first_changelist = some_function_of(@syncpoints);
		
		# Get Mercurial & XXX into the synchronised state

		hg_checkout($first_changelist);		# call back to Mercurial to update from hg repository
		
		# get XXX into state associated with $first_changelist

		## NB this is a bit Perforce specific - you might prefer to separate tip_only processing
		## from the normal "changes since last synchronisation" processing
		##
		$first_changelist += 1;		# we've already synched that one
		##
		}
	
	# enumerate the changelists available from the XXX system & return as an ordered list

	my @scmrefs = some_function($first_changelist);

	if ($verbose)
		{
		printf "Found %d new changelists to process\n", scalar @scmrefs;
		print join(", ", @scmrefs), "\n";
		}
	
	return @scmrefs;
	}

# scm_checkout($scmref)
#
# Update the workspace to reflect the given SCM reference
#
# This update should not change files which are not currently managed by the XXX system, but
# should delete workspace files if the XX change involved deleting files. It is useful, but not essential,
# for empty directories to be removed if they become empty due to deletions recorded in the XXX change.
#
# The function returns a list containing three things:
# 
# $change_user - the username to be recorded for the Mercurial commit
# $change_date - the date to be recorded for the Mercurial commit (yyyy-mm-dd hh:mm:ss format)
# @change_description - the commit message to be used in Mercurial, as a list of lines of text
#
# The lines in the change description should not contain end of line markers, as these will be supplied
# by the main code.
#
sub scm_checkout($)
	{
	my ($scmref) = @_;
	
	my @change_description;
	my $change_date;
	my $change_user;
	
	# obtain the user , date and description for the given change identifier
	
	# apply the change to the workspace, ready for Mercurial to deduce with "hg commit --addremove"
	
	return ($change_user,$change_date,@change_description);
	}


# scm_checkin($hgnode,$author,$date,$changes,$description,$tags)
#
# Describe the changes to the workspace as an SCM change, and return the new identifier (for use in a tag)
#
# The function receives 6 parameters, all of which are associated with the Mercurial commit
#
# $hgnode      - Mercurial commit reference, as a globally unique hexadecimal identifier
# $author      - the author recorded in the commit
# $date        - the date recorded in the commit, in ISO date format (yyyy-mm-dd hh:mm:ss)
# $changes     - Perl reference to the list of file changes (more information below)
# $description - Perl reference to the list of lines in the commit message
# $tags        - Perl reference to the list of (non-local) tags associated with this commit
#
# The changes come from the Mercurial "hg status" command, and consist of a filename relative to the
# root of the repository, prefixed by a single letter code and a space. The codes that this routine
# must handle are M = modify, R = remove, A = add, and C = clean. The C codes are only used for the
# first synchronisation, and so should be handled as "add if not already in XXX". All of the implied 
# changes have already been applied to the files in the workspace - the R files have been deleted, 
# the A files have been added and the M files contain the desired content.
#
# The $hgnode should be recorded in the description in the XXX system, as should the $author and $date if
# they can't be used directly. 
#
# The function returns the identifier for the completed change, which will be tagged in the Mercurial
# repository and reported to scm_init() in future synchronisation runs.
#
# WARNING: The tag information isn't currently filtered properly, and it is likely that there will
# need to be a separate scm_tag() function to handle the important Mercurial tags explicitly. Don't try
# to do much with the tag information just yet.
#
sub scm_checkin($$$$$$)
	{
	my ($hgnode,$author,$date,$changes,$description,$tags) = @_;
	
	my @hg_tags = grep !/^tip$/, split /\s+/, $tags;
	my @xxx_modify;
	my @xxx_remove;
	my @xxx_add;
	
	# Separate the changes into lists of files for modify/add/remove
	
	foreach my $line (@$changes)
		{
		my $type = substr($line,0,2,"");	# removes type as well as extracting it
		if ($type eq "M ")
			{
			push @xxx_modify, $line;
			next;
			}
		if ($type eq "A " || $type eq "C ")
			{
			push @xxx_add, $line;
			next;
			}		
		if ($type eq "R ")
			{
			push @xxx_remove, $line;
			next;
			}
		
		abandon_sync("Unexpected hg status line: $type$line");
		}
	
	# Create an XXX system change object to record the changes (if necessary)
	# process the lists of files
	
	if (scalar @xxx_add)
		{
		# add the files in the xxx_add list
		}
	
	if (scalar @xxx_modify)
		{
		# process the list of modified files
		}
		
	if (scalar @xxx_remove)
		{
		# remove the files in the xxx_remove list
		}
	
	# Create the change description from the @$description list
	
	# Include the $hgnode as part of the description
	# Use the $author and $date if possible (by might just have to be more descriptive text
	# TODO: Do something with the interesting tags?
	
	my $scmref = some_function();
	
	return $scmref;
	}