diff -r 000000000000 -r 02cd6b52f378 synch_hg_p4/synch_hg_xxx.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/synch_hg_p4/synch_hg_xxx.pl Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,657 @@ +#! perl + +# sync_hg_xxx.pl + +use strict; +use Getopt::Long; +use File::Temp qw/ tempfile tempdir /; # for tempfile() + +my $verbose; +my $debug = 0; +my $rootdir; +my $help; +my $remoterepo; +my $hgbranch; +my $sync_prefix = "sync_"; + +# abandon_sync, all ye who enter here +# This should send a notification to someone, as it will probably mean manual repair +# +sub abandon_sync(@) + { + print "ERROR - synchronisation of $rootdir abandoned\n\n"; + print @_; + print "\n\n"; + exit(1); + } + +# utility to run an external command +# +sub run_cmd($;$) + { + my ($cmd,$failurematch) = @_; + print "--- $cmd\n" if ($verbose || $debug); + my @output = `$cmd`; + print @output,"\n---\n" if ($debug); + + if (defined $failurematch) + { + if (grep /$failurematch/, @output) + { + abandon_sync("COMMAND FAILED: $cmd\n", @output,"\n\n", + "Output matched $failurematch\n"); + } + else + { + print "CMD OK - Didn't match /$failurematch/\n" if ($debug); + } + } + if ($?) + { + print @output,"\n---\n" if ($verbose); + abandon_sync("COMMAND FAILED: exit status = $?\n",$cmd,"\n"); + } + + return @output; + } + + +# -------------- hg section ------------- +# +# Code relating to other SCM system is abstracted into +# functions to do relatively simple actions. This section +# contains the driving logic for the script, and all of the +# manipulations of Mercurial +# + +sub scm_usage(); # forward declarations +sub scm_options(); +sub scm_init($@); +sub scm_checkout($); # non-destructive, i.e. leave untouched any workspace files not managed in SCM +sub scm_checkin($$$$$$); + +sub Usage(;$) + { + my ($errmsg) = @_; + print "\nERROR: $errmsg\n" if (defined $errmsg); + scm_usage(); + print <<'EOF'; + +General options: + +-root rootdir root of the Mercurial gateway repository +-v verbose +-h print this usage information + +Setting up a new synchronisation: + +-clone remoterepo clones gateway from remote repository +-branch hgbranch Mercurial branch name (if needed) + +EOF + exit 1; + } + +Usage() if !GetOptions( + 'root=s' => \$rootdir, + 'h' => \$help, + 'v' => \$verbose, + 'debug' => \$debug, + 'clone=s' => \$remoterepo, + 'branch=s' => \$hgbranch, + scm_options() + ); + +Usage() if ($help); + +Usage("Must specify root directory for Mercurial gateway") if (!defined $rootdir); +Usage("-branch is only used with -clone") if (defined $hgbranch && !defined $remoterepo); + +if ($verbose) + { + my @hgversion = run_cmd("hg --version"); + print @hgversion; + } + +# utility to return the heads descended from a particular point +# +sub hg_heads($) + { + my ($rev_on_branch) = @_; + my @heads = run_cmd("hg heads --template {rev}\\t{tags}\\n $rev_on_branch"); + return @heads; + } + +# return an unsorted list of synchronisation points, identified by +# tags beginning with "sync_" +# +sub hg_syncpoints(;$) + { + my ($tip_rev) = @_; + my @tags = run_cmd("hg tags"); + my @syncpoints; + foreach my $tag (@tags) + { + if ($tag =~ /^tip\s+(\d+):\S+$/) + { + $$tip_rev = $1 if (defined $tip_rev); + next; + } + if ($tag =~ /^$sync_prefix(.*\S)\s+\S+$/) + { + push @syncpoints, $1; + next + } + } + if ($debug) + { + printf "Found %d syncpoints in %d tags:", scalar @syncpoints, scalar @tags; + print join("\n * ", "",@syncpoints), "\n"; + } + return @syncpoints; + } + +my $hg_updated = 0; + +# Update the Mercurial workspace to a given sync point +# +sub hg_checkout($) + { + my ($scmref) = @_; + + my $tag = $sync_prefix.$scmref; + my @output = run_cmd("hg update --clean --rev $tag", "^abort:"); + $hg_updated = 1; # could check the output in case it didn't change anything + } + +# 0. Create the gateway repository, if -clone is specified + +if (defined $remoterepo) + { + Usage("Cannot create gateway because $rootdir already exists") if (-d $rootdir); + + my $clonecmd = "clone"; + $clonecmd .= " --rev $hgbranch" if (defined $hgbranch); + my @output = run_cmd("hg $clonecmd $remoterepo $rootdir"); + $hg_updated = 1; + } + +chdir $rootdir; +Usage("$rootdir is not a Mercurial repository") if (!-d ".hg"); + +my $something_to_push = 0; + +# 1. Prime the SCM system, and get the ordered list of changes available to +# convert into Mercurial commits + +my $first_sync; # is this the first synchronisation? +my $scm_tip_only; # can we process a series of changes in the SCM system? + +my $tip_rev = -1; +my @syncpoints = hg_syncpoints(\$tip_rev); + +if (scalar @syncpoints != 0) + { + $first_sync = 0; # no - it's already synchronised + $scm_tip_only = 0; # so can allow sequence of SCM changes + } +else + { + print "First synchronisation through this gateway\n" if ($verbose); + $first_sync = 1; + if ($tip_rev != -1) + { + $scm_tip_only = 1; # because there's already something in the repository + } + else + { + print "Mercurial repository is empty\n" if ($verbose); + $scm_tip_only = 0; # allow multiple SCM changes, because there's nothing to merge with + } + } + +my $opening_scmtag; # ancestor by which we judge the headcount of the result +my $latest_scmtag; + +my @scmrefs = scm_init($scm_tip_only, @syncpoints); + +if (scalar @scmrefs == 0) + { + print "No changes to process in local SCM\n"; + $opening_scmtag = $tip_rev; + } +else + { + $opening_scmtag = $sync_prefix.$scmrefs[0]; + } +$latest_scmtag = $opening_scmtag; + +if ($scm_tip_only && scalar @scmrefs > 1) + { + print "ERROR - cannot handle multiple SCM changes in this situation\n"; + exit(1); + } + +# 2. Process the SCM changes, turning them into Mercurial commits and marking with tags +# - we guarantee that there is at most one change, if this is the first synchronisation + +foreach my $scmref (@scmrefs) + { + my ($user,$date,@description) = scm_checkout($scmref); + + # commit the result + + my ($fh,$filename) = tempfile(); + print $fh join("\n",@description), "\n"; + close $fh; + + run_cmd("hg commit --addremove --date \"$date\" --user \"$user\" --logfile $filename", "^abort\:"); + $something_to_push = 1; + + unlink($filename); # remove temporary file + + my $tag = $sync_prefix.$scmref; + run_cmd("hg tag --local $tag"); + $latest_scmtag = $tag; + print "Synchronised $scmref into Mercurial gateway repository\n"; + } + +# 3. Put the full Mercurial state into the SCM, if this is the first synchronisation + +if ($first_sync) + { + my @traceback = run_cmd("hg log --limit 1 --template {rev}\\t{node}\\t{tags}\\n"); + my $line = shift @traceback; + + chomp $line; + my ($rev,$node,$tags) = split /\t/,$line; + + if ($rev != 0) + { + # repository was not empty, so need to commit the current state back into Perforce + + my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\""); + chomp @description; + my $author = shift @description; + my $date = shift @description; + my @changes = run_cmd("hg status --clean"); # include info on unmodified files + @changes = sort @changes; + + # Deliver changes to SCM + my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags); + + my $tag = $sync_prefix.$scmref; + run_cmd("hg tag --local $tag"); + $latest_scmtag = $tag; + print "Synchronised $scmref from Mercurial gateway, to initialise the synchronisation\n"; + } + + $opening_scmtag = $latest_scmtag; # don't consider history before this point + } + + +# 3. pull from Mercurial default path, deal with new stuff + +my @pull_output = run_cmd("hg pull --update"); +$hg_updated = 1; + +my @heads = hg_heads($opening_scmtag); + +if (scalar @heads > 1) + { + # more than one head - try a safe merge + print "WARNING: multiple heads\n",@heads,"\nMerge is needed\n\n\n" if ($verbose); + + my @merge_output = run_cmd("hg --config \"ui.merge=internal:fail\" merge"); # which head? + if ($merge_output[0] =~ / 0 files unresolved/) + { + # successful merge - commit it. + run_cmd("hg commit --message \"Automatic merge\""); + $something_to_push = 1; + } + else + { + # clean up any partially merged files + run_cmd("hg update -C"); + } + } + +# 4. Identify the sequence of Mercurial changes on the trunk and put them into the SCM +# - Do only the head revision if this is the first synchronisation, to avoid copying ancient history + +my $options = "--follow-first"; +$options .= " --prune $latest_scmtag"; + +my @traceback = run_cmd("hg log $options --template {rev}\\t{node}\\t{tags}\\n"); +foreach my $line (reverse @traceback) + { + chomp $line; + my ($rev,$node,$tags) = split /\t/,$line; + if ($tags =~ /$sync_prefix/) + { + # shouldn't happen - it would mean that tip goes back to an ancestor + # of the latest sync point + abandon_sync("Cannot handle this structure\n",@traceback); + } + + # Read commit information and update workspace from Mercurial + + my @description = run_cmd("hg log --rev $rev --template \"{author}\\n{date|isodate}\\n{desc}\""); + chomp @description; + my $author = shift @description; + my $date = shift @description; + my @changes = run_cmd("hg status --rev $latest_scmtag --rev $rev"); + @changes = sort @changes; + + run_cmd("hg update -C --rev $rev"); + $hg_updated = 1; + + # Deliver changes to SCM + my $scmref = scm_checkin($node,$author,$date,\@changes,\@description,$tags); + + # Tag as the latest sync point + my $tag = $sync_prefix.$scmref; + run_cmd("hg tag --local $tag"); + $latest_scmtag = $tag; + print "Synchronised $scmref from Mercurial gateway\n"; + } + +# 3. push changes to the destination gateway + +if ($something_to_push) + { + my @output = run_cmd("hg -v push --force --rev $latest_scmtag"); + print "\n",@output,"\n" if ($verbose); + print "Destination Mercurial repository has been updated\n"; + } +else + { + print "Nothing to push to destination Mercurial repository\n"; + } + +# 4. Check to see if we are in a clean state + +@heads = hg_heads($opening_scmtag); +if (scalar @heads > 1) + { + print "\n------------------\n"; + print "WARNING: Mercurial repository has multiple heads - manual merge recommended\n"; + } + +exit(0); + + +# -------------- SCM section ------------- +# +# Code relating to non-Mercurial SCM system. +# This version implements the sync with XXX +# + +# Utility functions you might want to call are: +# +# run_cmd($cmd) +# - Function which runs the specified command in a subshell and returns the stdout output as a list +# of strings. The whole script will terminate if the command returns with a non-zero exit status. +# +# abandon_sync(@messagelines) +# - Terminate the synchronisation script and pass on the message to someone who might care... +# +# $hg_updated +# - Global variable set to 1 if Mercurial changes the content of the workspace. This can be used to +# optimise the XXX system processing during initialisation, because it indicates when the workspace +# can be considered "clean" by the XXX system. Should be set to 0 whenever the XXX system knows the +# state of the workspace. +# +# hg_checkout($scmref) +# - Function to call "hg update" to the sync label associated with $scmref. Used during the scm_init +# function as part of getting the workspaces in harmony before transferring changes. Will set +# $hg_updated to 1. + + +# scm_usage() +# +# This function is called to supply the main "usage" statement for the script, as the +# interesting description is all about the interaction between Mercurial and XXX. It takes no +# arguments and should return nothing. +# +sub scm_usage() + { + print <<'EOF'; + +perl sync_hg_xxx.pl -root rootdir [options] +version 0.7 + +Synchronise a branch in XXX with a branch in Mercurial. + +The branch starts at rootdir, which is a local Mercurial repository. +The Perforce clientspec is assumed to exist, to specify modtime & rmdir, +and to exclude the .hg directory from the rootdir. + +The tool will sync rootdir/... to the specified changelist, and +then reflect all changes affecting this part of the directory tree into +Mercurial. + +The -first option is used to specify the first sync point if the gateway +has not been previously synchronised, e.g. when -clone is specified. + +Perforce-related options: + +-m maxchangelist highest changelist to consider + defaults to #head + +EOF + } + +my $max_changelist; # put XXX-specific global variables here + +sub scm_options() + { + # set defaults + + $max_changelist = "#head"; # initialise the XXX global variables here, otherwise it doesn't happen + + # return the GetOpt specification + return ( + 'm|max=s' => \$max_changelist, # add your own bits of GetOpt arguments + ); + } + + +# scm_init ($tip_only, @syncpoints) +# +# The main code calls this routine once, passing two arguments, and expects to get a +# list of identifiers to XXX changes which could be synchronised as individual Mercurial +# commits. These references can be anything which can be used as part of a Mercurial tag name, +# and the main code will call back into this routine supplying the references one at a time +# in the list order. +# +# If $tip_only is true, then the synchronisation can only handle the most recent version +# of the code in the XXX system, and will not be able to sync intermediate steps. This usually +# means that this is the first synchronisation of this branch with Mercurial. At most one identifier +# should be returned. +# +# @syncpoints contains a list of identifiers extracted from the local sync_* tags which mark a change +# that was synchronised with Mercurial in some previous run. This information allows the XXX system +# to ignore changes which have previously been processed, and to set itself into a state where +# the workspace corresponds to the contents of the XXX system at that point. +# +# The function returns the list of change identifiers for changes to be taken out of XXX and applied +# to Mercurial. This list can be empty if there are no new changes since the last synchronisation point, +# or if there is no content in the branch. + +sub scm_init($@) + { + my ($tip_only, @syncpoints) = @_; + + my $first_changelist; + + if ($tip_only) + { + # Script says we must synchronise from the XXX tip revision + + # Find the first changelist + + if (!defined $first_changelist) + { + print "XXX branch contains no changes\n"; + return (); + } + print "Synchronisation from tip ($first_changelist)\n" if ($verbose); + # fall through to complete the initialisation + } + else + { + # deduce the last synchronisation point from the @syncpoints list + + $first_changelist = some_function_of(@syncpoints); + + # Get Mercurial & XXX into the synchronised state + + hg_checkout($first_changelist); # call back to Mercurial to update from hg repository + + # get XXX into state associated with $first_changelist + + ## NB this is a bit Perforce specific - you might prefer to separate tip_only processing + ## from the normal "changes since last synchronisation" processing + ## + $first_changelist += 1; # we've already synched that one + ## + } + + # enumerate the changelists available from the XXX system & return as an ordered list + + my @scmrefs = some_function($first_changelist); + + if ($verbose) + { + printf "Found %d new changelists to process\n", scalar @scmrefs; + print join(", ", @scmrefs), "\n"; + } + + return @scmrefs; + } + +# scm_checkout($scmref) +# +# Update the workspace to reflect the given SCM reference +# +# This update should not change files which are not currently managed by the XXX system, but +# should delete workspace files if the XX change involved deleting files. It is useful, but not essential, +# for empty directories to be removed if they become empty due to deletions recorded in the XXX change. +# +# The function returns a list containing three things: +# +# $change_user - the username to be recorded for the Mercurial commit +# $change_date - the date to be recorded for the Mercurial commit (yyyy-mm-dd hh:mm:ss format) +# @change_description - the commit message to be used in Mercurial, as a list of lines of text +# +# The lines in the change description should not contain end of line markers, as these will be supplied +# by the main code. +# +sub scm_checkout($) + { + my ($scmref) = @_; + + my @change_description; + my $change_date; + my $change_user; + + # obtain the user , date and description for the given change identifier + + # apply the change to the workspace, ready for Mercurial to deduce with "hg commit --addremove" + + return ($change_user,$change_date,@change_description); + } + + +# scm_checkin($hgnode,$author,$date,$changes,$description,$tags) +# +# Describe the changes to the workspace as an SCM change, and return the new identifier (for use in a tag) +# +# The function receives 6 parameters, all of which are associated with the Mercurial commit +# +# $hgnode - Mercurial commit reference, as a globally unique hexadecimal identifier +# $author - the author recorded in the commit +# $date - the date recorded in the commit, in ISO date format (yyyy-mm-dd hh:mm:ss) +# $changes - Perl reference to the list of file changes (more information below) +# $description - Perl reference to the list of lines in the commit message +# $tags - Perl reference to the list of (non-local) tags associated with this commit +# +# The changes come from the Mercurial "hg status" command, and consist of a filename relative to the +# root of the repository, prefixed by a single letter code and a space. The codes that this routine +# must handle are M = modify, R = remove, A = add, and C = clean. The C codes are only used for the +# first synchronisation, and so should be handled as "add if not already in XXX". All of the implied +# changes have already been applied to the files in the workspace - the R files have been deleted, +# the A files have been added and the M files contain the desired content. +# +# The $hgnode should be recorded in the description in the XXX system, as should the $author and $date if +# they can't be used directly. +# +# The function returns the identifier for the completed change, which will be tagged in the Mercurial +# repository and reported to scm_init() in future synchronisation runs. +# +# WARNING: The tag information isn't currently filtered properly, and it is likely that there will +# need to be a separate scm_tag() function to handle the important Mercurial tags explicitly. Don't try +# to do much with the tag information just yet. +# +sub scm_checkin($$$$$$) + { + my ($hgnode,$author,$date,$changes,$description,$tags) = @_; + + my @hg_tags = grep !/^tip$/, split /\s+/, $tags; + my @xxx_modify; + my @xxx_remove; + my @xxx_add; + + # Separate the changes into lists of files for modify/add/remove + + foreach my $line (@$changes) + { + my $type = substr($line,0,2,""); # removes type as well as extracting it + if ($type eq "M ") + { + push @xxx_modify, $line; + next; + } + if ($type eq "A " || $type eq "C ") + { + push @xxx_add, $line; + next; + } + if ($type eq "R ") + { + push @xxx_remove, $line; + next; + } + + abandon_sync("Unexpected hg status line: $type$line"); + } + + # Create an XXX system change object to record the changes (if necessary) + # process the lists of files + + if (scalar @xxx_add) + { + # add the files in the xxx_add list + } + + if (scalar @xxx_modify) + { + # process the list of modified files + } + + if (scalar @xxx_remove) + { + # remove the files in the xxx_remove list + } + + # Create the change description from the @$description list + + # Include the $hgnode as part of the description + # Use the $author and $date if possible (by might just have to be more descriptive text + # TODO: Do something with the interesting tags? + + my $scmref = some_function(); + + return $scmref; + }