various: add read-only mode support
[girocco.git] / taskd / clone.sh
blobaba3cb6e8f0e7171ad7cadf65d7e005fde2a9454
1 #!/bin/sh
3 # Invoked from taskd/taskd.pl
5 . @basedir@/shlib.sh
6 . @basedir@/jobd/gc-util-functions.sh
8 set -e
10 umask 002
11 [ "$cfg_permission_control" != "Hooks" ] || umask 000
12 clean_git_env
14 # darcs fast-export | git fast-import with error handling
15 git_darcs_fetch() (
16 set_utf8_locale
17 _err1=
18 _err2=
19 exec 3>&1
20 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
22 exec 4>&3 3>&1 1>&4 4>&-
24 _e1=0
25 "$cfg_basedir"/bin/darcs-fast-export \
26 --export-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
27 echo $_e1 >&3
28 } |
30 _e2=0
31 git_ulimit fast-import \
32 --export-marks="$(pwd)/gfi-marks" \
33 --export-pack-edges="$(pwd)/gfi-packs" \
34 --force 3>&- || _e2=$?
35 echo $_e2 >&3
38 EOT
39 exec 3>&-
40 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
41 return $?
44 # bzr fast-export | git fast-import with error handling
45 git_bzr_fetch() (
46 set_utf8_locale
47 BZR_LOG=/dev/null
48 export BZR_LOG
49 _err1=
50 _err2=
51 exec 3>&1
52 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
54 exec 4>&3 3>&1 1>&4 4>&-
56 _e1=0
57 bzr fast-export --plain \
58 --export-marks="$(pwd)/bfe-marks" "$1" 3>&- || _e1=$?
59 echo $_e1 >&3
60 } |
62 _e2=0
63 git_ulimit fast-import \
64 --export-marks="$(pwd)/gfi-marks" \
65 --export-pack-edges="$(pwd)/gfi-packs" \
66 --force 3>&- || _e2=$?
67 echo $_e2 >&3
70 EOT
71 exec 3>&-
72 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
73 return $?
76 # removes any git-svn leftovers
77 cleanup_git_svn_leftovers() {
79 # Remove any stale git-svn temp files
80 # The git-svn process creates temp files with random 10 character names
81 # in the root of $GIT_DIR. Unfortunately they do not have a recognizable
82 # prefix, so we just have to kill any files with a 10-character name.
83 # All characters are chosen from
84 # [A-Za-z0-9_] so we can at least check that and fortunately the only
85 # collision is 'FETCH_HEAD' but that doesn't matter.
86 # There may also be temp files with a Git_ prefix as well.
87 _randchar='[A-Za-z0-9_]'
88 _randchar2="$_randchar$_randchar"
89 _randchar4="$_randchar2$_randchar2"
90 _randchar10="$_randchar4$_randchar4$_randchar2"
91 find -L . -maxdepth 1 -type f -name "$_randchar10" -exec rm -f '{}' + || :
92 find -L . -maxdepth 1 -type f -name "Git_*" -exec rm -f '{}' + || :
95 # removes all crud leftover from a previous failed clone attempt
96 # only files that could potentially consume a non-trivial amount
97 # of space are removed by this function
98 # refs and objects are left unchanged by this function
99 _cleanup_failed_clone_bloat() {
101 # Any pre-existing FETCH_HEAD from a previous clone failed or not is
102 # now garbage to be removed
103 rm -f FETCH_HEAD
105 # Any pre-existing index doesn't belong and is now garbage
106 rm -f index
108 # Remove any left-over svn dir from a previous failed attempt
109 rm -rf svn
111 # Remove any left-over .darcs dirs from a previous failed attempt
112 rm -rf *.darcs
114 # Remove any left-over repo.hg dir from a previous failed attempt
115 rm -rf repo.hg
117 # Remove any left-over import/export/temp files from a previous failed attempt
118 rm -f bfe-marks dfe-marks hg2git-heads hg2git-mapping hg2git-marks* hg2git-state \
119 gfi-marks gfi-packs .pkts-temp .refs-temp
121 # Remove any git-svn junk
122 cleanup_git_svn_leftovers
124 # There should not be any reflogs files after a failed clone
125 if [ -d reflogs ]; then
126 find -H reflogs -type f -exec rm -f '{}' + >/dev/null 2>&1 || :
129 # Nor any ref logs (which are normally disabled)
130 if [ -d logs ]; then
131 find -H logs -type f -exec rm -f '{}' + >/dev/null 2>&1 || :
135 clear_all_objects_and_packs() {
136 # make sure the repository is not left broken
137 printf '%s\n' 'ref: refs/heads/master' >HEAD || :
138 rm -f packed-refs info/refs objects/info/packs objects/info/commit-graph || :
139 cleardirs=
140 [ ! -d refs ] || cleardirs="refs"
141 [ ! -d objects ] || cleardirs="${cleardirs:+$cleardirs }objects"
142 if [ -n "$cleardirs" ]; then
143 find -H $cleardirs -type f -exec rm -f '{}' + >/dev/null 2>&1 || :
145 _cleanup_failed_clone_bloat
146 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
149 exit_err=0
150 exit_objs=0
151 send_clone_failed() {
152 trap "" EXIT
153 # We must now close the .clonelog file that is open on stdout and stderr
154 exec >/dev/null 2>&1
155 # It would be nice if git propagated the SIGXFSZ error on up to the shell,
156 # perhaps it will at some point in the future. In any case, the only file
157 # that might be too big would end up in the objects subdirectory.
158 # Search for any files of size $cfg_max_file_size512 blocks (if set) or
159 # larger and trigger the too big failure that way as well.
160 toobig=
161 if [ "${cfg_max_file_size512:-0}" != "0" ]; then
162 toobig="$(find -H objects -type f -size +$(( $cfg_max_file_size512 - 1 )) -print 2>/dev/null |
163 head -n 1)" || :
165 failaddrs="$(config_get owner)" || :
166 ccadm="${cfg_admincc:-0}"
167 xfsz_err=""
168 if [ -n "$toobig" ] || [ "${exit_err:-0}" = "${var_xfsz_err:-999}" ]; then
169 ccadm=1
170 reposize="$(cd objects && du -sk . | LC_ALL=C awk '{print $1}')" || :
171 if [ -n "$reposize" ]; then
172 if [ $reposize -lt 5120 ]; then
173 reposize="$reposize KiB"
174 else
175 reposize="$(( $reposize / 1024 ))"
176 if [ $reposize -lt 5120 ]; then
177 reposize="$reposize MiB"
178 else
179 reposize="$(( $reposize / 1024 ))"
180 reposize="$reposize GiB"
184 xfsz_err="
186 The source repository exceeds our maximum allowed repository size."
187 clear_all_objects_and_packs
189 xobjs_err=""
190 if [ "${exit_objs:-0}" != "0" ]; then
191 ccadm=1
192 xobjs_err="
194 The source repository${exit_objs:+ ($exit_objs objects)} exceeds our maximum allowed object limit."
195 clear_all_objects_and_packs
197 if [ -n "$xfsz_err" ] || [ -n "$xobjs_err" ]; then
198 # Mark as an exceeds limit clone failure and remember the exceeds
199 # message(s) in both .clone_failed_exceeds_limit and .clonelog
200 >.clone_failed_exceeds_limit
201 if [ -n "$xfsz_err" ]; then
202 [ -z "$reposize" ] ||
203 printf '%s\n' "repository size at failure: $reposize" >>.clone_failed_exceeds_limit
204 printf '%s\n' "${xfsz_err#??}" >>.clone_failed_exceeds_limit
205 printf '%s\n' "${xfsz_err#?}" >>.clonelog
207 if [ -n "$xobjs_err" ]; then
208 printf '%s\n' "${xobjs_err#??}" >>.clone_failed_exceeds_limit
209 printf '%s\n' "${xobjs_err#?}" >>.clonelog
211 # Remove the .clone_failed file to prevent "restarting" the clone since
212 # restarting it will not cure the fact that it exceeds allowed limits
213 # And the .clone_in_progress file has to go at the same time
214 rm -f .clone_in_progress .clone_failed
216 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
217 [ "$ccadm" = "0" ] || [ -z "$cfg_admin" ] ||
218 if [ -z "$failaddrs" ]; then failaddrs="$cfg_admin"; else failaddrs="$failaddrs,$cfg_admin"; fi
219 [ -z "$failaddrs" ] ||
221 cat <<EOT
222 Condolences. The clone of project $proj just failed.$xfsz_err$xobjs_err
224 * Source URL: $url
225 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
227 The project settings link may be used to adjust the settings
228 and restart the clone in order to try the clone again.
230 if [ -f .clonelog ] && [ -r .clonelog ]; then
231 echo ""
232 echo "Log follows:"
233 echo ""
234 loglines=$(LC_ALL=C wc -l <.clonelog)
235 if [ $loglines -le 203 ]; then
236 cat .clonelog
237 else
238 head -n 100 .clonelog
239 echo ""
240 echo "[ ... elided $(( $loglines - 200 )) middle lines ... ]"
241 echo ""
242 tail -n 100 .clonelog
245 } | mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone failed" "$failaddrs" || :
248 # removes any git-svn leftovers
249 cleanup_git_svn_leftovers() {
251 # Remove any stale git-svn temp files
252 # The git-svn process creates temp files with random 10 character names
253 # in the root of $GIT_DIR. Unfortunately they do not have a recognizable
254 # prefix, so we just have to kill any files with a 10-character name.
255 # All characters are chosen from
256 # [A-Za-z0-9_] so we can at least check that and fortunately the only
257 # collision is 'FETCH_HEAD' but that doesn't matter.
258 # There may also be temp files with a Git_ prefix as well.
259 _randchar='[A-Za-z0-9_]'
260 _randchar2="$_randchar$_randchar"
261 _randchar4="$_randchar2$_randchar2"
262 _randchar10="$_randchar4$_randchar4$_randchar2"
263 find -L . -maxdepth 1 -type f -name "$_randchar10" -exec rm -f '{}' + || :
264 find -L . -maxdepth 1 -type f -name "Git_*" -exec rm -f '{}' + || :
267 # removes all leftovers from a previous failed clone attempt
268 cleanup_failed_clone() {
270 # Remove any left-over clone bloat
271 _cleanup_failed_clone_bloat
273 # Remove any left-over svn-remote.svn or remote.origin config
274 git config --remove-section svn-remote.svn 2>/dev/null || :
275 git config --remove-section remote.origin 2>/dev/null || :
277 # If there is a remote-template.origin section, pre-seed the
278 # remote.origin section with its contents
279 git config --get-regexp '^remote-template\.origin\..' |
280 while read name value; do
281 if [ -n "$name" ] && [ -n "$value" ]; then
282 git config "remote${name#remote-template}" "$value"
284 done
286 # Remove any stale ref locks
287 clear_stale_ref_locks
289 # We want a gc right after the clone, so re-enable that just in case.
290 # There's a potential race where we could add it and gc.sh could remove
291 # it, but we'll reunset lastgc just before we remove .delaygc at the end.
292 [ -e .delaygc ] || >.delaygc
293 git config --unset gitweb.lastgc 2>/dev/null || :
295 # Remove all pre-existing refs
296 rm -f packed-refs
297 git for-each-ref --format='delete %(refname)' | git_updateref_stdin 2>/dev/null || :
299 # The initial state before a clone starts has HEAD as a symbolic-ref to master
300 git symbolic-ref HEAD refs/heads/master
302 # HEAD is no longer "ok"
303 git config --unset girocco.headok 2>/dev/null || :
305 # We, perhaps, ought to remove any packs/loose objects now, but the next gc
306 # will get rid of any extras. Also, if we're recloning the same thing, any
307 # preexisting packs/loose objects containing what we're recloning will only
308 # speed up the reclone by avoiding some disk writes. So we don't kill them.
310 # It's just remotely possible that a bunch of failures in a row could
311 # create a big mess that just keeps growing and growing...
312 # Trigger a .needsgc if that happens.
313 check_and_set_needsgc
316 proj="${1%.git}"
317 cd "$cfg_reporoot/$proj.git"
318 bang_reset
320 ! [ -e .delaygc ] || >.allowgc || :
322 trap "exit_err=$?; echo '@OVER@'; touch .clone_failed; send_clone_failed" EXIT
323 echo "Project: $proj"
324 echo " Date: $(TZ=UTC date '+%Y-%m-%d %T UTC')"
325 echo ""
326 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 1; }
327 url="$(config_get baseurl)" || :
328 case "$url" in *" "*|*" "*|"")
329 echo "Bad mirror URL (\"$url\")"
330 exit 1
331 esac
333 cleanup_failed_clone
335 # Record original mirror type for use by update.sh
336 mirror_type="$(get_url_mirror_type "$url")"
337 git config girocco.mirrortype "$mirror_type"
339 echo "Mirroring from URL \"$url\""
340 echo ""
342 if [ "$cfg_project_owners" = "source" ]; then
343 config set owner "$(ls -ldH "${url#file://}" 2>/dev/null | LC_ALL=C awk '{print $3}')"
346 mailaddrs="$(config_get owner)" || :
347 [ -z "$cfg_admin" ] ||
348 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
350 # Make sure we don't get any unwanted loose objects
351 # Starting with Git v2.10.0 fast-import can generate loose objects unless we
352 # tweak its configuration to prevent that
353 git_add_config 'fetch.unpackLimit=1'
354 # Note the git config documentation is wrong
355 # transfer.unpackLimit, if set, overrides fetch.unpackLimit
356 git_add_config 'transfer.unpackLimit=1'
357 # But not the Git v2.10.0 and later fastimport.unpackLimit which improperly uses <= instead of <
358 git_add_config 'fastimport.unpackLimit=0'
360 # Initial mirror
361 echo "Initiating mirroring..."
362 headref=
363 showheadwarn=
364 warnempty=
366 # remember the starting time so we can easily combine fetched loose objects
367 # we sleep for 1 second after creating .needspack to make sure all objects are newer
368 if ! [ -e .needspack ]; then
369 rm -f .needspack
370 >.needspack
371 sleep 1
374 case "$url" in
375 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
376 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 1; }
377 # Allow the username to be specified in the "svn-credential.svn.username"
378 # property and the password in the "svn-credential.svn.password" property
379 # Use an 'anonsvn' username by default as is commonly used for anonymous svn
380 # Default the password to the same as the username
381 # The password property will be ignored unless a username has been specified
382 if svnuser="$(git config --get svn-credential.svn.username)" && [ -n "$svnuser" ]; then
383 if ! svnpass="$(git config --get svn-credential.svn.password)"; then
384 svnpass="$svnuser"
386 url1="${url#*://}"
387 url1="${url1%%/*}"
388 case "$url1" in ?*"@"?*)
389 urlsch="${url%%://*}"
390 url="$urlsch://${url#*@}"
391 esac
392 else
393 # As a fallback, check in the URL, just in case
394 url1="${url#*://}"
395 url1="${url1%%/*}"
396 svnuser=
397 case "$url1" in ?*"@"?*)
398 urlsch="${url%%://*}"
399 url="$urlsch://${url#*@}"
400 url1="${url1%%@*}"
401 svnuser="${url1%%:*}"
402 if [ -n "$svnuser" ]; then
403 svnpass="$svnuser"
404 case "$url1" in *":"*)
405 svnpass="${url1#*:}"
406 esac
408 esac
409 if [ -z "$svnuser" ]; then
410 svnuser="anonsvn"
411 svnpass="anonsvn"
414 GIT_ASKPASS_PASSWORD="$svnpass"
415 export GIT_ASKPASS_PASSWORD
416 # We just remove svn+ here, so svn+http://... becomes http://...
417 # We also remove a trailing '/' to match what git-svn will do
418 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
419 svnurl="${svnurl%/}"
420 # We require svn info to succeed on the URL otherwise it's
421 # simply not a valid URL and without using -s on the init it
422 # will not otherwise be tested until the fetch
423 svn --non-interactive --username "$svnuser" --password "$svnpass" info "$svnurl" >/dev/null
424 # We initially use -s for the init which will possibly shorten
425 # the URL. However, the shortening can fail if a password is
426 # not required for the longer version but is for the shorter,
427 # so try again without -s if the -s version fails.
428 # We must use GIT_DIR=. here or ever so "helpful" git-svn will
429 # create a .git subdirectory!
430 GIT_DIR=. git svn init --username="$svnuser" --prefix "" -s "$svnurl" <"$mtlinesfile" ||
431 GIT_DIR=. git svn init --username="$svnuser" --prefix "" "$svnurl" <"$mtlinesfile"
432 # We need to remember this url so we can detect changes because
433 # ever so "helpful" git-svn may shorten it!
434 config_set svnurl "$svnurl"
435 # At this point, since we asked for a standard layout (-s) git-svn
436 # may have been "helpful" and adjusted our $svnurl to a prefix and
437 # then glued the removed suffix onto the front of any svn-remote.svn.*
438 # config items. We could avoid this by not using the '-s' option
439 # but then we might not get all the history. If, for example, we
440 # are cloning an http://svn.example.com/repos/public repository that
441 # early in its history moved trunk => public/trunk we would miss that
442 # earlier history without allowing the funky shorten+prefix behavior.
443 # So we read back the svn-remote.svn.fetch configuration and compute
444 # the prefix. This way we are sure to get the correct prefix.
445 gitsvnurl="$(git config --get svn-remote.svn.url)" || :
446 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | tail -1)" || :
447 gitsvnprefix="${gitsvnfetch%%:*}"
448 gitsvnsuffix="${gitsvnprefix##*/}"
449 gitsvnprefix="${gitsvnprefix%$gitsvnsuffix}"
450 # Ask git-svn to store everything in the normal non-remote
451 # locations being careful to use the correct prefix
452 git config --replace-all svn-remote.svn.fetch "${gitsvnprefix}trunk:refs/heads/master"
453 git config --replace-all svn-remote.svn.branches "${gitsvnprefix}branches/*:refs/heads/*"
454 git config --replace-all svn-remote.svn.tags "${gitsvnprefix}tags/*:refs/tags/*"
455 # look for additional non-standard directories to fetch
456 # check for standard layout at the same time
457 foundstd=
458 foundfile=
459 svn --non-interactive --username "$svnuser" --password "$svnpass" ls "$gitsvnurl/${gitsvnprefix}" 2>/dev/null |
460 { while read file; do case $file in
461 # skip the already-handled standard ones and any with a space or tab
462 *' '*|*' '*) :;;
463 trunk/|branches/|tags/) foundstd=1;;
464 # only fetch extra directories from the $svnurl root (not any files)
465 *?/) git config --add svn-remote.svn.fetch \
466 "${gitsvnprefix}${file%/}:refs/heads/${file%/}";;
467 *?) foundfile=1;;
468 esac; done
469 # if files found and no standard directories present use a simpler layout
470 if [ -z "$foundstd" ] && [ -n "$foundfile" ]; then
471 git config --unset svn-remote.svn.branches
472 git config --unset svn-remote.svn.tags
473 git config --replace-all svn-remote.svn.fetch ':refs/heads/master'
474 fi; }
475 test $? -eq 0
476 # git svn fetch on a very large repo can take some time and the
477 # remote server may interrupt the connection from time to time.
478 # keep retrying (after a brief pause) as long as we are making progress.
479 # however, we do limit the total number of retries to 1000
480 # we will, however, retry up to 5 times even if we're not making progress
481 v_get_svn_progress_fingerprint() {
482 eval "$1="'"$({ GIT_DIR=. git svn info <"$mtlinesfile" 2>&1; git show-ref --head 2>&1; } |
483 git hash-object -t blob --stdin )"' || :
485 svn_ret_err() { return "${1:-1}"; }
486 svn_retries=1000 # maximum possible fetch attempts no matter what
487 svn_retry_backoff_start_half=60 # min retry wait is double this amount in seconds
488 svn_backoff_count=7 # max retry wait is $svn_retry_backoff_start_half * 2^$svn_backoff_count
489 # Cumulative backoff wait before giving up on consecutive no-progress retries
490 # is approximately 2 * $svn_retry_backoff_start_half * 2^$svn_backoff_count
491 # For a $svn_backoff_count of 7 that works out to be exactly 4h14m
492 svn_progress=
493 v_get_svn_progress_fingerprint svn_progress
494 svn_progress_retries="$svn_retries"
495 svn_retry_backoff="$svn_retry_backoff_start_half"
496 svn_err=0
497 while [ "$svn_retries" -gt 0 ]; do
498 svn_retries="$(( $svn_retries - 1 ))"
499 svn_err=0
500 GIROCCO_DIVERT_GIT_SVN_AUTO_GC=1
501 export GIROCCO_DIVERT_GIT_SVN_AUTO_GC
502 unset GIROCCO_SUPPRESS_AUTO_GC_UPDATE
503 saveconfig="$GIT_CONFIG_PARAMETERS"
504 git_add_config 'gc.auto=1'
505 git_add_config 'gc.autoPackLimit=1'
506 # Again, be careful to use GIT_DIR=. here or else new .git subdirectory!
507 GIT_DIR=. git_ulimit svn fetch --log-window-size=$var_log_window_size --username="$svnuser" --quiet <"$mtlinesfile" || svn_err="$?"
508 GIROCCO_SUPPRESS_AUTO_GC_UPDATE=1
509 export GIROCCO_SUPPRESS_AUTO_GC_UPDATE
510 unset GIROCCO_DIVERT_GIT_SVN_AUTO_GC
511 unset GIT_CONFIG_PARAMETERS
512 [ -z "$saveconfig" ] || {
513 GIT_CONFIG_PARAMETERS="$saveconfig"
514 export GIT_CONFIG_PARAMETERS
516 [ "${svn_err:-1}" -ne 0 ] || break # success!
517 # Check to see if we made any progress
518 v_get_svn_progress_fingerprint svn_progress_now
519 if [ "$svn_progress_now" != "$svn_progress" ]; then
520 # we made progress, continue the loop with min wait
521 svn_progress="$svn_progress_now"
522 svn_progress_retries="$svn_retries"
523 svn_retry_backoff="$svn_retry_backoff_start_half"
524 else
525 # no progress, but we only give up after
526 # $svn_backoff_count no-progress attempts in a row
527 [ "$(( $svn_progress_retries - $svn_retries ))" -lt "$svn_backoff_count" ] ||
528 break # failure
529 # continue but only after twice the previous wait
530 # (which will still be the min wait if this is the
531 # first no-progress retry after making some progress)
533 svn_retry_backoff="$(( 2 * $svn_retry_backoff ))"
534 # Pause for $svn_retry_backoff seconds before retrying to be friendly to the server
535 # Use that time to pack up loose objects if there are "lotsa" them
536 if ! lotsa_loose_objects_or_sopacks; then
537 echo "Pausing for $svn_retry_backoff seconds before retrying ($(date))"
538 sleep "$svn_retry_backoff"
539 else
540 pausestop="$(( $(date '+%s') + $svn_retry_backoff ))"
541 echo "Pausing and packing loose objects for $svn_retry_backoff seconds before retrying ($(date))"
542 pack_incremental_loose_objects_if_lockable ||
543 echo "Packing skipped (only pausing): $lockerr"
544 timenow="$(date '+%s')"
545 if [ "$timenow" -lt "$pausestop" ]; then
546 sleepamt="$(( $pausestop - $timenow ))"
547 [ "$sleepamt" -le "$svn_retry_backoff" ] ||
548 sleepamt="$svn_retry_backoff" # paranoia check
549 sleep "$sleepamt"
552 cleanup_git_svn_leftovers
553 echo "Retrying fetch ($(date))"
554 done
555 [ "${svn_err:-1}" -eq 0 ] || svn_ret_err "$svn_err"
556 test ${svn_err:-1} -eq 0
557 # git svn does not preserve group permissions in the svn subdirectory
558 chmod -R ug+rw,o+r svn
559 # git svn also leaves behind ref turds that end with @nnn
560 # We get rid of them now
561 git for-each-ref --format='%(refname)' |
562 LC_ALL=C sed '/^..*@[1-9][0-9]*$/!d; s/^/delete /' |
563 git_updateref_stdin
564 unset GIT_ASKPASS_PASSWORD
566 darcs://* | darcs+http://* | darcs+https://*)
567 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 1; }
568 case "$url" in
569 darcs://*) darcsurl="http://${url#darcs://}";;
570 *) darcsurl="${url#darcs+}";;
571 esac
572 git_darcs_fetch "$darcsurl"
574 bzr://*)
575 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 1; }
576 # we just remove bzr:// here, a typical bzr url is just
577 # "lp:foo"
578 bzrurl="${url#bzr://}"
579 git_bzr_fetch "$bzrurl"
581 hg+http://* | hg+https://* | hg+file://* | hg+ssh://*)
582 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 1; }
583 # We just remove hg+ here, so hg+http://... becomes http://...
584 hgurl="${url#hg+}"
585 # Perform the initial hg clone
586 hg clone -U "$hgurl" "$(pwd)/repo.hg"
587 # Do the fast-export | fast-import
588 git_hg_fetch
591 # We manually add remote.origin.url and remote.origin.fetch
592 # to simulate a `git remote add --mirror=fetch` since that's
593 # not available until Git 1.7.5 and this way we guarantee we
594 # always get exactly the intended configuration and nothing else.
595 git config remote.origin.url "$url"
596 if ! is_gfi_mirror_url "$url" && [ "$(git config --bool girocco.cleanmirror 2>/dev/null || :)" = "true" ]; then
597 git config --replace-all remote.origin.fetch "+refs/heads/*:refs/heads/*"
598 git config --add remote.origin.fetch "+refs/tags/*:refs/tags/*"
599 git config --add remote.origin.fetch "+refs/notes/*:refs/notes/*"
600 git config --add remote.origin.fetch "+refs/top-bases/*:refs/top-bases/*"
601 git config --add remote.origin.fetch "+refs/replace/*:refs/replace/*"
602 git config --bool girocco.lastupdateclean true
603 else
604 git config --replace-all remote.origin.fetch "+refs/*:refs/*"
605 git config --bool girocco.lastupdateclean false
607 # Set the correct HEAD symref by using ls-remote first
608 GIT_SSL_NO_VERIFY=1 GIT_TRACE_PACKET=1 git ls-remote origin >.refs-temp 2>.pkts-temp ||
610 # Since everything was redirected, on failure there'd be no output,
611 # so let's make some failure output
612 cat .pkts-temp
613 echo ""
614 echo "git ls-remote \"$url\" failed"
615 exit 1
617 # Compensate for git() {} side effects
618 unset GIT_TRACE_PACKET
619 # If the server is running at least Git 1.8.4.3 then it will send us the actual
620 # symref for HEAD. If we are running at least Git 1.7.5 then we can snarf that
621 # out of the packet trace data.
622 if [ -s .refs-temp ]; then
623 # Nothing to do unless the remote repository has at least 1 ref
624 # See if we got a HEAD ref
625 head="$(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+HEAD\$" <.refs-temp | LC_ALL=C awk '{print $1}')"
626 # If the remote has HEAD set to a symbolic ref that does not exist
627 # then we will not receive a HEAD ref in the ls-remote output
628 headref=
629 showheadwarn=
630 symrefcap=
631 if [ -n "$head" ]; then
632 symrefcap="$(LC_ALL=C sed -ne <.pkts-temp \
633 "/packet:.*git<.*[ $tab]symref="'HEAD:refs\/heads\/'"[^ $tab]/\
634 {s/^.*[ $tab]symref="'HEAD:\(refs\/heads\/'"[^ $tab][^ $tab]*"'\).*$/\1/;p;}')"
635 # prefer $symrefcap (refs/heads/master if no $symrefcap) if it
636 # matches HEAD otherwise take the first refs/heads/... match
637 matchcnt=0
638 while read ref; do
639 [ -n "$ref" ] || continue
640 matchcnt=$(( $matchcnt + 1 ))
641 if [ -z "$headref" ] || [ "$ref" = "${symrefcap:-refs/heads/master}" ]; then
642 headref="$ref"
644 if [ "$headref" = "${symrefcap:-refs/heads/master}" ] && [ $matchcnt -gt 1 ]; then
645 break
647 done <<-EOT
648 $(LC_ALL=C grep -E "^$head[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
649 LC_ALL=C awk '{print $2}')
651 # Warn if there was more than one match and $symrefcap is empty
652 # or $symrefcap is not the same as $headref since our choice might
653 # differ from the source repository's HEAD
654 if [ $matchcnt -ge 1 ] && [ "$symrefcap" != "$headref" ] &&
655 { [ -n "$symrefcap" ] || [ $matchcnt -gt 1 ]; }; then
656 showheadwarn=1
659 if [ -z "$headref" ]; then
660 # If we still don't have a HEAD ref then prefer refs/heads/master
661 # if it exists otherwise take the first refs/heads/...
662 # We do not support having a detached HEAD.
663 # We always warn now because we will be setting HEAD differently
664 # than the source repository had HEAD set
665 showheadwarn=1
666 while read ref; do
667 [ -n "$ref" ] || continue
668 if [ -z "$headref" ] || [ "$ref" = "refs/heads/master" ]; then
669 headref="$ref"
671 [ "$headref" != "refs/heads/master" ] || break
672 done <<-EOT
673 $(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
674 LC_ALL=C awk '{print $2}')
677 # If we STILL do not have a HEAD ref (perhaps the source repository
678 # contains only tags) then use refs/heads/master. It will be invalid
679 # but is no worse than we used to do by default and we'll warn about
680 # it. We do not support a HEAD symref to anything other than refs/heads/...
681 [ -n "$headref" ] || headref="refs/heads/master"
682 git symbolic-ref HEAD "$headref"
683 pruneopt=--prune
684 [ "$(git config --bool fetch.prune 2>/dev/null || :)" != "false" ] || pruneopt=
685 # remember the starting time so we can easily detect new packs for fast-import mirrors
686 # we sleep for 1 second after creating .gfipack to make sure all packs are newer
687 if is_gfi_mirror_url "$url" && ! [ -e .gfipack ]; then
688 rm -f .gfipack
689 >.gfipack
690 sleep 1
692 GIT_SSL_NO_VERIFY=1 git_ulimit remote update $pruneopt
693 if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then
694 find -L objects/pack -type f -newer .gfipack -name "pack-$octet20*.pack" -print >>gfi-packs
695 rm -f .gfipack
697 else
698 warnempty=1
699 git symbolic-ref HEAD "refs/heads/master"
701 rm -f .refs-temp .pkts-temp
703 esac
705 # For systems that do not properly implement the file size limit,
706 # perform a check here just in case. Unfortunately by this time
707 # the excess space has already been used, but at least it will
708 # be reclaimed almost immediately if we detect an overage here.
709 if [ "${cfg_max_file_size512:-0}" != "0" ]; then
710 toobig="$(find -H objects -type f -size +$(( $cfg_max_file_size512 - 1 )) -print 2>/dev/null |
711 head -n 1)" || :
712 if [ -n "$toobig" ]; then
713 exit 1 # fail the clone
717 # Check the max_clone_objects setting now (if set)
718 if [ "${cfg_max_clone_objects:-0}" != "0" ]; then
719 objcount="$(git count-objects -v | LC_ALL=C awk 'BEGIN{v=0}/^count:/||/^in-pack:/{v+=$2}END{print v}')" || :
720 if [ -n "$objcount" ] && [ "$objcount" -gt "$cfg_max_clone_objects" ]; then
721 exit_objs="$objcount"
722 exit 1 # fail the clone
726 # The objects subdirectories permissions must be updated now.
727 # In the case of a dumb http clone, the permissions will not be correct
728 # (missing group write) despite the core.sharedrepository=2 setting!
729 # The objects themselves seem to have the correct permissions.
730 # This problem appears to have been fixed in the most recent git versions.
731 perms=g+w
732 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
733 chmod $perms $(find -L objects -maxdepth 1 -type d) 2>/dev/null || :
735 # We may have just cloned a lot of refs and they will all be
736 # individual files at this point. Let's pack them now so we
737 # can have better performance right from the start.
738 git pack-refs --all
740 # Initialize gitweb.lastreceive, gitweb.lastchange and info/lastactivity
741 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
742 git config gitweb.lastchange "$(date '+%a, %d %b %Y %T %z')"
743 git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
744 --count=1 refs/heads >info/lastactivity || :
745 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
747 # Don't leave a multi-megabyte useless FETCH_HEAD behind
748 rm -f FETCH_HEAD
750 # Last ditch attempt to get a valid HEAD for a non-git source
751 check_and_set_head || :
753 # The rest
754 echo "Final touches..."
755 git update-server-info
756 trap "" EXIT
758 # run gc now unless the clone is empty
759 if [ -z "$warnempty" ]; then
760 git config --unset gitweb.lastgc 2>/dev/null || :
761 rm -f .delaygc .allowgc
764 emptynote=
765 [ -z "$warnempty" ] ||
766 emptynote="
767 WARNING: You have mirrored an empty repository.
769 headnote=
770 [ -z "$showheadwarn" ] || [ -z "$headref" ] ||
771 headnote="
772 NOTE: HEAD has been set to a symbolic ref to \"$headref\".
773 Use the \"Project settings\" link to choose a different HEAD symref.
775 sizenote=
776 ! is_gfi_mirror ||
777 sizenote="
778 NOTE: Since this is a mirror of a non-Git source, the initial repository
779 size may be somewhat larger than necessary. This will be corrected
780 shortly. If you intend to clone this repository you may want to
781 wait up to 1 hour before doing so in order to receive the more
782 compact final size.
784 [ -z "$mailaddrs" ] ||
785 mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone completed" "$mailaddrs" <<EOT || :
786 Congratulations! The clone of project $proj just completed.
788 * Source URL: $url
789 * GitWeb interface: $cfg_gitweburl/$proj.git
790 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
791 $emptynote$headnote$sizenote
792 Have a lot of fun.
795 echo "Mirroring finished successfuly!"
796 # In case this is a re-mirror, lastgc could have been set already so clear it now
797 git config --unset gitweb.lastgc || :
799 # Finishing touches to .clonelog
800 echo "$sizenote@OVER@"
802 # We must now close the .clonelog file that is open on stdout and stderr
803 # otherwise it will not be possible to examine it and possibly trim it
804 exec >/dev/null 2>&1 || :
806 # Trim an overly large successful clone log back down to something less wasteful
807 # Keep only 10000 lines (first 5000, last 5000) if there are more than 10003 lines
808 # This will limit the .clonelog file to roughly 800KB max (with 80 char average lines)
809 loglines=$(LC_ALL=C wc -l <.clonelog) || :
810 if [ "${loglines:-0}" -gt 10003 ]; then
811 # Try to be careful and never lose the .clonelog nor fail the
812 # clone at this point no matter what bizarre failures might occur
813 rm -f .clonelogtrim || :
815 test ! -e .clonelogtrim &&
816 { >.clonelogtrim; } >/dev/null 2>&1 &&
817 test -f .clonelogtrim
818 then
820 head -n 5000 .clonelog &&
821 echo "" &&
822 echo "[ ... elided $(( $loglines - 10000 )) middle lines ... ]" &&
823 echo "" &&
824 tail -n 5000 .clonelog
825 } >.clonelogtrim &&
826 mv -f .clonelogtrim .clonelog || :
827 rm -f .clonelogtrim || :
831 # Clone is no longer in progress
832 rm -f .clone_in_progress || :