taskd/clone.sh: clean up more crud on exceeds limit failures
[girocco.git] / taskd / clone.sh
blob94e543b1aa5db5b97d848240da76e937a92fa723
1 #!/bin/sh
3 # Invoked from taskd/taskd.pl
5 . @basedir@/shlib.sh
6 . @basedir@/jobd/gc-util-functions.sh
8 set -e
10 umask 002
11 [ "$cfg_permission_control" != "Hooks" ] || umask 000
12 clean_git_env
14 # darcs fast-export | git fast-import with error handling
15 git_darcs_fetch() (
16 set_utf8_locale
17 _err1=
18 _err2=
19 exec 3>&1
20 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
22 exec 4>&3 3>&1 1>&4 4>&-
24 _e1=0
25 "$cfg_basedir"/bin/darcs-fast-export \
26 --export-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
27 echo $_e1 >&3
28 } |
30 _e2=0
31 git_ulimit fast-import \
32 --export-marks="$(pwd)/gfi-marks" \
33 --export-pack-edges="$(pwd)/gfi-packs" \
34 --force 3>&- || _e2=$?
35 echo $_e2 >&3
38 EOT
39 exec 3>&-
40 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
41 return $?
44 # bzr fast-export | git fast-import with error handling
45 git_bzr_fetch() (
46 set_utf8_locale
47 BZR_LOG=/dev/null
48 export BZR_LOG
49 _err1=
50 _err2=
51 exec 3>&1
52 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
54 exec 4>&3 3>&1 1>&4 4>&-
56 _e1=0
57 bzr fast-export --plain \
58 --export-marks="$(pwd)/bfe-marks" "$1" 3>&- || _e1=$?
59 echo $_e1 >&3
60 } |
62 _e2=0
63 git_ulimit fast-import \
64 --export-marks="$(pwd)/gfi-marks" \
65 --export-pack-edges="$(pwd)/gfi-packs" \
66 --force 3>&- || _e2=$?
67 echo $_e2 >&3
70 EOT
71 exec 3>&-
72 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
73 return $?
76 # removes any git-svn leftovers
77 cleanup_git_svn_leftovers() {
79 # Remove any stale git-svn temp files
80 # The git-svn process creates temp files with random 10 character names
81 # in the root of $GIT_DIR. Unfortunately they do not have a recognizable
82 # prefix, so we just have to kill any files with a 10-character name.
83 # All characters are chosen from
84 # [A-Za-z0-9_] so we can at least check that and fortunately the only
85 # collision is 'FETCH_HEAD' but that doesn't matter.
86 # There may also be temp files with a Git_ prefix as well.
87 _randchar='[A-Za-z0-9_]'
88 _randchar2="$_randchar$_randchar"
89 _randchar4="$_randchar2$_randchar2"
90 _randchar10="$_randchar4$_randchar4$_randchar2"
91 find -L . -maxdepth 1 -type f -name "$_randchar10" -exec rm -f '{}' + || :
92 find -L . -maxdepth 1 -type f -name "Git_*" -exec rm -f '{}' + || :
95 # removes all crud leftover from a previous failed clone attempt
96 # only files that could potentially consume a non-trivial amount
97 # of space are removed by this function
98 # refs and objects are left unchanged by this function
99 _cleanup_failed_clone_bloat() {
101 # Any pre-existing FETCH_HEAD from a previous clone failed or not is
102 # now garbage to be removed
103 rm -f FETCH_HEAD
105 # Remove any left-over svn dir from a previous failed attempt
106 rm -rf svn
108 # Remove any left-over .darcs dirs from a previous failed attempt
109 rm -rf *.darcs
111 # Remove any left-over repo.hg dir from a previous failed attempt
112 rm -rf repo.hg
114 # Remove any left-over import/export/temp files from a previous failed attempt
115 rm -f bfe-marks dfe-marks hg2git-heads hg2git-mapping hg2git-marks* hg2git-state \
116 gfi-marks gfi-packs .pkts-temp .refs-temp
118 # Remove any git-svn junk
119 cleanup_git_svn_leftovers
122 clear_all_objects_and_packs() {
123 if [ -d objects ]; then
124 # make sure the repository is not left broken
125 printf '%s\n' 'ref: refs/heads/master' >HEAD || :
126 rm -f packed-refs || :
127 find -H refs objects -type f -exec rm -f '{}' + >/dev/null 2>&1 || :
128 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
130 _cleanup_failed_clone_bloat
133 exit_err=0
134 exit_objs=0
135 send_clone_failed() {
136 trap "" EXIT
137 # We must now close the .clonelog file that is open on stdout and stderr
138 exec >/dev/null 2>&1
139 # It would be nice if git propagated the SIGXFSZ error on up to the shell,
140 # perhaps it will at some point in the future. In any case, the only file
141 # that might be too big would end up in the objects subdirectory.
142 # Search for any files of size $cfg_max_file_size512 blocks (if set) or
143 # larger and trigger the too big failure that way as well.
144 toobig=
145 if [ "${cfg_max_file_size512:-0}" != "0" ]; then
146 toobig="$(find -H objects -type f -size +$(( $cfg_max_file_size512 - 1 )) -print 2>/dev/null |
147 head -n 1)" || :
149 failaddrs="$(config_get owner)" || :
150 ccadm="${cfg_admincc:-0}"
151 xfsz_err=""
152 if [ -n "$toobig" ] || [ "${exit_err:-0}" = "${var_xfsz_err:-999}" ]; then
153 ccadm=1
154 reposize="$(cd objects && du -sk . | LC_ALL=C awk '{print $1}')" || :
155 if [ -n "$reposize" ]; then
156 if [ $reposize -lt 5120 ]; then
157 reposize="$reposize KiB"
158 else
159 reposize="$(( $reposize / 1024 ))"
160 if [ $reposize -lt 5120 ]; then
161 reposize="$reposize MiB"
162 else
163 reposize="$(( $reposize / 1024 ))"
164 reposize="$reposize GiB"
168 xfsz_err="
170 The source repository exceeds our maximum allowed repository size."
171 clear_all_objects_and_packs
173 xobjs_err=""
174 if [ "${exit_objs:-0}" != "0" ]; then
175 ccadm=1
176 xobjs_err="
178 The source repository${exit_objs:+ ($exit_objs objects)} exceeds our maximum allowed object limit."
179 clear_all_objects_and_packs
181 if [ -n "$xfsz_err" ] || [ -n "$xobjs_err" ]; then
182 # Mark as an exceeds limit clone failure and remember the exceeds
183 # message(s) in both .clone_failed_exceeds_limit and .clonelog
184 >.clone_failed_exceeds_limit
185 if [ -n "$xfsz_err" ]; then
186 [ -z "$reposize" ] ||
187 printf '%s\n' "repository size at failure: $reposize" >>.clone_failed_exceeds_limit
188 printf '%s\n' "${xfsz_err#??}" >>.clone_failed_exceeds_limit
189 printf '%s\n' "${xfsz_err#?}" >>.clonelog
191 if [ -n "$xobjs_err" ]; then
192 printf '%s\n' "${xobjs_err#??}" >>.clone_failed_exceeds_limit
193 printf '%s\n' "${xobjs_err#?}" >>.clonelog
195 # Remove the .clone_failed file to prevent "restarting" the clone since
196 # restarting it will not cure the fact that it exceeds allowed limits
197 # And the .clone_in_progress file has to go at the same time
198 rm -f .clone_in_progress .clone_failed
200 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
201 [ "$ccadm" = "0" ] || [ -z "$cfg_admin" ] ||
202 if [ -z "$failaddrs" ]; then failaddrs="$cfg_admin"; else failaddrs="$failaddrs,$cfg_admin"; fi
203 [ -z "$failaddrs" ] ||
205 cat <<EOT
206 Condolences. The clone of project $proj just failed.$xfsz_err$xobjs_err
208 * Source URL: $url
209 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
211 The project settings link may be used to adjust the settings
212 and restart the clone in order to try the clone again.
214 if [ -f .clonelog ] && [ -r .clonelog ]; then
215 echo ""
216 echo "Log follows:"
217 echo ""
218 loglines=$(LC_ALL=C wc -l <.clonelog)
219 if [ $loglines -le 203 ]; then
220 cat .clonelog
221 else
222 head -n 100 .clonelog
223 echo ""
224 echo "[ ... elided $(( $loglines - 200 )) middle lines ... ]"
225 echo ""
226 tail -n 100 .clonelog
229 } | mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone failed" "$failaddrs" || :
232 # removes any git-svn leftovers
233 cleanup_git_svn_leftovers() {
235 # Remove any stale git-svn temp files
236 # The git-svn process creates temp files with random 10 character names
237 # in the root of $GIT_DIR. Unfortunately they do not have a recognizable
238 # prefix, so we just have to kill any files with a 10-character name.
239 # All characters are chosen from
240 # [A-Za-z0-9_] so we can at least check that and fortunately the only
241 # collision is 'FETCH_HEAD' but that doesn't matter.
242 # There may also be temp files with a Git_ prefix as well.
243 _randchar='[A-Za-z0-9_]'
244 _randchar2="$_randchar$_randchar"
245 _randchar4="$_randchar2$_randchar2"
246 _randchar10="$_randchar4$_randchar4$_randchar2"
247 find -L . -maxdepth 1 -type f -name "$_randchar10" -exec rm -f '{}' + || :
248 find -L . -maxdepth 1 -type f -name "Git_*" -exec rm -f '{}' + || :
251 # removes all leftovers from a previous failed clone attempt
252 cleanup_failed_clone() {
254 # Remove any left-over clone bloat
255 _cleanup_failed_clone_bloat
257 # Remove any left-over svn-remote.svn or remote.origin config
258 git config --remove-section svn-remote.svn 2>/dev/null || :
259 git config --remove-section remote.origin 2>/dev/null || :
261 # If there is a remote-template.origin section, pre-seed the
262 # remote.origin section with its contents
263 git config --get-regexp '^remote-template\.origin\..' |
264 while read name value; do
265 if [ -n "$name" ] && [ -n "$value" ]; then
266 git config "remote${name#remote-template}" "$value"
268 done
270 # Remove any stale ref locks
271 clear_stale_ref_locks
273 # We want a gc right after the clone, so re-enable that just in case.
274 # There's a potential race where we could add it and gc.sh could remove
275 # it, but we'll reunset lastgc just before we remove .delaygc at the end.
276 [ -e .delaygc ] || >.delaygc
277 git config --unset gitweb.lastgc 2>/dev/null || :
279 # Remove all pre-existing refs
280 rm -f packed-refs
281 git for-each-ref --format='delete %(refname)' | git_updateref_stdin 2>/dev/null || :
283 # The initial state before a clone starts has HEAD as a symbolic-ref to master
284 git symbolic-ref HEAD refs/heads/master
286 # HEAD is no longer "ok"
287 git config --unset girocco.headok 2>/dev/null || :
289 # We, perhaps, ought to remove any packs/loose objects now, but the next gc
290 # will get rid of any extras. Also, if we're recloning the same thing, any
291 # preexisting packs/loose objects containing what we're recloning will only
292 # speed up the reclone by avoiding some disk writes. So we don't kill them.
294 # It's just remotely possible that a bunch of failures in a row could
295 # create a big mess that just keeps growing and growing...
296 # Trigger a .needsgc if that happens.
297 check_and_set_needsgc
300 proj="${1%.git}"
301 cd "$cfg_reporoot/$proj.git"
302 bang_reset
304 ! [ -e .delaygc ] || >.allowgc || :
306 trap "exit_err=$?; echo '@OVER@'; touch .clone_failed; send_clone_failed" EXIT
307 echo "Project: $proj"
308 echo " Date: $(TZ=UTC date '+%Y-%m-%d %T UTC')"
309 echo ""
310 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 1; }
311 url="$(config_get baseurl)" || :
312 case "$url" in *" "*|*" "*|"")
313 echo "Bad mirror URL (\"$url\")"
314 exit 1
315 esac
317 cleanup_failed_clone
319 # Record original mirror type for use by update.sh
320 mirror_type="$(get_url_mirror_type "$url")"
321 git config girocco.mirrortype "$mirror_type"
323 echo "Mirroring from URL \"$url\""
324 echo ""
326 if [ "$cfg_project_owners" = "source" ]; then
327 config set owner "$(ls -ldH "${url#file://}" 2>/dev/null | LC_ALL=C awk '{print $3}')"
330 mailaddrs="$(config_get owner)" || :
331 [ -z "$cfg_admin" ] ||
332 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
334 # Make sure we don't get any unwanted loose objects
335 # Starting with Git v2.10.0 fast-import can generate loose objects unless we
336 # tweak its configuration to prevent that
337 git_add_config 'fetch.unpackLimit=1'
338 # Note the git config documentation is wrong
339 # transfer.unpackLimit, if set, overrides fetch.unpackLimit
340 git_add_config 'transfer.unpackLimit=1'
341 # But not the Git v2.10.0 and later fastimport.unpackLimit which improperly uses <= instead of <
342 git_add_config 'fastimport.unpackLimit=0'
344 # Initial mirror
345 echo "Initiating mirroring..."
346 headref=
347 showheadwarn=
348 warnempty=
350 # remember the starting time so we can easily combine fetched loose objects
351 # we sleep for 1 second after creating .needspack to make sure all objects are newer
352 if ! [ -e .needspack ]; then
353 rm -f .needspack
354 >.needspack
355 sleep 1
358 case "$url" in
359 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
360 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 1; }
361 # Allow the username to be specified in the "svn-credential.svn.username"
362 # property and the password in the "svn-credential.svn.password" property
363 # Use an 'anonsvn' username by default as is commonly used for anonymous svn
364 # Default the password to the same as the username
365 # The password property will be ignored unless a username has been specified
366 if svnuser="$(git config --get svn-credential.svn.username)" && [ -n "$svnuser" ]; then
367 if ! svnpass="$(git config --get svn-credential.svn.password)"; then
368 svnpass="$svnuser"
370 url1="${url#*://}"
371 url1="${url1%%/*}"
372 case "$url1" in ?*"@"?*)
373 urlsch="${url%%://*}"
374 url="$urlsch://${url#*@}"
375 esac
376 else
377 # As a fallback, check in the URL, just in case
378 url1="${url#*://}"
379 url1="${url1%%/*}"
380 svnuser=
381 case "$url1" in ?*"@"?*)
382 urlsch="${url%%://*}"
383 url="$urlsch://${url#*@}"
384 url1="${url1%%@*}"
385 svnuser="${url1%%:*}"
386 if [ -n "$svnuser" ]; then
387 svnpass="$svnuser"
388 case "$url1" in *":"*)
389 svnpass="${url1#*:}"
390 esac
392 esac
393 if [ -z "$svnuser" ]; then
394 svnuser="anonsvn"
395 svnpass="anonsvn"
398 GIT_ASKPASS_PASSWORD="$svnpass"
399 export GIT_ASKPASS_PASSWORD
400 # We just remove svn+ here, so svn+http://... becomes http://...
401 # We also remove a trailing '/' to match what git-svn will do
402 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
403 svnurl="${svnurl%/}"
404 # We require svn info to succeed on the URL otherwise it's
405 # simply not a valid URL and without using -s on the init it
406 # will not otherwise be tested until the fetch
407 svn --non-interactive --username "$svnuser" --password "$svnpass" info "$svnurl" >/dev/null
408 # We initially use -s for the init which will possibly shorten
409 # the URL. However, the shortening can fail if a password is
410 # not required for the longer version but is for the shorter,
411 # so try again without -s if the -s version fails.
412 # We must use GIT_DIR=. here or ever so "helpful" git-svn will
413 # create a .git subdirectory!
414 GIT_DIR=. git svn init --username="$svnuser" --prefix "" -s "$svnurl" <"$mtlinesfile" ||
415 GIT_DIR=. git svn init --username="$svnuser" --prefix "" "$svnurl" <"$mtlinesfile"
416 # We need to remember this url so we can detect changes because
417 # ever so "helpful" git-svn may shorten it!
418 config_set svnurl "$svnurl"
419 # At this point, since we asked for a standard layout (-s) git-svn
420 # may have been "helpful" and adjusted our $svnurl to a prefix and
421 # then glued the removed suffix onto the front of any svn-remote.svn.*
422 # config items. We could avoid this by not using the '-s' option
423 # but then we might not get all the history. If, for example, we
424 # are cloning an http://svn.example.com/repos/public repository that
425 # early in its history moved trunk => public/trunk we would miss that
426 # earlier history without allowing the funky shorten+prefix behavior.
427 # So we read back the svn-remote.svn.fetch configuration and compute
428 # the prefix. This way we are sure to get the correct prefix.
429 gitsvnurl="$(git config --get svn-remote.svn.url)" || :
430 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | tail -1)" || :
431 gitsvnprefix="${gitsvnfetch%%:*}"
432 gitsvnsuffix="${gitsvnprefix##*/}"
433 gitsvnprefix="${gitsvnprefix%$gitsvnsuffix}"
434 # Ask git-svn to store everything in the normal non-remote
435 # locations being careful to use the correct prefix
436 git config --replace-all svn-remote.svn.fetch "${gitsvnprefix}trunk:refs/heads/master"
437 git config --replace-all svn-remote.svn.branches "${gitsvnprefix}branches/*:refs/heads/*"
438 git config --replace-all svn-remote.svn.tags "${gitsvnprefix}tags/*:refs/tags/*"
439 # look for additional non-standard directories to fetch
440 # check for standard layout at the same time
441 foundstd=
442 foundfile=
443 svn --non-interactive --username "$svnuser" --password "$svnpass" ls "$gitsvnurl/${gitsvnprefix}" 2>/dev/null |
444 { while read file; do case $file in
445 # skip the already-handled standard ones and any with a space or tab
446 *' '*|*' '*) :;;
447 trunk/|branches/|tags/) foundstd=1;;
448 # only fetch extra directories from the $svnurl root (not any files)
449 *?/) git config --add svn-remote.svn.fetch \
450 "${gitsvnprefix}${file%/}:refs/heads/${file%/}";;
451 *?) foundfile=1;;
452 esac; done
453 # if files found and no standard directories present use a simpler layout
454 if [ -z "$foundstd" ] && [ -n "$foundfile" ]; then
455 git config --unset svn-remote.svn.branches
456 git config --unset svn-remote.svn.tags
457 git config --replace-all svn-remote.svn.fetch ':refs/heads/master'
458 fi; }
459 test $? -eq 0
460 # git svn fetch on a very large repo can take some time and the
461 # remote server may interrupt the connection from time to time.
462 # keep retrying (after a brief pause) as long as we are making progress.
463 # however, we do limit the total number of retries to 1000
464 # we will, however, retry up to 5 times even if we're not making progress
465 v_get_svn_progress_fingerprint() {
466 eval "$1="'"$({ GIT_DIR=. git svn info <"$mtlinesfile" 2>&1; git show-ref --head 2>&1; } |
467 git hash-object -t blob --stdin )"' || :
469 svn_ret_err() { return "${1:-1}"; }
470 svn_retries=1000 # maximum possible fetch attempts no matter what
471 svn_retry_backoff_start_half=60 # min retry wait is double this amount in seconds
472 svn_backoff_count=7 # max retry wait is $svn_retry_backoff_start_half * 2^$svn_backoff_count
473 # Cumulative backoff wait before giving up on consecutive no-progress retries
474 # is approximately 2 * $svn_retry_backoff_start_half * 2^$svn_backoff_count
475 # For a $svn_backoff_count of 7 that works out to be exactly 4h14m
476 svn_progress=
477 v_get_svn_progress_fingerprint svn_progress
478 svn_progress_retries="$svn_retries"
479 svn_retry_backoff="$svn_retry_backoff_start_half"
480 svn_err=0
481 while [ "$svn_retries" -gt 0 ]; do
482 svn_retries="$(( $svn_retries - 1 ))"
483 svn_err=0
484 GIROCCO_DIVERT_GIT_SVN_AUTO_GC=1
485 export GIROCCO_DIVERT_GIT_SVN_AUTO_GC
486 unset GIROCCO_SUPPRESS_AUTO_GC_UPDATE
487 saveconfig="$GIT_CONFIG_PARAMETERS"
488 git_add_config 'gc.auto=1'
489 git_add_config 'gc.autoPackLimit=1'
490 # Again, be careful to use GIT_DIR=. here or else new .git subdirectory!
491 GIT_DIR=. git_ulimit svn fetch --log-window-size=$var_log_window_size --username="$svnuser" --quiet <"$mtlinesfile" || svn_err="$?"
492 GIROCCO_SUPPRESS_AUTO_GC_UPDATE=1
493 export GIROCCO_SUPPRESS_AUTO_GC_UPDATE
494 unset GIROCCO_DIVERT_GIT_SVN_AUTO_GC
495 unset GIT_CONFIG_PARAMETERS
496 [ -z "$saveconfig" ] || {
497 GIT_CONFIG_PARAMETERS="$saveconfig"
498 export GIT_CONFIG_PARAMETERS
500 [ "${svn_err:-1}" -ne 0 ] || break # success!
501 # Check to see if we made any progress
502 v_get_svn_progress_fingerprint svn_progress_now
503 if [ "$svn_progress_now" != "$svn_progress" ]; then
504 # we made progress, continue the loop with min wait
505 svn_progress="$svn_progress_now"
506 svn_progress_retries="$svn_retries"
507 svn_retry_backoff="$svn_retry_backoff_start_half"
508 else
509 # no progress, but we only give up after
510 # $svn_backoff_count no-progress attempts in a row
511 [ "$(( $svn_progress_retries - $svn_retries ))" -lt "$svn_backoff_count" ] ||
512 break # failure
513 # continue but only after twice the previous wait
514 # (which will still be the min wait if this is the
515 # first no-progress retry after making some progress)
517 svn_retry_backoff="$(( 2 * $svn_retry_backoff ))"
518 # Pause for $svn_retry_backoff seconds before retrying to be friendly to the server
519 # Use that time to pack up loose objects if there are "lotsa" them
520 if ! lotsa_loose_objects_or_sopacks; then
521 echo "Pausing for $svn_retry_backoff seconds before retrying ($(date))"
522 sleep "$svn_retry_backoff"
523 else
524 pausestop="$(( $(date '+%s') + $svn_retry_backoff ))"
525 echo "Pausing and packing loose objects for $svn_retry_backoff seconds before retrying ($(date))"
526 pack_incremental_loose_objects_if_lockable ||
527 echo "Packing skipped (only pausing): $lockerr"
528 timenow="$(date '+%s')"
529 if [ "$timenow" -lt "$pausestop" ]; then
530 sleepamt="$(( $pausestop - $timenow ))"
531 [ "$sleepamt" -le "$svn_retry_backoff" ] ||
532 sleepamt="$svn_retry_backoff" # paranoia check
533 sleep "$sleepamt"
536 cleanup_git_svn_leftovers
537 echo "Retrying fetch ($(date))"
538 done
539 [ "${svn_err:-1}" -eq 0 ] || svn_ret_err "$svn_err"
540 test ${svn_err:-1} -eq 0
541 # git svn does not preserve group permissions in the svn subdirectory
542 chmod -R ug+rw,o+r svn
543 # git svn also leaves behind ref turds that end with @nnn
544 # We get rid of them now
545 git for-each-ref --format='%(refname)' |
546 LC_ALL=C sed '/^..*@[1-9][0-9]*$/!d; s/^/delete /' |
547 git_updateref_stdin
548 unset GIT_ASKPASS_PASSWORD
550 darcs://* | darcs+http://* | darcs+https://*)
551 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 1; }
552 case "$url" in
553 darcs://*) darcsurl="http://${url#darcs://}";;
554 *) darcsurl="${url#darcs+}";;
555 esac
556 git_darcs_fetch "$darcsurl"
558 bzr://*)
559 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 1; }
560 # we just remove bzr:// here, a typical bzr url is just
561 # "lp:foo"
562 bzrurl="${url#bzr://}"
563 git_bzr_fetch "$bzrurl"
565 hg+http://* | hg+https://* | hg+file://* | hg+ssh://*)
566 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 1; }
567 # We just remove hg+ here, so hg+http://... becomes http://...
568 hgurl="${url#hg+}"
569 # Perform the initial hg clone
570 hg clone -U "$hgurl" "$(pwd)/repo.hg"
571 # Do the fast-export | fast-import
572 git_hg_fetch
575 # We manually add remote.origin.url and remote.origin.fetch
576 # to simulate a `git remote add --mirror=fetch` since that's
577 # not available until Git 1.7.5 and this way we guarantee we
578 # always get exactly the intended configuration and nothing else.
579 git config remote.origin.url "$url"
580 if ! is_gfi_mirror_url "$url" && [ "$(git config --bool girocco.cleanmirror 2>/dev/null || :)" = "true" ]; then
581 git config --replace-all remote.origin.fetch "+refs/heads/*:refs/heads/*"
582 git config --add remote.origin.fetch "+refs/tags/*:refs/tags/*"
583 git config --add remote.origin.fetch "+refs/notes/*:refs/notes/*"
584 git config --add remote.origin.fetch "+refs/top-bases/*:refs/top-bases/*"
585 git config --bool girocco.lastupdateclean true
586 else
587 git config --replace-all remote.origin.fetch "+refs/*:refs/*"
588 git config --bool girocco.lastupdateclean false
590 # Set the correct HEAD symref by using ls-remote first
591 GIT_SSL_NO_VERIFY=1 GIT_TRACE_PACKET=1 git ls-remote origin >.refs-temp 2>.pkts-temp ||
593 # Since everything was redirected, on failure there'd be no output,
594 # so let's make some failure output
595 cat .pkts-temp
596 echo ""
597 echo "git ls-remote \"$url\" failed"
598 exit 1
600 # Compensate for git() {} side effects
601 unset GIT_TRACE_PACKET
602 # If the server is running at least Git 1.8.4.3 then it will send us the actual
603 # symref for HEAD. If we are running at least Git 1.7.5 then we can snarf that
604 # out of the packet trace data.
605 if [ -s .refs-temp ]; then
606 # Nothing to do unless the remote repository has at least 1 ref
607 # See if we got a HEAD ref
608 head="$(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+HEAD\$" <.refs-temp | LC_ALL=C awk '{print $1}')"
609 # If the remote has HEAD set to a symbolic ref that does not exist
610 # then we will not receive a HEAD ref in the ls-remote output
611 headref=
612 showheadwarn=
613 symrefcap=
614 if [ -n "$head" ]; then
615 symrefcap="$(LC_ALL=C sed -ne <.pkts-temp \
616 "/packet:.*git<.*[ $tab]symref="'HEAD:refs\/heads\/'"[^ $tab]/\
617 {s/^.*[ $tab]symref="'HEAD:\(refs\/heads\/'"[^ $tab][^ $tab]*"'\).*$/\1/;p;}')"
618 # prefer $symrefcap (refs/heads/master if no $symrefcap) if it
619 # matches HEAD otherwise take the first refs/heads/... match
620 matchcnt=0
621 while read ref; do
622 [ -n "$ref" ] || continue
623 matchcnt=$(( $matchcnt + 1 ))
624 if [ -z "$headref" ] || [ "$ref" = "${symrefcap:-refs/heads/master}" ]; then
625 headref="$ref"
627 if [ "$headref" = "${symrefcap:-refs/heads/master}" ] && [ $matchcnt -gt 1 ]; then
628 break
630 done <<-EOT
631 $(LC_ALL=C grep -E "^$head[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
632 LC_ALL=C awk '{print $2}')
634 # Warn if there was more than one match and $symrefcap is empty
635 # or $symrefcap is not the same as $headref since our choice might
636 # differ from the source repository's HEAD
637 if [ $matchcnt -ge 1 ] && [ "$symrefcap" != "$headref" ] &&
638 { [ -n "$symrefcap" ] || [ $matchcnt -gt 1 ]; }; then
639 showheadwarn=1
642 if [ -z "$headref" ]; then
643 # If we still don't have a HEAD ref then prefer refs/heads/master
644 # if it exists otherwise take the first refs/heads/...
645 # We do not support having a detached HEAD.
646 # We always warn now because we will be setting HEAD differently
647 # than the source repository had HEAD set
648 showheadwarn=1
649 while read ref; do
650 [ -n "$ref" ] || continue
651 if [ -z "$headref" ] || [ "$ref" = "refs/heads/master" ]; then
652 headref="$ref"
654 [ "$headref" != "refs/heads/master" ] || break
655 done <<-EOT
656 $(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
657 LC_ALL=C awk '{print $2}')
660 # If we STILL do not have a HEAD ref (perhaps the source repository
661 # contains only tags) then use refs/heads/master. It will be invalid
662 # but is no worse than we used to do by default and we'll warn about
663 # it. We do not support a HEAD symref to anything other than refs/heads/...
664 [ -n "$headref" ] || headref="refs/heads/master"
665 git symbolic-ref HEAD "$headref"
666 pruneopt=--prune
667 [ "$(git config --bool fetch.prune 2>/dev/null || :)" != "false" ] || pruneopt=
668 # remember the starting time so we can easily detect new packs for fast-import mirrors
669 # we sleep for 1 second after creating .gfipack to make sure all packs are newer
670 if is_gfi_mirror_url "$url" && ! [ -e .gfipack ]; then
671 rm -f .gfipack
672 >.gfipack
673 sleep 1
675 GIT_SSL_NO_VERIFY=1 git_ulimit remote update $pruneopt
676 if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then
677 find -L objects/pack -type f -newer .gfipack -name "pack-$octet20*.pack" -print >>gfi-packs
678 rm -f .gfipack
680 else
681 warnempty=1
682 git symbolic-ref HEAD "refs/heads/master"
684 rm -f .refs-temp .pkts-temp
686 esac
688 # For systems that do not properly implement the file size limit,
689 # perform a check here just in case. Unfortunately by this time
690 # the excess space has already been used, but at least it will
691 # be reclaimed almost immediately if we detect an overage here.
692 if [ "${cfg_max_file_size512:-0}" != "0" ]; then
693 toobig="$(find -H objects -type f -size +$(( $cfg_max_file_size512 - 1 )) -print 2>/dev/null |
694 head -n 1)" || :
695 if [ -n "$toobig" ]; then
696 exit 1 # fail the clone
700 # Check the max_clone_objects setting now (if set)
701 if [ "${cfg_max_clone_objects:-0}" != "0" ]; then
702 objcount="$(git count-objects -v | LC_ALL=C awk 'BEGIN{v=0}/^count:/||/^in-pack:/{v+=$2}END{print v}')" || :
703 if [ -n "$objcount" ] && [ "$objcount" -gt "$cfg_max_clone_objects" ]; then
704 exit_objs="$objcount"
705 exit 1 # fail the clone
709 # The objects subdirectories permissions must be updated now.
710 # In the case of a dumb http clone, the permissions will not be correct
711 # (missing group write) despite the core.sharedrepository=1 setting!
712 # The objects themselves seem to have the correct permissions.
713 # This problem appears to have been fixed in the most recent git versions.
714 perms=g+w
715 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
716 chmod $perms $(find -L objects -maxdepth 1 -type d) 2>/dev/null || :
718 # We may have just cloned a lot of refs and they will all be
719 # individual files at this point. Let's pack them now so we
720 # can have better performance right from the start.
721 git pack-refs --all
723 # Initialize gitweb.lastreceive, gitweb.lastchange and info/lastactivity
724 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
725 git config gitweb.lastchange "$(date '+%a, %d %b %Y %T %z')"
726 git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
727 --count=1 refs/heads >info/lastactivity || :
728 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
730 # Don't leave a multi-megabyte useless FETCH_HEAD behind
731 rm -f FETCH_HEAD
733 # Last ditch attempt to get a valid HEAD for a non-git source
734 check_and_set_head || :
736 # The rest
737 echo "Final touches..."
738 git update-server-info
739 trap "" EXIT
741 # run gc now unless the clone is empty
742 if [ -z "$warnempty" ]; then
743 git config --unset gitweb.lastgc 2>/dev/null || :
744 rm -f .delaygc .allowgc
747 emptynote=
748 [ -z "$warnempty" ] ||
749 emptynote="
750 WARNING: You have mirrored an empty repository.
752 headnote=
753 [ -z "$showheadwarn" ] || [ -z "$headref" ] ||
754 headnote="
755 NOTE: HEAD has been set to a symbolic ref to \"$headref\".
756 Use the \"Project settings\" link to choose a different HEAD symref.
758 sizenote=
759 ! is_gfi_mirror ||
760 sizenote="
761 NOTE: Since this is a mirror of a non-Git source, the initial repository
762 size may be somewhat larger than necessary. This will be corrected
763 shortly. If you intend to clone this repository you may want to
764 wait up to 1 hour before doing so in order to receive the more
765 compact final size.
767 [ -z "$mailaddrs" ] ||
768 mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone completed" "$mailaddrs" <<EOT || :
769 Congratulations! The clone of project $proj just completed.
771 * Source URL: $url
772 * GitWeb interface: $cfg_gitweburl/$proj.git
773 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
774 $emptynote$headnote$sizenote
775 Have a lot of fun.
778 echo "Mirroring finished successfuly!"
779 # In case this is a re-mirror, lastgc could have been set already so clear it now
780 git config --unset gitweb.lastgc || :
781 rm .clone_in_progress
782 echo "$sizenote@OVER@"