From 520902cd741f225665595a50c094893e50186cda Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sun, 28 Jun 2020 16:17:30 -0700 Subject: [PATCH] jobd/gc.sh: compute simple gc "signature" hashes The gc operation can be time consuming. If there have been no ref changes (and nothing has been expired/pruned) then there's no reason to run an expensive gc operation when $min_gc_interval rolls around. Instead the lastgc time simply gets updated. With added tolerance of linked working trees, private refs, and supplemental packs, it becomes increasingly difficult to determine whether or not there's been a change that warrants running gc or whether it can be safely skipped. As a result, gc may run when it doesn't actually need to. As a beginning of the changes to help determine when gc can safely be skipped, compute "signature"s (i.e. hashes) of some key information and save that away at the end of the gc operation for use in the future to help determine whether or not gc can be skipped. The first "signature" hash represents the primary and supplemental packs produced by the gc operation. When a gc check comes up, if there are any loose objects or the existing packs do not exactly match this signature, gc must be performed. This is saved in girocco.gcsig.packs. The second "signature" represents the state of all the refs (including HEAD's symbolic ref state) as seen by the gc operation. When a gc check comes up, if the ref state does not match this signature, gc must be performed. This is saved in girocco.gcsig.refs. Both of these hashes are cheap and easy to generate with very little overhead -- far, far less than an actual gc operation. Signed-off-by: Kyle J. McKay --- jobd/gc.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/jobd/gc.sh b/jobd/gc.sh index 8f0345f..ac104bf 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -330,6 +330,16 @@ pack_is_complete() { echo "$_headsha" } +# Compute a "signature" for the refs in the specified (default .) repository. +# Instead of passing --head to show-ref, the actual contents of HEAD are used +# to capture changes in HEAD's symbolic-ref value that would otherwise be missed. +# $1 => variable name to store result hash in +# $2 => --git-dir to use, defaults to "." +v_compute_refs_sighash() { + eval "$1="'"$({ cat "${2:-.}/HEAD"; git --git-dir="${2:-.}" show-ref; } | + git hash-object -t blob --stdin)"' +} + # On return a "$lockf" will have been created that must be removed when gc is done lock_gc() { v_lock_gc _lockresult || { @@ -400,14 +410,19 @@ make_repack_dir() { ln -s ../info repack/info ln -s ../objects repack/objects ln -s "$PWD/objects/pack/repack" repack/alt/pack - ln -s ../../refs repack/refs/refs ! [ -d logs ] || ln -s ../logs repack/logs ! [ -d worktrees ] || ln -s ../worktrees repack/worktrees _lines=$(( $(LC_ALL=C wc -l repack/HEAD.orig + cat packed-refs >repack/packed-refs.orig + ln -s packed-refs.orig repack/packed-refs + cp repack/HEAD.orig repack/HEAD + v_compute_refs_sighash _rsig repack + rm repack/packed-refs repack/HEAD + ln -s ../../refs repack/refs/refs + echo "$_rsig" >repack/packed-refs.sighash >repack/packed-refs.extra _xtralines=0 - cat packed-refs >repack/packed-refs.orig if [ $(LC_ALL=C wc -l &2 "[$proj] error: make_repack_dir failed original packed-refs line count sanity check" exit 1 @@ -1693,9 +1708,20 @@ eval "reposizek=$(( $( LC_ALL=C sed -e 's/ / + /g') ))" config_set_raw girocco.reposizek "${reposizek:-0}" +read -r rsighash