From 2c7f92cf7d463b43090f3c9980b69d4c01158cd0 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sun, 28 Jun 2020 23:03:26 -0700 Subject: [PATCH] jobd/gc.sh: add repack "signature" hash When deciding whether or not to skip gc, everything that might be caught up in the supplementary packs must be considered. Create a "signature" hash for the constructed `repack` subdirectory that reflects all of the refs that are packed during gc as well as anything that may be referenced by ref logs since the supplementary pack creation process uses the `--reflog` option when running the `git pack-objects` command. If the only thing that happens since the last "gc" run is to expire some ref logs, that means the supplementary packs will need to be rebuilt to remove any newly unreachable objects.o By including the contents of the various ref logs in the data used to create a "signature" hash for the `repack` subdirectory this can easily be accomplished. Signed-off-by: Kyle J. McKay --- jobd/gc.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/jobd/gc.sh b/jobd/gc.sh index ac104bf..de406f0 100755 --- a/jobd/gc.sh +++ b/jobd/gc.sh @@ -1164,6 +1164,33 @@ compute_extra_reachables() { LC_ALL=C awk '!/missing/ {num++; print $1 " " "refs/" substr("zzzzzzzzzzzz", 1, length(num)) "/" num}' } +# cat_all_ref_logs +# the current directory MUST be set to the repository's --git-dir +# first the arguments (which must reference readable files) are cat'd if any +# then the path names of all ref log files are sorted and cat'd in that order +# included in output: +# * the contents of the pathnames given as arguments (if any) in order +# * any existing files in the logs subdirectory and any existing worktree HEAD logs +# but always in sorted filename order +# obviously any missing directories are silently skipped +cat_all_ref_logs() { + [ $# -le 0 ] || LC_ALL=C cat -- "$@" + { + ! [ -d logs ] || find -L logs -name '.*' -prune -o -name '*.' -prune -o -type f -size +0c -print + if [ -d worktrees ]; then + find -L worktrees -mindepth 2 -maxdepth 2 -name HEAD -type f -print | + while read -r _lwth; do + if + [ -d "${_lwth%HEAD}logs" ] && + [ -f "${_lwth%HEAD}logs/HEAD" ] + then + printf '%s\n' "${_lwth%HEAD}logs/HEAD" + fi + done + fi + } | LC_ALL=C sort | LC_ALL=C sed 's/[^A-Za-z0-9._/-]/\\&/g' | xargs env LC_ALL=C cat -- +} + # ## main gc logic # @@ -1389,6 +1416,12 @@ progress "~ [$proj] running supplementary gc pack-objects ($(date))" # Add the "supplementary" refs compute_extra_reachables >>repack/packed-refs +# Now calculate the final repack signature hash +# Since we do _allow_ worktrees and we do _allow_ ref logs, +# the ref logs of such must be accounted for here as well +_rpksig="$(cat_all_ref_logs repack/packed-refs | git hash-object -t blob --stdin)" +echo "$_rpksig" >repack/repack.sighash + # Subtract the primary refs GIT_ALTERNATE_OBJECT_DIRECTORIES="$PWD/repack/alt" export GIT_ALTERNATE_OBJECT_DIRECTORIES @@ -1709,6 +1742,7 @@ eval "reposizek=$(( $( config_set_raw girocco.reposizek "${reposizek:-0}" read -r rsighash