gc-util-functions.sh: combine loose packs as needed
[girocco.git] / jobd / gc-util-functions.sh
blob6eb73f3ac905f7b8828bee5619d95b71122c3774
1 #!/bin/sh
3 # This is a shell library for common gc related functions
4 # used by various Girocco scripts.
6 # shlib.sh always sets this, it's an error to source
7 # this script without having already sourced shlib.sh
8 [ -n "$var_git_exec_path" ] || exit 2
10 # default packing options
11 packopts="--depth=50 --window=50 --window-memory=${var_window_memory:-1g}"
12 quiet="-q"; [ "${show_progress:-0}" = "0" ] || quiet=
14 # make sure combine-packs uses the correct Git executable
15 run_combine_packs() {
16 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
19 # combine the input pack(s) into a new pack (or possibly packs if packSizeLimit set)
20 # input pack names are read from standard input one per line delimited by the first
21 # ':', ' ' or '\n' character on the line (which allows gfi-packs to be read directly)
22 # all arguments, if any, are passed to pack-objects as additional options
23 # first removes any pre-existing "*.zap*" sentinels that may be leftover from any
24 # previously aborted "--replace" operations
25 # returns non-zero on failure
26 combine_packs_std() {
27 find -L objects/pack -maxdepth 1 -type f -name '*.zap*' -exec rm -f '{}' + || :
28 run_combine_packs --replace "$@" $packopts --all-progress-implied $quiet --non-empty
31 # duplicate the first file to the name given by the second file making sure that
32 # the second file appears atomically all-at-once after the copy has been completed
33 # and does not appear at all if the copy fails (in which case this function fails)
34 # if the second file already exists this function fails with status 1
35 # if the file names are the same this function returns immediately with success
36 dupe_file() {
37 [ "$1" != "$2" ] || return 0
38 ! [ -e "$2" ] || return 1
39 case "$2" in
40 *?/?*) _tmpdir="${2%/*}";;
41 *) _tmpdir=".";;
42 esac
43 _tmpfile="$(mktemp "${_tmpdir:-.}/packtmp-XXXXXX")" || return 1
44 cp -fp "$1" "$_tmpfile" || return 1
45 mv -f "$_tmpfile" "$2"
48 # rename_pack oldnamepath newnamepath
49 # note that .keep and .bndl files are left untouched and not moved at all!
50 rename_pack() {
51 [ $# -eq 2 ] && [ "$1" != "$2" ] || {
52 echo >&2 "[$proj] incorrect use of rename_pack function"
53 exit 1
55 # Git assumes that if the destination of the rename already exists
56 # that it is, in fact, a copy of the same bytes so silently succeeds
57 # without doing anything. We duplicate that logic here.
58 # Git checks for the .idx file first before even trying to use a pack
59 # so it should be the last moved and the first removed.
60 for ext in pack bitmap idx; do
61 [ -f "$1.$ext" ] || continue
62 ln "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
63 dupe_file "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
64 [ -f "$2.$ext" ] || {
65 echo >&2 "[$proj] unable to move $1.$ext to $2.$ext"
66 exit 1
68 done
69 for ext in idx pack bitmap; do
70 rm -f "$1.$ext"
71 done
72 return 0
75 # current directory must already be set to the $GIT_DIR
76 # see if there are "lotsa" loose objects
77 # "lotsa" is defined as the 17, 68, 71 and 86 object directories existing
78 # and there being at least 5 total objects between them which corresponds
79 # to an approximate average of 320 loose objects before this function starts
80 # returning true and triggering a "mini" gc to pack up loose objects
81 lotsa_loose_objects() {
82 [ -d objects/17 ] && [ -d objects/68 ] && [ -d objects/71 ] && [ -d objects/86 ] || return 1
83 _objs=$(( $(find -L objects/17 objects/68 objects/71 objects/86 -maxdepth 1 -name "$octet19*" -type f -print 2>/dev/null | LC_ALL=C wc -l) ))
84 [ ${_objs:-0} -ge 5 ]
87 # pack any existing, non-packed loose objects into a new _l.pack file then run prune-packed
88 # note that prune-packed is NOT run beforehand -- the caller must do that if needed
89 # loose objects need not be part of complete commits/trees as --weak-naming is used
90 # if there end up being too many loose packs, attempt to combine the packs too
91 pack_incremental_loose_objects() {
92 _lpacks="$(run_combine_packs </dev/null --names --loose --weak-naming --incremental --non-empty --all-progress-implied ${quiet:---progress} $packopts)"
93 if [ -n "$_lpacks" ]; then
94 # We need to identify these packs later so we don't combine_packs them
95 for _objpack in $_lpacks; do
96 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
97 done
98 git prune-packed $quiet
100 _packs=
101 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
102 _lpol="$_lpo --exclude-no-sfx _l"
103 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
104 [ "${_packs:-0}" -lt 20 ] || {
105 combine_small_incremental_loose_packs
106 _packs=
107 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
108 [ "${_packs:-0}" -lt 20 ] || combine_large_incremental_loose_packs
112 # combine small _l packs into larger pack(s) using --weak-naming
113 # we avoid any non _l, keep, bndl or bitmap packs
114 combine_small_incremental_loose_packs() {
115 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
116 _lpo="$_lpo --exclude-no-sfx _l"
117 _lpo="$_lpo --quiet --object-limit $var_redelta_threshold objects/pack"
118 while
119 _cnt="$(list_packs --count $_lpo)" || :
120 test "${_cnt:-0}" -ge 2
122 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming --no-reuse-delta)"
123 # We need to identify these packs later so we don't combine_packs them
124 for _objpack in $_newp; do
125 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
126 done
127 v_cnt _newc $_newp
128 # be paranoid and exit the loop if we haven't reduced the number of packs
129 [ $_newc -lt $_cnt ] || break
130 done
131 return 0
134 # combine large[ish] _l packs into larger pack(s) using --weak-naming
135 # we avoid any non _l, keep, bndl or bitmap packs
136 combine_large_incremental_loose_packs() {
137 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
138 _lpo="$_lpo --exclude-no-sfx _l"
139 _lpo="$_lpo --quiet --exclude-limit -$(( ( $var_redelta_threshold / 2 ) + 1 )) objects/pack"
140 while
141 _cnt="$(list_packs --count $_lpo)" || :
142 test "${_cnt:-0}" -ge 2
144 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming)"
145 # We need to identify these packs later so we don't combine_packs them
146 for _objpack in $_newp; do
147 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
148 done
149 v_cnt _newc $_newp
150 # be paranoid and exit the loop if we haven't reduced the number of packs
151 [ $_newc -lt $_cnt ] || break
152 done
153 return 0