gc-util-functions.sh: adopt single-object loose packs
[girocco.git] / jobd / gc-util-functions.sh
blobc9342447a445b9dce86cc7f597a638e0f58a0433
1 #!/bin/sh
3 # This is a shell library for common gc related functions
4 # used by various Girocco scripts.
6 # shlib.sh always sets this, it's an error to source
7 # this script without having already sourced shlib.sh
8 [ -n "$var_git_exec_path" ] || exit 2
10 # default packing options
11 packopts="--depth=50 --window=50 --window-memory=${var_window_memory:-1g}"
12 quiet="-q"; [ "${show_progress:-0}" = "0" ] || quiet=
14 pidactive() {
15 if _result="$(kill -0 "$1" 2>&1)"; then
16 # process exists and we have permission to signal it
17 return 0
19 case "$_result" in *"not permitted"*)
20 # we do not have permission to signal the process
21 return 0
22 esac
23 # process does not exist
24 return 1
27 createlock() {
28 # A .lock file should only exist for much less than a second.
29 # If we see a stale lock file (> 1h old), remove it and then,
30 # just in case, wait 30 seconds for any process whose .lock
31 # we might have just removed (it's racy) to finish doing what
32 # should take much less than a second to do.
33 _stalelock="$(find -L "$1.lock" -maxdepth 1 -mmin +60 -print 2>/dev/null)" || :
34 if [ -n "$_stalelock" ]; then
35 rm -f "$_stalelock"
36 sleep 30
38 for _try in p p n; do
39 if (set -C; >"$1.lock") 2>/dev/null; then
40 echo "$1.lock"
41 return 0
43 # delay and try again
44 [ "$_try" != "p" ] || sleep 1
45 done
46 # cannot create lock file
47 return 1
50 # Create a gc.pid lockfile
51 # $1 => name of variable to store result in
52 # On success:
53 # variable named by $1 will contain the name of the newly create lockfile (i.e. "gc.pid")
54 # On failure:
55 # variable named by $1 will contain the failure reason
56 v_lock_gc() {
57 # be compatibile with gc.pid file from newer Git releases
58 _lockf='gc.pid'
59 _hn="$(hostname)"
60 _active=
61 if [ "$(createlock "$_lockf")" ]; then
62 # If $_lockf is:
63 # 1) less than 12 hours old
64 # 2) contains two fields (pid hostname) NO trailing NL
65 # 3) the hostname is different OR the pid is still alive
66 # then we exit as another active process is holding the lock
67 if [ "$(find -L "$_lockf" -maxdepth 1 -mmin -720 -print 2>/dev/null)" ]; then
68 _apid=
69 _ahost=
70 read -r _apid _ahost _ajunk <"$_lockf" || :
71 if [ "$_apid" ] && [ "$_ahost" ]; then
72 if [ "$_ahost" != "$_hn" ] || pidactive "$_apid"; then
73 _active=1
77 else
78 eval "$1="'"unable to create $_lockf.lock file"'
79 return 1
81 if [ -n "$_active" ]; then
82 rm -f "$_lockf.lock"
83 eval "$1="'"gc already running on machine '\''$_ahost'\'' pid '\''$_apid'\''"'
84 return 1
86 printf "%s %s" "$$" "$_hn" >"$_lockf.lock"
87 chmod 0664 "$_lockf.lock"
88 mv -f "$_lockf.lock" "$_lockf"
89 eval "$1="'"$_lockf"'
90 return 0
93 # make sure combine-packs uses the correct Git executable
94 run_combine_packs() {
95 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
98 # combine the input pack(s) into a new pack (or possibly packs if packSizeLimit set)
99 # input pack names are read from standard input one per line delimited by the first
100 # ':', ' ' or '\n' character on the line (which allows gfi-packs to be read directly)
101 # all arguments, if any, are passed to pack-objects as additional options
102 # first removes any pre-existing "*.zap*" sentinels that may be leftover from any
103 # previously aborted "--replace" operations
104 # returns non-zero on failure
105 combine_packs_std() {
106 find -L objects/pack -maxdepth 1 -type f -name '*.zap*' -exec rm -f '{}' + || :
107 run_combine_packs --replace "$@" $packopts --all-progress-implied $quiet --non-empty
110 # duplicate the first file to the name given by the second file making sure that
111 # the second file appears atomically all-at-once after the copy has been completed
112 # and does not appear at all if the copy fails (in which case this function fails)
113 # if the second file already exists this function fails with status 1
114 # if the file names are the same this function returns immediately with success
115 dupe_file() {
116 [ "$1" != "$2" ] || return 0
117 ! [ -e "$2" ] || return 1
118 case "$2" in
119 *?/?*) _tmpdir="${2%/*}";;
120 *) _tmpdir=".";;
121 esac
122 _tmpfile="$(mktemp "${_tmpdir:-.}/packtmp-XXXXXX")" || return 1
123 cp -fp "$1" "$_tmpfile" || return 1
124 mv -f "$_tmpfile" "$2"
127 # rename_pack oldnamepath newnamepath
128 # note that .keep and .bndl files are left untouched and not moved at all!
129 rename_pack() {
130 [ $# -eq 2 ] && [ "$1" != "$2" ] || {
131 echo >&2 "[$proj] incorrect use of rename_pack function"
132 exit 1
134 # Git assumes that if the destination of the rename already exists
135 # that it is, in fact, a copy of the same bytes so silently succeeds
136 # without doing anything. We duplicate that logic here.
137 # Git checks for the .idx file first before even trying to use a pack
138 # so it should be the last moved and the first removed.
139 for ext in pack bitmap idx; do
140 [ -f "$1.$ext" ] || continue
141 ln "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
142 dupe_file "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
143 [ -f "$2.$ext" ] || {
144 echo >&2 "[$proj] unable to move $1.$ext to $2.$ext"
145 exit 1
147 done
148 for ext in idx pack bitmap; do
149 rm -f "$1.$ext"
150 done
151 return 0
154 # current directory must already be set to the $GIT_DIR
155 # see if there are "lotsa" loose objects
156 # "lotsa" is defined as the 17, 68, 71 and 86 object directories existing
157 # and there being at least 5 total objects between them which corresponds
158 # to an approximate average of 320 loose objects before this function starts
159 # returning true and triggering a "mini" gc to pack up loose objects
160 lotsa_loose_objects() {
161 [ -d objects/17 ] && [ -d objects/68 ] && [ -d objects/71 ] && [ -d objects/86 ] || return 1
162 _objs=$(( $(find -L objects/17 objects/68 objects/71 objects/86 -maxdepth 1 -name "$octet19*" -type f -print 2>/dev/null | LC_ALL=C wc -l) ))
163 [ ${_objs:-0} -ge 5 ]
166 # same as lotsa_loose_objects but first runs `git prune-packed` if it can get a gc lock
167 lotsa_loose_pruned_objects() {
168 lotsa_loose_objects || return $?
169 v_lock_gc _gclock || return 0
170 git prune-packed --quiet
171 rm -f "$_gclock"
172 lotsa_loose_objects
175 # pack any existing, non-packed loose objects into a new _l.pack file then run prune-packed
176 # note that prune-packed is NOT run beforehand -- the caller must do that if needed
177 # loose objects need not be part of complete commits/trees as --weak-naming is used
178 # if there end up being too many loose packs, attempt to combine the packs too
179 pack_incremental_loose_objects() {
180 _lpacks="$(run_combine_packs </dev/null --names --loose --weak-naming --incremental --non-empty --all-progress-implied ${quiet:---progress} $packopts)"
181 if [ -n "$_lpacks" ]; then
182 # We need to identify these packs later so we don't combine_packs them
183 for _objpack in $_lpacks; do
184 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
185 done
186 git prune-packed $quiet
188 _packs=
189 __lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
190 _lpo01="$__lpo --exclude-limit 2"
191 _lpol="$__lpo --exclude-no-sfx _l"
192 list_packs --quiet $_lpo01 objects/pack 2>/dev/null |
193 while read -r _apack && _apack="${_apack%.pack}" && [ -n "$_apack" ]; do
194 case "$_apack" in *_*);;*)
195 rename_pack "$_apack" "${_apack}_l" || :
196 esac
197 done || :
198 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
199 [ "${_packs:-0}" -lt 20 ] || {
200 combine_small_incremental_loose_packs
201 _packs=
202 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
203 [ "${_packs:-0}" -lt 20 ] || combine_large_incremental_loose_packs
207 # same as pack_incremental_loose_objects except
208 # returns true if locked and packed and unlocked or
209 # false if could not lock (with err in $lockerr)
210 pack_incremental_loose_objects_if_lockable() {
211 if v_lock_gc _gclock; then
212 pack_incremental_loose_objects || :
213 rm -f "$_gclock"
214 return 0
215 else
216 lockerr="$_gclock"
217 return 1
221 # combine small _l packs into larger pack(s) using --weak-naming
222 # we avoid any non _l, keep, bndl or bitmap packs
223 combine_small_incremental_loose_packs() {
224 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
225 _lpo="$_lpo --exclude-no-sfx _l"
226 _lpo="$_lpo --quiet --object-limit $var_redelta_threshold objects/pack"
227 while
228 _cnt="$(list_packs --count $_lpo)" || :
229 test "${_cnt:-0}" -ge 2
231 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming --no-reuse-delta)"
232 # We need to identify these packs later so we don't combine_packs them
233 for _objpack in $_newp; do
234 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
235 done
236 v_cnt _newc $_newp
237 # be paranoid and exit the loop if we haven't reduced the number of packs
238 [ $_newc -lt $_cnt ] || break
239 done
240 return 0
243 # combine large[ish] _l packs into larger pack(s) using --weak-naming
244 # we avoid any non _l, keep, bndl or bitmap packs
245 combine_large_incremental_loose_packs() {
246 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
247 _lpo="$_lpo --exclude-no-sfx _l"
248 _lpo="$_lpo --quiet --exclude-limit -$(( ( $var_redelta_threshold / 2 ) + 1 )) objects/pack"
249 while
250 _cnt="$(list_packs --count $_lpo)" || :
251 test "${_cnt:-0}" -ge 2
253 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming)"
254 # We need to identify these packs later so we don't combine_packs them
255 for _objpack in $_newp; do
256 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
257 done
258 v_cnt _newc $_newp
259 # be paranoid and exit the loop if we haven't reduced the number of packs
260 [ $_newc -lt $_cnt ] || break
261 done
262 return 0