html/volunteers: clean up some old style `/w` links
[girocco.git] / bin / format-readme
blobac9f7b6740ea06f2a5435ec54dc514915bff4ffd
1 #!/bin/sh
3 # format-readme -- find and format a repository's readme blob
4 # Copyright (C) 2015,2016,2017,2019,2021 Kyle J. McKay.
5 # All rights reserved.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # Version 1.3.1
22 set -e
24 usage() {
25 printf '%s' \
26 'Usage: '"${0##*/}"' [-e]... [-r <prefix> | -p <prefix> [-i <imgprefix>]]
27 <path-to-repo.git> [<treeish>]
28 -a treat all plain text as .md
29 -e output error document on Markdown.pl errors
30 -w which blob contains the actual readme contents
31 -m <maxsize> maximum size of readme to allow (default is 32768)
32 -b <prefix> prefix bare fragment-only URLs with <prefix>
33 -r <prefix> prefix non-absolute URLs with <prefix>
34 -p <prefix> prefix non-absolute URLs with <prefix>/<symlink-path>
35 -i <imgprefix> use <imgprefix> instead of <prefix> for images
36 --stub wrap the output in a full XHTML document stub
38 <path-to-repo.git> location where `git rev-parse --git-dir` works
39 <treeish> tree to use instead of "HEAD^{tree}"
41 With `-a` a plain text readme found with an explicit "text" (i.e. `.txt` or
42 `.text`) extension or no extension at all will be treated it as though it had a
43 `.md` extension instead and run through Markdown.pl. Adding a second `-a` will
44 treat unknown extension types as though they are `.md` too (not recommended).
46 Regardless of how many `-a` options are specified, extension types that are
47 known to be something other than plain text will never be treated as `.md`.
49 With `-e` if Markdown.pl finds validation errors, those errors will be output
50 together with a line-numbered source into a suitable `<pre>...</pre>` block
51 that can be displayed in place of the "readme" and '"${0##*/}"' exits with
52 success.
54 With `-m` the maximum allowable size of the source blob for the "readme" can be
55 specified. By default the maximum size is 32768 (32K). This must be specified
56 in bytes and will automatically be rounded up to a minimum of 1024 except that
57 a value of 0 means unlimited (not recommended).
59 With `-w` rather than formatting the blob, output a single line with the format:
61 <hash> <size> <fmt> <name> <sympath>
63 Where "<hash>" is the actual blob hash, "<size>" is the size as reported by
64 `git ls-tree`, "<fmt>" is "md", "pod" or "txt" indicating the formatter to use
65 and "<name>" is the initial name and if that was a symlink the "<sympath>"
66 field will be present and is the relative path to the final blob. Note that
67 size is still enforced and will produce an error return rather than a result if
68 the size is too big. This is the one "semi-recommended" use of `-m 0`, but if
69 `-m` is not specified and `-w` is then `-m` defaults to 0 instead of 32K.
71 With `-r` all non-absolute URLs have <prefix> prefixed to them.
73 With `-p` all non-absolute URLs have "<prefix>/<symlink-path>" prefixed to them
74 where <symlink-path> is the dirname portion of the symlink if the selected
75 "readme" file is a symlink that needs to be followed. If it'\''s not a symlink or
76 it'\''s in the same directory, the "/<symlink-path>" part is not added but the
77 "<prefix>" part still is.
79 With `-i` (which also requires either `-r` or `-p`), if the target of the link
80 is an image, use "<imgprefix>" instead of "<prefix>". (If `-p` was used it
81 will still get the symlink path, if `-r` was used it won'\''t.)
85 die() {>&2 printf '%s: fatal: %s\n' "${0##*/}" "$*"; exit 2;}
86 TMPDIR="${TMPDIR%/}"
87 [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ] && [ -w "$TMPDIR" ] || TMPDIR="/tmp"
89 rmfiles=
90 cleanup() {
91 [ -z "$rmfiles" ] || eval rm -f "$rmfiles" || :
94 trap 'exit 129' HUP
95 trap 'exit 130' INT
96 trap 'exit 131' QUIT
97 trap 'exit 134' ABRT
98 trap 'exit 141' PIPE
99 trap 'exit 143' TERM
100 trap 'cleanup' EXIT
102 nl='
104 symlinks=
106 alltxt=
107 stub=
108 showblob=
109 maxlen=32768
110 showerr=
111 fragprefix=
112 addprefix=
113 addpath=
114 urlprefix=
115 imgprefix=
116 optm= optb= optr= optp= opti=
117 while [ $# -gt 0 ] && [ ${#1} -gt 1 ] && [ z"-${1#-}" = z"$1" ]; do
118 case "$1" in
119 "-h"|"--help")
120 usage
121 exit 0
123 "--")
124 shift
125 break
127 "-a")
128 alltxt="$(( ${alltxt:-0} + 1 ))"
129 shift
131 "-e")
132 showerr=1
133 shift
135 "-w")
136 showblob=1
137 shift
139 "--stub")
140 stub=1
141 shift
143 "-m")
144 [ -z "$optm" ] || die "'-m' may only be used once"
145 shift
146 [ $# -ge 1 ] && [ -n "$1" ] || die "'-m' requires an argument"
147 [ "${1#*[!0-9]}" = "$1" ] || die "'-m' requires a whole number argument"
148 optm=1
149 maxlen="$(( $1+0 ))"
150 [ $maxlen -ge 1024 ] || [ "$maxlen" = "0" ] || maxlen="1024"
151 shift
153 "-b")
154 [ -z "$optb" ] || die "'-b' may only be used once"
155 shift
156 [ $# -ge 1 ] || die "'-b' requires an argument"
157 optb=1
158 fragprefix="$1"
159 shift
161 "-r")
162 [ -z "$optr" ] || die "'-r' may only be used once"
163 [ -z "$optp" ] || die "'-r' may not be used with '-p'"
164 shift
165 [ $# -ge 1 ] && [ -n "$1" ] || die "'-r' requires an argument"
166 optr=1
167 addprefix=1
168 urlprefix="${1%/}/"
169 shift
171 "-p")
172 [ -z "$optp" ] || die "'-p' may only be used once"
173 [ -z "$optr" ] || die "'-p' may not be used with '-r'"
174 shift
175 [ $# -ge 1 ] && [ -n "$1" ] || die "'-p' requires an argument"
176 optp=1
177 addprefix=1
178 addpath=1
179 urlprefix="${1%/}/"
180 shift
182 "-i")
183 [ -z "$opti" ] || die "'-i' may only be used once"
184 shift
185 [ $# -ge 1 ] && [ -n "$1" ] || die "'-i' requires an argument"
186 opti=1
187 imgprefix="${1%/}/"
188 shift
191 die "unrecognized option \`$1\` (-h for help)"
193 esac
194 done
195 [ -z "$showblob" ] || [ -n "$optm" ] || maxlen=0
196 [ -z "$opti" ] || [ -n "$optr$optp" ] || die "'-i' also requires either '-r' or '-p'"
197 [ $# -le 2 ] || die "too many arguments (-h for help)"
198 [ $# -ge 1 ] && [ -n "$1" ] || die "missing first argument <path-to-repo.git> (-h for help)"
199 projdir="$1"
200 [ -n "$projdir" ] && [ -d "$projdir" ] || exit 2
201 cd "$projdir" || exit 2
202 unset GIT_DIR
203 gd="$(git rev-parse --git-dir 2>&1)" || exit 2
204 cd "$gd" || exit 2
205 treeish="${2:-HEAD}"
206 tree="$(git rev-parse --quiet --verify "$treeish"^{tree} 2>/dev/null)" || exit 2
208 # We prefer the first file or symlink we find with
209 # a supported extension and then we will follow it
210 # if it's a relative symlink with no '.' or '..' components.
211 # If we don't find a supported extension, we use just plain README
212 # which we assume to be plain text (and we will follow a symlink).
213 # We prefer a markdown extension over others and any extension
214 # other than plain text next followed by plain text and then no extension.
216 haspod=
217 ! perl -MPod::Html -e 1 >/dev/null 2>&1 || haspod=1
219 notplain() {
220 case "$1" in
221 # .textile, .rdoc, .org, .creole, .mediawiki/.wiki
222 # .rst, .asciidoc/.adoc/.asc
223 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Ee][Xx][Tt][Ii][Ll][Ee]|\
224 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Dd][Oo][Cc]|\
225 [Rr][Ee][Aa][Dd][Mm][Ee].[Oo][Rr][Gg]|\
226 [Rr][Ee][Aa][Dd][Mm][Ee].[Cc][Rr][Ee][Oo][Ll][Ee]|\
227 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Ee][Dd][Ii][Aa][Ww][Ii][Kk][Ii]|\
228 [Rr][Ee][Aa][Dd][Mm][Ee].[Ww][Ii][Kk][Ii]|\
229 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Ss][Tt]|\
230 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Ss][Cc][Ii][Ii][Dd][Oo][Cc]|\
231 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Dd][Oo][Cc]|\
232 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Ss][Cc])
233 return 0
234 esac
235 return 1
238 readme=
239 readmenm=
240 readmelnk=
241 readmefmt=
242 readmesiz=
243 readmefb=
244 readmefbnm=
245 readmefblnk=
246 readmefbsiz=
247 readmeext=
248 readmeextnm=
249 readmeextlnk=
250 readmeextfmt=
251 readmeextsiz=
252 while read -r mode type hash size name; do
253 [ "$mode" = "100644" ] || [ "$mode" = "100755" ] || [ "$mode" = "120000" ] || continue
254 [ "$size" != "0" ] && [ "$size" != "-" ] || continue
255 [ "$type" = "blob" ] || continue
256 [ "$mode" != "120000" ] || [ "$size" -lt 1024 ] || continue
257 [ "$mode" != "120000" ] || symlinks="$symlinks$hash $name$nl"
258 case "$name" in
260 # Markdown extensions must match this pattern:
261 # /md|rmd|mkdn?|mdwn|mdown|markdown|litcoffee/i
262 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd]|\
263 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Mm][Dd]|\
264 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Kk][Dd]|\
265 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Kk][Dd][Nn]|\
266 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd][Ww][Nn]|\
267 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd][Oo][Ww][Nn]|\
268 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn]|\
269 [Rr][Ee][Aa][Dd][Mm][Ee].[Ll][Ii][Tt][Cc][Oo][Ff][Ff][Ee][Ee])
270 if [ -n "$readmeext" ]; then
271 [ "$readmeextfmt" != "md" ] || [ "$mode" != "120000" ] || continue
273 readmeext="$hash"
274 readmeextnm="$name"
275 readmeextsiz="$size"
276 readmeextlnk=
277 [ "$mode" != "120000" ] || readmeextlnk=1
278 readmeextfmt=md
281 [Rr][Ee][Aa][Dd][Mm][Ee].[Pp][Oo][Dd])
282 [ -n "$haspod" ] || continue
283 if [ -n "$readmeext" ]; then
284 [ "$readmeextfmt" != "md" ] || continue
285 [ "$readmeextfmt" = "txt" ] || [ "$mode" != "120000" ] || continue
287 readmeext="$hash"
288 readmeextnm="$name"
289 readmeextsiz="$size"
290 readmeextlnk=
291 [ "$mode" != "120000" ] || readmeextlnk=1
292 readmeextfmt=pod
295 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Xx][Tt]|\
296 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Ee][Xx][Tt])
297 if [ -n "$readmeext" ]; then
298 [ "$readmeextfmt" = "txt" ] && [ "$mode" != "120000" ] || continue
300 readmeext="$hash"
301 readmeextnm="$name"
302 readmeextsiz="$size"
303 readmeextlnk=
304 [ "$mode" != "120000" ] || readmeextlnk=1
305 readmeextfmt=txt
308 [Rr][Ee][Aa][Dd][Mm][Ee])
309 [ -z "$readme" ] || [ "$mode" != "120000" ] || continue
310 readme="$hash"
311 readmenm="$name"
312 readmesiz="$size"
313 readmelnk=
314 [ "$mode" != "120000" ] || readmelnk=1
315 readmefmt=txt
318 [Rr][Ee][Aa][Dd][Mm][Ee].?*)
319 [ -z "$readmefb" ] || [ "$mode" != "120000" ] || continue
320 [ "${name%.*}" = "${name%%.*}" ] || continue
321 [ "${name#*.}" = "${name##*[!A-Za-z0-9+_]}" ] || continue
322 [ "${name%[$ws]*}" = "$name" ] || continue
323 readmefb="$hash"
324 readmefbnm="$name"
325 readmefbsiz="$size"
326 readmefblnk=
327 [ "$mode" != "120000" ] || readmefblnk=1
330 esac
331 done <<EOT
332 $(git ls-tree -l $tree)
334 if [ -n "$readmefb" ] && [ -z "$readme" ]; then
335 readme="$readmefb"
336 readmenm="$readmefbnm"
337 readmesiz="$readmefbsiz"
338 readmelnk="$readmefblnk"
339 readmefmt=""
340 case "$readmenm" in *.[Ff][Ii][Rr][Ss][Tt])
341 readmefmt=txt
342 esac
344 if [ -n "$readme" ] && [ -z "$readmeext" ]; then
345 readmeext="$readme"
346 readmeextnm="$readmenm"
347 readmeextsiz="$readmesiz"
348 readmeextlnk="$readmelnk"
349 readmeextfmt="$readmefmt"
351 [ -n "$readmeext" ] || exit 1
352 if [ ${alltxt:-0} -gt 0 ]; then
353 [ "$readmeextfmt" != "txt" ] || readmeextfmt=md
355 [ -z "$readmeextfmt" ] && [ ${alltxt:-0} -gt 1 ] &&
356 ! notplain "$readmeextnm"
357 then
358 readmeextfmt=md
361 blobsym=
362 if [ -n "$readmeextlnk" ]; then
363 rel="$(git cat-file blob $readmeext 2>/dev/null)" || exit 1
364 case "$rel" in /*) exit 1; esac
365 case "/$rel/" in */../*|*/./*) exit 1; esac
366 case "$rel" in */*) :;; ?*)
367 while read -r hash name; do
368 if [ -n "$hash" ] && [ "$name" = "$rel" ]; then
369 rel2="$(git cat-file blob $hash 2>/dev/null)" || exit 1
370 case "$rel2" in /*) exit 1; esac
371 case "/$rel2/" in */../*|*/./*) exit 1; esac
372 rel="$rel2"
373 break
375 done <<-EOT
376 ${symlinks%?}
378 esac
379 case "$rel" in *?/?*)
380 suffix="${rel#*/}"
381 prefix="${rel%/$suffix}"
382 while read -r hash name; do
383 if [ -n "$hash" ] && [ "$name" = "$prefix" ]; then
384 rel2="$(git cat-file blob $hash 2>/dev/null)" || exit 1
385 case "$rel2" in /*) exit 1; esac
386 case "/$rel2/" in */../*|*/./*) exit 1; esac
387 rel="$rel2/$suffix"
388 break
390 done <<-EOT
391 ${symlinks%?}
393 esac
394 [ -z "$showblob" ] || blobsym="$rel"
395 if [ -n "$addpath" ]; then
396 dir="$(dirname "$rel")"
397 if [ "$dir" != "." ]; then
398 urlprefix="${urlprefix%/}/$dir"
399 [ -z "$imgprefix" ] || imgprefix="${imgprefix%/}/$dir"
402 read -r mode type hash size name <<EOT
403 $(git ls-tree -l $tree -- "$rel")
405 [ "$mode" = "100644" ] || [ "$mode" = "100755" ] || exit 1
406 [ "$type" = "blob" ] || exit 1
407 [ "$size" != "0" ] && [ "$size" != "-" ] || exit 1
408 readmeext="$hash"
409 readmeextsiz="$size"
412 # Allow up to $maxlen (32K by default)
413 # But fail if it doesn't look like it's text
414 # A $maxlen of 0 means unlimited (but that's not recommended)
415 # Length is checked twice:
416 # 1) here based on ls-tree size
417 # 2) perl based on the actual blob contents size
419 [ "$maxlen" = "0" ] || [ "$readmeextsiz" -le "$maxlen" ] || exit 1
421 if [ -n "$showblob" ]; then
422 printf '%s %s %s %s%s\n' "$readmeext" "$readmeextsiz" \
423 "${readmeextfmt:-txt}" "$readmeextnm" "${blobsym:+ $blobsym}"
424 exit 0
427 andmaxok=
428 [ "$maxlen" = "0" ] || andmaxok=' && length($contents) <= '"$maxlen"
429 contents="$(git cat-file blob $readmeext | perl -e '
430 use 5.008;
431 use strict;
432 use warnings;
433 binmode STDIN, ":perlio" or exit 1
434 unless grep /^perlio$/, PerlIO::get_layers(STDIN);
435 exit 1 unless -T STDIN;
436 undef $/;
437 my $contents = <STDIN>;
438 exit 1 unless defined($contents) && length($contents) > 0'"$andmaxok"';
439 print $contents;
440 exit 0;
441 ' 2>/dev/null)" || exit 1
443 mdpl='Markdown.pl' args='--strip-comments-lax-only'
444 showstub() {
445 printf '# %s\n' "$*" | "$mdpl" --stub | LC_ALL=C awk '/^<h1/{exit}{print}'
447 showfoot() {
448 printf '%s\n' "$*</div>" "</body>" "</html>"
451 # Format the result
452 case "${readmeextfmt:-txt}" in
455 # Run Markdown.pl on it
456 if [ -n "$fragprefix" ]; then
457 args="$args -b \"$fragprefix\""
459 if [ -n "$addprefix" ]; then
460 args="$args -r \"$urlprefix\""
461 [ -z "$imgprefix" ] || args="$args -i \"$imgprefix\""
463 if [ ! "$showerr" ]; then
464 [ -z "$stub" ] || showstub "$readmeextnm"
465 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
466 err=0
467 printf '%s' "$contents" | eval "$mdpl $args 2>/dev/null" || err=$?
468 [ -z "$stub" ] || showfoot
469 exit $err
470 else
471 tmph="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$-htm-XXXXXX")"
472 tmpe="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$-err-XXXXXX")"
473 rmfiles="$rmfiles "'"$tmph" "$tmpe"'
474 rm -f "$tmph" "$tmpe" && >"$tmph" && >"$tmpe"
475 if printf '%s' "$contents" | eval "\"\$mdpl\" $args "'>"$tmph" 2>"$tmpe"'; then
476 [ -z "$stub" ] || showstub "$readmeextnm"
477 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
478 cat "$tmph"
479 [ -z "$stub" ] || showfoot
480 else
481 [ -s "$tmpe" ] || echo "unknown errors" >"$tmpe"
482 [ -z "$stub" ] || showstub "$readmeextnm errors"
483 printf '<!-- README NAME: %s errors -->\n<pre>' "$readmeextnm"
484 <"$tmpe" LC_ALL=C sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g'
485 echo ""
486 printf '%s' "$contents" |
487 eval "\"\$mdpl\" --no-sanitize --no-validate-xml $args 2>/dev/null >\"$tmph\""
488 lines="$(( $(wc -l <"$tmph")+0 ))"
489 <"$tmph" LC_ALL=C awk -v w="${#lines}" \
490 '{gsub(/&/,"\\&amp;");gsub(/</,"\\&lt;");printf("%*u %s\n",w,NR,$0)}'
491 printf '%s' '</pre>'
492 [ -z "$stub" ] || showfoot "$nl"
494 exit 0
498 pod)
499 # Run pod2html and extract the contents
500 arg=
501 if [ -n "$addprefix" ] && [ -n "${urlprefix%/}" ]; then
502 arg=", \"--htmlroot=${urlprefix%/}\""
504 [ -z "$stub" ] || showstub "$readmeextnm"
505 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
506 err=0
507 printf '%s' "$contents" | \
508 perl -MPod::Html -e "pod2html \"--quiet\", \"--no-index\"$arg" 2>/dev/null | \
509 FP="$fragprefix" perl -e '
510 use strict;
511 use warnings;
512 undef $/;
513 my $fp = $ENV{FP};
514 defined($fp) or $fp = "";
515 my $contents = <STDIN>;
516 $contents =~ s,^.*<body[^>]*>\s*,,is;
517 $contents =~ s,\s*</body[^>]*>.*$,,is;
518 $contents =~ s,^.*<!--\s*INDEX\s+END\s*-->\s*,,is;
519 $contents =~ s,^\s*(?:<p>\s*</p>\s*)+,,is;
520 $contents =~ s,(<a href=['\''"])#,$1$fp#,gis if $fp ne "";
521 print $contents;
522 ' || err=$?
523 [ -z "$stub" ] || showfoot
524 exit $err
527 txt)
528 # It's a <pre> block but we need some escaping
529 [ -z "$stub" ] || showstub "$readmeextnm"
530 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
531 printf '%s' '<pre class="plaintext">'
532 printf '%s' "$contents" | LC_ALL=C sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g'
533 printf '%s\n' '</pre>'
534 [ -z "$stub" ] || showfoot
535 exit 0
538 esac
539 exit 1