3 # format-readme -- find and format a repository's readme blob
4 # Copyright (C) 2015,2016,2017,2019,2021 Kyle J. McKay.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 'Usage: '"${0##*/}"' [-e]... [-r <prefix> | -p <prefix> [-i <imgprefix>]]
27 <path-to-repo.git> [<treeish>]
28 -a treat all plain text as .md
29 -e output error document on Markdown.pl errors
30 -w which blob contains the actual readme contents
31 -m <maxsize> maximum size of readme to allow (default is 32768)
32 -b <prefix> prefix bare fragment-only URLs with <prefix>
33 -r <prefix> prefix non-absolute URLs with <prefix>
34 -p <prefix> prefix non-absolute URLs with <prefix>/<symlink-path>
35 -i <imgprefix> use <imgprefix> instead of <prefix> for images
36 --stub wrap the output in a full XHTML document stub
38 <path-to-repo.git> location where `git rev-parse --git-dir` works
39 <treeish> tree to use instead of "HEAD^{tree}"
41 With `-a` a plain text readme found with an explicit "text" (i.e. `.txt` or
42 `.text`) extension or no extension at all will be treated it as though it had a
43 `.md` extension instead and run through Markdown.pl. Adding a second `-a` will
44 treat unknown extension types as though they are `.md` too (not recommended).
46 Regardless of how many `-a` options are specified, extension types that are
47 known to be something other than plain text will never be treated as `.md`.
49 With `-e` if Markdown.pl finds validation errors, those errors will be output
50 together with a line-numbered source into a suitable `<pre>...</pre>` block
51 that can be displayed in place of the "readme" and '"${0##*/}"' exits with
54 With `-m` the maximum allowable size of the source blob for the "readme" can be
55 specified. By default the maximum size is 32768 (32K). This must be specified
56 in bytes and will automatically be rounded up to a minimum of 1024 except that
57 a value of 0 means unlimited (not recommended).
59 With `-w` rather than formatting the blob, output a single line with the format:
61 <hash> <size> <fmt> <name> <sympath>
63 Where "<hash>" is the actual blob hash, "<size>" is the size as reported by
64 `git ls-tree`, "<fmt>" is "md", "pod" or "txt" indicating the formatter to use
65 and "<name>" is the initial name and if that was a symlink the "<sympath>"
66 field will be present and is the relative path to the final blob. Note that
67 size is still enforced and will produce an error return rather than a result if
68 the size is too big. This is the one "semi-recommended" use of `-m 0`, but if
69 `-m` is not specified and `-w` is then `-m` defaults to 0 instead of 32K.
71 With `-r` all non-absolute URLs have <prefix> prefixed to them.
73 With `-p` all non-absolute URLs have "<prefix>/<symlink-path>" prefixed to them
74 where <symlink-path> is the dirname portion of the symlink if the selected
75 "readme" file is a symlink that needs to be followed. If it'\''s not a symlink or
76 it'\''s in the same directory, the "/<symlink-path>" part is not added but the
77 "<prefix>" part still is.
79 With `-i` (which also requires either `-r` or `-p`), if the target of the link
80 is an image, use "<imgprefix>" instead of "<prefix>". (If `-p` was used it
81 will still get the symlink path, if `-r` was used it won'\''t.)
85 die
() {>&2 printf '%s: fatal: %s\n' "${0##*/}" "$*"; exit 2;}
87 [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ] && [ -w "$TMPDIR" ] || TMPDIR
="/tmp"
91 [ -z "$rmfiles" ] ||
eval rm -f "$rmfiles" ||
:
116 optm
= optb
= optr
= optp
= opti
=
117 while [ $# -gt 0 ] && [ ${#1} -gt 1 ] && [ z
"-${1#-}" = z
"$1" ]; do
128 alltxt
="$(( ${alltxt:-0} + 1 ))"
144 [ -z "$optm" ] || die
"'-m' may only be used once"
146 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-m' requires an argument"
147 [ "${1#*[!0-9]}" = "$1" ] || die
"'-m' requires a whole number argument"
150 [ $maxlen -ge 1024 ] ||
[ "$maxlen" = "0" ] || maxlen
="1024"
154 [ -z "$optb" ] || die
"'-b' may only be used once"
156 [ $# -ge 1 ] || die
"'-b' requires an argument"
162 [ -z "$optr" ] || die
"'-r' may only be used once"
163 [ -z "$optp" ] || die
"'-r' may not be used with '-p'"
165 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-r' requires an argument"
172 [ -z "$optp" ] || die
"'-p' may only be used once"
173 [ -z "$optr" ] || die
"'-p' may not be used with '-r'"
175 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-p' requires an argument"
183 [ -z "$opti" ] || die
"'-i' may only be used once"
185 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-i' requires an argument"
191 die
"unrecognized option \`$1\` (-h for help)"
195 [ -z "$showblob" ] ||
[ -n "$optm" ] || maxlen
=0
196 [ -z "$opti" ] ||
[ -n "$optr$optp" ] || die
"'-i' also requires either '-r' or '-p'"
197 [ $# -le 2 ] || die
"too many arguments (-h for help)"
198 [ $# -ge 1 ] && [ -n "$1" ] || die
"missing first argument <path-to-repo.git> (-h for help)"
200 [ -n "$projdir" ] && [ -d "$projdir" ] ||
exit 2
201 cd "$projdir" ||
exit 2
203 gd
="$(git rev-parse --git-dir 2>&1)" ||
exit 2
206 tree
="$(git rev-parse --quiet --verify "$treeish"^{tree} 2>/dev/null)" ||
exit 2
208 # We prefer the first file or symlink we find with
209 # a supported extension and then we will follow it
210 # if it's a relative symlink with no '.' or '..' components.
211 # If we don't find a supported extension, we use just plain README
212 # which we assume to be plain text (and we will follow a symlink).
213 # We prefer a markdown extension over others and any extension
214 # other than plain text next followed by plain text and then no extension.
217 ! perl
-MPod::Html
-e 1 >/dev
/null
2>&1 || haspod
=1
221 # .textile, .rdoc, .org, .creole, .mediawiki/.wiki
222 # .rst, .asciidoc/.adoc/.asc
223 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Ee
][Xx
][Tt
][Ii
][Ll
][Ee
]|\
224 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Dd
][Oo
][Cc
]|\
225 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Oo
][Rr
][Gg
]|\
226 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Cc
][Rr
][Ee
][Oo
][Ll
][Ee
]|\
227 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Ee
][Dd
][Ii
][Aa
][Ww
][Ii
][Kk
][Ii
]|\
228 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Ww
][Ii
][Kk
][Ii
]|\
229 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Ss
][Tt
]|\
230 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Ss
][Cc
][Ii
][Ii
][Dd
][Oo
][Cc
]|\
231 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Dd
][Oo
][Cc
]|\
232 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Ss
][Cc
])
252 while read -r mode
type hash size name
; do
253 [ "$mode" = "100644" ] ||
[ "$mode" = "100755" ] ||
[ "$mode" = "120000" ] ||
continue
254 [ "$size" != "0" ] && [ "$size" != "-" ] ||
continue
255 [ "$type" = "blob" ] ||
continue
256 [ "$mode" != "120000" ] ||
[ "$size" -lt 1024 ] ||
continue
257 [ "$mode" != "120000" ] || symlinks
="$symlinks$hash $name$nl"
260 # Markdown extensions must match this pattern:
261 # /md|rmd|mkdn?|mdwn|mdown|markdown|litcoffee/i
262 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
]|\
263 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Mm
][Dd
]|\
264 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Kk
][Dd
]|\
265 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Kk
][Dd
][Nn
]|\
266 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
][Ww
][Nn
]|\
267 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
][Oo
][Ww
][Nn
]|\
268 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Aa
][Rr
][Kk
][Dd
][Oo
][Ww
][Nn
]|\
269 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Ll
][Ii
][Tt
][Cc
][Oo
][Ff
][Ff
][Ee
][Ee
])
270 if [ -n "$readmeext" ]; then
271 [ "$readmeextfmt" != "md" ] ||
[ "$mode" != "120000" ] ||
continue
277 [ "$mode" != "120000" ] || readmeextlnk
=1
281 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Pp
][Oo
][Dd
])
282 [ -n "$haspod" ] ||
continue
283 if [ -n "$readmeext" ]; then
284 [ "$readmeextfmt" != "md" ] ||
continue
285 [ "$readmeextfmt" = "txt" ] ||
[ "$mode" != "120000" ] ||
continue
291 [ "$mode" != "120000" ] || readmeextlnk
=1
295 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Xx
][Tt
]|\
296 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Ee
][Xx
][Tt
])
297 if [ -n "$readmeext" ]; then
298 [ "$readmeextfmt" = "txt" ] && [ "$mode" != "120000" ] ||
continue
304 [ "$mode" != "120000" ] || readmeextlnk
=1
308 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
])
309 [ -z "$readme" ] ||
[ "$mode" != "120000" ] ||
continue
314 [ "$mode" != "120000" ] || readmelnk
=1
318 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].?
*)
319 [ -z "$readmefb" ] ||
[ "$mode" != "120000" ] ||
continue
320 [ "${name%.*}" = "${name%%.*}" ] ||
continue
321 [ "${name#*.}" = "${name##*[!A-Za-z0-9+_]}" ] ||
continue
322 [ "${name%[$ws]*}" = "$name" ] ||
continue
327 [ "$mode" != "120000" ] || readmefblnk
=1
332 $(git ls-tree -l $tree)
334 if [ -n "$readmefb" ] && [ -z "$readme" ]; then
336 readmenm
="$readmefbnm"
337 readmesiz
="$readmefbsiz"
338 readmelnk
="$readmefblnk"
340 case "$readmenm" in *.
[Ff
][Ii
][Rr
][Ss
][Tt
])
344 if [ -n "$readme" ] && [ -z "$readmeext" ]; then
346 readmeextnm
="$readmenm"
347 readmeextsiz
="$readmesiz"
348 readmeextlnk
="$readmelnk"
349 readmeextfmt
="$readmefmt"
351 [ -n "$readmeext" ] ||
exit 1
352 if [ ${alltxt:-0} -gt 0 ]; then
353 [ "$readmeextfmt" != "txt" ] || readmeextfmt
=md
355 [ -z "$readmeextfmt" ] && [ ${alltxt:-0} -gt 1 ] &&
356 ! notplain
"$readmeextnm"
362 if [ -n "$readmeextlnk" ]; then
363 rel
="$(git cat-file blob $readmeext 2>/dev/null)" ||
exit 1
364 case "$rel" in /*) exit 1; esac
365 case "/$rel/" in */..
/*|
*/.
/*) exit 1; esac
366 case "$rel" in */*) :;; ?
*)
367 while read -r hash name
; do
368 if [ -n "$hash" ] && [ "$name" = "$rel" ]; then
369 rel2
="$(git cat-file blob $hash 2>/dev/null)" ||
exit 1
370 case "$rel2" in /*) exit 1; esac
371 case "/$rel2/" in */..
/*|
*/.
/*) exit 1; esac
379 case "$rel" in *?
/?
*)
381 prefix
="${rel%/$suffix}"
382 while read -r hash name
; do
383 if [ -n "$hash" ] && [ "$name" = "$prefix" ]; then
384 rel2
="$(git cat-file blob $hash 2>/dev/null)" ||
exit 1
385 case "$rel2" in /*) exit 1; esac
386 case "/$rel2/" in */..
/*|
*/.
/*) exit 1; esac
394 [ -z "$showblob" ] || blobsym
="$rel"
395 if [ -n "$addpath" ]; then
396 dir
="$(dirname "$rel")"
397 if [ "$dir" != "." ]; then
398 urlprefix
="${urlprefix%/}/$dir"
399 [ -z "$imgprefix" ] || imgprefix
="${imgprefix%/}/$dir"
402 read -r mode
type hash size name
<<EOT
403 $(git ls-tree -l $tree -- "$rel")
405 [ "$mode" = "100644" ] ||
[ "$mode" = "100755" ] ||
exit 1
406 [ "$type" = "blob" ] ||
exit 1
407 [ "$size" != "0" ] && [ "$size" != "-" ] ||
exit 1
412 # Allow up to $maxlen (32K by default)
413 # But fail if it doesn't look like it's text
414 # A $maxlen of 0 means unlimited (but that's not recommended)
415 # Length is checked twice:
416 # 1) here based on ls-tree size
417 # 2) perl based on the actual blob contents size
419 [ "$maxlen" = "0" ] ||
[ "$readmeextsiz" -le "$maxlen" ] ||
exit 1
421 if [ -n "$showblob" ]; then
422 printf '%s %s %s %s%s\n' "$readmeext" "$readmeextsiz" \
423 "${readmeextfmt:-txt}" "$readmeextnm" "${blobsym:+ $blobsym}"
428 [ "$maxlen" = "0" ] || andmaxok
=' && length($contents) <= '"$maxlen"
429 contents
="$(git cat-file blob $readmeext | perl -e '
433 binmode STDIN, ":perlio
" or exit 1
434 unless grep /^perlio$/, PerlIO::get_layers(STDIN);
435 exit 1 unless -T STDIN;
437 my $contents = <STDIN>;
438 exit 1 unless defined($contents) && length($contents) > 0'"$andmaxok"';
441 ' 2>/dev/null)" ||
exit 1
443 mdpl
='Markdown.pl' args
='--strip-comments-lax-only'
445 printf '# %s\n' "$*" |
"$mdpl" --stub | LC_ALL
=C
awk '/^<h1/{exit}{print}'
448 printf '%s\n' "$*</div>" "</body>" "</html>"
452 case "${readmeextfmt:-txt}" in
455 # Run Markdown.pl on it
456 if [ -n "$fragprefix" ]; then
457 args
="$args -b \"$fragprefix\""
459 if [ -n "$addprefix" ]; then
460 args
="$args -r \"$urlprefix\""
461 [ -z "$imgprefix" ] || args
="$args -i \"$imgprefix\""
463 if [ ! "$showerr" ]; then
464 [ -z "$stub" ] || showstub
"$readmeextnm"
465 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
467 printf '%s' "$contents" |
eval "$mdpl $args 2>/dev/null" || err
=$?
468 [ -z "$stub" ] || showfoot
471 tmph
="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$
-htm-XXXXXX")"
472 tmpe
="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$
-err-XXXXXX")"
473 rmfiles
="$rmfiles "'"$tmph" "$tmpe"'
474 rm -f "$tmph" "$tmpe" && >"$tmph" && >"$tmpe"
475 if printf '%s' "$contents" |
eval "\"\$mdpl\" $args "'>"$tmph" 2>"$tmpe"'; then
476 [ -z "$stub" ] || showstub
"$readmeextnm"
477 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
479 [ -z "$stub" ] || showfoot
481 [ -s "$tmpe" ] ||
echo "unknown errors" >"$tmpe"
482 [ -z "$stub" ] || showstub
"$readmeextnm errors"
483 printf '<!-- README NAME: %s errors -->\n<pre>' "$readmeextnm"
484 <"$tmpe" LC_ALL
=C
sed -e 's/&/\&/g' -e 's/</\</g'
486 printf '%s' "$contents" |
487 eval "\"\$mdpl\" --no-sanitize --no-validate-xml $args 2>/dev/null >\"$tmph\""
488 lines
="$(( $(wc -l <"$tmph")+0 ))"
489 <"$tmph" LC_ALL
=C
awk -v w
="${#lines}" \
490 '{gsub(/&/,"\\&");gsub(/</,"\\<");printf("%*u %s\n",w,NR,$0)}'
492 [ -z "$stub" ] || showfoot
"$nl"
499 # Run pod2html and extract the contents
501 if [ -n "$addprefix" ] && [ -n "${urlprefix%/}" ]; then
502 arg
=", \"--htmlroot=${urlprefix%/}\""
504 [ -z "$stub" ] || showstub
"$readmeextnm"
505 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
507 printf '%s' "$contents" | \
508 perl
-MPod::Html
-e "pod2html \"--quiet\", \"--no-index\"$arg" 2>/dev
/null | \
509 FP
="$fragprefix" perl
-e '
514 defined($fp) or $fp = "";
515 my $contents = <STDIN>;
516 $contents =~ s,^.*<body[^>]*>\s*,,is;
517 $contents =~ s,\s*</body[^>]*>.*$,,is;
518 $contents =~ s,^.*<!--\s*INDEX\s+END\s*-->\s*,,is;
519 $contents =~ s,^\s*(?:<p>\s*</p>\s*)+,,is;
520 $contents =~ s,(<a href=['\''"])#,$1$fp#,gis if $fp ne "";
523 [ -z "$stub" ] || showfoot
528 # It's a <pre> block but we need some escaping
529 [ -z "$stub" ] || showstub
"$readmeextnm"
530 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
531 printf '%s' '<pre class="plaintext">'
532 printf '%s' "$contents" | LC_ALL
=C
sed -e 's/&/\&/g' -e 's/</\</g'
533 printf '%s\n' '</pre>'
534 [ -z "$stub" ] || showfoot