From 428fa6f18538eccdd840d9c2440008334f059e15 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Mon, 1 Mar 2021 09:51:26 -0700 Subject: [PATCH] readme: allow non-HTML formats Provide a "README Format" selection to choose between 'Markdown', 'Plain Text' and 'HTML' for an explicit README on the project page. As before, leaving the actual readme content blank results in the "Automatic README" mode. 'Plain Text' simply XML-escapes the content and stuffs it into a "
...
" section. 'Markdown' passes it through the same Markdown processor as for an "Automatic README" in Markdown format. This is now the default format. 'HTML' represents the only previously allowed format and will automatically be selected for non-empty legacy explict HTML README contents. However, HTML is now always validated courtesy of the Markdown module's ProcessRaw function. It's also now sanitized (no more scripts, style tags [style attributes are still okay], javascript attributes, etc.). Content that is "almost-XML HTML" will be automatically "fixed up" rather than erroring out like it used to. The raw README content data is now stored in the file 'README.dat' alongside the processed result that's always stored in 'README.html' (thereby maintaining full gitweb compatibility). Just a few minor housekeeping updates to go along with this change. Make projtool show a length for the new READMEDATA value (similarly to how it handles the README value). Add the new 'README.dat' file to the list of files included in a project's BOM. Signed-off-by: Kyle J. McKay --- Girocco/Project.pm | 149 +++++++++++++++++++++++++++++++++-------- cgi/regproj.cgi | 1 + toolbox/create_projects_bom.pl | 1 + toolbox/projtool.pl | 7 ++ 4 files changed, 130 insertions(+), 28 deletions(-) diff --git a/Girocco/Project.pm b/Girocco/Project.pm index 1c47a08..2429fea 100644 --- a/Girocco/Project.pm +++ b/Girocco/Project.pm @@ -41,10 +41,12 @@ our $metadata_fields = { cleanmirror => ['Mirror refs', 'cleanmirror', 'placeholder'], homepage => ['Homepage URL', 'hp', 'text'], shortdesc => ['Short description', 'desc', 'text'], - README => [''. - 'README (HTML, < 8 KiB)
leave blank for automatic
', + README => [ + ['README Format', 'rmtype', 'select', 'Format of README data entered here', \&_rmtype_choices], + [''. + 'README (< 8 KiB)
leave blank for automatic
', 'README', 'textarea', 'Enter only “” '. - 'to completely suppress any README'], + 'to completely suppress any README']], notifymail => ['Commit notify – mail to', 'notifymail', 'text', 'comma separated address list'], reverseorder => ['Show oldest first', 'reverseorder', 'checkbox', @@ -67,6 +69,10 @@ our $metadata_fields = { 'notifycia', 'text', 'CIA is defunct – this value is ignored'], }; +sub _rmtype_choices { + return ('Markdown', 'Plain Text', 'HTML'); +} + sub _json_choices { return ('application/x-www-form-urlencoded', 'application/json'); } @@ -96,6 +102,8 @@ our %propmap = ( email => ':owner', desc => 'description', README => 'README.html', + READMEDATA => 'README.dat', + rmtype => '%girocco.readmetype', hp => ':homepage', notifymail => '%hooks.mailinglist', notifytag => '%hooks.announcelist', @@ -281,44 +289,112 @@ sub _cleanup_description { /\A(.*)$/m and $self->{desc} = $1; } -sub _cleanup_readme { - my $self = shift; - local $_ = $self->{README}; defined($_) or $_ = ''; +sub _cleanup_readme_str { + local $_ = shift; defined($_) or $_ = ''; {use bytes; s/[\x00-\x08\x0e-\x1f\x7f]+//gs;} s/\r\n?/\n/gs; s/^\s+//s; s/\s+$//s; $_ eq '' or $_ .= "\n"; - /^(.*)$/s and $self->{README} = $1; + /^(.*)$/s and $_ = $1; + return $_; +} + +sub _cleanup_readme { + my $self = shift; + $self->{README} = _cleanup_readme_str($self->{README}); + $self->{READMEDATA} = _cleanup_readme_str($self->{READMEDATA}); } +my %rmtypes; +BEGIN { %rmtypes = ( + default => 'Markdown', + markdown => 'Markdown', + 'plain text' => 'Plain Text', + plain => 'Plain Text', + text => 'Plain Text', + html => 'HTML' +) } + sub _lint_readme { my $self = shift; my $htmlfrag = shift; defined($htmlfrag) or $htmlfrag = 1; - return 0 unless defined($self->{README}) && $self->{README} ne ''; - my $test = '
'; - $test .= $self->{README}; - $test .= '
'; - my ($code, $errors) = capture_command(2, $test, 'xmllint', '--nonet', - '--noout', '--nowarning', '-'); - return 0 unless $code; - my $cnt = 0; + defined($self->{READMEDATA}) or $self->{READMEDATA} = ''; + my $rmtype = $rmtypes{lc($self->{rmtype} || '')}; + defined($rmtype) && $rmtype ne '' or $rmtype = $rmtypes{'default'}; + $self->{rmtype} = $rmtype; + # Empty always just becomes empty with no errors + if ($self->{READMEDATA} eq '') { + $self->{README} = ''; + return 0; + } + if ($rmtype eq 'Plain Text') { + # Never any errors with this one + my $esc = $self->{READMEDATA}; + $esc =~ s/&/&/gs; + $esc =~ s/{README} = '
' . $esc . '
'; + return 0; + } + eval { require Markdown; 1 } or do { + return (1, "README: Markdown/HTML formats temporarily unavailable"); + }; my @errs = (); - for my $line (split(/\n+/, $errors)) { - $line = html_esc($line) if $htmlfrag; - $line =~ s/ /\ /gs if $htmlfrag; - ++$cnt, $line = 'README'.$1 if $line =~ /^-(:\d+:.*)$/; + if ($rmtype eq 'HTML') { + # Legacy HTML support but now sanitized with possible auto-fixup + my $output; + eval { $output = Markdown::ProcessRaw($self->{READMEDATA}, + sanitize => 1, xmlcheck => 2, htmlauto => 0); 1; } or + do { + @errs = split(/\n+/, $@); + @errs or push(@errs, 'Invalid HTML'); + + # Make one more attempt with htmlauto in case + # that's good enough to fix up the HTML and make it valid + eval { $output = Markdown::ProcessRaw($self->{READMEDATA}, + sanitize => 1, xmlcheck => 2, htmlauto => 1); 1; } and + do { + @errs = (); + }; + }; + !@errs and $self->{README} = (chomp($output),$output); + } else { + # Markdown format + my $output; + my $gwbase = url_path($Girocco::Config::gitweburl,1).$self->{name}.'.git'; + my %mdopts = ( + sanitize => 1, + xmlcheck => 2, + yamlmode => 1, + yamlvis => 0, + keepabs => 1, + base_prefix => $self->{name}.'.git', + url_prefix => $gwbase . '/blob/HEAD:', + img_prefix => $gwbase . '/blob_plain/HEAD:' + ); + eval { $output = Markdown::Markdown($self->{READMEDATA}, %mdopts); 1; } or + do { + @errs = split(/\n+/, $@); + @errs or push(@errs, 'Invalid Markdown'); + }; + !@errs and $self->{README} = (chomp($output),$output); + } + return 0 unless @errs; + my @fmterrs = (); + foreach (@errs) { + $_ = html_esc($_) if $htmlfrag; + s/ /\ /gs if $htmlfrag; if ($htmlfrag) { - push @errs, '' . $line . ''; + push @fmterrs, 'README: ' . $_ . ''; } else { - push @errs, $line . "\n"; + push @fmterrs, 'README: ' . $_ . "\n"; } } if ($htmlfrag) { - return ($cnt, join("
\n", @errs)); + return (scalar(@fmterrs), join("
\n", @fmterrs)); } else { - return ($cnt, join("", @errs)); + return (scalar(@fmterrs), join("", @fmterrs)); } } @@ -357,6 +433,18 @@ sub _properties_load { $self->_cleanup_datetime('lastactivity'); $self->_cleanup_description; $self->_cleanup_readme; + defined($self->{rmtype}) and $self->{rmtype} = $rmtypes{lc($self->{rmtype})}; + defined($self->{rmtype}) or $self->{rmtype} = ""; + if (!$self->{rmtype}) { + # default type is "Markdown" unless README ne '' but READMEDATA eq '' + if ($self->{README} ne '' && $self->{READMEDATA} eq '') { + # This was the only previously understood format + $self->{rmtype} = 'HTML'; + $self->{READMEDATA} = $self->{README}; + } else { + $self->{rmtype} = $rmtypes{'default'}; + } + } delete $self->{configfilehash}; } @@ -747,12 +835,16 @@ sub cgi_fill { } if ($field_enabled->('README')) { - $self->{README} = to_utf8($gcgi->wparam('README'), 1); - $self->_cleanup_readme; - length($self->{README}) <= 8192 + $self->{rmtype} = $rmtypes{lc($gcgi->wparam('rmtype')||'')}; + if (!$self->{rmtype}) { + $gcgi->err("Invalid README Format. Must be 'Markdown', 'Plain Text' or 'HTML'."); + $self->{rmtype} = 'Markdown'; + } + $self->{READMEDATA} = _cleanup_readme_str(to_utf8($gcgi->wparam('README'), 1)); + length($self->{READMEDATA}) <= 8192 or $gcgi->err("README length > 8kb!"); my ($cnt, $err) = (0); - ($cnt, $err) = $self->_lint_readme if $gcgi->ok && $Girocco::Config::xmllint_readme; + ($cnt, $err) = $self->_lint_readme if $gcgi->ok; $gcgi->err($err), $gcgi->{err} += $cnt-1 if $cnt; } @@ -869,7 +961,8 @@ sub form_defaults { url => $self->{url}, cleanmirror => $self->{cleanmirror}, desc => html_esc($self->{desc}), - README => html_esc($self->{README}), + README => html_esc($self->{READMEDATA}), + rmtype => $self->{rmtype}, hp => $self->{hp}, users => $self->{users}, notifymail => html_esc($self->{notifymail}), diff --git a/cgi/regproj.cgi b/cgi/regproj.cgi index 17042a4..ed24703 100755 --- a/cgi/regproj.cgi +++ b/cgi/regproj.cgi @@ -41,6 +41,7 @@ my %values = ( notifyjson => '', notifycia => '', README => '', + rmtype => 'Markdown', source => 'Anywhere', url => '', Anywhere_url => '', diff --git a/toolbox/create_projects_bom.pl b/toolbox/create_projects_bom.pl index 8abcd08..2560e4b 100755 --- a/toolbox/create_projects_bom.pl +++ b/toolbox/create_projects_bom.pl @@ -29,6 +29,7 @@ BEGIN {@projparts = qw( .nogc .nohooks HEAD + README.dat README.html config ctags diff --git a/toolbox/projtool.pl b/toolbox/projtool.pl index a717e65..06c720a 100755 --- a/toolbox/projtool.pl +++ b/toolbox/projtool.pl @@ -218,6 +218,12 @@ sub die_usage { } } +sub get_readme_len { + my $rm = shift; + defined($rm) or $rm = ''; + return "length " . length($rm); +} + sub get_readme_desc { my $rm = shift; defined($rm) or $rm = ''; @@ -318,6 +324,7 @@ sub get_clean_project { delete $project->{notifyhook}; } $project->{README} = get_readme_desc($project->{README}) if exists($project->{README}); + $project->{READMEDATA} = get_readme_len($project->{READMEDATA}) if exists($project->{READMEDATA}); my @tags = get_ctag_counts($project, 1); $project->{tags} = \@tags if @tags; my $projconfig = read_config_file_hash($project->{path}."/config"); -- 2.11.4.GIT