From 29afaa5b8c81e09a86654c8b37fdf083f25d564f Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 7 Jul 2020 20:55:38 -0700 Subject: [PATCH] gitweb/genindex.sh: vastly improve speed of genindex Replace the hodgepodge version of genindex.sh with something that avoids spawning multiple git processes, slow shell while loops and forking lots of subshells. Since perl ultimately runs to update the owner name hash, just have perl handle it all for an order of magnitude speed up. While in there, also make sure the atomic move-into-place operation takes place on the same device as the final file destinations -- otherwise the moves might not actually be atomic. Signed-off-by: Kyle J. McKay --- gitweb/genindex.sh | 73 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/gitweb/genindex.sh b/gitweb/genindex.sh index 06eda60..8b73167 100755 --- a/gitweb/genindex.sh +++ b/gitweb/genindex.sh @@ -23,53 +23,72 @@ fi # displayed as a ' ' in the owner email and will cause a project name # containing it to be omitted from the project list page. +GIROCCO_REPOROOT="$cfg_reporoot" && export GIROCCO_REPOROOT if [ -z "$update" ] || [ ! -s "$cfg_projlist_cache_dir/gitproj.list" ]; then # Must read all the owners so don't bother with join at all - exec 3>/tmp/gitdir.listu.$$ - get_repo_list | while read proj; do - echo "$proj $(cd "$cfg_reporoot/$proj.git" && - echo "$proj $(pwd -P)" >&3 && - config_get owner)" - done | perl -MDigest::MD5=md5_hex -ne \ - '@_=split;print "$_[0] ",md5_hex(lc($_[1]))," $_[1]\n";' | + get_repo_list | + perl -I@basedir@ -MDigest::MD5=md5_hex -MCwd=realpath -MGirocco::ConfigUtil \ + -n -e 'BEGIN{$reporoot=$ENV{GIROCCO_REPOROOT};}' 2>/tmp/gitdir.listu.$$ -e \ + 'chomp; my $p = $_; defined($p) && $p ne "" or next; my $o = ""; + my $cf = read_config_file_hash("$reporoot/$p.git/config"); + defined($cf->{"gitweb.owner"}) and $o = $cf->{"gitweb.owner"}; + print "$p ",md5_hex(lc($o))," $o\n"; + my $rp = realpath("$reporoot/$p.git"); + defined($rp) && $rp ne "" and print STDERR "$p $rp\n";' | LC_ALL=C sort -k 1,1 >/tmp/gitproj.list.$$ test $? -eq 0 - exec 3>&- LC_ALL=C sort -k 1,1 /tmp/gitdir.list.$$ rm -f /tmp/gitdir.listu.$$ else + GIROCCO_UPDATE="$update" && export GIROCCO_UPDATE get_repo_list | LC_ALL=C sort -k 1,1 >/tmp/gitproj.srt.$$ LC_ALL=C join -a 1 /tmp/gitproj.srt.$$ "$cfg_projlist_cache_dir/gitproj.list" | - while read proj hash owner; do - if [ "$proj" = "$update" ] || [ -z "$owner" ] || [ -z "$hash" ]; then - echo "$proj recalc $(cd "$cfg_reporoot/$proj.git" && config_get owner)" - else - echo "$proj $hash $owner" - fi - done | perl -MDigest::MD5=md5_hex -ne \ - '@_=split;print "$_[0] ",$_[1] eq "recalc"?md5_hex(lc($_[2])):$_[1]," $_[2]\n";' | + perl -I@basedir@ -MDigest::MD5=md5_hex -MGirocco::ConfigUtil \ + -n -e 'BEGIN{$reporoot=$ENV{GIROCCO_REPOROOT};$update=$ENV{GIROCCO_UPDATE};}' -e \ + 'BEGIN{$mt5=md5_hex("");} + chomp; my @f=split(" ",$_,3); push(@f, "") while @f < 3; $f[0] ne "" or next; + my $r = $f[0] eq $update || $f[1] eq "" || ($f[2] eq "" && $f[1] ne $mt5); + if ($r) { + my $o = ""; + my $cf = read_config_file_hash("$reporoot/$f[0].git/config"); + defined($cf->{"gitweb.owner"}) and $o = $cf->{"gitweb.owner"}; + $f[1] = md5_hex(lc($o)); $f[2]=$o; + } + print "$f[0] $f[1] $f[2]\n";' | LC_ALL=C sort -k 1,1 >/tmp/gitproj.list.$$ test $? -eq 0 LC_ALL=C join -a 1 -t ' ' /tmp/gitproj.srt.$$ "$cfg_projlist_cache_dir/gitdir.list" | - while read proj path; do - if [ "$proj" = "$update" ] || [ -z "$path" ]; then - echo "$proj $(cd "$cfg_reporoot/$proj.git" && pwd -P)" - else - echo "$proj $path" - fi - done | + perl -MCwd=realpath \ + -n -e 'BEGIN{$reporoot=$ENV{GIROCCO_REPOROOT};$update=$ENV{GIROCCO_UPDATE};}' -e \ + 'chomp; my @f=split(" ",$_,2); push(@f, "") while @f < 2; $f[0] ne "" or next; + my $r = $f[0] eq $update || $f[1] eq ""; + $r and $f[1] = realpath("$reporoot/$f[0].git"); + defined($f[1]) && $f[1] ne "" and print "$f[0] $f[1]\n";' | LC_ALL=C sort -k 1,1 >/tmp/gitdir.list.$$ test $? -eq 0 rm -f /tmp/gitproj.srt.$$ fi cut -d ' ' -f 1,3- /tmp/gitweb.list.$$ +# Make sure we are on the correct device before the atomic move +rm -f \ + "$cfg_projlist_cache_dir/gitproj.list.$$" \ + "$cfg_projlist_cache_dir/gitdir.list.$$" \ + "$cfg_projlist_cache_dir/gitweb.list.$$" +cat /tmp/gitproj.list.$$ >"$cfg_projlist_cache_dir/gitproj.list.$$" +cat /tmp/gitdir.list.$$ >"$cfg_projlist_cache_dir/gitdir.list.$$" +cat /tmp/gitweb.list.$$ >"$cfg_projlist_cache_dir/gitweb.list.$$" +rm -f /tmp/gitproj.list.$$ /tmp/gitdir.list.$$ /tmp/gitweb.list.$$ + # Set the proper group, if configured, before the move if [ -n "$cfg_owning_group" ]; then - chgrp "$cfg_owning_group" /tmp/gitproj.list.$$ /tmp/gitdir.list.$$ /tmp/gitweb.list.$$ + chgrp "$cfg_owning_group" \ + "$cfg_projlist_cache_dir/gitproj.list.$$" \ + "$cfg_projlist_cache_dir/gitdir.list.$$" \ + "$cfg_projlist_cache_dir/gitweb.list.$$" fi # Atomically move into place -mv -f /tmp/gitproj.list.$$ "$cfg_projlist_cache_dir/gitproj.list" -mv -f /tmp/gitdir.list.$$ "$cfg_projlist_cache_dir/gitdir.list" -mv -f /tmp/gitweb.list.$$ "$cfg_projlist_cache_dir/gitweb.list" +mv -f "$cfg_projlist_cache_dir/gitproj.list.$$" "$cfg_projlist_cache_dir/gitproj.list" +mv -f "$cfg_projlist_cache_dir/gitdir.list.$$" "$cfg_projlist_cache_dir/gitdir.list" +mv -f "$cfg_projlist_cache_dir/gitweb.list.$$" "$cfg_projlist_cache_dir/gitweb.list" -- 2.11.4.GIT