Merge branch 'master' into rorcz
[girocco.git] / Girocco / Util.pm
blobf734e4157493dd7be1685247f767597aba8f870d
1 package Girocco::Util;
3 use 5.008;
4 use strict;
5 use warnings;
7 use Girocco::Config;
8 use Girocco::ConfigUtil;
9 use Time::Local;
11 BEGIN {
12 use base qw(Exporter);
13 our @EXPORT = qw(get_git scrypt jailed_file sendmail_pipe mailer_pipe
14 lock_file unlock_file valid_tag rand_adjust
15 filedb_atomic_append filedb_atomic_edit filedb_grep
16 filedb_atomic_grep valid_email valid_email_multi
17 valid_repo_url valid_web_url url_base url_path url_server
18 projects_html_list parse_rfc2822_date parse_any_date
19 extract_url_hostname is_dns_hostname is_our_hostname
20 get_cmd online_cpus sys_pagesize sys_memsize
21 calc_windowmemory to_utf8 capture_command human_size
22 calc_bigfilethreshold has_reserved_suffix human_duration
23 noFatalsToBrowser calc_redeltathreshold
24 clean_email_multi read_HEAD_symref read_config_file
25 read_config_file_hash is_git_dir git_bool util_path
26 is_shellish read_HEAD_ref git_add_config);
29 BEGIN {require "Girocco/extra/capture_command.pl"}
31 # Return the entire output sent to stdout from running a command
32 # Any output the command sends to stderr is discarded
33 # Returns undef if there was an error running the command (see $!)
34 sub get_cmd {
35 my ($status, $result) = capture_command(1, undef, @_);
36 return defined($status) && $status == 0 ? $result : undef;
39 # Same as get_cmd except configured git binary is automatically provided
40 # as the first argument to get_cmd
41 sub get_git {
42 return get_cmd($Girocco::Config::git_bin, @_);
45 sub scrypt {
46 my ($pwd) = @_;
47 crypt($pwd||'', join ('', ('.', '/', 0..9, 'A'..'Z', 'a'..'z')[rand 64, rand 64]));
50 sub jailed_file {
51 my ($filename) = @_;
52 $filename =~ s,^/,,;
53 $Girocco::Config::chroot."/$filename";
56 sub lock_file {
57 my ($path) = @_;
59 $path .= '.lock';
61 use Errno qw(EEXIST);
62 use Fcntl qw(O_WRONLY O_CREAT O_EXCL);
63 use IO::Handle;
64 my $handle = new IO::Handle;
66 unless (sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
67 my $cnt = 0;
68 while (not sysopen($handle, $path, O_WRONLY|O_CREAT|O_EXCL)) {
69 ($! == EEXIST) or die "$path open failed: $!";
70 ($cnt++ < 16) or die "$path open failed: cannot open lockfile";
71 sleep(1);
74 # XXX: filedb-specific
75 chmod 0664, $path or die "$path g+w failed: $!";
77 $handle;
80 sub _is_passwd_file {
81 return defined($_[0]) && $_[0] eq jailed_file('/etc/passwd');
84 sub _run_update_pwd_db {
85 my ($path, $updatearg) = @_;
86 my @cmd = ($Girocco::Config::basedir.'/bin/update-pwd-db', "$path");
87 push(@cmd, $updatearg) if $updatearg;
88 system(@cmd) == 0 or die "update-pwd-db failed: $?";
91 sub unlock_file {
92 my ($path, $noreplace, $updatearg) = @_;
94 if (!$noreplace) {
95 _run_update_pwd_db("$path.lock", $updatearg)
96 if $Girocco::Config::update_pwd_db && _is_passwd_file($path);
97 rename "$path.lock", $path or die "$path unlock failed: $!";
98 } else {
99 unlink "$path.lock" or die "$path unlock failed: $!";
103 sub filedb_atomic_append {
104 my ($file, $line, $updatearg) = @_;
105 my $id = 65536;
107 open my $src, '<', $file or die "$file open for reading failed: $!";
108 my $dst = lock_file($file);
110 while (<$src>) {
111 my $aid = (split /:/)[2];
112 $id = $aid + 1 if ($aid >= $id);
114 print $dst $_ or die "$file(l) write failed: $!";
117 $line =~ s/\\i/$id/g;
118 print $dst "$line\n" or die "$file(l) write failed: $!";
120 close $dst or die "$file(l) close failed: $!";
121 close $src;
123 unlock_file($file, 0, $updatearg);
125 $id;
128 sub filedb_atomic_edit {
129 my ($file, $fn, $updatearg) = @_;
131 open my $src, '<', $file or die "$file open for reading failed: $!";
132 my $dst = lock_file($file);
134 while (<$src>) {
135 print $dst $fn->($_) or die "$file(l) write failed: $!";
138 close $dst or die "$file(l) close failed: $!";
139 close $src;
141 unlock_file($file, 0, $updatearg);
144 sub filedb_atomic_grep {
145 my ($file, $fn) = @_;
146 my @results = ();
148 open my $src, '<', $file or die "$file open for reading failed: $!";
149 my $dst = lock_file($file);
151 while (<$src>) {
152 my $result = $fn->($_);
153 push(@results, $result) if $result;
156 close $dst or die "$file(l) close failed: $!";
157 close $src;
159 unlock_file($file, 1);
160 return @results;
163 sub filedb_grep {
164 my ($file, $fn) = @_;
165 my @results = ();
167 open my $src, '<', $file or die "$file open for reading failed: $!";
169 while (<$src>) {
170 my $result = $fn->($_);
171 push(@results, $result) if $result;
174 close $src;
176 return @results;
179 sub valid_email {
180 my $email = shift;
181 defined($email) or $email = '';
182 return $email =~ /^[a-zA-Z0-9+._-]+@[a-zA-Z0-9.-]+$/;
185 sub clean_email_multi {
186 my $input = shift;
187 defined($input) or $input = '';
188 $input =~ s/^\s+//; $input =~ s/\s+$//;
189 my %seen = ();
190 my @newlist = ();
191 foreach (split(/\s*,\s*/, $input)) {
192 next if $_ eq "";
193 $seen{lc($_)} = 1, push(@newlist, $_) unless $seen{lc($_)};
195 return join(",", @newlist);
198 sub valid_email_multi {
199 # each email address must be a valid_email but we silently
200 # ignore extra spaces at the beginning/end and around any comma(s)
201 foreach (split(/,/, clean_email_multi(shift))) {
202 return 0 unless valid_email($_);
204 return 1;
207 sub valid_web_url {
208 my $url = shift;
209 defined($url) or $url = '';
210 return $url =~
211 /^https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/~:?&=;-]*)?(#[a-zA-Z0-9._-]+)?$/;
214 sub valid_repo_url {
215 my $url = shift || '';
216 # Currently neither username nor password is allowed in the URL (except for svn)
217 # and IPv6 literal addresses are not accepted either.
218 $Girocco::Config::mirror_svn &&
219 $url =~ /^svn(\+https?)?:\/\/([^\@\/\s]+\@)?[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/+~-]*)?$/os
220 and return 1;
221 $Girocco::Config::mirror_darcs &&
222 $url =~ /^darcs(?:\+https?)?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/+~-]*)?$/os
223 and return 1;
224 $Girocco::Config::mirror_bzr &&
225 $url =~ /^bzr:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/+~-]*)?$/os
226 and return 1;
227 $Girocco::Config::mirror_hg &&
228 $url =~ /^hg\+https?:\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/+~-]*)?$/os
229 and return 1;
230 return $url =~ /^(https?|git):\/\/[a-zA-Z0-9.:-]+(\/[_\%a-zA-Z0-9.\/+~-]*)?$/;
233 sub extract_url_hostname {
234 my $url = shift || '';
235 if ($url =~ m,^bzr://,) {
236 $url =~ s,^bzr://,,;
237 return 'launchpad.net' if $url =~ /^lp:/;
239 return undef unless $url =~ m,^[A-Za-z0-9+.-]+://[^/],;
240 $url =~ s,^[A-Za-z0-9+.-]+://,,;
241 $url =~ s,^([^/]+).*$,$1,;
242 $url =~ s/:[0-9]*$//;
243 $url =~ s/^[^\@]*[\@]//;
244 return $url ? $url : undef;
247 # See these RFCs:
248 # RFC 1034 section 3.5
249 # RFC 1123 section 2.1
250 # RFC 1738 section 3.1
251 # RFC 2606 sections 2 & 3
252 # RFC 3986 section 3.2.2
253 sub is_dns_hostname {
254 my $host = shift;
255 defined($host) or $host = '';
256 return 0 if $host eq '' || $host =~ /\s/;
257 # first remove a trailing '.'
258 $host =~ s/\.$//;
259 return 0 if length($host) > 255;
260 my $octet = '(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])';
261 return 0 if $host =~ /^$octet\.$octet\.$octet\.$octet$/o;
262 my @labels = split(/[.]/, $host, -1);
263 return 0 unless @labels && @labels >= $Girocco::Config::min_dns_labels;
264 # now check each label
265 foreach my $label (@labels) {
266 return 0 unless length($label) > 0 && length($label) <= 63;
267 return 0 unless $label =~ /^[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?$/;
269 # disallow RFC 2606 names provided at least two labels are present
270 if (@labels >= 2) {
271 my $tld = lc($labels[-1]);
272 return 0 if
273 $tld eq 'test' ||
274 $tld eq 'example' ||
275 $tld eq 'invalid' ||
276 $tld eq 'localhost';
277 my $sld = lc($labels[-2]);
278 return 0 if $sld eq 'example' &&
279 ($tld eq 'com' || $tld eq 'net' || $tld eq 'org');
281 return 1;
284 sub is_our_hostname {
285 my $test = shift || '';
286 $test =~ s/\.$//;
287 my %names = ();
288 my @urls = (
289 $Girocco::Config::gitweburl,
290 $Girocco::Config::gitwebfiles,
291 $Girocco::Config::webadmurl,
292 $Girocco::Config::bundlesurl,
293 $Girocco::Config::htmlurl,
294 $Girocco::Config::httppullurl,
295 $Girocco::Config::httpbundleurl,
296 $Girocco::Config::httpspushurl,
297 $Girocco::Config::gitpullurl,
298 $Girocco::Config::pushurl
300 foreach my $url (@urls) {
301 if ($url) {
302 my $host = extract_url_hostname($url);
303 if (defined($host)) {
304 $host =~ s/\.$//;
305 $names{lc($host)} = 1;
309 return $names{lc($test)} ? 1 : 0;
312 my (%_oktags, %_badtags, %_canontags, $_canontagscreated, @_whitetags);
313 BEGIN {
314 # These are always okay (a "whitelist") even if they would
315 # otherwise not be allowed
316 @_whitetags = (qw(
317 .net 2d 3d 6502 68000 68008 68010 68020 68030 68040 68060
318 8086 80286 80386 80486 80586 c cc make www x
320 map({$_oktags{lc($_)}=1} @_whitetags, @Girocco::Config::allowed_tags);
321 # entries MUST be all lowercase to be effective
322 %_badtags = (
323 # These are "nonsense" or pointless tags
324 about=>1, after=>1, all=>1, also=>1, an=>1, and=>1, another=>1, any=>1,
325 are=>1, as=>1, at=>1, be=>1, because=>1, been=>1, before=>1, being=>1,
326 between=>1, both=>1, but=>1, by=>1, came=>1, can=>1, come=>1, could=>1,
327 did=>1, do=>1, each=>1, for=>1, from=>1, get=>1, got=>1, had=>1, has=>1,
328 have=>1, he=>1, her=>1, here=>1, him=>1, himself=>1, his=>1, how=>1,
329 if=>1, in=>1, into=>1, is=>1, it=>1, like=>1, make=>1, many=>1, me=>1,
330 might=>1, more=>1, most=>1, much=>1, must=>1, my=>1, never=>1, now=>1,
331 of=>1, oh=>1, on=>1, only=>1, or=>1, other=>1, our=>1, out=>1, over=>1,
332 said=>1, same=>1, see=>1, should=>1, since=>1, some=>1, still=>1,
333 such=>1, take=>1, than=>1, that=>1, the=>1, their=>1, them=>1, then=>1,
334 there=>1, these=>1, they=>1, this=>1, those=>1, through=>1, to=>1,
335 too=>1, under=>1, up=>1, very=>1, was=>1, way=>1, we=>1, well=>1,
336 were=>1, what=>1, where=>1, which=>1, while=>1, who=>1, with=>1,
337 would=>1, yea=>1, yeah=>1, you=>1, your=>1, yup=>1
339 # These are "offensive" tags with at least one letter escaped to
340 # avoid having this file trigger various safe-scan robots
341 $_badtags{"a\x73\x73"} = 1;
342 $_badtags{"a\x73\x73hole"} = 1;
343 $_badtags{"b\x30\x30b"} = 1;
344 $_badtags{"b\x30\x30bs"} = 1;
345 $_badtags{"b\x6f\x6fb"} = 1;
346 $_badtags{"b\x6f\x6fbs"} = 1;
347 $_badtags{"b\x75tt"} = 1;
348 $_badtags{"b\x75ttd\x69\x63k"} = 1;
349 $_badtags{"c\x6f\x63k"} = 1;
350 $_badtags{"c\x75\x6e\x74"} = 1;
351 $_badtags{"d\x69\x63k"} = 1;
352 $_badtags{"d\x69\x63kb\x75tt"} = 1;
353 $_badtags{"f\x75\x63k"} = 1;
354 $_badtags{"in\x63\x65st"} = 1;
355 $_badtags{"ph\x75\x63k"} = 1;
356 $_badtags{"p\x6f\x72n"} = 1;
357 $_badtags{"p\x6f\x72no"} = 1;
358 $_badtags{"p\x6f\x72nographic"} = 1;
359 $_badtags{"p\x72\x30n"} = 1;
360 $_badtags{"p\x72\x6fn"} = 1;
361 $_badtags{"r\x61\x70e"} = 1;
362 $_badtags{"s\x65\x78"} = 1;
363 map({$_badtags{lc($_)}=1} @Girocco::Config::blocked_tags);
366 # A valid tag must only have [a-zA-Z0-9:.+#_-] characters, must start with a
367 # letter, must not be a noise word, must be more than one character long,
368 # must not be a repeated letter and must be no more than 32 characters long.
369 # However, anything in %_oktags is explicitly allowed even if it otherwise
370 # would violate the rules (except that none of [,\s\\\/] are allowed in tags).
371 # Returns the canonical name for the tag if the tag is valid otherwise undef.
372 sub valid_tag {
373 local $_ = $_[0];
374 return undef unless defined($_) && $_ ne "" && !/[,\s\/\\]/;
375 my $fold = $Girocco::Config::foldtags;
376 if ($fold && !$_canontagscreated) {
377 local $_;
378 %_canontags = ();
379 $_canontags{lc($_)} = $_ foreach sort({$b cmp $a} @_whitetags, @Girocco::Config::allowed_tags);
380 $_canontagscreated = 1;
382 return $_canontags{lc($_)} if $fold && exists($_canontags{lc($_)});
383 return ($fold ? lc($_) : $_) if $_oktags{lc($_)};
384 return undef unless /^[a-zA-Z][a-zA-Z0-9:.+#_-]+$/;
385 return undef if $_badtags{lc($_)};
386 return undef if /^(.)\1+$/;
387 return length($_) <= 32 ? ($fold ? lc($_) : $_) : undef;
390 # If the passed in argument looks like a URL, return only the stuff up through
391 # the host:port part otherwise return the entire argument.
392 sub url_base {
393 my $url = shift || '';
394 # See RFC 3968
395 $url = $1.$2.$3.$4 if $url =~ m,^( [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
396 ( // ) # // separator
397 ((?:[^\@]+\@)?) # optional userinfo
398 ( [^/?#]+ ) # host and port
399 (?:[/?#].*)?$,x; # path and optional query string and/or anchor
400 return $url;
403 # If the passed in argument looks like a URL, return only the stuff following
404 # the host:port part otherwise return the entire argument.
405 # If the optional second argument is true, the returned value will have '/'
406 # appended if it does not already end in '/'.
407 sub url_path {
408 my $url = shift || '';
409 my $add_slash = shift || 0;
410 # See RFC 3968
411 $url = $1 if $url =~ m,^(?: [A-Za-z][A-Za-z0-9+.-]*: ) # scheme
412 (?: // ) # // separator
413 (?: [^\@]+\@ )? # optional userinfo
414 (?: [^/?#]+ ) # host and port
415 ((?:[/?#].*)?)$,x; # path and optional query string and/or anchor
416 $url .= '/' if $add_slash && $url !~ m|/$|;
417 return $url;
420 # If both SERVER_NAME and SERVER_PORT are set pass the argument through url_path
421 # and then prefix it with the appropriate scheme (HTTPS=?on), host and port and
422 # return it. If a something that doesn't look like it could be the start of a
423 # URL path comes back from url_path or SERVER_NAME is a link-local IPv6 address
424 # then just return the argument unchanged.
425 sub url_server {
426 my $url = shift || '';
427 my $path = url_path($url);
428 return $url unless $path eq '' || $path =~ m|^[/?#]|;
429 return $url unless $ENV{'SERVER_NAME'} && $ENV{'SERVER_PORT'} &&
430 $ENV{'SERVER_PORT'} =~ /^[1-9][0-9]{0,4}$/;
431 return $url if $ENV{'SERVER_NAME'} =~ /^[[]?fe80:/i;
432 my $server = $ENV{'SERVER_NAME'};
433 # Deal with Apache bug where IPv6 literal server names do not include
434 # the required surrounding '[' and ']' characters
435 $server = '[' . $server . ']' if $server =~ /:/ && $server !~ /^[[]/;
436 my $ishttps = $ENV{'HTTPS'} && $ENV{'HTTPS'} =~ /^on$/i;
437 my $portnum = 0 + $ENV{'SERVER_PORT'};
438 my $port = '';
439 if (($ishttps && $portnum != 443) || (!$ishttps && $portnum != 80)) {
440 $port = ':' . $portnum;
442 return 'http' . ($ishttps ? 's' : '') . '://' . $server . $port . $path;
445 # Returns the number rounded to the nearest tenths. The ".d" part will be
446 # excluded if it's ".0" unless the optional second argument is true
447 sub _tenths {
448 my $v = shift;
449 my $use0 = shift;
450 $v *= 10;
451 $v += 0.5;
452 $v = int($v);
453 return '' . int($v/10) unless $v % 10 || $use0;
454 return '' . int($v/10) . '.' . ($v%10);
457 # Returns a human-readable size string (e.g. '1.5 MiB') for the value
458 # (in bytes) passed in. Returns '0' for undefined or 0 or not all digits.
459 # Otherwise returns '1 KiB' for < 1024, or else a number rounded to the
460 # nearest tenths of a KiB, MiB or GiB.
461 sub human_size {
462 my $v = shift || 0;
463 return "0" unless $v && $v =~ /^\d+$/;
464 return "1 KiB" unless $v > 1024;
465 $v /= 1024;
466 return _tenths($v) . " KiB" if $v < 1024;
467 $v /= 1024;
468 return _tenths($v) . " MiB" if $v < 1024;
469 $v /= 1024;
470 return _tenths($v) . " GiB";
473 # Returns a human duration string (e.g. 1h10m5s for the value (in secs)
474 # passed in. Returns the value unchanged if it's not defined or <= 0.
475 sub human_duration {
476 my $secs = shift;
477 return $secs unless defined($secs) && $secs >= 0;
478 $secs = int($secs);
479 my $ans = ($secs % 60) . 's';
480 return $ans if $secs < 60;
481 $secs = int($secs / 60);
482 $ans = ($secs % 60) . 'm' . $ans;
483 return $ans if $secs < 60;
484 $secs = int($secs / 60);
485 $ans = ($secs % 24) . 'h' . $ans;
486 return $ans if $secs < 24;
487 $secs = int($secs / 24);
488 return $secs . 'd' . $ans;
491 sub _escapeHTML {
492 my $str = shift;
493 $str =~ s/\&/\&amp;/gs;
494 $str =~ s/\</\&lt;/gs;
495 $str =~ s/\>/\&gt;/gs;
496 $str =~ s/\"/\&quot;/gs; #"
497 return $str;
500 # create relative time string from passed in age in seconds
501 sub _rel_age {
502 my $age = shift;
503 my $age_str;
505 if ($age > 60*60*24*365*2) {
506 $age_str = (int $age/60/60/24/365);
507 $age_str .= " years ago";
508 } elsif ($age > 60*60*24*(365/12)*2) {
509 $age_str = int $age/60/60/24/(365/12);
510 $age_str .= " months ago";
511 } elsif ($age > 60*60*24*7*2) {
512 $age_str = int $age/60/60/24/7;
513 $age_str .= " weeks ago";
514 } elsif ($age > 60*60*24*2) {
515 $age_str = int $age/60/60/24;
516 $age_str .= " days ago";
517 } elsif ($age > 60*60*2) {
518 $age_str = int $age/60/60;
519 $age_str .= " hours ago";
520 } elsif ($age > 60*2) {
521 $age_str = int $age/60;
522 $age_str .= " mins ago";
523 } elsif ($age > 2) {
524 $age_str = int $age;
525 $age_str .= " secs ago";
526 } elsif ($age >= 0) {
527 $age_str = "right now";
528 } else {
529 $age_str = "future time";
531 return $age_str;
534 # create relative time string from passed in idle in seconds
535 sub _rel_idle {
536 my $idle_str = _rel_age(shift);
537 $idle_str =~ s/ ago//;
538 $idle_str = "not at all" if $idle_str eq "right now";
539 return $idle_str;
542 sub _strftime {
543 use POSIX qw(strftime);
544 my ($fmt, $secs, $zonesecs) = @_;
545 my ($S,$M,$H,$d,$m,$y) = gmtime($secs + $zonesecs);
546 $zonesecs = int($zonesecs / 60);
547 $fmt =~ s/%z/\$z/g;
548 my $ans = strftime($fmt, $S, $M, $H, $d, $m, $y, -1, -1, -1);
549 my $z;
550 if ($zonesecs < 0) {
551 $z = "-";
552 $zonesecs = -$zonesecs;
553 } else {
554 $z = "+";
556 $z .= sprintf("%02d%02d", int($zonesecs/60), $zonesecs % 60);
557 $ans =~ s/\$z/$z/g;
558 return $ans;
561 # Take a list of project names and produce a nicely formated table that
562 # includes owner links and descriptions. If the list is empty returns ''.
563 # The first argument may be a hash ref that contains options. The following
564 # options are available:
565 # target -- sets the target value of the owner link
566 # emptyok -- if true returns an empty table rather than ''
567 # sizecol -- if true include a human-readable size column
568 # typecol -- if true include type column with hover info
569 # changed -- if true include a changed and idle column
570 sub projects_html_list {
571 my $options = {};
572 if (defined($_[0]) && ref($_[0]) eq 'HASH') {
573 $options = shift;
575 return '' unless @_ || (defined($options->{emptyok}) && $options->{emptyok});
576 require Girocco::Project;
577 my $count = 0;
578 my $target = '';
579 $target = " target=\""._escapeHTML($options->{target})."\""
580 if defined($options->{target});
581 my $withsize = defined($options->{sizecol}) && $options->{sizecol};
582 my $withtype = defined($options->{typecol}) && $options->{typecol};
583 my $withchanged = defined($options->{changed}) && $options->{changed};
584 my $sizehead = '';
585 $sizehead = substr(<<EOT, 0, -1) if $withsize;
586 <th class="sizecol"><span class="hover">Size<span><span class="head" _data="Size"></span
587 /><span class="none" /><br />(</span>Fork size excludes objects borrowed from the parent.<span class="none">)</span></span></span></th
590 my $typehead = '';
591 $typehead = '<th>Type</th>' if $withtype;
592 my $chghead = '';
593 $chghead = substr(<<EOT, 0, -1) if $withchanged;
594 <th><span class="hover">Changed<span><span class="head" _data="Changed"></span
595 /><span class="none" /><br />(</span>The last time a ref change was received by this site.<span class="none">)</span></span></span></th
596 ><th><span class="hover">Idle<span><span class="head" _data="Idle"></span
597 /><span class="none" /><br />(</span>The most recent committer time in <i>refs/heads</i>.<span class="none">)</span></span></span></th
600 my $html = <<EOT;
601 <table class='projectlist'><tr valign="top" align="left"><th>Project</th>$sizehead$typehead$chghead<th class="desc">Description</th></tr>
603 my $trclass = ' class="odd"';
604 foreach (sort({lc($a) cmp lc($b)} @_)) {
605 if (Girocco::Project::does_exist($_, 1)) {
606 my $proj = Girocco::Project->load($_);
607 my $projname = $proj->{name}.".git";
608 my $projdesc = $proj->{desc}||'';
609 utf8::decode($projdesc) if utf8::valid($projdesc);
610 my $sizecol = '';
611 if ($withsize) {
612 my $psize = $proj->{reposizek};
613 $psize = undef unless defined($psize) && $psize =~ /^\d+$/;
614 $psize = 0 if !defined($psize) && $proj->is_empty;
615 if (!defined($psize)) {
616 $psize = 'unknown';
617 } elsif (!$psize) {
618 $psize = 'empty';
619 } else {
620 $psize = human_size($psize * 1024);
621 $psize =~ s/ /\&#160;/g;
623 $sizecol = '<td class="sizecol">'.$psize.'</td>';
625 my $typecol = '';
626 if ($withtype) {
627 if ($proj->{mirror}) {
628 my $url = _escapeHTML($proj->{url});
629 $typecol = substr(<<EOT, 0, -1);
630 <td class="type"><span class="hover">mirror<span class="nowrap"><span class="before" _data="$url"><span class="none"> <a href="$url" rel="nofollow">(URL)</a></span></span></span></span></td>
632 } else {
633 my $users = @{$proj->{users}};
634 $users .= ' user';
635 $users .= 's' unless @{$proj->{users}} == 1;
636 my $userlist = join(', ', sort({lc($a) cmp lc($b)} @{$proj->{users}}));
637 my $spncls = length($userlist) > 25 ? '' : ' class="nowrap"';
638 $typecol = $userlist ? substr(<<EOT, 0, -1) : substr(<<EOT, 0, -1);
639 <td class="type"><span class="hover">$users<span$spncls><br class="none" />$userlist</span></span></td>
641 <td class="type">$users</td>
645 my $changecol = '';
646 if ($withchanged) {
647 my $rel = '';
648 my $changetime = $proj->{lastchange};
649 if ($changetime) {
650 my ($ts, $tz);
651 $ts = parse_rfc2822_date($changetime, \$tz);
652 my $ct = _strftime("%Y-%m-%d %T %z", $ts, $tz);
653 $rel = "<span class=\"hover\">" .
654 _rel_age(time - $ts) .
655 "<span class=\"nowrap\"><span class=\"before\" _data=\"$changetime\"></span><span class=\"none\"><br />$ct</span></span></span>";
656 } else {
657 $rel = "no commits";
659 $changecol = substr(<<EOT, 0, -1);
660 <td class="change">$rel</td>
662 my $idletime = $proj->{lastactivity};
663 my ($idlesecs, $tz);
664 $idlesecs = parse_any_date($idletime, \$tz) if $idletime;
665 if ($idlesecs) {
666 my $idle2822 = _strftime("%a, %d %b %Y %T %z", $idlesecs, $tz);
667 my $ct = _strftime("%Y-%m-%d %T %z", $idlesecs, $tz);
668 $rel = "<span class=\"hover\">" .
669 _rel_idle(time - $idlesecs) .
670 "<span class=\"nowrap\"><span class=\"before\" _data=\"$idle2822\"></span><span class=\"none\"><br />$ct</span></span></span>";
671 } else {
672 $rel = "no commits";
674 $changecol .= substr(<<EOT, 0, -1);
675 <td class="idle">$rel</td>
678 $html .= <<EOT;
679 <tr valign="top"$trclass><td><a href="@{[url_path($Girocco::Config::gitweburl)]}/$projname"$target
680 >@{[_escapeHTML($projname)]}</td>$sizecol$typecol$changecol<td>@{[_escapeHTML($projdesc)]}</td></tr>
682 $trclass = $trclass ? '' : ' class="odd"';
683 ++$count;
686 $html .= <<EOT;
687 </table>
689 return ($count || (defined($options->{emptyok}) && $options->{emptyok})) ? $html : '';
692 my %_month_names;
693 BEGIN {
694 %_month_names = (
695 jan => 0, feb => 1, mar => 2, apr => 3, may => 4, jun => 5,
696 jul => 6, aug => 7, sep => 8, oct => 9, nov => 10, dec => 11
700 # Should be in "date '+%a, %d %b %Y %T %z'" format as saved to lastgc, lastrefresh and lastchange
701 # The leading "%a, " is optional, returns undef if unrecognized date. This is also known as
702 # RFC 2822 date format and git's '%cD', '%aD' and --date=rfc2822 format.
703 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
704 sub parse_rfc2822_date {
705 my $dstr = shift || '';
706 my $tzoff = shift || '';
707 $dstr = $1 if $dstr =~/^[^\s]+,\s*(.*)$/;
708 return undef unless $dstr =~
709 /^\s*(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{1,2}):(\d{2}):(\d{2})\s+([+-]\d{4})\s*$/;
710 my ($d,$b,$Y,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7);
711 my $m = $_month_names{lc($b)};
712 return undef unless defined($m);
713 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, 0+$m, 0+$Y);
714 my $offset = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
715 $offset = -$offset if substr($z,0,1) eq '-';
716 $$tzoff = $offset if ref($tzoff) eq 'SCALAR';
717 return $seconds - $offset;
720 # Will parse any supported date format. Actually there are three formats
721 # currently supported:
722 # 1. RFC 2822 (uses parse_rfc2822_date)
723 # 2. RFC 3339 / ISO 8601 (T may be ' ' or '_', 'Z' is optional or may be 'UTC', ':' optional in TZ)
724 # 3. Same as #2 except no colons or hyphens allowed and hours MUST be 2 digits
725 # 4. unix seconds since epoch with optional +/- trailing TZ (may not have a ':')
726 # Returns undef if unsupported date.
727 # If the second argument is a SCALAR ref, its value will be set to the TZ offset in seconds
728 sub parse_any_date {
729 my $dstr = shift || '';
730 my $tzoff = shift || '';
731 if ($dstr =~ /^\s*([-+]?\d+)(?:\s+([-+]\d{4}))?\s*$/) {
732 # Unix timestamp
733 my $ts = 0 + $1;
734 my $off = 0;
735 if ($2) {
736 my $z = $2;
737 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
738 $off = -$off if substr($z,0,1) eq '-';
740 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
741 return $ts;
743 if ($dstr =~ /^\s*(\d{4})-(\d{2})-(\d{2})[Tt _](\d{1,2}):(\d{2}):(\d{2})(?:[ _]?([Zz]|[Uu][Tt][Cc]|(?:[-+]\d{1,2}:?\d{2})))?\s*$/ ||
744 $dstr =~ /^\s*(\d{4})(\d{2})(\d{2})[Tt _](\d{2})(\d{2})(\d{2})(?:[ _]?([Zz]|[Uu][Tt][Cc]|(?:[-+]\d{2}\d{2})))?\s*$/) {
745 my ($Y,$m,$d,$H,$M,$S,$z) = ($1,$2,$3,$4,$5,$6,$7||'');
746 my $seconds = timegm(0+$S, 0+$M, 0+$H, 0+$d, $m-1, 0+$Y);
747 defined($z) && $z ne '' or $z = 'Z';
748 $z = uc($z);
749 $z =~ s/://;
750 substr($z,1,0) = '0' if length($z) == 4;
751 my $off = 0;
752 if ($z ne 'Z' && $z ne 'UTC') {
753 $off = 60 * (60 * (0+substr($z,1,2)) + (0+substr($z,3,2)));
754 $off = -$off if substr($z,0,1) eq '-';
756 $$tzoff = $off if ref($tzoff) eq 'SCALAR';
757 return $seconds - $off;
759 return parse_rfc2822_date($dstr, $tzoff);
762 # Input is a number such as a minute interval
763 # Return value is a random number between the input and 1.25*input
764 # This can be used to randomize the update and gc operations a bit to avoid
765 # having them all end up all clustered together
766 sub rand_adjust {
767 my $input = shift || 0;
768 return $input unless $input;
769 return $input + int(rand(0.25 * $input));
772 # Open a pipe to a new sendmail process. The '-i' option is always passed to
773 # the new process followed by any addtional arguments passed in. Note that
774 # the sendmail process is only expected to understand the '-i', '-t' and '-f'
775 # options. Using any other options via this function is not guaranteed to work.
776 # A list of recipients may follow the options. Combining a list of recipients
777 # with the '-t' option is not recommended.
778 sub sendmail_pipe {
779 return undef unless @_;
780 die "\$Girocco::Config::sendmail_bin is unset or not executable!\n"
781 unless $Girocco::Config::sendmail_bin && -x $Girocco::Config::sendmail_bin;
782 my $result = open(my $pipe, '|-', $Girocco::Config::sendmail_bin, '-i', @_);
783 return $result ? $pipe : undef;
786 # Open a pipe that works similarly to a mailer such as /usr/bin/mail in that
787 # if the first argument is '-s', a subject line will be automatically added
788 # (using the second argument as the subject). Any remaining arguments are
789 # expected to be recipient addresses that will be added to an explicit To:
790 # line as well as passed on to sendmail_pipe. In addition an
791 # "Auto-Submitted: auto-generated" header is always added as well as a suitable
792 # "From:" header.
793 sub mailer_pipe {
794 my $subject = undef;
795 if (@_ >= 2 && $_[0] eq '-s') {
796 shift;
797 $subject = shift;
799 my $tolist = join(", ", @_);
800 unshift(@_, '-f', $Girocco::Config::sender) if $Girocco::Config::sender;
801 my $pipe = sendmail_pipe(@_);
802 if ($pipe) {
803 print $pipe "From: \"$Girocco::Config::name\" ",
804 "($Girocco::Config::title) ",
805 "<$Girocco::Config::admin>\n";
806 print $pipe "To: $tolist\n";
807 print $pipe "Subject: $subject\n" if defined($subject);
808 print $pipe "MIME-Version: 1.0\n";
809 print $pipe "Content-Type: text/plain; charset=utf-8; format=fixed\n";
810 print $pipe "Content-Transfer-Encoding: 8bit\n";
811 print $pipe "X-Girocco: $Girocco::Config::gitweburl\n"
812 unless $Girocco::Config::suppress_x_girocco;
813 print $pipe "Auto-Submitted: auto-generated\n";
814 print $pipe "\n";
816 return $pipe;
819 sub _goodval {
820 my $val = shift;
821 return undef unless defined($val);
822 $val =~ s/[\r\n]+$//s;
823 return undef unless $val =~ /^\d+$/;
824 $val = 0 + $val;
825 return undef unless $val >= 1;
826 return $val;
829 # Returns the number of "online" cpus or undef if undetermined
830 sub online_cpus {
831 my @confcpus = $^O eq "linux" ?
832 qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN) :
833 qw(NPROCESSORS_ONLN _NPROCESSORS_ONLN) ;
834 my $cpus = _goodval(get_cmd('getconf', $confcpus[0]));
835 return $cpus if $cpus;
836 $cpus = _goodval(get_cmd('getconf', $confcpus[1]));
837 return $cpus if $cpus;
838 if ($^O ne "linux") {
839 my @sysctls = qw(hw.ncpu);
840 unshift(@sysctls, qw(hw.availcpu)) if $^O eq "darwin";
841 foreach my $mib (@sysctls) {
842 $cpus = _goodval(get_cmd('sysctl', '-n', $mib));
843 return $cpus if $cpus;
846 return undef;
849 # Returns the system page size in bytes or undef if undetermined
850 # This should never fail on a POSIX system
851 sub sys_pagesize {
852 use POSIX ":unistd_h";
853 my $pagesize = sysconf(_SC_PAGESIZE);
854 return undef unless defined($pagesize) && $pagesize =~ /^\d+$/;
855 $pagesize = 0 + $pagesize;
856 return undef unless $pagesize >= 256;
857 return $pagesize;
860 # Returns the amount of available physical memory in bytes
861 # This may differ from the actual amount of physical memory installed
862 # Returns undef if this cannot be determined
863 sub sys_memsize {
864 my $pagesize = sys_pagesize;
865 if ($pagesize && $^O eq "linux") {
866 my $pages = _goodval(get_cmd('getconf', '_PHYS_PAGES'));
867 return $pagesize * $pages if $pages;
869 if ($^O ne "linux") {
870 my @sysctls = qw(hw.physmem64);
871 unshift(@sysctls, qw(hw.memsize)) if $^O eq "darwin";
872 foreach my $mib (@sysctls) {
873 my $memsize = _goodval(get_cmd('sysctl', '-n', $mib));
874 return $memsize if $memsize;
876 my $memsize32 = _goodval(get_cmd('sysctl', '-n', 'hw.physmem'));
877 return $memsize32 if $memsize32 && $memsize32 <= 2147483647;
878 if ($pagesize) {
879 my $pages = _goodval(get_cmd('sysctl', '-n', 'hw.availpages'));
880 return $pagesize * $pages if $pages;
882 return 2147483647 + 1 if $memsize32;
884 return undef;
887 sub _get_max_conf_suffixed_size {
888 my $conf = shift;
889 return undef unless defined $conf && $conf =~ /^(\d+)([kKmMgG]?)$/;
890 my ($val, $suffix) = (0+$1, lc($2));
891 $val *= 1024 if $suffix eq 'k';
892 $val *= 1024 * 1024 if $suffix eq 'm';
893 $val *= 1024 * 1024 * 1024 if $suffix eq 'g';
894 return $val;
897 sub _make_suffixed_size {
898 my $size = shift;
899 return $size if $size % 1024;
900 $size /= 1024;
901 return "${size}k" if $size % 1024;
902 $size /= 1024;
903 return "${size}m" if $size % 1024;
904 $size /= 1024;
905 return "${size}g";
908 # Return the value to pass to --window-memory= for git repack
909 # If the system memory or number of CPUs cannot be determined, returns "1g"
910 # Otherwise returns one third the available memory divided by the number of CPUs
911 # but never more than 1 gigabyte or max_gc_window_memory_size.
912 sub calc_windowmemory {
913 my $cpus = online_cpus;
914 my $memsize = sys_memsize;
915 my $max = 1024 * 1024 * 1024;
916 if ($cpus && $memsize) {
917 $max = int($memsize / 3 / $cpus);
918 $max = 1024 * 1024 * 1024 if $max >= 1024 * 1024 * 1024;
920 my $maxconf = _get_max_conf_suffixed_size($Girocco::Config::max_gc_window_memory_size);
921 $max = $maxconf if defined($maxconf) && $maxconf && $max > $maxconf;
922 return _make_suffixed_size($max);
925 # Return the value to set as core.bigFileThreshold for git repack
926 # If the system memory cannot be determined, returns "256m"
927 # Otherwise returns the available memory divided by 16
928 # but never more than 512 megabytes or max_gc_big_file_threshold_size.
929 sub calc_bigfilethreshold {
930 my $memsize = sys_memsize;
931 my $max = 256 * 1024 * 1024;
932 if ($memsize) {
933 $max = int($memsize / 16);
934 $max = 512 * 1024 * 1024 if $max >= 512 * 1024 * 1024;
936 my $maxconf = _get_max_conf_suffixed_size($Girocco::Config::max_gc_big_file_threshold_size);
937 $max = $maxconf if defined($maxconf) && $maxconf && $max > $maxconf;
938 return _make_suffixed_size($max);
941 # Return the value to use when deciding whether or not to re-calculate object deltas
942 # If there are no more than this many objects then deltas will be recomputed in
943 # order to create more efficient pack files. The new_delta_threshold value
944 # is constrained to be at least 1000 * cpu cores and no more than 100000.
945 # The default is sys_memsize rounded up to the nearest multiple of 256 MB and
946 # then 5000 per 256 MB or 50000 if we cannot determine memory size but never
947 # more than 100000 or less than 1000 * cpu cores.
948 sub calc_redeltathreshold {
949 my $cpus = online_cpus || 1;
950 if (defined($Girocco::Config::new_delta_threshold) &&
951 $Girocco::Config::new_delta_threshold =~ /^\d+/) {
952 my $ndt = 0 + $Girocco::Config::new_delta_threshold;
953 if ($ndt >= $cpus * 1000) {
954 return $ndt <= 100000 ? $ndt : 100000;
957 my $calcval = 50000;
958 my $memsize = sys_memsize;
959 if ($memsize) {
960 my $quantum = 256 * 1024 * 1024;
961 $calcval = 5000 * int(($memsize + ($quantum - 1)) / $quantum);
962 $calcval = 1000 * $cpus if $calcval < 1000 * $cpus;
963 $calcval = 100000 if $calcval > 100000;
965 return $calcval;
968 # $1 => thing to test
969 # $2 => optional directory, if given and -e "$2/$1$3", then return false
970 # $3 => optional, defaults to ''
971 sub has_reserved_suffix {
972 no warnings; # avoid silly 'unsuccessful stat on filename with \n' warning
973 my ($name, $dir, $ext) = @_;
974 $ext = '' unless defined $ext;
975 return 0 unless defined $name && $name =~ /\.([^.]+)$/;
976 return 0 unless exists $Girocco::Config::reserved_suffixes{lc($1)};
977 return 0 if defined $dir && -e "$dir/$name$ext";
978 return 1;
981 # mostly undoes effect of `use CGI::Carp qw(fatalsToBrowser);`
982 # mostly undoes effect of `use CGI::Carp qw(warningsToBrowser);`
983 sub noFatalsToBrowser {
984 delete $SIG{__DIE__};
985 delete $SIG{__WARN__};
986 undef *CORE::GLOBAL::die;
987 *CORE::GLOBAL::die = sub {
988 no warnings;
989 my $ec = $! || ($? >> 8) || 255;
990 my (undef, $fn, $li) = caller(0);
991 my $loc = " at " . $fn . " line " . $li . ".\n";
992 my $msg = "";
993 $msg = join("", @_) if @_;
994 $msg = "Died" if $msg eq "";
995 $msg .= $loc unless $msg =~ /\n$/;
996 die $msg if $^S;
997 printf STDERR "%s", $msg;
998 exit($ec);
1000 undef *CORE::GLOBAL::warn;
1001 *CORE::GLOBAL::warn = sub {
1002 no warnings;
1003 my (undef, $fn, $li) = caller(0);
1004 my $loc = " at " . $fn . " line " . $li . ".\n";
1005 my $msg = "";
1006 $msg = join("", @_) if @_;
1007 $msg = "Warning: something's wrong" if $msg eq "";
1008 $msg .= $loc unless $msg =~ /\n$/;
1009 printf STDERR "%s", $msg;
1013 # mimics Git's symref reading but only for HEAD
1014 # returns undef on failure otherwise an string that is
1015 # either an all-hex (lowercase) value or starts with "refs/"
1016 sub read_HEAD_ref {
1017 my $headpath = $_[0] . "/HEAD";
1018 if (-l $headpath) {
1019 my $rl = readlink($headpath);
1020 return defined($rl) && $rl =~ m,^refs/[^\x00-\x1f \x7f~^:\\*?[]+$, ? $rl : undef;
1022 open my $fd, '<', $headpath or return undef;
1023 my $hv;
1025 local $/ = undef;
1026 $hv = <$fd>;
1028 close $fd;
1029 defined($hv) or return undef;
1030 chomp $hv;
1031 $hv =~ m,^ref:\s*(refs/[^\x00-\x1f \x7f~^:\\*?[]+)$, and return $1;
1032 $hv =~ m/^[0-9a-fA-F]{40,}$/ and return lc($hv);
1033 return undef;
1036 # same as read_HEAD_ref but returns undef
1037 # unless the result starts with "refs/"
1038 sub read_HEAD_symref {
1039 my $hv = read_HEAD_ref(@_);
1040 return defined($hv) && $hv =~ m,^refs/., ? $hv : undef;
1043 # similar to Git's test except that GIT_OBJECT_DIRECTORY is ignored
1044 sub is_git_dir {
1045 my $gd = shift;
1046 defined($gd) && $gd ne "" && -d $gd or return undef;
1047 -d "$gd/objects" && -x "$gd/objects" or return 0;
1048 -d "$gd/refs" && -x "$gd/refs" or return 0;
1049 if (-l "$gd/HEAD") {
1050 my $rl = readlink("$gd/HEAD");
1051 defined($rl) && $rl =~ m,^refs/., or return 0;
1052 -e "$gd/HEAD" or return 1;
1054 open my $fd, '<', "$gd/HEAD" or return 0;
1055 my $hv;
1057 local $/;
1058 $hv = <$fd>;
1060 close $fd;
1061 defined $hv or return 0;
1062 chomp $hv;
1063 $hv =~ m,^ref:\s*refs/., and return 1;
1064 return $hv =~ /^[0-9a-f]{40}/;
1067 # Returns a PATH properly prefixed which guarantees that Git is found and the
1068 # basedir/bin utilities are found as intended. $ENV{PATH} is LEFT UNCHANGED!
1069 # Caller is responsible for assigning result to $ENV{PATH} or otherwise
1070 # arranging for it to be used. If $ENV{PATH} already has the proper prefix
1071 # then it's returned as-is (making this function idempotent).
1072 # Will die if it cannot determine a suitable full PATH.
1073 # Result is cached so all calls after the first are practically free.
1074 my $var_git_exec_path;
1075 sub util_path {
1076 if (!defined($var_git_exec_path)) {
1077 defined($Girocco::Config::basedir) && $Girocco::Config::basedir ne "" &&
1078 -d $Girocco::Config::basedir && -r _ && -x _ or
1079 die "invalid \$Girocco::Config::basedir setting: $Girocco::Config::basedir\n";
1080 my $varsfile = $Girocco::Config::basedir . "/shlib_vars.sh";
1081 if (-f $varsfile && -r _) {
1082 my $vars;
1083 if (open $vars, '<', $varsfile) {
1084 # last value for var_git_exec_path wins
1085 while (<$vars>) {
1086 chomp;
1087 substr($_, 0, 19) eq "var_git_exec_path=\"" or next;
1088 substr($_, -1, 1) eq "\"" or next;
1089 my $xd = substr($_, 19, -1);
1090 $var_git_exec_path = $xd if -d $xd && -r _ && -x _;
1092 close $vars;
1095 if (!defined($var_git_exec_path)) {
1096 my $xd = get_git("--exec-path");
1097 $var_git_exec_path = $xd if defined($xd) &&
1098 (chomp $xd, $xd) ne "" && -d $xd && -r _ && -x _;
1100 defined($var_git_exec_path) or
1101 die "could not determine \$(git --exec-path) value\n"
1103 my $prefix = "$var_git_exec_path:$Girocco::Config::basedir/bin:";
1104 if (substr($ENV{PATH}, 0, length($prefix)) eq $prefix) {
1105 return $ENV{PATH};
1106 } else {
1107 return $prefix . $ENV{PATH};
1111 # Note that Perl performs a "shellish" test in the Perl_do_exec3 function from doio.c,
1112 # but it has slightly different semantics in that whitespace does not automatically
1113 # make something "shellish". The semantics used here more closely match Git's
1114 # semantics so that Girocco will provide an interpretation more similar to Git's.
1115 sub is_shellish {
1116 return unless defined(local $_ = shift);
1117 return 1 if m#[][\$&*(){}'";:=\\|?<>~`\#\s]#; # contains metacharacters
1118 return 0; # probably not shellish
1121 # Works just like the shlib.sh function git_add_config
1122 # except it takes two arguments, first the variable name, second the value
1123 # For example: git_add_config("gc.auto", "0")
1124 # No extra quoting is performed!
1125 # If the name or value requires special quoting, it must be provided by the caller!
1126 # Note this function will only be effective when running Git 1.7.3 or later
1127 sub git_add_config {
1128 my ($name, $val) = @_;
1129 defined($name) && defined($val) or return;
1130 $name ne "" or return;
1131 my $gcp = $ENV{GIT_CONFIG_PARAMETERS};
1132 defined($gcp) or $gcp = '';
1133 $gcp eq "" or $gcp = $gcp . " ";
1134 $gcp .= "'" . $name . '=' . $val . "'";
1135 $ENV{GIT_CONFIG_PARAMETERS} = $gcp;