make-config-h.sh: check for snprintf function
[girocco.git] / src / peek_packet.c
blob220e2dcd914489e1703381fef01281be8c725621
1 /*
3 peek_packet.c -- peek_packet utility to peek at incoming git-daemon request
4 Copyright (C) 2015,2020 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 This utility is intended to be used by a script front end to git daemon
24 running in inetd mode. The first thing the script does is call this utility
25 which attempts to peek the first incoming Git packet off the connection
26 and then output contents after the initial 4-character hex length upto but
27 excluding the first \0 character.
29 At that point the script can validate the incoming request and if it chooses
30 to allow it then exec git daemon to process it. Since the packet was peeked
31 it's still there to be read by git daemon.
33 Note that there is a hard-coded timeout of 30 seconds and a hard-coded limit
34 of PATH_MAX for the length of the initial packet.
36 On failure a non-zero exit code is returned. On success a 0 exit code is
37 returned and peeked text is output to stdout.
39 The connection to be peeked must be fd 0 and will have SO_KEEPALIVE set on it.
41 This utility does not take any arguments and ignores any that are given.
43 The output of a successful peek should be one of these:
45 git-upload-pack /<...>
46 git-upload-archive /<...>
47 git-receive-pack /<...>
49 where "<...>" is replaced with the repository path so, for example, doing
50 "git ls-remote git://example.com/foo.git" would result in this output:
52 git-upload-pack /foo.git
54 Note that the first character could be a ~ instead of a /, but it's
55 probably best to reject those.
57 The output is guaranteed to not contain any bytes with a value less than
58 %x20 except for possibly tab (%x09) characters. Any request that does
59 will produce no output and return a non-zero exit code.
61 If the extra optional "host=" parameter is present, then an additional
62 second line is output in the format:
64 host=<hostname>
66 where <hostname> is the host name only from the extra "host=" parameter
67 that's been lowercased and had a trailing "." removed (but only if that
68 doesn't create the empty string) and had any surrounding '[' and ']' removed
69 from a literal IPv6 address. The <hostname> is guaranteed to only contain
70 bytes in the range %x21-%xFF.
72 If the extra optional "host=" parameter contains a ":" <portnum> suffix
73 then an additional third line will be output of the format:
75 port=<portnum>
77 If just the ":" was present <portnum> will be the empty string otherwise
78 it's guaranteed to be a decimal number with no leading zeros in the range
79 1..65535.
81 For example, this git command:
83 git ls-remote git://example.com/repo.git
85 Will result in peek_packet producing these two lines (unless a very, very
86 old version of Git was used in which case only the first line):
88 git-upload-pack /repo.git
89 host=example.com
91 This git command:
93 git ls-remote git://[::1]:8765/repo.git
95 Will result in peek_packet producing these three lines (unless a very, very
96 old version of Git was used in which case only the first line):
98 git-upload-pack /repo.git
99 host=::1
100 port=8765
102 If the incoming packet looks invalid (or a timeout occurs) then no output
103 is produced, but a non-zero exit code is set.
109 ;; The Git packet protocol is defined as follows in RFC 5234+7405 syntax
112 BYTE = %x00-FF
114 DIGIT = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
116 ; Note that hexdigits are case insensitive
117 HEX-DIGIT = DIGIT / "a" / "b" / "c" / "d" / "e" / "f"
119 PKT-LENGTH = 4HEX-DIGIT ; represents a hexadecimal big-endian non-negative
120 ; value. a length of "0002" or "0003" is invalid.
121 ; lengths "0000" and "0001" have special meaning.
123 PKT-DATA = *BYTE ; first 4 <BYTE>s MUST NOT be %s"ERR "
125 PKT = FLUSH-PKT / DELIM-PKT / ERR-PKT / PROTOCOL-PKT
127 FLUSH-PKT = "0000"
129 DELIM-PKT = "0001"
131 PROTOCOL-PKT = PKT-LENGTH PKT-DATA ; PKT-DATA must contain exactly
132 ; PKT-LENGTH - 4 bytes
134 ERR-PKT = PKT-LENGTH ERR-DATA; ERR-DATA must contain exactly PKT-LENGTH - 4 bytes
136 ERR-DATA = %s"ERR " *BYTE ; Note that "ERR " *IS* case sensitive
139 ;; The first packet sent by a client connecting to a "Git Transport" server
140 ;; has the <GIT-REQUEST-PKT> format
143 GIT-REQUEST-PKT = PKT-LENGTH GIT-REQUEST-DATA ; GIT-REQUEST-DATA must contain
144 ; exactly PKT-LENGTH - 4 bytes
146 ; Normally if %x0A is present it's the final byte (very old Git versions)
147 ; Normally if %x00 is present then %x0A is not (modern Git versions)
148 ; But the current Git versions do parse the %x0A.00 form correctly
149 GIT-REQUEST-DATA = GIT-COMMAND [EXTRA-ARGS]
151 GIT-COMMAND = REQUEST-COMMAND %20 PATHNAME [%0A]
153 ; these are all case sensitive
154 REQUEST-COMMAND = %s"git-upload-pack" /
155 %s"git-receive-pack" /
156 %s"git-upload-archive"
158 PATHNAME = NON-NULL-BYTES
160 EXTRA-ARGS = %x00 HOST-ARG-TRUNCATED /
161 %x00 [HOST-ARG] [%x00 EXTRA-PARMS]
163 HOST-ARG-TRUNCATED = HOST-PARAM HOST-NAME [ ":" [PORTNUM] ]
165 HOST-ARG = HOST-ARG-TRUNCATED %x00
167 ; "host=" is case insensitive
168 HOST-PARAM = "host="
170 HOST-NAME = NON-NULL-BYTES ; should be a valid DNS name
171 ; or IPv4 literal
172 ; or "[" IPv6 literal "]"
173 ; a ":" is only allowed between
174 ; the "[" and "]" of an IPv6 literal
176 ; PORTNUM matches 1..65535 with no leading zeros allowed
177 PORTNUM = ( "1" / "2" / "3" / "4" / "5" ) *4DIGIT /
178 "6" /
179 "6" ( "0" / "1" / "2" / "3" / "4" ) *3DIGIT /
180 "65" ( "0" / "1" / "2" / "3" / "4" ) *2DIGIT /
181 "655" ( "0" / "1" / "2" ) *1DIGIT /
182 "6553" ( "0" / "1" / "2" / "3" / "4" / "5" ) /
183 "6" ( "6" / "7" / "8" / "9" ) *2DIGIT /
184 ( "7" / "8" / "9" ) *3DIGIT
186 EXTRA-PARAMS = *EXTRA-PARAM [EXTRA-PARAM-TRUNCATED]
188 EXTRA-PARAM-TRUNCATED = NON-NULL-BYTES
190 EXTRA-PARAM = EXTRA-PARAM-TRUNCATED %x00
192 NON-NULL-BYTES = *NON-NULL-BYTE
194 NON-NULL-BYTE = %x01-FF
198 #ifdef CONFIG_H
199 #include CONFIG_H
200 #endif
202 #include <stdio.h>
203 #include <string.h>
204 #include <stdlib.h>
205 #include <unistd.h>
206 #include <limits.h>
207 #include <time.h>
208 #include <signal.h>
209 #include <sys/types.h>
210 #include <sys/socket.h>
212 /* Note that mod_reqtimeout has a default configuration of 20 seconds
213 * maximum to wait for the first byte of the initial request line and
214 * then no more than 40 seconds total, but after the first byte is
215 * received the rest must arrive at 500 bytes/sec or faster. That
216 * means 10000 bytes minimum in 40 seconds. We do not allow the
217 * initial Git packet to be longer than PATH_MAX (which is typically
218 * either 1024 or 4096). And since 20 + 1024/500 = 22.048 and
219 * 20 + 4096/500 = 28.192 using 30 seconds for a total timeout is
220 * quite reasonable in comparison to mod_reqtimeout's default conf.
223 #define TIMEOUT_SECS 30 /* no more than 30 seconds for initial packet */
225 #define POLL_QUANTUM 100000U /* how often to poll in microseconds */
227 #if !defined(PATH_MAX) || PATH_MAX+0 < 4096
228 #define BUFF_MAX 4096
229 #else
230 #define BUFF_MAX PATH_MAX
231 #endif
233 /* avoid requiring C99 library */
234 static size_t xstrnlen(const char *s, size_t maxlen)
236 size_t l = 0;
237 if (!s) return l;
238 while (l < maxlen && *s) { ++s; ++l; }
239 return l;
242 #define LC(c) (((c)<'A'||(c)>'Z')?(c):((c)+('a'-'A')))
244 /* returns >0 if m1 and m2 are NOT equal comparing first len bytes
245 ** returns 0 if m1 and m2 ARE equal but ignoring case (POSIX locale)
246 ** essentially the same as the non-existent memcasecmp except that only
247 ** a 0 or >0 result is possible and a >0 result only means not-equal */
248 static size_t xmemcaseneql(const char *m1, const char *m2, size_t len)
250 for (; len; --len, ++m1, ++m2) {
251 char c1 = *m1;
252 char c2 = *m2;
253 c1 = LC(c1);
254 c2 = LC(c2);
255 if (c1 != c2) break;
257 return len;
260 static int xdig(char c)
262 if ('0' <= c && c <= '9')
263 return c - '0';
264 if ('a' <= c && c <= 'f')
265 return c - 'a' + 10;
266 if ('A' <= c && c <= 'F')
267 return c - 'A' + 10;
268 return -1;
271 static char buffer[BUFF_MAX];
272 static time_t expiry;
274 /* Ideally we could just use MSG_PEEK + MSG_WAITALL, and that works nicely
275 * on BSD-type distros. Unfortunately very bad things happen on Linux with
276 * that combination -- a CPU core runs at 100% until all the data arrives.
277 * So instead we omit the MSG_WAITALL and poll every POLL_QUANTUM interval
278 * to see if we've satisfied the requested amount yet.
280 static int recv_peekall(int fd, void *buff, size_t len)
282 int ans;
283 while ((ans = recv(fd, buff, len, MSG_PEEK)) > 0 && (size_t)ans < len) {
284 if (time(NULL) > expiry)
285 exit(2);
286 usleep(POLL_QUANTUM);
288 return ans < 0 ? -1 : (int)len;
291 static void handle_sigalrm(int s)
293 (void)s;
294 _exit(2);
297 static void clear_alarm(void)
299 alarm(0);
302 static int parse_host_and_port(char *ptr, size_t zlen, char **host,
303 size_t *hlen, const char **port, size_t *plen);
305 static size_t has_controls(const void *_ptr, size_t zlen);
307 int main(int argc, char *argv[])
309 int len;
310 int xvals[4];
311 char hexlen[4];
312 size_t pktlen, zlen, gitlen, hlen=0, plen=0;
313 char *ptr, *gitcmd, *host=NULL;
314 const char *pktend, *port=NULL;
315 int optval;
317 (void)argc;
318 (void)argv;
320 /* Ideally calling recv with MSG_PEEK would never, ever hang. However
321 * even with MSG_PEEK, recv still waits for at least the first message
322 * to arrive on the socket (unless it's non-blocking). For this reason
323 * we set an alarm timer at TIMEOUT_SECS + 2 to make sure we don't
324 * remain stuck in the recv call waiting for the first message.
326 signal(SIGALRM, handle_sigalrm);
327 alarm(TIMEOUT_SECS + 2); /* Some slop as this shouldn't be needed */
328 atexit(clear_alarm); /* Probably not necessary, but do it anyway */
330 expiry = time(NULL) + TIMEOUT_SECS;
332 optval = 1;
333 if (setsockopt(0, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval)))
334 return 1;
336 len = recv_peekall(0, hexlen, 4);
337 if (len != 4)
338 return 1;
340 if ((xvals[0]=xdig(hexlen[0])) < 0 ||
341 (xvals[1]=xdig(hexlen[1])) < 0 ||
342 (xvals[2]=xdig(hexlen[2])) < 0 ||
343 (xvals[3]=xdig(hexlen[3])) < 0)
344 return 1;
345 pktlen = ((unsigned)xvals[0] << 12) |
346 ((unsigned)xvals[1] << 8) |
347 ((unsigned)xvals[2] << 4) |
348 (unsigned)xvals[3];
349 if (pktlen < 22 || pktlen > sizeof(buffer))
350 return 1;
352 len = recv_peekall(0, buffer, pktlen);
353 if (len != (int)pktlen)
354 return 1;
356 /* skip over 4-byte <PKT-LENGTH> */
357 pktend = buffer + pktlen;
358 ptr = buffer + 4;
360 /* thanks to check above, pktend - ptr always >= 18 */
361 if (memcmp(ptr, "git-", 4)) /* quick sanity check */
362 return 1;
364 /* validate the entire packet format now */
366 /* find length of <GIT-COMMAND> */
367 gitlen = xstrnlen(ptr, pktend - ptr);
368 /* thanks to the quick sanity check, gitlen always >= 4 */
369 gitcmd = ptr;
370 /* skip over <GIT-COMMAND> */
371 ptr += gitlen + 1; /* not a problem if ptr > pktend */
372 if (gitcmd[gitlen-1] == '\n') {
373 /* strip trailing \n from <GIT-COMMAND> */
374 gitcmd[--gitlen] = '\0';
376 if (has_controls(gitcmd, gitlen))
377 return 1; /* bad bytes in command */
379 /* now comes the optional <HOST-ARG> */
380 if (ptr < pktend && (pktend - ptr) >= 5 &&
381 !xmemcaseneql(ptr, "host=", 5)) {
382 /* skip over <HOST-PARAM> part */
383 ptr += 5;
384 zlen = xstrnlen(ptr, pktend - ptr);
385 if (!parse_host_and_port(ptr, zlen, &host, &hlen, &port, &plen))
386 /* failed to parse rest of <HOST-ARG-TRUNCATED> */
387 return 1;
388 /* skip over rest of <HOST-ARG>, okay if ptr ends up > pktend */
389 ptr += zlen + 1;
392 if (ptr < pktend && *ptr)
393 return 1; /* invalid, missing required %x00 before <EXTRA-PARMS> */
394 ++ptr; /* skip over %x00 */
396 /* now skip over the rest of the extra args with minimal validation */
397 while (ptr < pktend) {
398 zlen = xstrnlen(ptr, pktend - ptr);
399 /* if (zlen) process_arg(ptr, zlen); */
400 ptr += zlen + 1; /* okay if ptr ends up > pktend */
403 if (ptr < pktend)
404 return 1; /* not a valid <GIT-REQUEST-PKT> */
406 printf("%.*s\n", (int)gitlen, gitcmd);
407 if (host != NULL)
408 printf("host=%.*s\n", (int)hlen, host);
409 if (port != NULL)
410 printf("port=%.*s\n", (int)plen, port);
412 return 0;
415 static size_t has_controls_or_spaces(const void *ptr, size_t zlen);
417 static int parse_host_and_port(char *ptr, size_t zlen, char **host,
418 size_t *hlen, const char **port, size_t *plen)
420 const char *colon = NULL;
421 if (!ptr) return 0; /* bogus ptr argument */
422 if (has_controls_or_spaces(ptr, zlen)) return 0; /* bogus host= value */
423 if (zlen >= 1 && *ptr == '[') {
424 /* IPv6 literal */
425 const char *ebrkt = (const char *)memchr(ptr, ']', zlen);
426 if (!ebrkt) return 0; /* missing closing ']' */
427 *host = ptr + 1;
428 *hlen = ebrkt - ptr - 1; /* yes, could be 0 */
429 if ((size_t)(++ebrkt - ptr) < zlen) {
430 if (*ebrkt != ':') return 0; /* missing ':' after ']' */
431 colon = ebrkt;
433 } else {
434 colon = (const char *)memchr(ptr, ':', zlen);
435 *host = ptr;
436 *hlen = colon ? ((size_t)(colon - ptr)) : zlen;
437 if (*hlen > 1 && ptr[*hlen - 1] == '.')
438 --*hlen;
440 if (colon) {
441 zlen = (ptr + zlen) - ++colon;
442 if (zlen > 5) return 0; /* invalid port number */
443 if (zlen == 0) {
444 /* empty port */
445 *port = colon;
446 *plen = 0;
447 } else {
448 unsigned pval;
449 const char *pptr;
450 size_t pl;
451 while (zlen > 1 && *colon == '0') {
452 ++colon;
453 --zlen;
455 pptr = colon;
456 pl = zlen;
457 pval = 0;
458 while (zlen) {
459 if (*colon < '0' || *colon > '9')
460 return 0; /* invalid port number */
461 pval *= 10;
462 pval += (*colon++) - '0';
463 --zlen;
465 if (!pval || pval > 65535)
466 return 0; /* invalid port number */
467 *port = pptr;
468 *plen = pl;
470 } else {
471 *port = NULL;
472 *plen = 0;
474 ptr = *host;
475 zlen = *hlen;
476 while (zlen) {
477 char c = *ptr;
478 c = LC(c);
479 *ptr++ = c;
480 --zlen;
482 return 1;
485 /* the tab character %x09 is not considered a control here */
486 static size_t has_controls(const void *_ptr, size_t zlen)
488 const unsigned char *ptr = (const unsigned char *)_ptr;
489 if (!ptr) return 0;
490 while (zlen && (*ptr >= ' ' || *ptr == '\t')) {
491 ++ptr;
492 --zlen;
494 return zlen;
497 static size_t has_controls_or_spaces(const void *_ptr, size_t zlen)
499 const unsigned char *ptr = (const unsigned char *)_ptr;
500 if (!ptr) return 0;
501 while (zlen && *ptr > ' ') {
502 ++ptr;
503 --zlen;
505 return zlen;