3 peek_packet.c -- peek_packet utility to peek at incoming git-daemon request
4 Copyright (C) 2015,2020 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 This utility is intended to be used by a script front end to git daemon
24 running in inetd mode. The first thing the script does is call this utility
25 which attempts to peek the first incoming Git packet off the connection
26 and then output contents after the initial 4-character hex length upto but
27 excluding the first \0 character.
29 At that point the script can validate the incoming request and if it chooses
30 to allow it then exec git daemon to process it. Since the packet was peeked
31 it's still there to be read by git daemon.
33 Note that there is a hard-coded timeout of 30 seconds and a hard-coded limit
34 of PATH_MAX for the length of the initial packet.
36 On failure a non-zero exit code is returned. On success a 0 exit code is
37 returned and peeked text is output to stdout.
39 The connection to be peeked must be fd 0 and will have SO_KEEPALIVE set on it.
41 This utility does not take any arguments and ignores any that are given.
43 The output of a successful peek should be one of these:
45 git-upload-pack /<...>
46 git-upload-archive /<...>
47 git-receive-pack /<...>
49 where "<...>" is replaced with the repository path so, for example, doing
50 "git ls-remote git://example.com/foo.git" would result in this output:
52 git-upload-pack /foo.git
54 Note that the first character could be a ~ instead of a /, but it's
55 probably best to reject those.
57 The output is guaranteed to not contain any bytes with a value less than
58 %x20 except for possibly tab (%x09) characters. Any request that does
59 will produce no output and return a non-zero exit code.
61 If the extra optional "host=" parameter is present, then an additional
62 second line is output in the format:
66 where <hostname> is the host name only from the extra "host=" parameter
67 that's been lowercased and had a trailing "." removed (but only if that
68 doesn't create the empty string) and had any surrounding '[' and ']' removed
69 from a literal IPv6 address. The <hostname> is guaranteed to only contain
70 bytes in the range %x21-%xFF.
72 If the extra optional "host=" parameter contains a ":" <portnum> suffix
73 then an additional third line will be output of the format:
77 If just the ":" was present <portnum> will be the empty string otherwise
78 it's guaranteed to be a decimal number with no leading zeros in the range
81 For example, this git command:
83 git ls-remote git://example.com/repo.git
85 Will result in peek_packet producing these two lines (unless a very, very
86 old version of Git was used in which case only the first line):
88 git-upload-pack /repo.git
93 git ls-remote git://[::1]:8765/repo.git
95 Will result in peek_packet producing these three lines (unless a very, very
96 old version of Git was used in which case only the first line):
98 git-upload-pack /repo.git
102 If the incoming packet looks invalid (or a timeout occurs) then no output
103 is produced, but a non-zero exit code is set.
109 ;; The Git packet protocol is defined as follows in RFC 5234+7405 syntax
114 DIGIT = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
116 ; Note that hexdigits are case insensitive
117 HEX-DIGIT = DIGIT / "a" / "b" / "c" / "d" / "e" / "f"
119 PKT-LENGTH = 4HEX-DIGIT ; represents a hexadecimal big-endian non-negative
120 ; value. a length of "0002" or "0003" is invalid.
121 ; lengths "0000" and "0001" have special meaning.
123 PKT-DATA = *BYTE ; first 4 <BYTE>s MUST NOT be %s"ERR "
125 PKT = FLUSH-PKT / DELIM-PKT / ERR-PKT / PROTOCOL-PKT
131 PROTOCOL-PKT = PKT-LENGTH PKT-DATA ; PKT-DATA must contain exactly
132 ; PKT-LENGTH - 4 bytes
134 ERR-PKT = PKT-LENGTH ERR-DATA; ERR-DATA must contain exactly PKT-LENGTH - 4 bytes
136 ERR-DATA = %s"ERR " *BYTE ; Note that "ERR " *IS* case sensitive
139 ;; The first packet sent by a client connecting to a "Git Transport" server
140 ;; has the <GIT-REQUEST-PKT> format
143 GIT-REQUEST-PKT = PKT-LENGTH GIT-REQUEST-DATA ; GIT-REQUEST-DATA must contain
144 ; exactly PKT-LENGTH - 4 bytes
146 ; Normally if %x0A is present it's the final byte (very old Git versions)
147 ; Normally if %x00 is present then %x0A is not (modern Git versions)
148 ; But the current Git versions do parse the %x0A.00 form correctly
149 GIT-REQUEST-DATA = GIT-COMMAND [EXTRA-ARGS]
151 GIT-COMMAND = REQUEST-COMMAND %20 PATHNAME [%0A]
153 ; these are all case sensitive
154 REQUEST-COMMAND = %s"git-upload-pack" /
155 %s"git-receive-pack" /
156 %s"git-upload-archive"
158 PATHNAME = NON-NULL-BYTES
160 EXTRA-ARGS = %x00 HOST-ARG-TRUNCATED /
161 %x00 [HOST-ARG] [%x00 EXTRA-PARMS]
163 HOST-ARG-TRUNCATED = HOST-PARAM HOST-NAME [ ":" [PORTNUM] ]
165 HOST-ARG = HOST-ARG-TRUNCATED %x00
167 ; "host=" is case insensitive
170 HOST-NAME = NON-NULL-BYTES ; should be a valid DNS name
172 ; or "[" IPv6 literal "]"
173 ; a ":" is only allowed between
174 ; the "[" and "]" of an IPv6 literal
176 ; PORTNUM matches 1..65535 with no leading zeros allowed
177 PORTNUM = ( "1" / "2" / "3" / "4" / "5" ) *4DIGIT /
179 "6" ( "0" / "1" / "2" / "3" / "4" ) *3DIGIT /
180 "65" ( "0" / "1" / "2" / "3" / "4" ) *2DIGIT /
181 "655" ( "0" / "1" / "2" ) *1DIGIT /
182 "6553" ( "0" / "1" / "2" / "3" / "4" / "5" ) /
183 "6" ( "6" / "7" / "8" / "9" ) *2DIGIT /
184 ( "7" / "8" / "9" ) *3DIGIT
186 EXTRA-PARAMS = *EXTRA-PARAM [EXTRA-PARAM-TRUNCATED]
188 EXTRA-PARAM-TRUNCATED = NON-NULL-BYTES
190 EXTRA-PARAM = EXTRA-PARAM-TRUNCATED %x00
192 NON-NULL-BYTES = *NON-NULL-BYTE
194 NON-NULL-BYTE = %x01-FF
205 #include <sys/types.h>
206 #include <sys/socket.h>
208 /* Note that mod_reqtimeout has a default configuration of 20 seconds
209 * maximum to wait for the first byte of the initial request line and
210 * then no more than 40 seconds total, but after the first byte is
211 * received the rest must arrive at 500 bytes/sec or faster. That
212 * means 10000 bytes minimum in 40 seconds. We do not allow the
213 * initial Git packet to be longer than PATH_MAX (which is typically
214 * either 1024 or 4096). And since 20 + 1024/500 = 22.048 and
215 * 20 + 4096/500 = 28.192 using 30 seconds for a total timeout is
216 * quite reasonable in comparison to mod_reqtimeout's default conf.
219 #define TIMEOUT_SECS 30 /* no more than 30 seconds for initial packet */
221 #define POLL_QUANTUM 100000U /* how often to poll in microseconds */
223 #if !defined(PATH_MAX) || PATH_MAX+0 < 4096
224 #define BUFF_MAX 4096
226 #define BUFF_MAX PATH_MAX
229 /* avoid requiring C99 library */
230 static size_t xstrnlen(const char *s
, size_t maxlen
)
234 while (l
< maxlen
&& *s
) { ++s
; ++l
; }
238 #define LC(c) (((c)<'A'||(c)>'Z')?(c):((c)+('a'-'A')))
240 /* returns >0 if m1 and m2 are NOT equal comparing first len bytes
241 ** returns 0 if m1 and m2 ARE equal but ignoring case (POSIX locale)
242 ** essentially the same as the non-existent memcasecmp except that only
243 ** a 0 or >0 result is possible and a >0 result only means not-equal */
244 static size_t xmemcaseneql(const char *m1
, const char *m2
, size_t len
)
246 for (; len
; --len
, ++m1
, ++m2
) {
256 static int xdig(char c
)
258 if ('0' <= c
&& c
<= '9')
260 if ('a' <= c
&& c
<= 'f')
262 if ('A' <= c
&& c
<= 'F')
267 static char buffer
[BUFF_MAX
];
268 static time_t expiry
;
270 /* Ideally we could just use MSG_PEEK + MSG_WAITALL, and that works nicely
271 * on BSD-type distros. Unfortunately very bad things happen on Linux with
272 * that combination -- a CPU core runs at 100% until all the data arrives.
273 * So instead we omit the MSG_WAITALL and poll every POLL_QUANTUM interval
274 * to see if we've satisfied the requested amount yet.
276 static int recv_peekall(int fd
, void *buff
, size_t len
)
279 while ((ans
= recv(fd
, buff
, len
, MSG_PEEK
)) > 0 && (size_t)ans
< len
) {
280 if (time(NULL
) > expiry
)
282 usleep(POLL_QUANTUM
);
284 return ans
< 0 ? -1 : (int)len
;
287 static void handle_sigalrm(int s
)
293 static void clear_alarm(void)
298 static int parse_host_and_port(char *ptr
, size_t zlen
, char **host
,
299 size_t *hlen
, const char **port
, size_t *plen
);
301 static size_t has_controls(const void *_ptr
, size_t zlen
);
303 int main(int argc
, char *argv
[])
308 size_t pktlen
, zlen
, gitlen
, hlen
=0, plen
=0;
309 char *ptr
, *gitcmd
, *host
=NULL
;
310 const char *pktend
, *port
=NULL
;
316 /* Ideally calling recv with MSG_PEEK would never, ever hang. However
317 * even with MSG_PEEK, recv still waits for at least the first message
318 * to arrive on the socket (unless it's non-blocking). For this reason
319 * we set an alarm timer at TIMEOUT_SECS + 2 to make sure we don't
320 * remain stuck in the recv call waiting for the first message.
322 signal(SIGALRM
, handle_sigalrm
);
323 alarm(TIMEOUT_SECS
+ 2); /* Some slop as this shouldn't be needed */
324 atexit(clear_alarm
); /* Probably not necessary, but do it anyway */
326 expiry
= time(NULL
) + TIMEOUT_SECS
;
329 if (setsockopt(0, SOL_SOCKET
, SO_KEEPALIVE
, &optval
, sizeof(optval
)))
332 len
= recv_peekall(0, hexlen
, 4);
336 if ((xvals
[0]=xdig(hexlen
[0])) < 0 ||
337 (xvals
[1]=xdig(hexlen
[1])) < 0 ||
338 (xvals
[2]=xdig(hexlen
[2])) < 0 ||
339 (xvals
[3]=xdig(hexlen
[3])) < 0)
341 pktlen
= ((unsigned)xvals
[0] << 12) |
342 ((unsigned)xvals
[1] << 8) |
343 ((unsigned)xvals
[2] << 4) |
345 if (pktlen
< 22 || pktlen
> sizeof(buffer
))
348 len
= recv_peekall(0, buffer
, pktlen
);
349 if (len
!= (int)pktlen
)
352 /* skip over 4-byte <PKT-LENGTH> */
353 pktend
= buffer
+ pktlen
;
356 /* thanks to check above, pktend - ptr always >= 18 */
357 if (memcmp(ptr
, "git-", 4)) /* quick sanity check */
360 /* validate the entire packet format now */
362 /* find length of <GIT-COMMAND> */
363 gitlen
= xstrnlen(ptr
, pktend
- ptr
);
364 /* thanks to the quick sanity check, gitlen always >= 4 */
366 /* skip over <GIT-COMMAND> */
367 ptr
+= gitlen
+ 1; /* not a problem if ptr > pktend */
368 if (gitcmd
[gitlen
-1] == '\n') {
369 /* strip trailing \n from <GIT-COMMAND> */
370 gitcmd
[--gitlen
] = '\0';
372 if (has_controls(gitcmd
, gitlen
))
373 return 1; /* bad bytes in command */
375 /* now comes the optional <HOST-ARG> */
376 if (ptr
< pktend
&& (pktend
- ptr
) >= 5 &&
377 !xmemcaseneql(ptr
, "host=", 5)) {
378 /* skip over <HOST-PARAM> part */
380 zlen
= xstrnlen(ptr
, pktend
- ptr
);
381 if (!parse_host_and_port(ptr
, zlen
, &host
, &hlen
, &port
, &plen
))
382 /* failed to parse rest of <HOST-ARG-TRUNCATED> */
384 /* skip over rest of <HOST-ARG>, okay if ptr ends up > pktend */
388 if (ptr
< pktend
&& *ptr
)
389 return 1; /* invalid, missing required %x00 before <EXTRA-PARMS> */
390 ++ptr
; /* skip over %x00 */
392 /* now skip over the rest of the extra args with minimal validation */
393 while (ptr
< pktend
) {
394 zlen
= xstrnlen(ptr
, pktend
- ptr
);
395 /* if (zlen) process_arg(ptr, zlen); */
396 ptr
+= zlen
+ 1; /* okay if ptr ends up > pktend */
400 return 1; /* not a valid <GIT-REQUEST-PKT> */
402 printf("%.*s\n", (int)gitlen
, gitcmd
);
404 printf("host=%.*s\n", (int)hlen
, host
);
406 printf("port=%.*s\n", (int)plen
, port
);
411 static size_t has_controls_or_spaces(const void *ptr
, size_t zlen
);
413 static int parse_host_and_port(char *ptr
, size_t zlen
, char **host
,
414 size_t *hlen
, const char **port
, size_t *plen
)
416 const char *colon
= NULL
;
417 if (!ptr
) return 0; /* bogus ptr argument */
418 if (has_controls_or_spaces(ptr
, zlen
)) return 0; /* bogus host= value */
419 if (zlen
>= 1 && *ptr
== '[') {
421 const char *ebrkt
= (const char *)memchr(ptr
, ']', zlen
);
422 if (!ebrkt
) return 0; /* missing closing ']' */
424 *hlen
= ebrkt
- ptr
- 1; /* yes, could be 0 */
425 if ((size_t)(++ebrkt
- ptr
) < zlen
) {
426 if (*ebrkt
!= ':') return 0; /* missing ':' after ']' */
430 colon
= (const char *)memchr(ptr
, ':', zlen
);
432 *hlen
= colon
? ((size_t)(colon
- ptr
)) : zlen
;
433 if (*hlen
> 1 && ptr
[*hlen
- 1] == '.')
437 zlen
= (ptr
+ zlen
) - ++colon
;
438 if (zlen
> 5) return 0; /* invalid port number */
447 while (zlen
> 1 && *colon
== '0') {
455 if (*colon
< '0' || *colon
> '9')
456 return 0; /* invalid port number */
458 pval
+= (*colon
++) - '0';
461 if (!pval
|| pval
> 65535)
462 return 0; /* invalid port number */
481 /* the tab character %x09 is not considered a control here */
482 static size_t has_controls(const void *_ptr
, size_t zlen
)
484 const unsigned char *ptr
= (const unsigned char *)_ptr
;
486 while (zlen
&& (*ptr
>= ' ' || *ptr
== '\t')) {
493 static size_t has_controls_or_spaces(const void *_ptr
, size_t zlen
)
495 const unsigned char *ptr
= (const unsigned char *)_ptr
;
497 while (zlen
&& *ptr
> ' ') {