From 764d5c3e0109e7a4f90dca5351e2c03da15796b8 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 6 Oct 2020 00:33:49 -0700 Subject: [PATCH] peek_packet.c: perform more validation and extract host= param Perform a bit more validation on the format of the initial incoming git daemon request packet and reject poorly formatted packets or packets with unwanted control characters. Additionally, if the "host=" parameter is found (included since ancient Git versions), extract and parse it and dump out one or two extra shell-friendly lines to provide a lowercased version of the host name and a range-checked port number. Consumers of the `peek_packet` output will need to adapt to handle the case in which more than one line is output. This will normally always happen now unless a very ancient version of Git was used as the client. Signed-off-by: Kyle J. McKay --- src/peek_packet.c | 329 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 322 insertions(+), 7 deletions(-) diff --git a/src/peek_packet.c b/src/peek_packet.c index 7f3dd73..fbbb324 100644 --- a/src/peek_packet.c +++ b/src/peek_packet.c @@ -53,6 +53,146 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Note that the first character could be a ~ instead of a /, but it's probably best to reject those. + + The output is guaranteed to not contain any bytes with a value less than + %x20 except for possibly tab (%x09) characters. Any request that does + will produce no output and return a non-zero exit code. + + If the extra optional "host=" parameter is present, then an additional + second line is output in the format: + + host= + + where is the host name only from the extra "host=" parameter + that's been lowercased and had a trailing "." removed (but only if that + doesn't create the empty string) and had any surrounding '[' and ']' removed + from a literal IPv6 address. The is guaranteed to only contain + bytes in the range %x21-%xFF. + + If the extra optional "host=" parameter contains a ":" suffix + then an additional third line will be output of the format: + + port= + + If just the ":" was present will be the empty string otherwise + it's guaranteed to be a decimal number with no leading zeros in the range + 1..65535. + + For example, this git command: + + git ls-remote git://example.com/repo.git + + Will result in peek_packet producing these two lines (unless a very, very + old version of Git was used in which case only the first line): + + git-upload-pack /repo.git + host=example.com + + This git command: + + git ls-remote git://[::1]:8765/repo.git + + Will result in peek_packet producing these three lines (unless a very, very + old version of Git was used in which case only the first line): + + git-upload-pack /repo.git + host=::1 + port=8765 + + If the incoming packet looks invalid (or a timeout occurs) then no output + is produced, but a non-zero exit code is set. +*/ + +/* + +;; +;; The Git packet protocol is defined as follows in RFC 5234+7405 syntax +;; + +BYTE = %x00-FF + +DIGIT = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9" + +; Note that hexdigits are case insensitive +HEX-DIGIT = DIGIT / "a" / "b" / "c" / "d" / "e" / "f" + +PKT-LENGTH = 4HEX-DIGIT ; represents a hexadecimal big-endian non-negative + ; value. a length of "0002" or "0003" is invalid. + ; lengths "0000" and "0001" have special meaning. + +PKT-DATA = *BYTE ; first 4 s MUST NOT be %s"ERR " + +PKT = FLUSH-PKT / DELIM-PKT / ERR-PKT / PROTOCOL-PKT + +FLUSH-PKT = "0000" + +DELIM-PKT = "0001" + +PROTOCOL-PKT = PKT-LENGTH PKT-DATA ; PKT-DATA must contain exactly + ; PKT-LENGTH - 4 bytes + +ERR-PKT = PKT-LENGTH ERR-DATA; ERR-DATA must contain exactly PKT-LENGTH - 4 bytes + +ERR-DATA = %s"ERR " *BYTE ; Note that "ERR " *IS* case sensitive + +;; +;; The first packet sent by a client connecting to a "Git Transport" server +;; has the format +;; + +GIT-REQUEST-PKT = PKT-LENGTH GIT-REQUEST-DATA ; GIT-REQUEST-DATA must contain + ; exactly PKT-LENGTH - 4 bytes + +; Normally if %x0A is present it's the final byte (very old Git versions) +; Normally if %x00 is present then %x0A is not (modern Git versions) +; But the current Git versions do parse the %x0A.00 form correctly +GIT-REQUEST-DATA = GIT-COMMAND [EXTRA-ARGS] + +GIT-COMMAND = REQUEST-COMMAND %20 PATHNAME [%0A] + +; these are all case sensitive +REQUEST-COMMAND = %s"git-upload-pack" / + %s"git-receive-pack" / + %s"git-upload-archive" + +PATHNAME = NON-NULL-BYTES + +EXTRA-ARGS = %x00 HOST-ARG-TRUNCATED / + %x00 [HOST-ARG] [%x00 EXTRA-PARMS] + +HOST-ARG-TRUNCATED = HOST-PARAM HOST-NAME [ ":" [PORTNUM] ] + +HOST-ARG = HOST-ARG-TRUNCATED %x00 + +; "host=" is case insensitive +HOST-PARAM = "host=" + +HOST-NAME = NON-NULL-BYTES ; should be a valid DNS name + ; or IPv4 literal + ; or "[" IPv6 literal "]" + ; a ":" is only allowed between + ; the "[" and "]" of an IPv6 literal + +; PORTNUM matches 1..65535 with no leading zeros allowed +PORTNUM = ( "1" / "2" / "3" / "4" / "5" ) *4DIGIT / + "6" / + "6" ( "0" / "1" / "2" / "3" / "4" ) *3DIGIT / + "65" ( "0" / "1" / "2" / "3" / "4" ) *2DIGIT / + "655" ( "0" / "1" / "2" ) *1DIGIT / + "6553" ( "0" / "1" / "2" / "3" / "4" / "5" ) / + "6" ( "6" / "7" / "8" / "9" ) *2DIGIT / + ( "7" / "8" / "9" ) *3DIGIT + +EXTRA-PARAMS = *EXTRA-PARAM [EXTRA-PARAM-TRUNCATED] + +EXTRA-PARAM-TRUNCATED = NON-NULL-BYTES + +EXTRA-PARAM = EXTRA-PARAM-TRUNCATED %x00 + +NON-NULL-BYTES = *NON-NULL-BYTE + +NON-NULL-BYTE = %x01-FF + */ #include @@ -86,6 +226,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #define BUFF_MAX PATH_MAX #endif +/* avoid requiring C99 library */ +static size_t xstrnlen(const char *s, size_t maxlen) +{ + size_t l = 0; + if (!s) return l; + while (l < maxlen && *s) { ++s; ++l; } + return l; +} + +#define LC(c) (((c)<'A'||(c)>'Z')?(c):((c)+('a'-'A'))) + +/* returns >0 if m1 and m2 are NOT equal comparing first len bytes +** returns 0 if m1 and m2 ARE equal but ignoring case (POSIX locale) +** essentially the same as the non-existent memcasecmp except that only +** a 0 or >0 result is possible and a >0 result only means not-equal */ +static size_t xmemcaseneql(const char *m1, const char *m2, size_t len) +{ + for (; len; --len, ++m1, ++m2) { + char c1 = *m1; + char c2 = *m2; + c1 = LC(c1); + c2 = LC(c2); + if (c1 != c2) break; + } + return len; +} + static int xdig(char c) { if ('0' <= c && c <= '9') @@ -128,13 +295,19 @@ static void clear_alarm(void) alarm(0); } +static int parse_host_and_port(char *ptr, size_t zlen, char **host, + size_t *hlen, const char **port, size_t *plen); + +static size_t has_controls(const void *_ptr, size_t zlen); + int main(int argc, char *argv[]) { int len; int xvals[4]; char hexlen[4]; - size_t pktlen; - const char *nullptr; + size_t pktlen, zlen, gitlen, hlen=0, plen=0; + char *ptr, *gitcmd, *host=NULL; + const char *pktend, *port=NULL; int optval; (void)argc; @@ -176,12 +349,154 @@ int main(int argc, char *argv[]) if (len != (int)pktlen) return 1; - if (memcmp(buffer+4, "git-", 4)) /* sanity check */ - return 1; - nullptr = (const char *)memchr(buffer+4, 0, pktlen-4); - if (!nullptr || nullptr < (buffer+21)) + /* skip over 4-byte */ + pktend = buffer + pktlen; + ptr = buffer + 4; + + /* thanks to check above, pktend - ptr always >= 18 */ + if (memcmp(ptr, "git-", 4)) /* quick sanity check */ return 1; - puts(buffer + 4); + + /* validate the entire packet format now */ + + /* find length of */ + gitlen = xstrnlen(ptr, pktend - ptr); + /* thanks to the quick sanity check, gitlen always >= 4 */ + gitcmd = ptr; + /* skip over */ + ptr += gitlen + 1; /* not a problem if ptr > pktend */ + if (gitcmd[gitlen-1] == '\n') { + /* strip trailing \n from */ + gitcmd[--gitlen] = '\0'; + } + if (has_controls(gitcmd, gitlen)) + return 1; /* bad bytes in command */ + + /* now comes the optional */ + if (ptr < pktend && (pktend - ptr) >= 5 && + !xmemcaseneql(ptr, "host=", 5)) { + /* skip over part */ + ptr += 5; + zlen = xstrnlen(ptr, pktend - ptr); + if (!parse_host_and_port(ptr, zlen, &host, &hlen, &port, &plen)) + /* failed to parse rest of */ + return 1; + /* skip over rest of , okay if ptr ends up > pktend */ + ptr += zlen + 1; + } + + if (ptr < pktend && *ptr) + return 1; /* invalid, missing required %x00 before */ + ++ptr; /* skip over %x00 */ + + /* now skip over the rest of the extra args with minimal validation */ + while (ptr < pktend) { + zlen = xstrnlen(ptr, pktend - ptr); + /* if (zlen) process_arg(ptr, zlen); */ + ptr += zlen + 1; /* okay if ptr ends up > pktend */ + } + + if (ptr < pktend) + return 1; /* not a valid */ + + printf("%.*s\n", (int)gitlen, gitcmd); + if (host != NULL) + printf("host=%.*s\n", (int)hlen, host); + if (port != NULL) + printf("port=%.*s\n", (int)plen, port); return 0; } + +static size_t has_controls_or_spaces(const void *ptr, size_t zlen); + +static int parse_host_and_port(char *ptr, size_t zlen, char **host, + size_t *hlen, const char **port, size_t *plen) +{ + const char *colon = NULL; + if (!ptr) return 0; /* bogus ptr argument */ + if (has_controls_or_spaces(ptr, zlen)) return 0; /* bogus host= value */ + if (zlen >= 1 && *ptr == '[') { + /* IPv6 literal */ + const char *ebrkt = (const char *)memchr(ptr, ']', zlen); + if (!ebrkt) return 0; /* missing closing ']' */ + *host = ptr + 1; + *hlen = ebrkt - ptr - 1; /* yes, could be 0 */ + if ((size_t)(++ebrkt - ptr) < zlen) { + if (*ebrkt != ':') return 0; /* missing ':' after ']' */ + colon = ebrkt; + } + } else { + colon = (const char *)memchr(ptr, ':', zlen); + *host = ptr; + *hlen = colon ? ((size_t)(colon - ptr)) : zlen; + if (*hlen > 1 && ptr[*hlen - 1] == '.') + --*hlen; + } + if (colon) { + zlen = (ptr + zlen) - ++colon; + if (zlen > 5) return 0; /* invalid port number */ + if (zlen == 0) { + /* empty port */ + *port = colon; + *plen = 0; + } else { + unsigned pval; + const char *pptr; + size_t pl; + while (zlen > 1 && *colon == '0') { + ++colon; + --zlen; + } + pptr = colon; + pl = zlen; + pval = 0; + while (zlen) { + if (*colon < '0' || *colon > '9') + return 0; /* invalid port number */ + pval *= 10; + pval += (*colon++) - '0'; + --zlen; + } + if (!pval || pval > 65535) + return 0; /* invalid port number */ + *port = pptr; + *plen = pl; + } + } else { + *port = NULL; + *plen = 0; + } + ptr = *host; + zlen = *hlen; + while (zlen) { + char c = *ptr; + c = LC(c); + *ptr++ = c; + --zlen; + } + return 1; +} + +/* the tab character %x09 is not considered a control here */ +static size_t has_controls(const void *_ptr, size_t zlen) +{ + const unsigned char *ptr = (const unsigned char *)_ptr; + if (!ptr) return 0; + while (zlen && (*ptr >= ' ' || *ptr == '\t')) { + ++ptr; + --zlen; + } + return zlen; +} + +static size_t has_controls_or_spaces(const void *_ptr, size_t zlen) +{ + const unsigned char *ptr = (const unsigned char *)_ptr; + if (!ptr) return 0; + while (zlen && *ptr > ' ') { + ++ptr; + --zlen; + } + return zlen; +} -- 2.11.4.GIT