COMMAND

    kernel (SYN)

SYSTEMS AFFECTED

    FreeBSD

PROBLEM

    Richard Steenbergen posted  following.  Here's  a quickie for  the
    people  who  have  been  plagued  with  high  bandwidth  syn flood
    attacks, a kernel patch  for FreeBSD 3.1-STABLE which  rate limits
    SYN processing.   Its messy  but functional  (to make  it better -
    that's the fbsd developers job).  So,

	cd /usr/src/sys
	patch < synlim
	add "options SYN_RATELIM" (highly recommend ICMP_BANDLIM as well) to your kernel
	recompile
	sysctl net.inet.tcp.synlim (default to 100)

    This  is  the  maximium  number  of  SYNs  per second that will be
    processed, the rest  will be silently  discarded.  On  test system
    (P2 450 running  3.1-stable being hit  w/15,000 packets per  sec),
    this  has  successfully  brought  CPU  usage  from  100%  to  ~20%
    (against an open port which is replying with unacknowledged ACKs).
    Which brings  us to  the more  sticky topic  of kernel panics when
    under SYN flood.   This one seems  to be the  biggie for  crashing
    when under syn flood is as follows (heh just turned off the  patch
    and panic'd within 8 seconds while writing this):

	panic: free: multiple frees
	(kgdb) bt
	#0  boot (howto=256) at ../../kern/kern_shutdown.c:285
	#1  0xc0138c09 in panic (fmt=0xc02192b7 "free: multiple frees")
	    at ../../kern/kern_shutdown.c:446
	#2  0xc0135aaf in free (addr=0xc0cdd600, type=0xc0239330)
	    at ../../kern/kern_malloc.c:333
	#3  0xc01768f4 in ifafree (ifa=0xc0cdd600) at ../../net/route.c:262
	#4  0xc0176876 in rtfree (rt=0xc34ce700) at ../../net/route.c:236
	#5  0xc0176c84 in rtrequest (req=2, dst=0xc34cbac0, gateway=0xc34cbad0,
	    netmask=0x0, flags=393223, ret_nrt=0x0) at ../../net/route.c:536
	#6  0xc017b34d in in_rtqkill (rn=0xc34ce700, rock=0xc0231610)
	    at ../../netinet/in_rmx.c:242
	#7  0xc0176064 in rn_walktree (h=0xc0cd9e00, f=0xc017b2fc <in_rtqkill>,
	    w=0xc0231610) at ../../net/radix.c:956
	#8  0xc017b3ec in in_rtqtimo (rock=0xc0cd9e00) at ../../netinet/in_rmx.c:283
	#9  0xc013d19b in softclock () at ../../kern/kern_timeout.c:124

    Which after a quick examination  seems to be a perioditic  routing
    table cleanup.  It seems that in_rtqtimo is scheduled to run every
    net.inet.ip.rtexpire seconds (which is dynamicly adjusted and  can
    never go lower then net.inet.ip.rtminexpire).  When the system  is
    under heavy load from processing lots of small packets (they don't
    even have to be  SYNs, anything which can  get routed will do  the
    trick, though the packet kiddies  would get very little gain  from
    just sending an ip header since its going to be padded to 64 bytes
    for the eth frame anyhow), this route cleanup code will go wacking
    at routes it shouldn't and free some memory twice.

SOLUTION

    Patch follows:

    *** conf/options.old    Sat May 15 23:08:03 1999
    --- conf/options        Sat May 15 23:40:21 1999
    ***************
    *** 68,73 ****
    --- 68,74 ----
      SYSVSHM               opt_sysvipc.h
      UCONSOLE
      ICMP_BANDLIM
    + SYN_RATELIM

      # POSIX kernel options
      P1003_1B      opt_posix.h
    *** netinet/tcp_var.h.old       Sat May 15 23:25:39 1999
    --- netinet/tcp_var.h   Sat May 15 23:45:05 1999
    ***************
    *** 40,45 ****
    --- 40,49 ----
       * Kernel variables for tcp.
       */

    + #ifdef KERNEL
    + #include "opt_syn_ratelim.h"
    + #endif
    +
      /*
       * Tcp control block, one per tcp; fields:
       * Organized for 16 byte cacheline efficiency.
    ***************
    *** 305,311 ****
      #define       TCPCTL_RECVSPACE        9       /* receive buffer space */
      #define       TCPCTL_KEEPINIT         10      /* receive buffer space */
      #define       TCPCTL_PCBLIST          11      /* list of all outstanding PCBs */
    ! #define TCPCTL_MAXID          12

      #define TCPCTL_NAMES { \
	    { 0, 0 }, \
    --- 309,316 ----
      #define       TCPCTL_RECVSPACE        9       /* receive buffer space */
      #define       TCPCTL_KEEPINIT         10      /* receive buffer space */
      #define       TCPCTL_PCBLIST          11      /* list of all outstanding PCBs */
    ! #define TCPCTL_SYNLIM         12      /* Rate limiting of SYNs */
    ! #define TCPCTL_MAXID          13

      #define TCPCTL_NAMES { \
	    { 0, 0 }, \
    ***************
    *** 320,325 ****
    --- 325,331 ----
	    { "recvspace", CTLTYPE_INT }, \
	    { "keepinit", CTLTYPE_INT }, \
	    { "pcblist", CTLTYPE_STRUCT }, \
    +       { "synlim", CTLTYPE_INT }, \
      }

      #ifdef KERNEL
    *** netinet/tcp_input.c.old     Sat May 15 23:08:10 1999
    --- netinet/tcp_input.c         Sun May 16 01:33:51 1999
    ***************
    *** 72,77 ****
    --- 72,85 ----
      static struct tcpiphdr tcp_saveti;
      #endif

    + #ifdef SYN_RATELIM
    + static int      synlim = 100;
    + SYSCTL_INT(_net_inet_tcp, TCPCTL_SYNLIM, synlim, CTLFLAG_RW, &synlim, 0, "");
    + #else
    + static int      synlim = -1;
    + SYSCTL_INT(_net_inet_tcp, TCPCTL_SYNLIM, synlim, CTLFLAG_RD, &synlim, 0, "");
    + #endif
    +
      static int    tcprexmtthresh = 3;
      tcp_seq       tcp_iss;
      tcp_cc        tcp_ccgen;
    ***************
    *** 98,104 ****
		struct tcpiphdr *, struct mbuf *));
      static int     tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
      static void    tcp_xmit_timer __P((struct tcpcb *, int));
    !

      /*
       * Insert segment ti into reassembly queue of tcp with
    --- 106,112 ----
		struct tcpiphdr *, struct mbuf *));
      static int     tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
      static void    tcp_xmit_timer __P((struct tcpcb *, int));
    ! static int     syn_ratelim(void);
  
      /*
       * Insert segment ti into reassembly queue of tcp with
    ***************
    *** 130,135 ****
    --- 138,183 ----
	    } \
      }

    + #ifdef SYN_RATELIM
    + int syn_ratelim(void)
    + {
    +       static int lticks;
    +       static int lpackets;
    +       int dticks;
    +
    +       /*
    +        * Return ok status if feature disabled or argument out of
    +        * ranage.
    +        */
    +
    +       if (synlim <= 0)
    +               return(0);
    +
    +       dticks = ticks - lticks;
    +
    +       /*
    +        * reset stats when cumulative dt exceeds one second.
    +        */
    +
    +       if ((unsigned int)dticks > hz) {
    +               if (lpackets > synlim)
    +                       printf("syn rate limit reached %d/%d pps\n", lpackets, synlim);
    +               lticks = ticks;
    +               lpackets = 0;
    +       }
    +
    +       /*
    +        * bump packet count
    +        */
    +
    +       if (++lpackets > synlim) {
    +               return(-1);
    +       }
    +
    +       return(0);
    + }
    + #endif
    +
      static int
      tcp_reass(tp, ti, m)
	    register struct tcpcb *tp;
    ***************
    *** 379,384 ****
    --- 427,438 ----
		    ip_fw_fwd_addr = NULL;
	    } else
      #endif        /* IPFIREWALL_FORWARD */
    +
    + #ifdef SYN_RATELIM
    +       if ((tiflags & TH_SYN) && !(tiflags & TH_ACK))
    +               if (syn_ratelim() < 0)
    +                       goto drop;
    + #endif

	    inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
		ti->ti_dst, ti->ti_dport, 1);