? sys/net/old ? sys/net/old2 ? sys/netinet/tcpsign Index: sys/net/route.c =================================================================== RCS file: /cvsroot/src/sys/net/route.c,v retrieving revision 1.98 diff -u -p -r1.98 route.c --- sys/net/route.c 10 Oct 2007 22:14:38 -0000 1.98 +++ sys/net/route.c 12 Nov 2007 17:14:39 -0000 @@ -147,6 +147,7 @@ struct callout rt_timer_ch; /* callout f static int _rtcache_debug = 0; #endif /* RTFLUSH_DEBUG */ +struct rtentry *rtgethead(const struct sockaddr *, const struct sockaddr *); static int rtdeletemsg(struct rtentry *); static int rtflushclone1(struct rtentry *, void *); static void rtflushclone(sa_family_t family, struct rtentry *); @@ -304,6 +305,34 @@ rtalloc(struct route *ro) rtcache(ro); } +/* + * Returns rtentry in a RR fashion + * rt should be the first path + */ +struct rtentry * +rtchoosepath_rr(struct rtentry *rt) +{ + rt->rt_last = rtnext(rt->rt_last); + return rt->rt_last; +} + +/* + * Next rtentry that it's UP (in case there is such thing) + * If none is found return the feeded rtentry + */ +struct rtentry * +rtnext(struct rtentry *rt) +{ + struct rtentry *retrt, *sentinel; + + KASSERT(rt != NULL); + CLIST_FOREACH(retrt, CLIST_NEXT(rt, rt_list), sentinel, rt_list) + if (retrt->rt_flags & RTF_UP) + return retrt; + + return rt; +} + struct rtentry * rtalloc1(const struct sockaddr *dst, int report) { @@ -355,28 +384,81 @@ rtalloc1(const struct sockaddr *dst, int return newrt; } +/* + * returns head of the list + * just a rnh_lookup wrapper + */ +struct rtentry * +rtgethead(const struct sockaddr *dst, const struct sockaddr *netmask) +{ + struct radix_node_head *rnh = rt_tables[dst->sa_family]; + struct rtentry *rt = NULL; + struct radix_node *rn; + int s = splsoftnet(); + + if (rnh && (rn = rnh->rnh_lookup(dst, netmask, rnh)) && + ((rn->rn_flags & RNF_ROOT) == 0)) + rt = (struct rtentry *)rn; + else + rtstat.rts_unreach++; + + splx(s); + return rt; +} + void rtfree(struct rtentry *rt) { - struct ifaddr *ifa; + struct rtentry *rthead; if (rt == NULL) panic("rtfree"); rt->rt_refcnt--; if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) { - if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtfree 2"); rttrash--; if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); return; } + rthead = RTFIRST(rt); + rthead->rt_total--; + if (rthead->rt_total == 0 && + (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))) + panic("rtfree 2"); rt_timer_remove_all(rt, 0); - ifa = rt->rt_ifa; - rt->rt_ifa = NULL; - IFAFREE(ifa); - rt->rt_ifp = NULL; - rt_destroy(rt); + IFAFREE(rt->rt_ifa); + if (rthead->rt_total == 0) { + /* No other paths */ + rt_destroy(rt); + } else if (rthead == rt) { + /* First GW to delete from more */ + struct radix_node_head *rnh; + struct rtentry *srt = CLIST_NEXT(rthead, rt_list), + *sen, *rtin; + KASSERT(rt != srt); + srt->rt_total = rt->rt_total; + srt->rt_last = srt; + CLIST_REMOVE(rt, rt_list); + if ((rnh = rt_tables[rt_getkey(rt)->sa_family]) == NULL) + panic("rtfree: rt_tables"); + if (rnh->rnh_deladdr(rt_getkey(rt), rt_mask(rt), rnh) == NULL) + panic("rtfree: deladdr"); + if (rnh->rnh_addaddr(rt_getkey(srt), rt_mask(srt), rnh, + srt->rt_nodes) == NULL) + panic("rtfree: addaddr"); + CLIST_FOREACH(rtin, srt, sen, rt_list) + RTFIRST(rtin) = srt; + } else { + /* Delete a non-first path */ + CLIST_REMOVE(rt, rt_list); + if (rthead->rt_last == rt) + rthead->rt_last = rthead; + } + + if (rt->rt_gateway != NULL) + sockaddr_free(rt->rt_gateway); + /* do I really need this ? I also Bzero at pool_get */ + Bzero(rt, sizeof(*rt)); pool_put(&rtentry_pool, rt); } } @@ -427,20 +509,33 @@ rtredirect(const struct sockaddr *dst, c error = ENETUNREACH; goto out; } - rt = rtalloc1(dst, 0); /* - * If the redirect isn't from our current router for this dst, - * it's either old or wrong. If it redirects us to ourselves, - * we have a routing loop, perhaps as a result of an interface - * going down recently. + * If it redirects us to ourselves we have a routing loop, + * perhaps as a result of an interface going down recently. */ - if (!(flags & RTF_DONE) && rt && - (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) - error = EINVAL; - else if (ifa_ifwithaddr(gateway)) + if (ifa_ifwithaddr(gateway)) { error = EHOSTUNREACH; - if (error) - goto done; + goto out; + } + rt = rtalloc1(dst, 0); + if (rt && !(flags & RTF_DONE)) { + /* + * If the redirect isn't from our current router for this dst, + * it's either old or wrong. Also calibrate rt. + */ + struct rtentry *sentinel, *nrt; + CLIST_FOREACH(nrt, rt, sentinel, rt_list) + if(equal(src, nrt->rt_gateway) && (nrt->rt_ifa == ifa)) + break; + if(nrt == NULL) { + error = EINVAL; + goto done; + } + rt->rt_refcnt--; + nrt->rt_refcnt++; + rt = nrt; + } + /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts @@ -485,6 +580,7 @@ rtredirect(const struct sockaddr *dst, c } } else error = EHOSTUNREACH; + done: if (rt) { if (rtp != NULL && !error) @@ -674,7 +770,7 @@ rtrequest1(int req, struct rt_addrinfo * { int s = splsoftnet(); int error = 0; - struct rtentry *rt, *crt; + struct rtentry *rt, *crt = NULL, *sentinel, *nrt; struct radix_node *rn; struct radix_node_head *rnh; struct ifaddr *ifa; @@ -698,16 +794,45 @@ rtrequest1(int req, struct rt_addrinfo * } if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL) senderr(ESRCH); - rt = (struct rtentry *)rn; + crt = rt = (struct rtentry *)rn; + /* Calibrate */ + if (gateway != NULL && !(crt->rt_flags & RTF_CLONING)) { + /* + * XXX: we can have a gateway on cloning route + */ + CLIST_FOREACH(rt, crt, sentinel, rt_list) + if (sockaddr_cmp(gateway, rt->rt_gateway) == 0) + break; + if (rt == NULL) + senderr(ESRCH); + } else + if (! CLIST_SINGULAR(crt, rt_list)) { + /* + * If gateway is not provided when + * multiple paths exist check if it's a cloning + * route and try to match ifp + */ + if ( (crt->rt_flags & RTF_CLONING) == 0 || + !(info->rti_ifa)) + senderr(EINVAL); + CLIST_FOREACH(rt, crt, sentinel, rt_list) + if (rt->rt_ifp == info->rti_ifa->ifa_ifp) + break; + if (rt == NULL) + senderr(EINVAL); + } + if (CLIST_SINGULAR(rt, rt_list)) { + if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) + senderr(ESRCH); + if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + panic("rtrequest delete"); + } if ((rt->rt_flags & RTF_CLONING) != 0) { /* clean up any cloned children */ rtflushclone(dst->sa_family, rt); } - if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) - senderr(ESRCH); - if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtrequest delete"); - rt = (struct rtentry *)rn; + if (rt->rt_nodes->rn_flags & RNF_ROOT) + panic("rtrequest delete 2"); if (rt->rt_gwroute) { RTFREE(rt->rt_gwroute); rt->rt_gwroute = NULL; @@ -781,26 +906,60 @@ rtrequest1(int req, struct rt_addrinfo * rt->rt_parent = *ret_nrt; rt->rt_parent->rt_refcnt++; } + rt->rt_total = 1; + rt->rt_first = rt; + rt->rt_last = rt; + CLIST_INIT(rt, rt_list); RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__, __LINE__, (void *)rt->_rt_key); rn = rnh->rnh_addaddr(rt_getkey(rt), netmask, rnh, rt->rt_nodes); RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__, __LINE__, (void *)rt->_rt_key); - if (rn == NULL && (crt = rtalloc1(rt_getkey(rt), 0)) != NULL) { + if (rn == NULL && + ((crt = rtgethead(rt_getkey(rt), NULL)) != NULL) && /* overwrite cloned route */ - if ((crt->rt_flags & RTF_CLONED) != 0) { - rtdeletemsg(crt); - rn = rnh->rnh_addaddr(rt_getkey(rt), - netmask, rnh, rt->rt_nodes); + ((crt->rt_flags & RTF_CLONED) != 0)) { + rtdeletemsg(crt); + rn = rnh->rnh_addaddr(rt_getkey(rt), + netmask, rnh, rt->rt_nodes); + crt = NULL; + if (rn == NULL) { + error = ENOMEM; + goto eexist; } - RTFREE(crt); RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__, - __LINE__, (void *)rt->_rt_key); + __LINE__, (void *)rt->_rt_key); } - RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__, - __LINE__, (void *)rt->_rt_key); - if (rn == NULL) { + else if (req == RTM_ADD && rn == NULL && + ((crt = rtgethead(rt_getkey(rt), netmask)) != NULL)) { + /* New route for the same destination */ + if (crt->rt_total >= MAX_PATHS) { + error = E2BIG; + goto eexist; + } + if (gateway) { + CLIST_FOREACH(nrt, crt, sentinel, rt_list) + if (sockaddr_cmp(nrt->rt_gateway, gateway) == 0) + goto eexist; + } else if((rt->rt_flags & RTF_CLONING) && + (info->rti_ifa)) { + CLIST_FOREACH(nrt, crt, sentinel, rt_list) + if (nrt->rt_ifp == info->rti_ifa->ifa_ifp) + goto eexist; + } else if(rt->rt_flags & RTF_CLONING) + CLIST_FOREACH(nrt, crt, sentinel, rt_list) + if (nrt->rt_ifp == rt->rt_ifp) + goto eexist; + sockaddr_free(rt->_rt_key); + rt->rt_nodes->rn_mask = crt->rt_nodes->rn_mask; + rt->_rt_key = crt->_rt_key; + rt->rt_first = crt; + CLIST_INSERT_AFTER(crt, rt, rt_list); + crt->rt_total++; + crt = NULL; + } else if (rn == NULL) { +eexist: IFAFREE(ifa); if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent) rtfree(rt->rt_parent); @@ -808,7 +967,10 @@ rtrequest1(int req, struct rt_addrinfo * rtfree(rt->rt_gwroute); rt_destroy(rt); pool_put(&rtentry_pool, rt); - senderr(EEXIST); + if (error) + senderr(error) + else + senderr(EEXIST); } RT_DPRINTF("%s l.%d: rt->_rt_key = %p\n", __func__, __LINE__, (void *)rt->_rt_key); @@ -824,7 +986,8 @@ rtrequest1(int req, struct rt_addrinfo * /* clean up any cloned children */ rtflushclone(dst->sa_family, rt); } - rtflushall(dst->sa_family); + if (crt == NULL) + rtflushall(dst->sa_family); break; case RTM_GET: if (netmask != NULL) { @@ -837,6 +1000,7 @@ rtrequest1(int req, struct rt_addrinfo * senderr(ESRCH); if (ret_nrt != NULL) { rt = (struct rtentry *)rn; + rt = rtchoosepath_rr(rt); *ret_nrt = rt; rt->rt_refcnt++; } @@ -944,8 +1108,12 @@ rtinit(struct ifaddr *ifa, int cmd, int rt_maskedcopy(odst, dst, ifa->ifa_netmask); } if ((rt = rtalloc1(dst, 0)) != NULL) { + struct rtentry *sentinel; rt->rt_refcnt--; - if (rt->rt_ifa != ifa) + CLIST_FOREACH(rt, rt, sentinel, rt_list) + if (rt->rt_ifa->ifa_ifp == ifa->ifa_ifp) + break; + if (rt == NULL) return (flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH; } Index: sys/net/route.h =================================================================== RCS file: /cvsroot/src/sys/net/route.h,v retrieving revision 1.58 diff -u -p -r1.58 route.h --- sys/net/route.h 27 Aug 2007 00:34:01 -0000 1.58 +++ sys/net/route.h 12 Nov 2007 17:14:39 -0000 @@ -93,6 +93,10 @@ struct rt_metrics { #ifndef RNF_NORMAL #include #endif + +/* XXX: sysctl maybe ? */ +#define MAX_PATHS 64 + struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_mask(r) ((const struct sockaddr *)((r)->rt_nodes->rn_mask)) @@ -108,7 +112,13 @@ struct rtentry { struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ LIST_HEAD(, rttimer) rt_timer; /* queue of timeouts for misc funcs */ struct rtentry *rt_parent; /* parent of cloned route */ - struct sockaddr *_rt_key; + struct sockaddr *_rt_key; + /* load-sharing */ + CLIST_ENTRY(rtentry) rt_list; + struct rtentry *rt_first; /* First entry in list */ +#define RTFIRST(r) ((r)->rt_first) + struct rtentry *rt_last; /* For round robin */ + uint8_t rt_total; /* Number of paths */ }; static inline const struct sockaddr * @@ -366,6 +376,7 @@ out: } struct rtentry *rtfindparent(struct radix_node_head *, struct route *); +struct rtentry *rtnext(struct rtentry *); #ifdef RTCACHE_DEBUG #define rtcache_init(ro) rtcache_init_debug(__func__, ro) @@ -386,6 +397,7 @@ void rtcache_clear(struct route *); void rtcache_update(struct route *, int); void rtcache_free(struct route *); int rtcache_setdst(struct route *, const struct sockaddr *); +struct rtentry* rtchoosepath_rr(struct rtentry *); static inline struct rtentry * rtcache_lookup1(struct route *ro, const struct sockaddr *dst, int clone) Index: sys/net/rtsock.c =================================================================== RCS file: /cvsroot/src/sys/net/rtsock.c,v retrieving revision 1.95 diff -u -p -r1.95 rtsock.c --- sys/net/rtsock.c 19 Jul 2007 20:48:53 -0000 1.95 +++ sys/net/rtsock.c 12 Nov 2007 17:14:39 -0000 @@ -306,7 +306,7 @@ route_output(struct mbuf *m, ...) if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */ struct radix_node *rn; - if (memcmp(dst, rt_getkey(rt), dst->sa_len) != 0) + if (sockaddr_cmp(dst, rt_getkey(rt)) != 0) senderr(ESRCH); netmask = intern_netmask(netmask); for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey) @@ -923,6 +923,8 @@ sysctl_dumpentry(struct rtentry *rt, voi int error = 0, size; struct rt_addrinfo info; + if (CLIST_NEXT(rt, rt_list) != RTFIRST(rt)) + sysctl_dumpentry(CLIST_NEXT(rt, rt_list), v); if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; memset(&info, 0, sizeof(info)); Index: sys/netinet/if_arp.c =================================================================== RCS file: /cvsroot/src/sys/netinet/if_arp.c,v retrieving revision 1.128 diff -u -p -r1.128 if_arp.c --- sys/netinet/if_arp.c 2 Sep 2007 19:42:22 -0000 1.128 +++ sys/netinet/if_arp.c 12 Nov 2007 17:14:39 -0000 @@ -768,16 +768,18 @@ arpresolve(struct ifnet *ifp, struct rte if (rt->rt_expire) { rt->rt_flags &= ~RTF_REJECT; if (la->la_asked == 0 || rt->rt_expire != time_second) { + struct rtentry *nrt, *sentinel; rt->rt_expire = time_second; if (la->la_asked++ < arp_maxtries) - arprequest(ifp, - &satocsin(rt->rt_ifa->ifa_addr)->sin_addr, + CLIST_FOREACH(nrt, rt->rt_parent, sentinel, rt_list) + arprequest(nrt->rt_ifp, + &satocsin(nrt->rt_ifa->ifa_addr)->sin_addr, &satocsin(dst)->sin_addr, #if NCARP > 0 (rt->rt_ifp->if_type == IFT_CARP) ? CLLADDR(rt->rt_ifp->if_sadl): #endif - CLLADDR(ifp->if_sadl)); + CLLADDR(nrt->rt_ifp->if_sadl)); else { rt->rt_flags |= RTF_REJECT; rt->rt_expire += arpt_down; @@ -1097,6 +1099,19 @@ in_arpinput(struct mbuf *m) if (rt->rt_expire) rt->rt_expire = time_second + arpt_keep; rt->rt_flags &= ~RTF_REJECT; + if(rt->rt_ifp != ifp) { + /* + * Reply came on different interface. Check + * if we have a rt_parent with this ifp + */ + struct rtentry *nrt, *sentinel; + CLIST_FOREACH(nrt, rt->rt_parent, sentinel, rt_list) + if(nrt->rt_ifp == ifp) { + rt_replace_ifa(rt, nrt->rt_ifa); + rt->rt_ifp = nrt->rt_ifp; + break; + } + } la->la_asked = 0; s = splnet(); Index: sys/netinet/in.c =================================================================== RCS file: /cvsroot/src/sys/netinet/in.c,v retrieving revision 1.118 diff -u -p -r1.118 in.c --- sys/netinet/in.c 1 Sep 2007 04:32:51 -0000 1.118 +++ sys/netinet/in.c 12 Nov 2007 17:14:39 -0000 @@ -987,7 +987,7 @@ bad: /* * add a route to prefix ("connected route" in cisco terminology). - * does nothing if there's some interface address with the same prefix already. + * does nothing if there's same prefix already assigned to the same interface. */ static int in_addprefix(struct in_ifaddr *target, int flags) @@ -1012,14 +1012,11 @@ in_addprefix(struct in_ifaddr *target, i p.s_addr &= ia->ia_sockmask.sin_addr.s_addr; } - if (prefix.s_addr != p.s_addr) + if (prefix.s_addr != p.s_addr || target->ia_ifp != ia->ia_ifp) continue; - /* - * if we got a matching prefix route inserted by other - * interface address, we don't need to bother - * - * XXX RADIX_MPATH implications here? -dyoung + * if we got a matching prefix route inserted on the same + * interface, we don't need to bother */ if (ia->ia_flags & IFA_ROUTE) return 0; Index: sys/netinet/in.h =================================================================== RCS file: /cvsroot/src/sys/netinet/in.h,v retrieving revision 1.81 diff -u -p -r1.81 in.h --- sys/netinet/in.h 19 Sep 2007 04:33:43 -0000 1.81 +++ sys/netinet/in.h 12 Nov 2007 17:14:39 -0000 @@ -450,8 +450,9 @@ struct ip_mreq { #define IPCTL_IFQ 21 /* ipintrq node */ #define IPCTL_RANDOMID 22 /* use random IP ids (if configured) */ #define IPCTL_LOOPBACKCKSUM 23 /* do IP checksum on loopback */ -#define IPCTL_STATS 24 /* IP statistics */ -#define IPCTL_MAXID 25 +#define IPCTL_STATS 24 /* IP statistics */ +#define IPCTL_LOAD_SHARING 25 /* Load sharing */ +#define IPCTL_MAXID 26 #define IPCTL_NAMES { \ { 0, 0 }, \ @@ -479,7 +480,13 @@ struct ip_mreq { { "random_id", CTLTYPE_INT }, \ { "do_loopback_cksum", CTLTYPE_INT }, \ { "stats", CTLTYPE_STRUCT }, \ + { "load-sharing", CTLTYPE_NODE }, \ } + +/* Load sharing */ +#define IPCTL_LS_SELECTED 1 +#define IPCTL_LS_AVAILABLE 2 + #endif /* _NETBSD_SOURCE */ /* INET6 stuff */ Index: sys/netinet/ip_input.c =================================================================== RCS file: /cvsroot/src/sys/netinet/ip_input.c,v retrieving revision 1.254 diff -u -p -r1.254 ip_input.c --- sys/netinet/ip_input.c 2 Oct 2007 20:35:04 -0000 1.254 +++ sys/netinet/ip_input.c 12 Nov 2007 17:14:39 -0000 @@ -218,6 +218,13 @@ int ip_do_randomid = 0; */ int ip_checkinterface = 0; +#define INITIAL_LS 2 +#define MAX_LS_STRING 20 + +/* See also defines in ip_output.c if you want to change these */ +const char* load_sharing_strings[] = { "first-only", "round-robin", + "simple-sum", NULL }; +int load_sharing_index = INITIAL_LS; struct rttimer_queue *ip_mtudisc_timeout_q = NULL; @@ -2163,6 +2170,45 @@ sysctl_net_inet_ip_hashsize(SYSCTLFN_ARG } #endif /* GATEWAY */ +static int +sysctl_load_sharing(SYSCTLFN_ARGS) +{ + int error, i; + struct sysctlnode node = *rnode; + char lsc[MAX_LS_STRING]; + + strlcpy(lsc, load_sharing_strings[load_sharing_index], MAX_LS_STRING); + node.sysctl_data = lsc; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return error; + for (i=0; load_sharing_strings[i] != NULL; i++) + if (strncmp(load_sharing_strings[i], lsc, MAX_LS_STRING) == 0) + break; + + if (load_sharing_strings[i] == NULL) + return EINVAL; + load_sharing_index = i; + return 0; +} + +static int +sysctl_ls_types(SYSCTLFN_ARGS) +{ + struct sysctlnode node = *rnode; + int i; + char rt[255]; + + rt[0]=0; + /* XXX: slow and ugly */ + for (i=0; load_sharing_strings[i] != NULL; i++) { + strlcat(rt, load_sharing_strings[i], 255); + if (load_sharing_strings[i+1] != NULL) + strlcat(rt, " ", 255); + } + node.sysctl_data = rt; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup") { @@ -2370,4 +2416,24 @@ SYSCTL_SETUP(sysctl_net_inet_ip_setup, " NULL, 0, &ipstat, sizeof(ipstat), CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, CTLTYPE_NODE, "load-sharing", + SYSCTL_DESCR("IP load sharing"), + NULL, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_IP, + IPCTL_LOAD_SHARING, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT | CTLFLAG_READWRITE, + CTLTYPE_STRING, "selected", + SYSCTL_DESCR("IP load sharing algorithm"), + sysctl_load_sharing, 0, + &load_sharing_strings[INITIAL_LS], + MAX_LS_STRING - 1, + CTL_NET, PF_INET, IPPROTO_IP, + IPCTL_LOAD_SHARING, IPCTL_LS_SELECTED, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, CTLTYPE_STRING, "available", + SYSCTL_DESCR("IP load sharing supported algorithms"), + sysctl_ls_types, 0, NULL, 255, CTL_NET, + PF_INET, IPPROTO_IP, IPCTL_LOAD_SHARING, IPCTL_LS_AVAILABLE, + CTL_EOL); } Index: sys/netinet/ip_output.c =================================================================== RCS file: /cvsroot/src/sys/netinet/ip_output.c,v retrieving revision 1.184 diff -u -p -r1.184 ip_output.c --- sys/netinet/ip_output.c 19 Sep 2007 04:33:43 -0000 1.184 +++ sys/netinet/ip_output.c 12 Nov 2007 17:14:39 -0000 @@ -171,6 +171,16 @@ int ip_do_loopback_cksum = 0; (((csum_flags) & M_CSUM_TCPv4) != 0 && tcp_do_loopback_cksum) || \ (((csum_flags) & M_CSUM_IPv4) != 0 && ip_do_loopback_cksum))) +/* See also string associations in ip_input.c if you want to change these */ +#define LS_NONE 0 +#define LS_RR 1 +#define LS_SS 2 + +extern int load_sharing_index; + +#define tiny_sum(ip4a) ((ip4a >> 24) + (ip4a << 8 >> 24) + \ + (ip4a << 16 >> 24) + (ip4a << 24 >> 24)) + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -338,13 +348,43 @@ ip_output(struct mbuf *m0, ...) mtu = ifp->if_mtu; IFP_TO_IA(ifp, ia); } else { - if (ro->ro_rt == NULL) + int ro_cached = 1; + if (ro->ro_rt == NULL) { rtcache_init(ro); + ro_cached = 0; + } if (ro->ro_rt == NULL) { ipstat.ips_noroute++; error = EHOSTUNREACH; goto bad; } + /* Load-sharing */ + if (ro->ro_rt->rt_total > 1 && + load_sharing_index != LS_NONE && + !(load_sharing_index == LS_SS && ro_cached)) { + ro->ro_rt->rt_refcnt--; + switch(load_sharing_index) { + case LS_RR: + ro->ro_rt = rtchoosepath_rr(ro->ro_rt); + break; + case LS_SS: + { + uint8_t i, hsh; + /* I'm not that happy with this "sum" */ + hsh = ( tiny_sum(ip->ip_src.s_addr) + + tiny_sum(ip->ip_dst.s_addr) + + ip->ip_p + ip->ip_tos) % + ro->ro_rt->rt_total; + /* XXX: Normally it should be up... */ + if (hsh == 0 && !(ro->ro_rt->rt_flags & RTF_UP)) + ro->ro_rt = rtnext(ro->ro_rt); + else for (i = 0; i < hsh; i++) + ro->ro_rt = rtnext(ro->ro_rt); + } + break; + } + ro->ro_rt->rt_refcnt++; + } ia = ifatoia(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) Index: sys/sys/queue.h =================================================================== RCS file: /cvsroot/src/sys/sys/queue.h,v retrieving revision 1.47 diff -u -p -r1.47 queue.h --- sys/sys/queue.h 18 Jul 2007 12:07:35 -0000 1.47 +++ sys/sys/queue.h 12 Nov 2007 17:14:39 -0000 @@ -674,4 +674,57 @@ struct { \ ? ((head)->cqh_last) \ : (elm->field.cqe_prev)) +/* + * Circular lists definitions + */ +#define CLIST_ENTRY(__type) \ + struct { \ + struct __type *cl_next; \ + struct __type *cl_prev; \ + } + +/* + * Circular lists functions + */ +#define CLIST_FOREACH1(__elm, __first, __sentinel, __field) \ + for ((__elm) = (__sentinel) = (__first); (__elm) != NULL;\ + (__elm) = ((__elm)->__field == (__sentinel)) \ + ? NULL \ + : (__elm)->__field) + +#define CLIST_FOREACH(__elm, __first, __sentinel, __field) \ + CLIST_FOREACH1((__elm), (__first), __sentinel, __field.cl_next) + +#define CLIST_FOREACH_REVERSE(__elm, __first, __sentinel, __field) \ + CLIST_FOREACH1((__elm), (__first), __sentinel, __field.cl_prev) + +#define CLIST_INIT(__elm, __field) \ + do { \ + (__elm)->__field.cl_prev = (__elm)->__field.cl_next = \ + (__elm); \ + } while (/*CONSTCOND*/0) + +#define CLIST_SINGULAR(__elm, __field) ((__elm)->__field.cl_prev == (__elm)) + +#define CLIST_REMOVE(__elm, __field) \ + do { \ + (__elm)->__field.cl_prev->__field.cl_next = \ + (__elm)->__field.cl_next; \ + (__elm)->__field.cl_next->__field.cl_prev = \ + (__elm)->__field.cl_prev; \ + CLIST_INIT((__elm), __field); \ + } while (/*CONSTCOND*/0) + +#define CLIST_INSERT_AFTER(__listelm, __elm, __field) \ + do { \ + assert(__listelm != __elm); \ + (__elm)->__field.cl_prev = (__listelm); \ + (__elm)->__field.cl_next = (__listelm)->__field.cl_next;\ + (__listelm)->__field.cl_next = (__elm); \ + (__elm)->__field.cl_next->__field.cl_prev = (__elm); \ + } while (/*CONSTCOND*/0) + +#define CLIST_NEXT(__elm, __field) ((__elm)->__field.cl_next) +#define CLIST_PREV(__elm, __field) ((__elm)->__field.cl_prev) + #endif /* !_SYS_QUEUE_H_ */ Index: usr.bin/netstat/route.c =================================================================== RCS file: /cvsroot/src/usr.bin/netstat/route.c,v retrieving revision 1.69 diff -u -p -r1.69 route.c --- usr.bin/netstat/route.c 19 Jul 2007 20:51:04 -0000 1.69 +++ usr.bin/netstat/route.c 12 Nov 2007 17:14:39 -0000 @@ -171,6 +171,11 @@ again: } else if (do_rtent) { kget(rn, rtentry); p_krtentry(&rtentry); + while ( CLIST_NEXT(&rtentry, rt_list) != + (struct rtentry*)rn ) { + kget(CLIST_NEXT(&rtentry, rt_list), rtentry); + p_krtentry(&rtentry); + } if (Aflag) p_rtnode(); } else {