diff -Nru a/include/linux/sysctl.h b/include/linux/sysctl.h --- a/include/linux/sysctl.h Thu Mar 4 21:57:00 2004 +++ b/include/linux/sysctl.h Thu Mar 4 21:57:00 2004 @@ -342,6 +342,8 @@ NET_IPV4_ROUTE_MIN_PMTU=16, NET_IPV4_ROUTE_MIN_ADVMSS=17, NET_IPV4_ROUTE_SECRET_INTERVAL=18, + NET_IPV4_ROUTE_IGNORE_TOS=19, + NET_IPV4_ROUTE_PMTU_MODE=20, }; enum diff -Nru a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c --- a/net/ipv4/fib_hash.c Thu Mar 4 21:56:59 2004 +++ b/net/ipv4/fib_hash.c Thu Mar 4 21:57:00 2004 @@ -48,6 +48,8 @@ printk(KERN_DEBUG a) */ +extern int ip_rt_ignore_tos; + static kmem_cache_t * fn_hash_kmem; /* @@ -309,7 +311,7 @@ continue; } #ifdef CONFIG_IP_ROUTE_TOS - if (f->fn_tos && f->fn_tos != flp->fl4_tos) + if (f->fn_tos && f->fn_tos != flp->fl4_tos && !ip_rt_ignore_tos) continue; #endif f->fn_state |= FN_S_ACCESSED; diff -Nru a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c --- a/net/ipv4/fib_rules.c Thu Mar 4 21:57:00 2004 +++ b/net/ipv4/fib_rules.c Thu Mar 4 21:57:00 2004 @@ -49,6 +49,8 @@ #define FRprintk(a...) +extern int ip_rt_ignore_tos; + struct fib_rule { struct fib_rule *r_next; @@ -323,7 +325,7 @@ if (((saddr^r->r_src) & r->r_srcmask) || ((daddr^r->r_dst) & r->r_dstmask) || #ifdef CONFIG_IP_ROUTE_TOS - (r->r_tos && r->r_tos != flp->fl4_tos) || + (r->r_tos && r->r_tos != flp->fl4_tos && !ip_rt_ignore_tos) || #endif #ifdef CONFIG_IP_ROUTE_FWMARK (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || diff -Nru a/net/ipv4/route.c b/net/ipv4/route.c --- a/net/ipv4/route.c Thu Mar 4 21:57:00 2004 +++ b/net/ipv4/route.c Thu Mar 4 21:57:00 2004 @@ -125,6 +125,14 @@ int ip_rt_min_advmss = 256; int ip_rt_secret_interval = 10 * 60 * HZ; static unsigned long rt_deadline; +#ifdef CONFIG_IP_ROUTE_TOS +static u8 iptos_rt_mask = IPTOS_RT_MASK; +int ip_rt_ignore_tos; +#else +static u8 iptos_rt_mask; +int ip_rt_ignore_tos = 1; +#endif +int ip_rt_pmtu_mode; /* 1=match by iph->tos, 0=ignore TOS for PMTU */ #define RTprint(a...) printk(KERN_DEBUG a) @@ -968,13 +976,12 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, u32 saddr, u8 tos, struct net_device *dev) { - int i, k; + int i; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; - int ikeys[2] = { dev->ifindex, 0 }; - tos &= IPTOS_RT_MASK; + tos &= iptos_rt_mask; if (!in_dev) return; @@ -994,10 +1001,7 @@ } for (i = 0; i < 2; i++) { - for (k = 0; k < 2; k++) { - unsigned hash = rt_hash_code(daddr, - skeys[i] ^ (ikeys[k] << 5), - tos); + unsigned hash = rt_hash_code(daddr, skeys[i], tos); rthp=&rt_hash_table[hash].chain; @@ -1009,7 +1013,9 @@ if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || rth->fl.fl4_tos != tos || - rth->fl.oif != ikeys[k] || + (rth->fl.oif && + rth->fl.oif != dev->ifindex) || + rth->rt_dst == rth->rt_gateway || rth->fl.iif != 0) { rthp = &rth->u.rt_next; continue; @@ -1076,7 +1082,6 @@ rcu_read_unlock(); do_next: ; - } } in_dev_put(in_dev); return; @@ -1106,8 +1111,7 @@ } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash_code(rt->fl.fl4_dst, - rt->fl.fl4_src ^ - (rt->fl.oif << 5), + rt->fl.fl4_src, rt->fl.fl4_tos); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " @@ -1238,21 +1242,33 @@ return 68; } +/* See IPTOS_RT_MASK */ +static u8 all_tos_values[8] = { 0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C }; + unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) { - int i; + int i, j, ntos; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; - u8 tos = iph->tos & IPTOS_RT_MASK; + u8 *tos_values, tos = iph->tos & iptos_rt_mask; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; + if (ip_rt_pmtu_mode || !iptos_rt_mask) { + tos_values = &tos; + ntos = 1; + } else { + tos_values = all_tos_values; + ntos = ARRAY_SIZE(all_tos_values); + } + + for (j = 0; j < ntos; j++) for (i = 0; i < 2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i], tos); + unsigned hash = rt_hash_code(daddr, skeys[i], tos_values[j]); rcu_read_lock(); for (rth = rt_hash_table[hash].chain; rth; @@ -1260,9 +1276,9 @@ smp_read_barrier_depends(); if (rth->fl.fl4_dst == daddr && rth->fl.fl4_src == skeys[i] && + rth->fl.fl4_tos == tos_values[j] && rth->rt_dst == daddr && rth->rt_src == iph->saddr && - rth->fl.fl4_tos == tos && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { unsigned short mtu = new_mtu; @@ -1503,7 +1519,7 @@ RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); + hash = rt_hash_code(daddr, saddr, tos); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); e_nobufs: @@ -1554,7 +1570,7 @@ if (!in_dev) goto out; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); + hash = rt_hash_code(daddr, saddr, tos); /* Check for the most weird martians, which can be not detected by fib_lookup. @@ -1846,8 +1862,8 @@ unsigned hash; int iif = dev->ifindex; - tos &= IPTOS_RT_MASK; - hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); + tos &= iptos_rt_mask; + hash = rt_hash_code(daddr, saddr, tos); rcu_read_lock(); for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) { @@ -1912,11 +1928,11 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) { - u32 tos = oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK); + u32 tos = oldflp->fl4_tos & (iptos_rt_mask | RTO_ONLINK); struct flowi fl = { .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, - .tos = tos & IPTOS_RT_MASK, + .tos = tos & iptos_rt_mask, .scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), @@ -2190,7 +2206,7 @@ rth->rt_flags = flags; - hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos); + hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src, tos); err = rt_intern_hash(hash, rth, rp); done: if (free_res) @@ -2213,8 +2229,9 @@ { unsigned hash; struct rtable *rth; + u8 tos = flp->fl4_tos & (iptos_rt_mask | RTO_ONLINK); - hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); + hash = rt_hash_code(flp->fl4_dst, flp->fl4_src, tos); rcu_read_lock(); for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) { @@ -2226,8 +2243,7 @@ #ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark == flp->fl4_fwmark && #endif - !((rth->fl.fl4_tos ^ flp->fl4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK))) { + rth->fl.fl4_tos == tos) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2479,6 +2495,26 @@ } #ifdef CONFIG_SYSCTL + +static int ipv4_sysctl_doint_strategy(ctl_table *table, int *name, + int nlen, void *oldval, + size_t *oldlenp, void *newval, + size_t newlen, void **context) +{ + int val; + + if (!newval || !newlen) + return 0; + if (newlen != sizeof(int)) + return -EINVAL; + if (get_user(val, (int *)newval)) + return -EFAULT; + if (val == *(int *) table->data) + return 0; + *(int *) table->data = val; + return 1; +} + static int flush_delay; static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, @@ -2508,6 +2544,53 @@ return 0; } +#ifdef CONFIG_IP_ROUTE_TOS + +static void do_ignore_tos(void) +{ + iptos_rt_mask = ip_rt_ignore_tos? 0 : IPTOS_RT_MASK; + rt_cache_flush(0); +} + +#endif + +static int ip_rt_ignore_tos_handler(ctl_table *ctl, int write, + struct file *filp, void *buffer, + size_t *lenp) +{ + if (write) { +#ifdef CONFIG_IP_ROUTE_TOS + int old = ip_rt_ignore_tos; + int ret = proc_dointvec(ctl, write, filp, buffer, lenp); + + if (ret) + return ret; + if (old != ip_rt_ignore_tos) do_ignore_tos(); + return 0; +#else + return -EINVAL; +#endif + } + return proc_dointvec(ctl, write, filp, buffer, lenp); +} +static int ipv4_sysctl_ignore_tos_strategy(ctl_table *table, int *name, + int nlen, void *oldval, + size_t *oldlenp, void *newval, + size_t newlen, void **context) +{ +#ifdef CONFIG_IP_ROUTE_TOS + int ret = ipv4_sysctl_doint_strategy(table, name, nlen, oldval, oldlenp, + newval, newlen, context); + + if (1 != ret) + return ret; + do_ignore_tos(); + return 0; +#else + return (newval || newlen) ? -EINVAL : 0; +#endif +} + ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -2660,6 +2743,23 @@ .mode = 0644, .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_IPV4_ROUTE_IGNORE_TOS, + .procname = "ignore_tos", + .data = &ip_rt_ignore_tos, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ip_rt_ignore_tos_handler, + .strategy = &ipv4_sysctl_ignore_tos_strategy, + }, + { + .ctl_name = NET_IPV4_ROUTE_PMTU_MODE, + .procname = "pmtu_mode", + .data = &ip_rt_pmtu_mode, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, }, { .ctl_name = 0 } };