summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorSrikant Patnaik2015-01-11 12:28:04 +0530
committerSrikant Patnaik2015-01-11 12:28:04 +0530
commit871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch)
tree8718f573808810c2a1e8cb8fb6ac469093ca2784 /net/ipv6
parent9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff)
downloadFOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip
Moved, renamed, and deleted files
The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig253
-rw-r--r--net/ipv6/Makefile42
-rw-r--r--net/ipv6/addrconf.c4884
-rw-r--r--net/ipv6/addrconf_core.c80
-rw-r--r--net/ipv6/addrlabel.c602
-rw-r--r--net/ipv6/af_inet6.c1363
-rw-r--r--net/ipv6/ah6.c757
-rw-r--r--net/ipv6/anycast.c520
-rw-r--r--net/ipv6/datagram.c872
-rw-r--r--net/ipv6/esp6.c674
-rw-r--r--net/ipv6/exthdrs.c896
-rw-r--r--net/ipv6/exthdrs_core.c114
-rw-r--r--net/ipv6/fib6_rules.c308
-rw-r--r--net/ipv6/icmp.c997
-rw-r--r--net/ipv6/inet6_connection_sock.c255
-rw-r--r--net/ipv6/inet6_hashtables.c304
-rw-r--r--net/ipv6/ip6_fib.c1713
-rw-r--r--net/ipv6/ip6_flowlabel.c783
-rw-r--r--net/ipv6/ip6_input.c344
-rw-r--r--net/ipv6/ip6_output.c1686
-rw-r--r--net/ipv6/ip6_tunnel.c1592
-rw-r--r--net/ipv6/ip6mr.c2281
-rw-r--r--net/ipv6/ipcomp6.c218
-rw-r--r--net/ipv6/ipv6_sockglue.c1326
-rw-r--r--net/ipv6/mcast.c2668
-rw-r--r--net/ipv6/mip6.c525
-rw-r--r--net/ipv6/ndisc.c1892
-rw-r--r--net/ipv6/netfilter.c196
-rw-r--r--net/ipv6/netfilter/Kconfig224
-rw-r--r--net/ipv6/netfilter/Makefile34
-rw-r--r--net/ipv6/netfilter/ip6_queue.c641
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2361
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c272
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c121
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c74
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c136
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c215
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c154
-rw-r--r--net/ipv6/netfilter/ip6t_mh.c94
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c133
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c225
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c108
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c145
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c88
-rw-r--r--net/ipv6/netfilter/ip6table_security.c105
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c398
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c362
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c650
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c137
-rwxr-xr-xnet/ipv6/ping.c222
-rw-r--r--net/ipv6/proc.c338
-rw-r--r--net/ipv6/protocol.c54
-rw-r--r--net/ipv6/raw.c1368
-rw-r--r--net/ipv6/reassembly.c769
-rw-r--r--net/ipv6/route.c3104
-rw-r--r--net/ipv6/sit.c1306
-rw-r--r--net/ipv6/syncookies.c269
-rw-r--r--net/ipv6/sysctl_net_ipv6.c177
-rw-r--r--net/ipv6/tcp_ipv6.c2196
-rw-r--r--net/ipv6/tunnel6.c184
-rw-r--r--net/ipv6/udp.c1526
-rw-r--r--net/ipv6/udp_impl.h37
-rw-r--r--net/ipv6/udplite.c140
-rw-r--r--net/ipv6/xfrm6_input.c146
-rw-r--r--net/ipv6/xfrm6_mode_beet.c131
-rw-r--r--net/ipv6/xfrm6_mode_ro.c84
-rw-r--r--net/ipv6/xfrm6_mode_transport.c85
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c117
-rw-r--r--net/ipv6/xfrm6_output.c161
-rw-r--r--net/ipv6/xfrm6_policy.c357
-rw-r--r--net/ipv6/xfrm6_state.c197
-rw-r--r--net/ipv6/xfrm6_tunnel.c402
72 files changed, 48192 insertions, 0 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
new file mode 100644
index 00000000..36d7437a
--- /dev/null
+++ b/net/ipv6/Kconfig
@@ -0,0 +1,253 @@
+#
+# IPv6 configuration
+#
+
+# IPv6 as module will cause a CRASH if you try to unload it
+menuconfig IPV6
+ tristate "The IPv6 protocol"
+ default m
+ ---help---
+ This is complemental support for the IP version 6.
+ You will still be able to do traditional IPv4 networking as well.
+
+ For general information about IPv6, see
+ <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+ For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
+ For specific information about IPv6 under Linux, read the HOWTO at
+ <http://www.bieringer.de/linux/IPv6/>.
+
+ To compile this protocol support as a module, choose M here: the
+ module will be called ipv6.
+
+if IPV6
+
+config IPV6_PRIVACY
+ bool "IPv6: Privacy Extensions (RFC 3041) support"
+ ---help---
+ Privacy Extensions for Stateless Address Autoconfiguration in IPv6
+ support. With this option, additional periodically-altered
+ pseudo-random global-scope unicast address(es) will be assigned to
+ your interface(s).
+
+ We use our standard pseudo-random algorithm to generate the
+ randomized interface identifier, instead of one described in RFC 3041.
+
+ By default the kernel does not generate temporary addresses.
+ To use temporary addresses, do
+
+ echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
+
+ See <file:Documentation/networking/ip-sysctl.txt> for details.
+
+config IPV6_ROUTER_PREF
+ bool "IPv6: Router Preference (RFC 4191) support"
+ ---help---
+ Router Preference is an optional extension to the Router
+ Advertisement message which improves the ability of hosts
+ to pick an appropriate router, especially when the hosts
+ are placed in a multi-homed network.
+
+ If unsure, say N.
+
+config IPV6_ROUTE_INFO
+ bool "IPv6: Route Information (RFC 4191) support (EXPERIMENTAL)"
+ depends on IPV6_ROUTER_PREF && EXPERIMENTAL
+ ---help---
+ This is experimental support of Route Information.
+
+ If unsure, say N.
+
+config IPV6_OPTIMISTIC_DAD
+ bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ ---help---
+ This is experimental support for optimistic Duplicate
+ Address Detection. It allows for autoconfigured addresses
+ to be used more quickly.
+
+ If unsure, say N.
+
+config INET6_AH
+ tristate "IPv6: AH transformation"
+ select XFRM
+ select CRYPTO
+ select CRYPTO_HMAC
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ ---help---
+ Support for IPsec AH.
+
+ If unsure, say Y.
+
+config INET6_ESP
+ tristate "IPv6: ESP transformation"
+ select XFRM
+ select CRYPTO
+ select CRYPTO_AUTHENC
+ select CRYPTO_HMAC
+ select CRYPTO_MD5
+ select CRYPTO_CBC
+ select CRYPTO_SHA1
+ select CRYPTO_DES
+ ---help---
+ Support for IPsec ESP.
+
+ If unsure, say Y.
+
+config INET6_IPCOMP
+ tristate "IPv6: IPComp transformation"
+ select INET6_XFRM_TUNNEL
+ select XFRM_IPCOMP
+ ---help---
+ Support for IP Payload Compression Protocol (IPComp) (RFC3173),
+ typically needed for IPsec.
+
+ If unsure, say Y.
+
+config IPV6_MIP6
+ tristate "IPv6: Mobility (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ select XFRM
+ ---help---
+ Support for IPv6 Mobility described in RFC 3775.
+
+ If unsure, say N.
+
+config INET6_XFRM_TUNNEL
+ tristate
+ select INET6_TUNNEL
+ default n
+
+config INET6_TUNNEL
+ tristate
+ default n
+
+config INET6_XFRM_MODE_TRANSPORT
+ tristate "IPv6: IPsec transport mode"
+ default IPV6
+ select XFRM
+ ---help---
+ Support for IPsec transport mode.
+
+ If unsure, say Y.
+
+config INET6_XFRM_MODE_TUNNEL
+ tristate "IPv6: IPsec tunnel mode"
+ default IPV6
+ select XFRM
+ ---help---
+ Support for IPsec tunnel mode.
+
+ If unsure, say Y.
+
+config INET6_XFRM_MODE_BEET
+ tristate "IPv6: IPsec BEET mode"
+ default IPV6
+ select XFRM
+ ---help---
+ Support for IPsec BEET mode.
+
+ If unsure, say Y.
+
+config INET6_XFRM_MODE_ROUTEOPTIMIZATION
+ tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ select XFRM
+ ---help---
+ Support for MIPv6 route optimization mode.
+
+config IPV6_SIT
+ tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
+ select INET_TUNNEL
+ select IPV6_NDISC_NODETYPE
+ default y
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This driver implements encapsulation of IPv6
+ into IPv4 packets. This is useful if you want to connect two IPv6
+ networks over an IPv4-only path.
+
+ Saying M here will produce a module called sit. If unsure, say Y.
+
+config IPV6_SIT_6RD
+ bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)"
+ depends on IPV6_SIT && EXPERIMENTAL
+ default n
+ ---help---
+ IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
+ mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
+ deploy IPv6 unicast service to IPv4 sites to which it provides
+ customer premise equipment. Like 6to4, it utilizes stateless IPv6 in
+ IPv4 encapsulation in order to transit IPv4-only network
+ infrastructure. Unlike 6to4, a 6rd service provider uses an IPv6
+ prefix of its own in place of the fixed 6to4 prefix.
+
+ With this option enabled, the SIT driver offers 6rd functionality by
+ providing additional ioctl API to configure the IPv6 Prefix for in
+ stead of static 2002::/16 for 6to4.
+
+ If unsure, say N.
+
+config IPV6_NDISC_NODETYPE
+ bool
+
+config IPV6_TUNNEL
+ tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
+ select INET6_TUNNEL
+ ---help---
+ Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
+ RFC 2473.
+
+ If unsure, say N.
+
+config IPV6_MULTIPLE_TABLES
+ bool "IPv6: Multiple Routing Tables"
+ depends on EXPERIMENTAL
+ select FIB_RULES
+ ---help---
+ Support multiple routing tables.
+
+config IPV6_SUBTREES
+ bool "IPv6: source address based routing"
+ depends on IPV6_MULTIPLE_TABLES
+ ---help---
+ Enable routing by source address or prefix.
+
+ The destination address is still the primary routing key, so mixing
+ normal and source prefix specific routes in the same routing table
+ may sometimes lead to unintended routing behavior. This can be
+ avoided by defining different routing tables for the normal and
+ source prefix specific routes.
+
+ If unsure, say N.
+
+config IPV6_MROUTE
+ bool "IPv6: multicast routing (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ ---help---
+ Experimental support for IPv6 multicast forwarding.
+ If unsure, say N.
+
+config IPV6_MROUTE_MULTIPLE_TABLES
+ bool "IPv6: multicast policy routing"
+ depends on IPV6_MROUTE
+ select FIB_RULES
+ help
+ Normally, a multicast router runs a userspace daemon and decides
+ what to do with a multicast packet based on the source and
+ destination addresses. If you say Y here, the multicast router
+ will also be able to take interfaces and packet marks into
+ account and run multiple instances of userspace daemons
+ simultaneously, each one handling a single table.
+
+ If unsure, say N.
+
+config IPV6_PIMSM_V2
+ bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
+ depends on IPV6_MROUTE
+ ---help---
+ Support for IPv6 PIM multicast routing protocol PIM-SMv2.
+ If unsure, say N.
+
+endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
new file mode 100644
index 00000000..753be5dd
--- /dev/null
+++ b/net/ipv6/Makefile
@@ -0,0 +1,42 @@
+#
+# Makefile for the Linux TCP/IP (INET6) layer.
+#
+
+obj-$(CONFIG_IPV6) += ipv6.o
+
+ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
+ addrlabel.o \
+ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
+ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
+ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+
+ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
+ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
+
+ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
+ xfrm6_output.o
+ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
+ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
+
+ipv6-objs += $(ipv6-y)
+
+obj-$(CONFIG_INET6_AH) += ah6.o
+obj-$(CONFIG_INET6_ESP) += esp6.o
+obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
+obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
+obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
+obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
+obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
+obj-$(CONFIG_IPV6_MIP6) += mip6.o
+obj-$(CONFIG_NETFILTER) += netfilter/
+
+obj-$(CONFIG_IPV6_SIT) += sit.o
+obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+
+obj-y += addrconf_core.o exthdrs_core.o
+
+obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
new file mode 100644
index 00000000..7d5cb975
--- /dev/null
+++ b/net/ipv6/addrconf.c
@@ -0,0 +1,4884 @@
+/*
+ * IPv6 Address [auto]configuration
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Janos Farkas : delete timer on ifdown
+ * <chexum@bankinf.banki.hu>
+ * Andi Kleen : kill double kfree on module
+ * unload.
+ * Maciej W. Rozycki : FDDI support
+ * sekiya@USAGI : Don't send too many RS
+ * packets.
+ * yoshfuji@USAGI : Fixed interval between DAD
+ * packets.
+ * YOSHIFUJI Hideaki @USAGI : improved accuracy of
+ * address validation timer.
+ * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
+ * support.
+ * Yuji SEKIYA @USAGI : Don't assign a same IPv6
+ * address on a same interface.
+ * YOSHIFUJI Hideaki @USAGI : ARCnet support
+ * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
+ * seq_file.
+ * YOSHIFUJI Hideaki @USAGI : improved source address
+ * selection; consider scope,
+ * status etc.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_addr.h>
+#include <linux/if_arp.h>
+#include <linux/if_arcnet.h>
+#include <linux/if_infiniband.h>
+#include <linux/route.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/capability.h>
+#include <linux/delay.h>
+#include <linux/notifier.h>
+#include <linux/string.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/if_tunnel.h>
+#include <linux/rtnetlink.h>
+
+#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/random.h>
+#endif
+
+#include <linux/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+
+/* Set to 3 to get tracing... */
+#define ACONF_DEBUG 2
+
+#if ACONF_DEBUG >= 3
+#define ADBG(x) printk x
+#else
+#define ADBG(x)
+#endif
+
+#define INFINITY_LIFE_TIME 0xFFFFFFFF
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+ return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
+#define ADDRCONF_TIMER_FUZZ (HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
+
+#ifdef CONFIG_SYSCTL
+static void addrconf_sysctl_register(struct inet6_dev *idev);
+static void addrconf_sysctl_unregister(struct inet6_dev *idev);
+#else
+static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+}
+
+static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
+{
+}
+#endif
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int __ipv6_regen_rndid(struct inet6_dev *idev);
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void ipv6_regen_rndid(unsigned long data);
+#endif
+
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
+static int ipv6_count_addresses(struct inet6_dev *idev);
+
+/*
+ * Configured unicast address hash table
+ */
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(addrconf_hash_lock);
+
+static void addrconf_verify(unsigned long);
+
+static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
+static DEFINE_SPINLOCK(addrconf_verify_lock);
+
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
+
+static void addrconf_type_change(struct net_device *dev,
+ unsigned long event);
+static int addrconf_ifdown(struct net_device *dev, int how);
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
+static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_rs_timer(unsigned long data);
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+ struct prefix_info *pinfo);
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev);
+
+static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+
+static struct ipv6_devconf ipv6_devconf __read_mostly = {
+ .forwarding = 0,
+ .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+ .mtu6 = IPV6_MIN_MTU,
+ .accept_ra = 1,
+ .accept_redirects = 1,
+ .autoconf = 1,
+ .force_mld_version = 0,
+ .dad_transmits = 1,
+ .rtr_solicits = MAX_RTR_SOLICITATIONS,
+ .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+ .use_tempaddr = 0,
+ .temp_valid_lft = TEMP_VALID_LIFETIME,
+ .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_max_retry = REGEN_MAX_RETRY,
+ .max_desync_factor = MAX_DESYNC_FACTOR,
+#endif
+ .max_addresses = IPV6_MAX_ADDRESSES,
+ .accept_ra_defrtr = 1,
+ .accept_ra_pinfo = 1,
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ .accept_ra_rtr_pref = 1,
+ .rtr_probe_interval = 60 * HZ,
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_max_plen = 0,
+#endif
+#endif
+ .proxy_ndp = 0,
+ .accept_source_route = 0, /* we do not accept RH0 by default. */
+ .disable_ipv6 = 0,
+ .accept_dad = 1,
+};
+
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
+ .forwarding = 0,
+ .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+ .mtu6 = IPV6_MIN_MTU,
+ .accept_ra = 1,
+ .accept_redirects = 1,
+ .autoconf = 1,
+ .dad_transmits = 1,
+ .rtr_solicits = MAX_RTR_SOLICITATIONS,
+ .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+ .use_tempaddr = 0,
+ .temp_valid_lft = TEMP_VALID_LIFETIME,
+ .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_max_retry = REGEN_MAX_RETRY,
+ .max_desync_factor = MAX_DESYNC_FACTOR,
+#endif
+ .max_addresses = IPV6_MAX_ADDRESSES,
+ .accept_ra_defrtr = 1,
+ .accept_ra_pinfo = 1,
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ .accept_ra_rtr_pref = 1,
+ .rtr_probe_interval = 60 * HZ,
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_max_plen = 0,
+#endif
+#endif
+ .proxy_ndp = 0,
+ .accept_source_route = 0, /* we do not accept RH0 by default. */
+ .disable_ipv6 = 0,
+ .accept_dad = 1,
+};
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+
+/* Check if a valid qdisc is available */
+static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+{
+ return !qdisc_tx_is_noop(dev);
+}
+
+/* Check if a route is valid prefix route */
+static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
+}
+
+static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+{
+ if (del_timer(&ifp->timer))
+ __in6_ifa_put(ifp);
+}
+
+enum addrconf_timer_t {
+ AC_NONE,
+ AC_DAD,
+ AC_RS,
+};
+
+static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
+ enum addrconf_timer_t what,
+ unsigned long when)
+{
+ if (!del_timer(&ifp->timer))
+ in6_ifa_hold(ifp);
+
+ switch (what) {
+ case AC_DAD:
+ ifp->timer.function = addrconf_dad_timer;
+ break;
+ case AC_RS:
+ ifp->timer.function = addrconf_rs_timer;
+ break;
+ default:
+ break;
+ }
+ ifp->timer.expires = jiffies + when;
+ add_timer(&ifp->timer);
+}
+
+static int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+ if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip;
+ idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
+ GFP_KERNEL);
+ if (!idev->stats.icmpv6dev)
+ goto err_icmp;
+ idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
+ GFP_KERNEL);
+ if (!idev->stats.icmpv6msgdev)
+ goto err_icmpmsg;
+
+ return 0;
+
+err_icmpmsg:
+ kfree(idev->stats.icmpv6dev);
+err_icmp:
+ snmp_mib_free((void __percpu **)idev->stats.ipv6);
+err_ip:
+ return -ENOMEM;
+}
+
+static void snmp6_free_dev(struct inet6_dev *idev)
+{
+ kfree(idev->stats.icmpv6msgdev);
+ kfree(idev->stats.icmpv6dev);
+ snmp_mib_free((void __percpu **)idev->stats.ipv6);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+ struct net_device *dev = idev->dev;
+
+ WARN_ON(!list_empty(&idev->addr_list));
+ WARN_ON(idev->mc_list != NULL);
+
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
+#endif
+ dev_put(dev);
+ if (!idev->dead) {
+ pr_warning("Freeing alive inet6 device %p\n", idev);
+ return;
+ }
+ snmp6_free_dev(idev);
+ kfree_rcu(idev, rcu);
+}
+
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+
+static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+{
+ struct inet6_dev *ndev;
+
+ ASSERT_RTNL();
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ return NULL;
+
+ ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+
+ if (ndev == NULL)
+ return NULL;
+
+ rwlock_init(&ndev->lock);
+ ndev->dev = dev;
+ INIT_LIST_HEAD(&ndev->addr_list);
+
+ memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+ ndev->cnf.mtu6 = dev->mtu;
+ ndev->cnf.sysctl = NULL;
+ ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+ if (ndev->nd_parms == NULL) {
+ kfree(ndev);
+ return NULL;
+ }
+ if (ndev->cnf.forwarding)
+ dev_disable_lro(dev);
+ /* We refer to the device */
+ dev_hold(dev);
+
+ if (snmp6_alloc_dev(ndev) < 0) {
+ ADBG((KERN_WARNING
+ "%s(): cannot allocate memory for statistics; dev=%s.\n",
+ __func__, dev->name));
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ dev_put(dev);
+ kfree(ndev);
+ return NULL;
+ }
+
+ if (snmp6_register_dev(ndev) < 0) {
+ ADBG((KERN_WARNING
+ "%s(): cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name));
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return NULL;
+ }
+
+ /* One reference from device. We must do this before
+ * we invoke __ipv6_regen_rndid().
+ */
+ in6_dev_hold(ndev);
+
+ if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
+ ndev->cnf.accept_dad = -1;
+
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+ if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
+ printk(KERN_INFO
+ "%s: Disabled Multicast RS\n",
+ dev->name);
+ ndev->cnf.rtr_solicits = 0;
+ }
+#endif
+
+#ifdef CONFIG_IPV6_PRIVACY
+ INIT_LIST_HEAD(&ndev->tempaddr_list);
+ setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
+ if ((dev->flags&IFF_LOOPBACK) ||
+ dev->type == ARPHRD_TUNNEL ||
+ dev->type == ARPHRD_TUNNEL6 ||
+ dev->type == ARPHRD_SIT ||
+ dev->type == ARPHRD_NONE) {
+ ndev->cnf.use_tempaddr = -1;
+ } else {
+ in6_dev_hold(ndev);
+ ipv6_regen_rndid((unsigned long) ndev);
+ }
+#endif
+
+ if (netif_running(dev) && addrconf_qdisc_ok(dev))
+ ndev->if_flags |= IF_READY;
+
+ ipv6_mc_init_dev(ndev);
+ ndev->tstamp = jiffies;
+ addrconf_sysctl_register(ndev);
+ /* protected by rtnl_lock */
+ rcu_assign_pointer(dev->ip6_ptr, ndev);
+
+ /* Join all-node multicast group */
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+
+ /* Join all-router multicast group if forwarding is set */
+ if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+
+ return ndev;
+}
+
+static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ idev = __in6_dev_get(dev);
+ if (!idev) {
+ idev = ipv6_add_dev(dev);
+ if (!idev)
+ return NULL;
+ }
+
+ if (dev->flags&IFF_UP)
+ ipv6_mc_up(idev);
+ return idev;
+}
+
+#ifdef CONFIG_SYSCTL
+static void dev_forward_change(struct inet6_dev *idev)
+{
+ struct net_device *dev;
+ struct inet6_ifaddr *ifa;
+
+ if (!idev)
+ return;
+ dev = idev->dev;
+ if (idev->cnf.forwarding)
+ dev_disable_lro(dev);
+ if (dev && (dev->flags & IFF_MULTICAST)) {
+ if (idev->cnf.forwarding)
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+ else
+ ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+ }
+
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (ifa->flags&IFA_F_TENTATIVE)
+ continue;
+ if (idev->cnf.forwarding)
+ addrconf_join_anycast(ifa);
+ else
+ addrconf_leave_anycast(ifa);
+ }
+}
+
+
+static void addrconf_forward_change(struct net *net, __s32 newf)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ int changed = (!idev->cnf.forwarding) ^ (!newf);
+ idev->cnf.forwarding = newf;
+ if (changed)
+ dev_forward_change(idev);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
+{
+ struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ net = (struct net *)table->extra2;
+ old = *p;
+ *p = newf;
+
+ if (p == &net->ipv6.devconf_dflt->forwarding) {
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (p == &net->ipv6.devconf_all->forwarding) {
+ net->ipv6.devconf_dflt->forwarding = newf;
+ addrconf_forward_change(net, newf);
+ } else if ((!newf) ^ (!old))
+ dev_forward_change((struct inet6_dev *)table->extra1);
+ rtnl_unlock();
+
+ if (newf)
+ rt6_purge_dflt_routers(net);
+ return 1;
+}
+#endif
+
+/* Nobody refers to this ifaddr, destroy it */
+void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
+{
+ WARN_ON(!hlist_unhashed(&ifp->addr_lst));
+
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+#endif
+
+ in6_dev_put(ifp->idev);
+
+ if (del_timer(&ifp->timer))
+ pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ pr_warning("Freeing alive inet6 address %p\n", ifp);
+ return;
+ }
+ dst_release(&ifp->rt->dst);
+
+ kfree_rcu(ifp, rcu);
+}
+
+static void
+ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
+{
+ struct list_head *p;
+ int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
+
+ /*
+ * Each device address list is sorted in order of scope -
+ * global before linklocal.
+ */
+ list_for_each(p, &idev->addr_list) {
+ struct inet6_ifaddr *ifa
+ = list_entry(p, struct inet6_ifaddr, if_list);
+ if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
+ break;
+ }
+
+ list_add_tail(&ifp->if_list, p);
+}
+
+static u32 ipv6_addr_hash(const struct in6_addr *addr)
+{
+ /*
+ * We perform the hash function over the last 64 bits of the address
+ * This will include the IEEE address token on links that support it.
+ */
+ return jhash_2words((__force u32)addr->s6_addr32[2],
+ (__force u32)addr->s6_addr32[3], 0)
+ & (IN6_ADDR_HSIZE - 1);
+}
+
+/* On success it returns ifp with increased reference count */
+
+static struct inet6_ifaddr *
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
+ int scope, u32 flags)
+{
+ struct inet6_ifaddr *ifa = NULL;
+ struct rt6_info *rt;
+ unsigned int hash;
+ int err = 0;
+ int addr_type = ipv6_addr_type(addr);
+
+ if (addr_type == IPV6_ADDR_ANY ||
+ addr_type & IPV6_ADDR_MULTICAST ||
+ (!(idev->dev->flags & IFF_LOOPBACK) &&
+ addr_type & IPV6_ADDR_LOOPBACK))
+ return ERR_PTR(-EADDRNOTAVAIL);
+
+ rcu_read_lock_bh();
+ if (idev->dead) {
+ err = -ENODEV; /*XXX*/
+ goto out2;
+ }
+
+ if (idev->cnf.disable_ipv6) {
+ err = -EACCES;
+ goto out2;
+ }
+
+ spin_lock(&addrconf_hash_lock);
+
+ /* Ignore adding duplicate addresses on an interface */
+ if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
+ ADBG(("ipv6_add_addr: already assigned\n"));
+ err = -EEXIST;
+ goto out;
+ }
+
+ ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+
+ if (ifa == NULL) {
+ ADBG(("ipv6_add_addr: malloc failed\n"));
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, false);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto out;
+ }
+
+ ifa->addr = *addr;
+
+ spin_lock_init(&ifa->lock);
+ spin_lock_init(&ifa->state_lock);
+ init_timer(&ifa->timer);
+ INIT_HLIST_NODE(&ifa->addr_lst);
+ ifa->timer.data = (unsigned long) ifa;
+ ifa->scope = scope;
+ ifa->prefix_len = pfxlen;
+ ifa->flags = flags | IFA_F_TENTATIVE;
+ ifa->cstamp = ifa->tstamp = jiffies;
+
+ ifa->rt = rt;
+
+ ifa->idev = idev;
+ in6_dev_hold(idev);
+ /* For caller */
+ in6_ifa_hold(ifa);
+
+ /* Add to big hash table */
+ hash = ipv6_addr_hash(addr);
+
+ hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ spin_unlock(&addrconf_hash_lock);
+
+ write_lock(&idev->lock);
+ /* Add to inet6_dev unicast addr list. */
+ ipv6_link_dev_addr(idev, ifa);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ if (ifa->flags&IFA_F_TEMPORARY) {
+ list_add(&ifa->tmp_list, &idev->tempaddr_list);
+ in6_ifa_hold(ifa);
+ }
+#endif
+
+ in6_ifa_hold(ifa);
+ write_unlock(&idev->lock);
+out2:
+ rcu_read_unlock_bh();
+
+ if (likely(err == 0))
+ atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+ else {
+ kfree(ifa);
+ ifa = ERR_PTR(err);
+ }
+
+ return ifa;
+out:
+ spin_unlock(&addrconf_hash_lock);
+ goto out2;
+}
+
+/* This function wants to get referenced ifp and releases it before return */
+
+static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ifa, *ifn;
+ struct inet6_dev *idev = ifp->idev;
+ int state;
+ int deleted = 0, onlink = 0;
+ unsigned long expires = jiffies;
+
+ spin_lock_bh(&ifp->state_lock);
+ state = ifp->state;
+ ifp->state = INET6_IFADDR_STATE_DEAD;
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (state == INET6_IFADDR_STATE_DEAD)
+ goto out;
+
+ spin_lock_bh(&addrconf_hash_lock);
+ hlist_del_init_rcu(&ifp->addr_lst);
+ spin_unlock_bh(&addrconf_hash_lock);
+
+ write_lock_bh(&idev->lock);
+#ifdef CONFIG_IPV6_PRIVACY
+ if (ifp->flags&IFA_F_TEMPORARY) {
+ list_del(&ifp->tmp_list);
+ if (ifp->ifpub) {
+ in6_ifa_put(ifp->ifpub);
+ ifp->ifpub = NULL;
+ }
+ __in6_ifa_put(ifp);
+ }
+#endif
+
+ list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
+ if (ifa == ifp) {
+ list_del_init(&ifp->if_list);
+ __in6_ifa_put(ifp);
+
+ if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
+ break;
+ deleted = 1;
+ continue;
+ } else if (ifp->flags & IFA_F_PERMANENT) {
+ if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ ifp->prefix_len)) {
+ if (ifa->flags & IFA_F_PERMANENT) {
+ onlink = 1;
+ if (deleted)
+ break;
+ } else {
+ unsigned long lifetime;
+
+ if (!onlink)
+ onlink = -1;
+
+ spin_lock(&ifa->lock);
+
+ lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+ /*
+ * Note: Because this address is
+ * not permanent, lifetime <
+ * LONG_MAX / HZ here.
+ */
+ if (time_before(expires,
+ ifa->tstamp + lifetime * HZ))
+ expires = ifa->tstamp + lifetime * HZ;
+ spin_unlock(&ifa->lock);
+ }
+ }
+ }
+ }
+ write_unlock_bh(&idev->lock);
+
+ addrconf_del_timer(ifp);
+
+ ipv6_ifa_notify(RTM_DELADDR, ifp);
+
+ atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
+
+ /*
+ * Purge or update corresponding prefix
+ *
+ * 1) we don't purge prefix here if address was not permanent.
+ * prefix is managed by its own lifetime.
+ * 2) if there're no addresses, delete prefix.
+ * 3) if there're still other permanent address(es),
+ * corresponding prefix is still permanent.
+ * 4) otherwise, update prefix lifetime to the
+ * longest valid lifetime among the corresponding
+ * addresses on the device.
+ * Note: subsequent RA will update lifetime.
+ *
+ * --yoshfuji
+ */
+ if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
+ struct in6_addr prefix;
+ struct rt6_info *rt;
+ struct net *net = dev_net(ifp->idev->dev);
+ ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
+ rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
+
+ if (rt && addrconf_is_prefix_route(rt)) {
+ if (onlink == 0) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+ rt6_set_expires(rt, expires);
+ }
+ }
+ dst_release(&rt->dst);
+ }
+
+ /* clean up prefsrc entries */
+ rt6_remove_prefsrc(ifp);
+out:
+ in6_ifa_put(ifp);
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+{
+ struct inet6_dev *idev = ifp->idev;
+ struct in6_addr addr, *tmpaddr;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
+ unsigned long regen_advance;
+ int tmp_plen;
+ int ret = 0;
+ int max_addresses;
+ u32 addr_flags;
+ unsigned long now = jiffies;
+
+ write_lock(&idev->lock);
+ if (ift) {
+ spin_lock_bh(&ift->lock);
+ memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
+ spin_unlock_bh(&ift->lock);
+ tmpaddr = &addr;
+ } else {
+ tmpaddr = NULL;
+ }
+retry:
+ in6_dev_hold(idev);
+ if (idev->cnf.use_tempaddr <= 0) {
+ write_unlock(&idev->lock);
+ printk(KERN_INFO
+ "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ spin_lock_bh(&ifp->lock);
+ if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
+ idev->cnf.use_tempaddr = -1; /*XXX*/
+ spin_unlock_bh(&ifp->lock);
+ write_unlock(&idev->lock);
+ printk(KERN_WARNING
+ "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ in6_ifa_hold(ifp);
+ memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
+ if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
+ spin_unlock_bh(&ifp->lock);
+ write_unlock(&idev->lock);
+ printk(KERN_WARNING
+ "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ memcpy(&addr.s6_addr[8], idev->rndid, 8);
+ age = (now - ifp->tstamp) / HZ;
+ tmp_valid_lft = min_t(__u32,
+ ifp->valid_lft,
+ idev->cnf.temp_valid_lft + age);
+ tmp_prefered_lft = min_t(__u32,
+ ifp->prefered_lft,
+ idev->cnf.temp_prefered_lft + age -
+ idev->cnf.max_desync_factor);
+ tmp_plen = ifp->prefix_len;
+ max_addresses = idev->cnf.max_addresses;
+ tmp_tstamp = ifp->tstamp;
+ spin_unlock_bh(&ifp->lock);
+
+ regen_advance = idev->cnf.regen_max_retry *
+ idev->cnf.dad_transmits *
+ idev->nd_parms->retrans_time / HZ;
+ write_unlock(&idev->lock);
+
+ /* A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In particular,
+ * an implementation must not create a temporary address with a zero
+ * Preferred Lifetime.
+ */
+ if (tmp_prefered_lft <= regen_advance) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+
+ addr_flags = IFA_F_TEMPORARY;
+ /* set in addrconf_prefix_rcv() */
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ addr_flags |= IFA_F_OPTIMISTIC;
+
+ ift = !max_addresses ||
+ ipv6_count_addresses(idev) < max_addresses ?
+ ipv6_add_addr(idev, &addr, tmp_plen,
+ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+ addr_flags) : NULL;
+ if (!ift || IS_ERR(ift)) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ printk(KERN_INFO
+ "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+ tmpaddr = &addr;
+ write_lock(&idev->lock);
+ goto retry;
+ }
+
+ spin_lock_bh(&ift->lock);
+ ift->ifpub = ifp;
+ ift->valid_lft = tmp_valid_lft;
+ ift->prefered_lft = tmp_prefered_lft;
+ ift->cstamp = now;
+ ift->tstamp = tmp_tstamp;
+ spin_unlock_bh(&ift->lock);
+
+ addrconf_dad_start(ift, 0);
+ in6_ifa_put(ift);
+ in6_dev_put(idev);
+out:
+ return ret;
+}
+#endif
+
+/*
+ * Choose an appropriate source address (RFC3484)
+ */
+enum {
+ IPV6_SADDR_RULE_INIT = 0,
+ IPV6_SADDR_RULE_LOCAL,
+ IPV6_SADDR_RULE_SCOPE,
+ IPV6_SADDR_RULE_PREFERRED,
+#ifdef CONFIG_IPV6_MIP6
+ IPV6_SADDR_RULE_HOA,
+#endif
+ IPV6_SADDR_RULE_OIF,
+ IPV6_SADDR_RULE_LABEL,
+#ifdef CONFIG_IPV6_PRIVACY
+ IPV6_SADDR_RULE_PRIVACY,
+#endif
+ IPV6_SADDR_RULE_ORCHID,
+ IPV6_SADDR_RULE_PREFIX,
+ IPV6_SADDR_RULE_MAX
+};
+
+struct ipv6_saddr_score {
+ int rule;
+ int addr_type;
+ struct inet6_ifaddr *ifa;
+ DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX);
+ int scopedist;
+ int matchlen;
+};
+
+struct ipv6_saddr_dst {
+ const struct in6_addr *addr;
+ int ifindex;
+ int scope;
+ int label;
+ unsigned int prefs;
+};
+
+static inline int ipv6_saddr_preferred(int type)
+{
+ if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK))
+ return 1;
+ return 0;
+}
+
+static int ipv6_get_saddr_eval(struct net *net,
+ struct ipv6_saddr_score *score,
+ struct ipv6_saddr_dst *dst,
+ int i)
+{
+ int ret;
+
+ if (i <= score->rule) {
+ switch (i) {
+ case IPV6_SADDR_RULE_SCOPE:
+ ret = score->scopedist;
+ break;
+ case IPV6_SADDR_RULE_PREFIX:
+ ret = score->matchlen;
+ break;
+ default:
+ ret = !!test_bit(i, score->scorebits);
+ }
+ goto out;
+ }
+
+ switch (i) {
+ case IPV6_SADDR_RULE_INIT:
+ /* Rule 0: remember if hiscore is not ready yet */
+ ret = !!score->ifa;
+ break;
+ case IPV6_SADDR_RULE_LOCAL:
+ /* Rule 1: Prefer same address */
+ ret = ipv6_addr_equal(&score->ifa->addr, dst->addr);
+ break;
+ case IPV6_SADDR_RULE_SCOPE:
+ /* Rule 2: Prefer appropriate scope
+ *
+ * ret
+ * ^
+ * -1 | d 15
+ * ---+--+-+---> scope
+ * |
+ * | d is scope of the destination.
+ * B-d | \
+ * | \ <- smaller scope is better if
+ * B-15 | \ if scope is enough for destinaion.
+ * | ret = B - scope (-1 <= scope >= d <= 15).
+ * d-C-1 | /
+ * |/ <- greater is better
+ * -C / if scope is not enough for destination.
+ * /| ret = scope - C (-1 <= d < scope <= 15).
+ *
+ * d - C - 1 < B -15 (for all -1 <= d <= 15).
+ * C > d + 14 - B >= 15 + 14 - B = 29 - B.
+ * Assume B = 0 and we get C > 29.
+ */
+ ret = __ipv6_addr_src_scope(score->addr_type);
+ if (ret >= dst->scope)
+ ret = -ret;
+ else
+ ret -= 128; /* 30 is enough */
+ score->scopedist = ret;
+ break;
+ case IPV6_SADDR_RULE_PREFERRED:
+ /* Rule 3: Avoid deprecated and optimistic addresses */
+ ret = ipv6_saddr_preferred(score->addr_type) ||
+ !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+ break;
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SADDR_RULE_HOA:
+ {
+ /* Rule 4: Prefer home address */
+ int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA);
+ ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome;
+ break;
+ }
+#endif
+ case IPV6_SADDR_RULE_OIF:
+ /* Rule 5: Prefer outgoing interface */
+ ret = (!dst->ifindex ||
+ dst->ifindex == score->ifa->idev->dev->ifindex);
+ break;
+ case IPV6_SADDR_RULE_LABEL:
+ /* Rule 6: Prefer matching label */
+ ret = ipv6_addr_label(net,
+ &score->ifa->addr, score->addr_type,
+ score->ifa->idev->dev->ifindex) == dst->label;
+ break;
+#ifdef CONFIG_IPV6_PRIVACY
+ case IPV6_SADDR_RULE_PRIVACY:
+ {
+ /* Rule 7: Prefer public address
+ * Note: prefer temporary address if use_tempaddr >= 2
+ */
+ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
+ !!(dst->prefs & IPV6_PREFER_SRC_TMP) :
+ score->ifa->idev->cnf.use_tempaddr >= 2;
+ ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
+ break;
+ }
+#endif
+ case IPV6_SADDR_RULE_ORCHID:
+ /* Rule 8-: Prefer ORCHID vs ORCHID or
+ * non-ORCHID vs non-ORCHID
+ */
+ ret = !(ipv6_addr_orchid(&score->ifa->addr) ^
+ ipv6_addr_orchid(dst->addr));
+ break;
+ case IPV6_SADDR_RULE_PREFIX:
+ /* Rule 8: Use longest matching prefix */
+ score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
+ dst->addr);
+ break;
+ default:
+ ret = 0;
+ }
+
+ if (ret)
+ __set_bit(i, score->scorebits);
+ score->rule = i;
+out:
+ return ret;
+}
+
+int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+ const struct in6_addr *daddr, unsigned int prefs,
+ struct in6_addr *saddr)
+{
+ struct ipv6_saddr_score scores[2],
+ *score = &scores[0], *hiscore = &scores[1];
+ struct ipv6_saddr_dst dst;
+ struct net_device *dev;
+ int dst_type;
+
+ dst_type = __ipv6_addr_type(daddr);
+ dst.addr = daddr;
+ dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
+ dst.scope = __ipv6_addr_src_scope(dst_type);
+ dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
+ dst.prefs = prefs;
+
+ hiscore->rule = -1;
+ hiscore->ifa = NULL;
+
+ rcu_read_lock();
+
+ for_each_netdev_rcu(net, dev) {
+ struct inet6_dev *idev;
+
+ /* Candidate Source Address (section 4)
+ * - multicast and link-local destination address,
+ * the set of candidate source address MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same link as the outgoing
+ * interface.
+ * (- For site-local destination addresses, the
+ * set of candidate source addresses MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same site as the outgoing
+ * interface.)
+ */
+ if (((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
+ dst.ifindex && dev->ifindex != dst.ifindex)
+ continue;
+
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ int i;
+
+ /*
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense, unless it is also flagged as optimistic.
+ * - Candidate Source Address (section 4)
+ * - In any case, anycast addresses, multicast
+ * addresses, and the unspecified address MUST
+ * NOT be included in a candidate set.
+ */
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+ continue;
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ADDRCONF: unspecified / multicast address "
+ "assigned as unicast address on %s",
+ dev->name);
+ continue;
+ }
+
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto try_nextdev;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
+
+ in6_ifa_hold(score->ifa);
+
+ swap(hiscore, score);
+
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
+
+ break;
+ }
+ }
+ }
+try_nextdev:
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ if (!hiscore->ifa)
+ return -EADDRNOTAVAIL;
+
+ *saddr = hiscore->ifa->addr;
+ in6_ifa_put(hiscore->ifa);
+ return 0;
+}
+EXPORT_SYMBOL(ipv6_dev_get_saddr);
+
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_dev *idev;
+ int err = -EADDRNOTAVAIL;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ *addr = ifp->addr;
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static int ipv6_count_addresses(struct inet6_dev *idev)
+{
+ int cnt = 0;
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list)
+ cnt++;
+ read_unlock_bh(&idev->lock);
+ return cnt;
+}
+
+int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, int strict)
+{
+ struct inet6_ifaddr *ifp;
+ struct hlist_node *node;
+ unsigned int hash = ipv6_addr_hash(addr);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr) &&
+ !(ifp->flags&IFA_F_TENTATIVE) &&
+ (dev == NULL || ifp->idev->dev == dev ||
+ !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
+ rcu_read_unlock_bh();
+ return 1;
+ }
+ }
+
+ rcu_read_unlock_bh();
+ return 0;
+}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev)
+{
+ unsigned int hash = ipv6_addr_hash(addr);
+ struct inet6_ifaddr *ifp;
+ struct hlist_node *node;
+
+ hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (dev == NULL || ifp->idev->dev == dev)
+ return true;
+ }
+ }
+ return false;
+}
+
+int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ int onlink;
+
+ onlink = 0;
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ onlink = ipv6_prefix_equal(addr, &ifa->addr,
+ ifa->prefix_len);
+ if (onlink)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return onlink;
+}
+
+EXPORT_SYMBOL(ipv6_chk_prefix);
+
+struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, int strict)
+{
+ struct inet6_ifaddr *ifp, *result = NULL;
+ unsigned int hash = ipv6_addr_hash(addr);
+ struct hlist_node *node;
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (dev == NULL || ifp->idev->dev == dev ||
+ !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+ result = ifp;
+ in6_ifa_hold(ifp);
+ break;
+ }
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return result;
+}
+
+/* Gets referenced address, destroys ifaddr */
+
+static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
+{
+ if (ifp->flags&IFA_F_PERMANENT) {
+ spin_lock_bh(&ifp->lock);
+ addrconf_del_timer(ifp);
+ ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags |= IFA_F_DADFAILED;
+ spin_unlock_bh(&ifp->lock);
+ if (dad_failed)
+ ipv6_ifa_notify(0, ifp);
+ in6_ifa_put(ifp);
+#ifdef CONFIG_IPV6_PRIVACY
+ } else if (ifp->flags&IFA_F_TEMPORARY) {
+ struct inet6_ifaddr *ifpub;
+ spin_lock_bh(&ifp->lock);
+ ifpub = ifp->ifpub;
+ if (ifpub) {
+ in6_ifa_hold(ifpub);
+ spin_unlock_bh(&ifp->lock);
+ ipv6_create_tempaddr(ifpub, ifp);
+ in6_ifa_put(ifpub);
+ } else {
+ spin_unlock_bh(&ifp->lock);
+ }
+ ipv6_del_addr(ifp);
+#endif
+ } else
+ ipv6_del_addr(ifp);
+}
+
+static int addrconf_dad_end(struct inet6_ifaddr *ifp)
+{
+ int err = -ENOENT;
+
+ spin_lock(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_DAD) {
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ err = 0;
+ }
+ spin_unlock(&ifp->state_lock);
+
+ return err;
+}
+
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+ struct inet6_dev *idev = ifp->idev;
+
+ if (addrconf_dad_end(ifp)) {
+ in6_ifa_put(ifp);
+ return;
+ }
+
+ if (net_ratelimit())
+ printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
+ ifp->idev->dev->name, &ifp->addr);
+
+ if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
+ struct in6_addr addr;
+
+ addr.s6_addr32[0] = htonl(0xfe800000);
+ addr.s6_addr32[1] = 0;
+
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+ ipv6_addr_equal(&ifp->addr, &addr)) {
+ /* DAD failed for link-local based on MAC address */
+ idev->cnf.disable_ipv6 = 1;
+
+ printk(KERN_INFO "%s: IPv6 being disabled!\n",
+ ifp->idev->dev->name);
+ }
+ }
+
+ addrconf_dad_stop(ifp, 1);
+}
+
+/* Join to solicited addr multicast group. */
+
+void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct in6_addr maddr;
+
+ if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+}
+
+void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct in6_addr maddr;
+
+ if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+ addrconf_addr_solict_mult(addr, &maddr);
+ __ipv6_dev_mc_dec(idev, &maddr);
+}
+
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+{
+ struct in6_addr addr;
+ if (ifp->prefix_len == 127) /* RFC 6164 */
+ return;
+ ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+ if (ipv6_addr_any(&addr))
+ return;
+ ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+}
+
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+{
+ struct in6_addr addr;
+ if (ifp->prefix_len == 127) /* RFC 6164 */
+ return;
+ ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+ if (ipv6_addr_any(&addr))
+ return;
+ __ipv6_dev_ac_dec(ifp->idev, &addr);
+}
+
+static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+{
+ if (dev->addr_len != ETH_ALEN)
+ return -1;
+ memcpy(eui, dev->dev_addr, 3);
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+ /*
+ * The zSeries OSA network cards can be shared among various
+ * OS instances, but the OSA cards have only one MAC address.
+ * This leads to duplicate address conflicts in conjunction
+ * with IPv6 if more than one instance uses the same card.
+ *
+ * The driver for these cards can deliver a unique 16-bit
+ * identifier for each instance sharing the same card. It is
+ * placed instead of 0xFFFE in the interface identifier. The
+ * "u" bit of the interface identifier is not inverted in this
+ * case. Hence the resulting interface identifier has local
+ * scope according to RFC2373.
+ */
+ if (dev->dev_id) {
+ eui[3] = (dev->dev_id >> 8) & 0xFF;
+ eui[4] = dev->dev_id & 0xFF;
+ } else {
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ }
+ return 0;
+}
+
+static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
+{
+ /* XXX: inherit EUI-64 from other interface -- yoshfuji */
+ if (dev->addr_len != ARCNET_ALEN)
+ return -1;
+ memset(eui, 0, 7);
+ eui[7] = *(u8*)dev->dev_addr;
+ return 0;
+}
+
+static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
+{
+ if (dev->addr_len != INFINIBAND_ALEN)
+ return -1;
+ memcpy(eui, dev->dev_addr + 12, 8);
+ eui[0] |= 2;
+ return 0;
+}
+
+static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
+{
+ if (addr == 0)
+ return -1;
+ eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) ||
+ ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) ||
+ ipv4_is_private_172(addr) || ipv4_is_test_192(addr) ||
+ ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) ||
+ ipv4_is_test_198(addr) || ipv4_is_multicast(addr) ||
+ ipv4_is_lbcast(addr)) ? 0x00 : 0x02;
+ eui[1] = 0;
+ eui[2] = 0x5E;
+ eui[3] = 0xFE;
+ memcpy(eui + 4, &addr, 4);
+ return 0;
+}
+
+static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
+{
+ if (dev->priv_flags & IFF_ISATAP)
+ return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+ return -1;
+}
+
+static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
+{
+ return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+}
+
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
+{
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_FDDI:
+ case ARPHRD_IEEE802_TR:
+ return addrconf_ifid_eui48(eui, dev);
+ case ARPHRD_ARCNET:
+ return addrconf_ifid_arcnet(eui, dev);
+ case ARPHRD_INFINIBAND:
+ return addrconf_ifid_infiniband(eui, dev);
+ case ARPHRD_SIT:
+ return addrconf_ifid_sit(eui, dev);
+ case ARPHRD_IPGRE:
+ return addrconf_ifid_gre(eui, dev);
+ }
+ return -1;
+}
+
+static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
+{
+ int err = -1;
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+ memcpy(eui, ifp->addr.s6_addr+8, 8);
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
+static int __ipv6_regen_rndid(struct inet6_dev *idev)
+{
+regen:
+ get_random_bytes(idev->rndid, sizeof(idev->rndid));
+ idev->rndid[0] &= ~0x02;
+
+ /*
+ * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
+ * check if generated address is not inappropriate
+ *
+ * - Reserved subnet anycast (RFC 2526)
+ * 11111101 11....11 1xxxxxxx
+ * - ISATAP (RFC4214) 6.1
+ * 00-00-5E-FE-xx-xx-xx-xx
+ * - value 0
+ * - XXX: already assigned to an address on the device
+ */
+ if (idev->rndid[0] == 0xfd &&
+ (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
+ (idev->rndid[7]&0x80))
+ goto regen;
+ if ((idev->rndid[0]|idev->rndid[1]) == 0) {
+ if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
+ goto regen;
+ if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
+ goto regen;
+ }
+
+ return 0;
+}
+
+static void ipv6_regen_rndid(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *) data;
+ unsigned long expires;
+
+ rcu_read_lock_bh();
+ write_lock_bh(&idev->lock);
+
+ if (idev->dead)
+ goto out;
+
+ if (__ipv6_regen_rndid(idev) < 0)
+ goto out;
+
+ expires = jiffies +
+ idev->cnf.temp_prefered_lft * HZ -
+ idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
+ idev->cnf.max_desync_factor * HZ;
+ if (time_before(expires, jiffies)) {
+ printk(KERN_WARNING
+ "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
+ idev->dev->name);
+ goto out;
+ }
+
+ if (!mod_timer(&idev->regen_timer, expires))
+ in6_dev_hold(idev);
+
+out:
+ write_unlock_bh(&idev->lock);
+ rcu_read_unlock_bh();
+ in6_dev_put(idev);
+}
+
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
+ int ret = 0;
+
+ if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
+ ret = __ipv6_regen_rndid(idev);
+ return ret;
+}
+#endif
+
+/*
+ * Add prefix route.
+ */
+
+static void
+addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
+ unsigned long expires, u32 flags)
+{
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_PREFIX,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_expires = expires,
+ .fc_dst_len = plen,
+ .fc_flags = RTF_UP | flags,
+ .fc_nlinfo.nl_net = dev_net(dev),
+ .fc_protocol = RTPROT_KERNEL,
+ };
+
+ cfg.fc_dst = *pfx;
+
+ /* Prevent useless cloning on PtP SIT.
+ This thing is done here expecting that the whole
+ class of non-broadcast devices need not cloning.
+ */
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+ if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+ cfg.fc_flags |= RTF_NONEXTHOP;
+#endif
+
+ ip6_route_add(&cfg);
+}
+
+
+static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+ int plen,
+ const struct net_device *dev,
+ u32 flags, u32 noflags)
+{
+ struct fib6_node *fn;
+ struct rt6_info *rt = NULL;
+ struct fib6_table *table;
+
+ table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+ if (table == NULL)
+ return NULL;
+
+ write_lock_bh(&table->tb6_lock);
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ if (!fn)
+ goto out;
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt->dst.dev->ifindex != dev->ifindex)
+ continue;
+ if ((rt->rt6i_flags & flags) != flags)
+ continue;
+ if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0))
+ continue;
+ dst_hold(&rt->dst);
+ break;
+ }
+out:
+ write_unlock_bh(&table->tb6_lock);
+ return rt;
+}
+
+
+/* Create "default" multicast route to the interface */
+
+static void addrconf_add_mroute(struct net_device *dev)
+{
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_LOCAL,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_dst_len = 8,
+ .fc_flags = RTF_UP,
+ .fc_nlinfo.nl_net = dev_net(dev),
+ };
+
+ ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+
+ ip6_route_add(&cfg);
+}
+
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+static void sit_route_add(struct net_device *dev)
+{
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_MAIN,
+ .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_ifindex = dev->ifindex,
+ .fc_dst_len = 96,
+ .fc_flags = RTF_UP | RTF_NONEXTHOP,
+ .fc_nlinfo.nl_net = dev_net(dev),
+ };
+
+ /* prefix length - 96 bits "::d.d.d.d" */
+ ip6_route_add(&cfg);
+}
+#endif
+
+static void addrconf_add_lroute(struct net_device *dev)
+{
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
+}
+
+static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ idev = ipv6_find_idev(dev);
+ if (!idev)
+ return ERR_PTR(-ENOBUFS);
+
+ if (idev->cnf.disable_ipv6)
+ return ERR_PTR(-EACCES);
+
+ /* Add default multicast route */
+ if (!(dev->flags & IFF_LOOPBACK))
+ addrconf_add_mroute(dev);
+
+ /* Add link local route */
+ addrconf_add_lroute(dev);
+ return idev;
+}
+
+void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
+{
+ struct prefix_info *pinfo;
+ __u32 valid_lft;
+ __u32 prefered_lft;
+ int addr_type;
+ struct inet6_dev *in6_dev;
+ struct net *net = dev_net(dev);
+
+ pinfo = (struct prefix_info *) opt;
+
+ if (len < sizeof(struct prefix_info)) {
+ ADBG(("addrconf: prefix option too short\n"));
+ return;
+ }
+
+ /*
+ * Validation checks ([ADDRCONF], page 19)
+ */
+
+ addr_type = ipv6_addr_type(&pinfo->prefix);
+
+ if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
+ return;
+
+ valid_lft = ntohl(pinfo->valid);
+ prefered_lft = ntohl(pinfo->prefered);
+
+ if (prefered_lft > valid_lft) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+ return;
+ }
+
+ in6_dev = in6_dev_get(dev);
+
+ if (in6_dev == NULL) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+ return;
+ }
+
+ /*
+ * Two things going on here:
+ * 1) Add routes for on-link prefixes
+ * 2) Configure prefixes with the auto flag set
+ */
+
+ if (pinfo->onlink) {
+ struct rt6_info *rt;
+ unsigned long rt_expires;
+
+ /* Avoid arithmetic overflow. Really, we could
+ * save rt_expires in seconds, likely valid_lft,
+ * but it would require division in fib gc, that it
+ * not good.
+ */
+ if (HZ > USER_HZ)
+ rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
+ else
+ rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
+
+ if (addrconf_finite_timeout(rt_expires))
+ rt_expires *= HZ;
+
+ rt = addrconf_get_prefix_route(&pinfo->prefix,
+ pinfo->prefix_len,
+ dev,
+ RTF_ADDRCONF | RTF_PREFIX_RT,
+ RTF_GATEWAY | RTF_DEFAULT);
+
+ if (rt) {
+ /* Autoconf prefix route */
+ if (valid_lft == 0) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ } else if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ rt6_set_expires(rt, jiffies + rt_expires);
+ } else {
+ rt6_clean_expires(rt);
+ }
+ } else if (valid_lft) {
+ clock_t expires = 0;
+ int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ flags |= RTF_EXPIRES;
+ expires = jiffies_to_clock_t(rt_expires);
+ }
+ addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+ dev, expires, flags);
+ }
+ if (rt)
+ dst_release(&rt->dst);
+ }
+
+ /* Try to figure out our local address for this prefix */
+
+ if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ struct inet6_ifaddr * ifp;
+ struct in6_addr addr;
+ int create = 0, update_lft = 0;
+
+ if (pinfo->prefix_len == 64) {
+ memcpy(&addr, &pinfo->prefix, 8);
+ if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+ ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+ goto ok;
+ }
+ if (net_ratelimit())
+ printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
+ pinfo->prefix_len);
+ in6_dev_put(in6_dev);
+ return;
+
+ok:
+
+ ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
+
+ if (ifp == NULL && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+ u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !net->ipv6.devconf_all->forwarding && sllao)
+ addr_flags = IFA_F_OPTIMISTIC;
+#endif
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags);
+
+ if (!ifp || IS_ERR(ifp)) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+
+ update_lft = create = 1;
+ ifp->cstamp = jiffies;
+ addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+ }
+
+ if (ifp) {
+ int flags;
+ unsigned long now;
+#ifdef CONFIG_IPV6_PRIVACY
+ struct inet6_ifaddr *ift;
+#endif
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && stored_lft) {
+ if (valid_lft > MIN_VALID_LIFETIME ||
+ valid_lft > stored_lft)
+ update_lft = 1;
+ else if (stored_lft <= MIN_VALID_LIFETIME) {
+ /* valid_lft <= stored_lft is always true */
+ /*
+ * RFC 4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of
+ * the corresponding address is always
+ * reset to the Preferred Lifetime in
+ * the received Prefix Information
+ * option, regardless of whether the
+ * valid lifetime is also reset or
+ * ignored."
+ *
+ * So if the preferred lifetime in
+ * this advertisement is different
+ * than what we have stored, but the
+ * valid lifetime is invalid, just
+ * reset prefered_lft.
+ *
+ * We must set the valid lifetime
+ * to the stored lifetime since we'll
+ * be updating the timestamp below,
+ * else we'll set it back to the
+ * minimum.
+ */
+ if (prefered_lft != ifp->prefered_lft) {
+ valid_lft = stored_lft;
+ update_lft = 1;
+ }
+ } else {
+ valid_lft = MIN_VALID_LIFETIME;
+ if (valid_lft < prefered_lft)
+ prefered_lft = valid_lft;
+ update_lft = 1;
+ }
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock(&ifp->lock);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ read_lock_bh(&in6_dev->lock);
+ /* update all temporary addresses in the list */
+ list_for_each_entry(ift, &in6_dev->tempaddr_list,
+ tmp_list) {
+ int age, max_valid, max_prefered;
+
+ if (ifp != ift->ifpub)
+ continue;
+
+ /*
+ * RFC 4941 section 3.3:
+ * If a received option will extend the lifetime
+ * of a public address, the lifetimes of
+ * temporary addresses should be extended,
+ * subject to the overall constraint that no
+ * temporary addresses should ever remain
+ * "valid" or "preferred" for a time longer than
+ * (TEMP_VALID_LIFETIME) or
+ * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR),
+ * respectively.
+ */
+ age = (now - ift->cstamp) / HZ;
+ max_valid = in6_dev->cnf.temp_valid_lft - age;
+ if (max_valid < 0)
+ max_valid = 0;
+
+ max_prefered = in6_dev->cnf.temp_prefered_lft -
+ in6_dev->cnf.max_desync_factor -
+ age;
+ if (max_prefered < 0)
+ max_prefered = 0;
+
+ if (valid_lft > max_valid)
+ valid_lft = max_valid;
+
+ if (prefered_lft > max_prefered)
+ prefered_lft = max_prefered;
+
+ spin_lock(&ift->lock);
+ flags = ift->flags;
+ ift->valid_lft = valid_lft;
+ ift->prefered_lft = prefered_lft;
+ ift->tstamp = now;
+ if (prefered_lft > 0)
+ ift->flags &= ~IFA_F_DEPRECATED;
+
+ spin_unlock(&ift->lock);
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ift);
+ }
+
+ if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
+ /*
+ * When a new public address is created as
+ * described in [ADDRCONF], also create a new
+ * temporary address. Also create a temporary
+ * address if it's enabled but no temporary
+ * address currently exists.
+ */
+ read_unlock_bh(&in6_dev->lock);
+ ipv6_create_tempaddr(ifp, NULL);
+ } else {
+ read_unlock_bh(&in6_dev->lock);
+ }
+#endif
+ in6_ifa_put(ifp);
+ addrconf_verify(0);
+ }
+ }
+ inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+ in6_dev_put(in6_dev);
+}
+
+/*
+ * Set destination address.
+ * Special case for SIT interfaces where we create a new "virtual"
+ * device.
+ */
+int addrconf_set_dstaddr(struct net *net, void __user *arg)
+{
+ struct in6_ifreq ireq;
+ struct net_device *dev;
+ int err = -EINVAL;
+
+ rtnl_lock();
+
+ err = -EFAULT;
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ goto err_exit;
+
+ dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
+
+ err = -ENODEV;
+ if (dev == NULL)
+ goto err_exit;
+
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+ if (dev->type == ARPHRD_SIT) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct ifreq ifr;
+ struct ip_tunnel_parm p;
+
+ err = -EADDRNOTAVAIL;
+ if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
+ goto err_exit;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
+ p.iph.saddr = 0;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPV6;
+ p.iph.ttl = 64;
+ ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+ if (ops->ndo_do_ioctl) {
+ mm_segment_t oldfs = get_fs();
+
+ set_fs(KERNEL_DS);
+ err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+ set_fs(oldfs);
+ } else
+ err = -EOPNOTSUPP;
+
+ if (err == 0) {
+ err = -ENOBUFS;
+ dev = __dev_get_by_name(net, p.name);
+ if (!dev)
+ goto err_exit;
+ err = dev_open(dev);
+ }
+ }
+#endif
+
+err_exit:
+ rtnl_unlock();
+ return err;
+}
+
+/*
+ * Manual configuration of address on an interface
+ */
+static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
+ unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
+ __u32 valid_lft)
+{
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
+ struct net_device *dev;
+ int scope;
+ u32 flags;
+ clock_t expires;
+ unsigned long timeout;
+
+ ASSERT_RTNL();
+
+ if (plen > 128)
+ return -EINVAL;
+
+ /* check the lifetime */
+ if (!valid_lft || prefered_lft > valid_lft)
+ return -EINVAL;
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ scope = ipv6_addr_scope(pfx);
+
+ timeout = addrconf_timeout_fixup(valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ expires = jiffies_to_clock_t(timeout * HZ);
+ valid_lft = timeout;
+ flags = RTF_EXPIRES;
+ } else {
+ expires = 0;
+ flags = 0;
+ ifa_flags |= IFA_F_PERMANENT;
+ }
+
+ timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ ifa_flags |= IFA_F_DEPRECATED;
+ prefered_lft = timeout;
+ }
+
+ ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+
+ if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = jiffies;
+ spin_unlock_bh(&ifp->lock);
+
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
+ expires, flags);
+ /*
+ * Note that section 3.1 of RFC 4429 indicates
+ * that the Optimistic flag should not be set for
+ * manually configured addresses
+ */
+ addrconf_dad_start(ifp, 0);
+ in6_ifa_put(ifp);
+ addrconf_verify(0);
+ return 0;
+ }
+
+ return PTR_ERR(ifp);
+}
+
+static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
+ unsigned int plen)
+{
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
+ struct net_device *dev;
+
+ if (plen > 128)
+ return -EINVAL;
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ return -ENXIO;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->prefix_len == plen &&
+ ipv6_addr_equal(pfx, &ifp->addr)) {
+ in6_ifa_hold(ifp);
+ read_unlock_bh(&idev->lock);
+
+ ipv6_del_addr(ifp);
+
+ /* If the last address is deleted administratively,
+ disable IPv6 on this interface.
+ */
+ if (list_empty(&idev->addr_list))
+ addrconf_ifdown(idev->dev, 1);
+ return 0;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ return -EADDRNOTAVAIL;
+}
+
+
+int addrconf_add_ifaddr(struct net *net, void __user *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ ireq.ifr6_prefixlen, IFA_F_PERMANENT,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ rtnl_unlock();
+ return err;
+}
+
+int addrconf_del_ifaddr(struct net *net, void __user *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ ireq.ifr6_prefixlen);
+ rtnl_unlock();
+ return err;
+}
+
+static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+ int plen, int scope)
+{
+ struct inet6_ifaddr *ifp;
+
+ ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ in6_ifa_put(ifp);
+ }
+}
+
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+static void sit_add_v4_addrs(struct inet6_dev *idev)
+{
+ struct in6_addr addr;
+ struct net_device *dev;
+ struct net *net = dev_net(idev->dev);
+ int scope;
+
+ ASSERT_RTNL();
+
+ memset(&addr, 0, sizeof(struct in6_addr));
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+
+ if (idev->dev->flags&IFF_POINTOPOINT) {
+ addr.s6_addr32[0] = htonl(0xfe800000);
+ scope = IFA_LINK;
+ } else {
+ scope = IPV6_ADDR_COMPATv4;
+ }
+
+ if (addr.s6_addr32[3]) {
+ add_addr(idev, &addr, 128, scope);
+ return;
+ }
+
+ for_each_netdev(net, dev) {
+ struct in_device * in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev && (dev->flags & IFF_UP)) {
+ struct in_ifaddr * ifa;
+
+ int flag = scope;
+
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ int plen;
+
+ addr.s6_addr32[3] = ifa->ifa_local;
+
+ if (ifa->ifa_scope == RT_SCOPE_LINK)
+ continue;
+ if (ifa->ifa_scope >= RT_SCOPE_HOST) {
+ if (idev->dev->flags&IFF_POINTOPOINT)
+ continue;
+ flag |= IFA_HOST;
+ }
+ if (idev->dev->flags&IFF_POINTOPOINT)
+ plen = 64;
+ else
+ plen = 96;
+
+ add_addr(idev, &addr, plen, flag);
+ }
+ }
+ }
+}
+#endif
+
+static void init_loopback(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ /* ::1 */
+
+ ASSERT_RTNL();
+
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ printk(KERN_DEBUG "init loopback: add_dev failed\n");
+ return;
+ }
+
+ add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+}
+
+static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct inet6_ifaddr * ifp;
+ u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (idev->cnf.optimistic_dad &&
+ !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+
+ ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
+ if (!IS_ERR(ifp)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+ addrconf_dad_start(ifp, 0);
+ in6_ifa_put(ifp);
+ }
+}
+
+static void addrconf_dev_config(struct net_device *dev)
+{
+ struct in6_addr addr;
+ struct inet6_dev * idev;
+
+ ASSERT_RTNL();
+
+ if ((dev->type != ARPHRD_ETHER) &&
+ (dev->type != ARPHRD_FDDI) &&
+ (dev->type != ARPHRD_IEEE802_TR) &&
+ (dev->type != ARPHRD_ARCNET) &&
+ (dev->type != ARPHRD_INFINIBAND)) {
+ /* Alas, we support only Ethernet autoconfiguration. */
+ return;
+ }
+
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev))
+ return;
+
+ memset(&addr, 0, sizeof(struct in6_addr));
+ addr.s6_addr32[0] = htonl(0xFE800000);
+
+ if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
+ addrconf_add_linklocal(idev, &addr);
+}
+
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+static void addrconf_sit_config(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ /*
+ * Configure the tunnel with one of our IPv4
+ * addresses... we should configure all of
+ * our v4 addrs in the tunnel
+ */
+
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ printk(KERN_DEBUG "init sit: add_dev failed\n");
+ return;
+ }
+
+ if (dev->priv_flags & IFF_ISATAP) {
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+ addrconf_add_linklocal(idev, &addr);
+ return;
+ }
+
+ sit_add_v4_addrs(idev);
+
+ if (dev->flags&IFF_POINTOPOINT) {
+ addrconf_add_mroute(dev);
+ addrconf_add_lroute(dev);
+ } else
+ sit_route_add(dev);
+}
+#endif
+
+#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+static void addrconf_gre_config(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct in6_addr addr;
+
+ pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name);
+
+ ASSERT_RTNL();
+
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ printk(KERN_DEBUG "init gre: add_dev failed\n");
+ return;
+ }
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
+
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
+ addrconf_add_linklocal(idev, &addr);
+}
+#endif
+
+static inline int
+ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
+{
+ struct in6_addr lladdr;
+
+ if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
+ addrconf_add_linklocal(idev, &lladdr);
+ return 0;
+ }
+ return -1;
+}
+
+static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
+{
+ struct net_device *link_dev;
+ struct net *net = dev_net(idev->dev);
+
+ /* first try to inherit the link-local address from the link device */
+ if (idev->dev->iflink &&
+ (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ /* then try to inherit it from any device */
+ for_each_netdev(net, link_dev) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
+}
+
+/*
+ * Autoconfigure tunnel with a link-local address so routing protocols,
+ * DHCPv6, MLD etc. can be run over the virtual link
+ */
+
+static void addrconf_ip6_tnl_config(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev)) {
+ printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+ return;
+ }
+ ip6_tnl_add_linklocal(idev);
+}
+
+static int addrconf_notify(struct notifier_block *this, unsigned long event,
+ void * data)
+{
+ struct net_device *dev = (struct net_device *) data;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ int run_pending = 0;
+ int err;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+ idev = ipv6_add_dev(dev);
+ if (!idev)
+ return notifier_from_errno(-ENOMEM);
+ }
+ break;
+
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ if (dev->flags & IFF_SLAVE)
+ break;
+
+ if (event == NETDEV_UP) {
+ if (!addrconf_qdisc_ok(dev)) {
+ /* device is not ready yet. */
+ printk(KERN_INFO
+ "ADDRCONF(NETDEV_UP): %s: "
+ "link is not ready\n",
+ dev->name);
+ break;
+ }
+
+ if (!idev && dev->mtu >= IPV6_MIN_MTU)
+ idev = ipv6_add_dev(dev);
+
+ if (idev) {
+ idev->if_flags |= IF_READY;
+ run_pending = 1;
+ }
+ } else {
+ if (!addrconf_qdisc_ok(dev)) {
+ /* device is still not ready. */
+ break;
+ }
+
+ if (idev) {
+ if (idev->if_flags & IF_READY)
+ /* device is already configured. */
+ break;
+ idev->if_flags |= IF_READY;
+ }
+
+ printk(KERN_INFO
+ "ADDRCONF(NETDEV_CHANGE): %s: "
+ "link becomes ready\n",
+ dev->name);
+
+ run_pending = 1;
+ }
+
+ switch (dev->type) {
+#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_TUNNEL6:
+ addrconf_ip6_tnl_config(dev);
+ break;
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+
+ if (idev) {
+ if (run_pending)
+ addrconf_dad_run(idev);
+
+ /*
+ * If the MTU changed during the interface down,
+ * when the interface up, the changed MTU must be
+ * reflected in the idev as well as routers.
+ */
+ if (idev->cnf.mtu6 != dev->mtu &&
+ dev->mtu >= IPV6_MIN_MTU) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ }
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+
+ /*
+ * If the changed mtu during down is lower than
+ * IPV6_MIN_MTU stop IPv6 on this interface.
+ */
+ if (dev->mtu < IPV6_MIN_MTU)
+ addrconf_ifdown(dev, 1);
+ }
+ break;
+
+ case NETDEV_CHANGEMTU:
+ if (idev && dev->mtu >= IPV6_MIN_MTU) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ break;
+ }
+
+ if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+ idev = ipv6_add_dev(dev);
+ if (idev)
+ break;
+ }
+
+ /*
+ * MTU falled under IPV6_MIN_MTU.
+ * Stop IPv6 on this interface.
+ */
+
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ /*
+ * Remove all addresses from this interface.
+ */
+ addrconf_ifdown(dev, event != NETDEV_DOWN);
+ break;
+
+ case NETDEV_CHANGENAME:
+ if (idev) {
+ snmp6_unregister_dev(idev);
+ addrconf_sysctl_unregister(idev);
+ addrconf_sysctl_register(idev);
+ err = snmp6_register_dev(idev);
+ if (err)
+ return notifier_from_errno(err);
+ }
+ break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ case NETDEV_POST_TYPE_CHANGE:
+ addrconf_type_change(dev, event);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * addrconf module should be notified of a device going up
+ */
+static struct notifier_block ipv6_dev_notf = {
+ .notifier_call = addrconf_notify,
+};
+
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
+{
+ struct inet6_dev *idev;
+ ASSERT_RTNL();
+
+ idev = __in6_dev_get(dev);
+
+ if (event == NETDEV_POST_TYPE_CHANGE)
+ ipv6_mc_remap(idev);
+ else if (event == NETDEV_PRE_TYPE_CHANGE)
+ ipv6_mc_unmap(idev);
+}
+
+static int addrconf_ifdown(struct net_device *dev, int how)
+{
+ struct net *net = dev_net(dev);
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ int state, i;
+
+ ASSERT_RTNL();
+
+ rt6_ifdown(net, dev);
+ neigh_ifdown(&nd_tbl, dev);
+
+ idev = __in6_dev_get(dev);
+ if (idev == NULL)
+ return -ENODEV;
+
+ /*
+ * Step 1: remove reference to ipv6 device from parent device.
+ * Do not dev_put!
+ */
+ if (how) {
+ idev->dead = 1;
+
+ /* protected by rtnl_lock */
+ RCU_INIT_POINTER(dev->ip6_ptr, NULL);
+
+ /* Step 1.5: remove snmp6 entry */
+ snmp6_unregister_dev(idev);
+
+ }
+
+ /* Step 2: clear hash table */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+ struct hlist_head *h = &inet6_addr_lst[i];
+ struct hlist_node *n;
+
+ spin_lock_bh(&addrconf_hash_lock);
+ restart:
+ hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+ if (ifa->idev == idev) {
+ hlist_del_init_rcu(&ifa->addr_lst);
+ addrconf_del_timer(ifa);
+ goto restart;
+ }
+ }
+ spin_unlock_bh(&addrconf_hash_lock);
+ }
+
+ write_lock_bh(&idev->lock);
+
+ /* Step 2: clear flags for stateless addrconf */
+ if (!how)
+ idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ if (how && del_timer(&idev->regen_timer))
+ in6_dev_put(idev);
+
+ /* Step 3: clear tempaddr list */
+ while (!list_empty(&idev->tempaddr_list)) {
+ ifa = list_first_entry(&idev->tempaddr_list,
+ struct inet6_ifaddr, tmp_list);
+ list_del(&ifa->tmp_list);
+ write_unlock_bh(&idev->lock);
+ spin_lock_bh(&ifa->lock);
+
+ if (ifa->ifpub) {
+ in6_ifa_put(ifa->ifpub);
+ ifa->ifpub = NULL;
+ }
+ spin_unlock_bh(&ifa->lock);
+ in6_ifa_put(ifa);
+ write_lock_bh(&idev->lock);
+ }
+#endif
+
+ while (!list_empty(&idev->addr_list)) {
+ ifa = list_first_entry(&idev->addr_list,
+ struct inet6_ifaddr, if_list);
+ addrconf_del_timer(ifa);
+
+ list_del(&ifa->if_list);
+
+ write_unlock_bh(&idev->lock);
+
+ spin_lock_bh(&ifa->state_lock);
+ state = ifa->state;
+ ifa->state = INET6_IFADDR_STATE_DEAD;
+ spin_unlock_bh(&ifa->state_lock);
+
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+ }
+ in6_ifa_put(ifa);
+
+ write_lock_bh(&idev->lock);
+ }
+
+ write_unlock_bh(&idev->lock);
+
+ /* Step 5: Discard multicast list */
+ if (how)
+ ipv6_mc_destroy_dev(idev);
+ else
+ ipv6_mc_down(idev);
+
+ idev->tstamp = jiffies;
+
+ /* Last: Shot the device (if unregistered) */
+ if (how) {
+ addrconf_sysctl_unregister(idev);
+ neigh_parms_release(&nd_tbl, idev->nd_parms);
+ neigh_ifdown(&nd_tbl, dev);
+ in6_dev_put(idev);
+ }
+ return 0;
+}
+
+static void addrconf_rs_timer(unsigned long data)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_dev *idev = ifp->idev;
+
+ read_lock(&idev->lock);
+ if (idev->dead || !(idev->if_flags & IF_READY))
+ goto out;
+
+ if (idev->cnf.forwarding)
+ goto out;
+
+ /* Announcement received after solicitation was sent */
+ if (idev->if_flags & IF_RA_RCVD)
+ goto out;
+
+ spin_lock(&ifp->lock);
+ if (ifp->probes++ < idev->cnf.rtr_solicits) {
+ /* The wait after the last probe can be shorter */
+ addrconf_mod_timer(ifp, AC_RS,
+ (ifp->probes == idev->cnf.rtr_solicits) ?
+ idev->cnf.rtr_solicit_delay :
+ idev->cnf.rtr_solicit_interval);
+ spin_unlock(&ifp->lock);
+
+ ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ } else {
+ spin_unlock(&ifp->lock);
+ /*
+ * Note: we do not support deprecated "all on-link"
+ * assumption any longer.
+ */
+ printk(KERN_DEBUG "%s: no IPv6 routers present\n",
+ idev->dev->name);
+ }
+
+out:
+ read_unlock(&idev->lock);
+ in6_ifa_put(ifp);
+}
+
+/*
+ * Duplicate Address Detection
+ */
+static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+{
+ unsigned long rand_num;
+ struct inet6_dev *idev = ifp->idev;
+
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ rand_num = 0;
+ else
+ rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
+ ifp->probes = idev->cnf.dad_transmits;
+ addrconf_mod_timer(ifp, AC_DAD, rand_num);
+}
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+{
+ struct inet6_dev *idev = ifp->idev;
+ struct net_device *dev = idev->dev;
+
+ addrconf_join_solict(dev, &ifp->addr);
+
+ net_srandom(ifp->addr.s6_addr32[3]);
+
+ read_lock_bh(&idev->lock);
+ spin_lock(&ifp->lock);
+ if (ifp->state == INET6_IFADDR_STATE_DEAD)
+ goto out;
+
+ if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+ idev->cnf.accept_dad < 1 ||
+ !(ifp->flags&IFA_F_TENTATIVE) ||
+ ifp->flags & IFA_F_NODAD) {
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+ spin_unlock(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+
+ addrconf_dad_completed(ifp);
+ return;
+ }
+
+ if (!(idev->if_flags & IF_READY)) {
+ spin_unlock(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+ /*
+ * If the device is not ready:
+ * - keep it tentative if it is a permanent address.
+ * - otherwise, kill it.
+ */
+ in6_ifa_hold(ifp);
+ addrconf_dad_stop(ifp, 0);
+ return;
+ }
+
+ /*
+ * Optimistic nodes can start receiving
+ * Frames right away
+ */
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ ip6_ins_rt(ifp->rt);
+
+ addrconf_dad_kick(ifp);
+out:
+ spin_unlock(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+}
+
+static void addrconf_dad_timer(unsigned long data)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_dev *idev = ifp->idev;
+ struct in6_addr mcaddr;
+
+ if (!ifp->probes && addrconf_dad_end(ifp))
+ goto out;
+
+ read_lock(&idev->lock);
+ if (idev->dead || !(idev->if_flags & IF_READY)) {
+ read_unlock(&idev->lock);
+ goto out;
+ }
+
+ spin_lock(&ifp->lock);
+ if (ifp->state == INET6_IFADDR_STATE_DEAD) {
+ spin_unlock(&ifp->lock);
+ read_unlock(&idev->lock);
+ goto out;
+ }
+
+ if (ifp->probes == 0) {
+ /*
+ * DAD was successful
+ */
+
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+ spin_unlock(&ifp->lock);
+ read_unlock(&idev->lock);
+
+ addrconf_dad_completed(ifp);
+
+ goto out;
+ }
+
+ ifp->probes--;
+ addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+ spin_unlock(&ifp->lock);
+ read_unlock(&idev->lock);
+
+ /* send a neighbour solicitation for our addr */
+ addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
+out:
+ in6_ifa_put(ifp);
+}
+
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+{
+ struct net_device *dev = ifp->idev->dev;
+
+ /*
+ * Configure the address for reception. Now it is valid.
+ */
+
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+
+ /* If added prefix is link local and we are prepared to process
+ router advertisements, start sending router solicitations.
+ */
+
+ if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) ||
+ ifp->idev->cnf.accept_ra == 2) &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0 &&
+ (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ /*
+ * If a host as already performed a random delay
+ * [...] as part of DAD [...] there is no need
+ * to delay again before sending the first RS
+ */
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+
+ spin_lock_bh(&ifp->lock);
+ ifp->probes = 1;
+ ifp->idev->if_flags |= IF_RS_SENT;
+ addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock_bh(&ifp->lock);
+ }
+}
+
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ spin_lock(&ifp->lock);
+ if (ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD)
+ addrconf_dad_kick(ifp);
+ spin_unlock(&ifp->lock);
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+#ifdef CONFIG_PROC_FS
+struct if6_iter_state {
+ struct seq_net_private p;
+ int bucket;
+ int offset;
+};
+
+static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
+{
+ struct inet6_ifaddr *ifa = NULL;
+ struct if6_iter_state *state = seq->private;
+ struct net *net = seq_file_net(seq);
+ int p = 0;
+
+ /* initial bucket if pos is 0 */
+ if (pos == 0) {
+ state->bucket = 0;
+ state->offset = 0;
+ }
+
+ for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+ struct hlist_node *n;
+ hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
+ addr_lst) {
+ /* sync with offset */
+ if (p < state->offset) {
+ p++;
+ continue;
+ }
+ state->offset++;
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
+ }
+
+ /* prepare for next bucket */
+ state->offset = 0;
+ p = 0;
+ }
+ return NULL;
+}
+
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+ struct inet6_ifaddr *ifa)
+{
+ struct if6_iter_state *state = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct hlist_node *n = &ifa->addr_lst;
+
+ hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+ state->offset++;
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
+ }
+
+ while (++state->bucket < IN6_ADDR_HSIZE) {
+ state->offset = 0;
+ hlist_for_each_entry_rcu_bh(ifa, n,
+ &inet6_addr_lst[state->bucket], addr_lst) {
+ state->offset++;
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
+ }
+ }
+
+ return NULL;
+}
+
+static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu_bh)
+{
+ rcu_read_lock_bh();
+ return if6_get_first(seq, *pos);
+}
+
+static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct inet6_ifaddr *ifa;
+
+ ifa = if6_get_next(seq, v);
+ ++*pos;
+ return ifa;
+}
+
+static void if6_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu_bh)
+{
+ rcu_read_unlock_bh();
+}
+
+static int if6_seq_show(struct seq_file *seq, void *v)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+ seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
+ &ifp->addr,
+ ifp->idev->dev->ifindex,
+ ifp->prefix_len,
+ ifp->scope,
+ ifp->flags,
+ ifp->idev->dev->name);
+ return 0;
+}
+
+static const struct seq_operations if6_seq_ops = {
+ .start = if6_seq_start,
+ .next = if6_seq_next,
+ .show = if6_seq_show,
+ .stop = if6_seq_stop,
+};
+
+static int if6_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &if6_seq_ops,
+ sizeof(struct if6_iter_state));
+}
+
+static const struct file_operations if6_fops = {
+ .owner = THIS_MODULE,
+ .open = if6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init if6_proc_net_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit if6_proc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "if_inet6");
+}
+
+static struct pernet_operations if6_proc_net_ops = {
+ .init = if6_proc_net_init,
+ .exit = if6_proc_net_exit,
+};
+
+int __init if6_proc_init(void)
+{
+ return register_pernet_subsys(&if6_proc_net_ops);
+}
+
+void if6_proc_exit(void)
+{
+ unregister_pernet_subsys(&if6_proc_net_ops);
+}
+#endif /* CONFIG_PROC_FS */
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+/* Check if address is a home address configured on any interface. */
+int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
+{
+ int ret = 0;
+ struct inet6_ifaddr *ifp = NULL;
+ struct hlist_node *n;
+ unsigned int hash = ipv6_addr_hash(addr);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr) &&
+ (ifp->flags & IFA_F_HOMEADDRESS)) {
+ ret = 1;
+ break;
+ }
+ }
+ rcu_read_unlock_bh();
+ return ret;
+}
+#endif
+
+/*
+ * Periodic address status verification
+ */
+
+static void addrconf_verify(unsigned long foo)
+{
+ unsigned long now, next, next_sec, next_sched;
+ struct inet6_ifaddr *ifp;
+ struct hlist_node *node;
+ int i;
+
+ rcu_read_lock_bh();
+ spin_lock(&addrconf_verify_lock);
+ now = jiffies;
+ next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+ del_timer(&addr_chk_timer);
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+restart:
+ hlist_for_each_entry_rcu_bh(ifp, node,
+ &inet6_addr_lst[i], addr_lst) {
+ unsigned long age;
+
+ if (ifp->flags & IFA_F_PERMANENT)
+ continue;
+
+ spin_lock(&ifp->lock);
+ /* We try to batch several events at once. */
+ age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+ if (ifp->valid_lft != INFINITY_LIFE_TIME &&
+ age >= ifp->valid_lft) {
+ spin_unlock(&ifp->lock);
+ in6_ifa_hold(ifp);
+ ipv6_del_addr(ifp);
+ goto restart;
+ } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
+ spin_unlock(&ifp->lock);
+ continue;
+ } else if (age >= ifp->prefered_lft) {
+ /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */
+ int deprecate = 0;
+
+ if (!(ifp->flags&IFA_F_DEPRECATED)) {
+ deprecate = 1;
+ ifp->flags |= IFA_F_DEPRECATED;
+ }
+
+ if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+ next = ifp->tstamp + ifp->valid_lft * HZ;
+
+ spin_unlock(&ifp->lock);
+
+ if (deprecate) {
+ in6_ifa_hold(ifp);
+
+ ipv6_ifa_notify(0, ifp);
+ in6_ifa_put(ifp);
+ goto restart;
+ }
+#ifdef CONFIG_IPV6_PRIVACY
+ } else if ((ifp->flags&IFA_F_TEMPORARY) &&
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+ ifp->idev->cnf.dad_transmits *
+ ifp->idev->nd_parms->retrans_time / HZ;
+
+ if (age >= ifp->prefered_lft - regen_advance) {
+ struct inet6_ifaddr *ifpub = ifp->ifpub;
+ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ;
+ if (!ifp->regen_count && ifpub) {
+ ifp->regen_count++;
+ in6_ifa_hold(ifp);
+ in6_ifa_hold(ifpub);
+ spin_unlock(&ifp->lock);
+
+ spin_lock(&ifpub->lock);
+ ifpub->regen_count = 0;
+ spin_unlock(&ifpub->lock);
+ ipv6_create_tempaddr(ifpub, ifp);
+ in6_ifa_put(ifpub);
+ in6_ifa_put(ifp);
+ goto restart;
+ }
+ } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+ spin_unlock(&ifp->lock);
+#endif
+ } else {
+ /* ifp->prefered_lft <= ifp->valid_lft */
+ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ;
+ spin_unlock(&ifp->lock);
+ }
+ }
+ }
+
+ next_sec = round_jiffies_up(next);
+ next_sched = next;
+
+ /* If rounded timeout is accurate enough, accept it. */
+ if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+ next_sched = next_sec;
+
+ /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+ if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+ next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+
+ ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched));
+
+ addr_chk_timer.expires = next_sched;
+ add_timer(&addr_chk_timer);
+ spin_unlock(&addrconf_verify_lock);
+ rcu_read_unlock_bh();
+}
+
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+{
+ struct in6_addr *pfx = NULL;
+
+ if (addr)
+ pfx = nla_data(addr);
+
+ if (local) {
+ if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+ pfx = NULL;
+ else
+ pfx = nla_data(local);
+ }
+
+ return pfx;
+}
+
+static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
+ [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
+ [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
+};
+
+static int
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ if (pfx == NULL)
+ return -EINVAL;
+
+ return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+ u32 prefered_lft, u32 valid_lft)
+{
+ u32 flags;
+ clock_t expires;
+ unsigned long timeout;
+
+ if (!valid_lft || (prefered_lft > valid_lft))
+ return -EINVAL;
+
+ timeout = addrconf_timeout_fixup(valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ expires = jiffies_to_clock_t(timeout * HZ);
+ valid_lft = timeout;
+ flags = RTF_EXPIRES;
+ } else {
+ expires = 0;
+ flags = 0;
+ ifa_flags |= IFA_F_PERMANENT;
+ }
+
+ timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ ifa_flags |= IFA_F_DEPRECATED;
+ prefered_lft = timeout;
+ }
+
+ spin_lock_bh(&ifp->lock);
+ ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
+ ifp->tstamp = jiffies;
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+
+ spin_unlock_bh(&ifp->lock);
+ if (!(ifp->flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
+ expires, flags);
+ addrconf_verify(0);
+
+ return 0;
+}
+
+static int
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+ struct inet6_ifaddr *ifa;
+ struct net_device *dev;
+ u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+ u8 ifa_flags;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ if (pfx == NULL)
+ return -EINVAL;
+
+ if (tb[IFA_CACHEINFO]) {
+ struct ifa_cacheinfo *ci;
+
+ ci = nla_data(tb[IFA_CACHEINFO]);
+ valid_lft = ci->ifa_valid;
+ preferred_lft = ci->ifa_prefered;
+ } else {
+ preferred_lft = INFINITY_LIFE_TIME;
+ valid_lft = INFINITY_LIFE_TIME;
+ }
+
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (dev == NULL)
+ return -ENODEV;
+
+ /* We ignore other flags so far. */
+ ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+
+ ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
+ if (ifa == NULL) {
+ /*
+ * It would be best to check for !NLM_F_CREATE here but
+ * userspace alreay relies on not having to provide this.
+ */
+ return inet6_addr_add(net, ifm->ifa_index, pfx,
+ ifm->ifa_prefixlen, ifa_flags,
+ preferred_lft, valid_lft);
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL ||
+ !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ err = -EEXIST;
+ else
+ err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+
+ in6_ifa_put(ifa);
+
+ return err;
+}
+
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+ u8 scope, int ifindex)
+{
+ struct ifaddrmsg *ifm;
+
+ ifm = nlmsg_data(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = prefixlen;
+ ifm->ifa_flags = flags;
+ ifm->ifa_scope = scope;
+ ifm->ifa_index = ifindex;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+ unsigned long tstamp, u32 preferred, u32 valid)
+{
+ struct ifa_cacheinfo ci;
+
+ ci.cstamp = cstamp_delta(cstamp);
+ ci.tstamp = cstamp_delta(tstamp);
+ ci.ifa_prefered = preferred;
+ ci.ifa_valid = valid;
+
+ return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
+static inline int rt_scope(int ifa_scope)
+{
+ if (ifa_scope & IFA_HOST)
+ return RT_SCOPE_HOST;
+ else if (ifa_scope & IFA_LINK)
+ return RT_SCOPE_LINK;
+ else if (ifa_scope & IFA_SITE)
+ return RT_SCOPE_SITE;
+ else
+ return RT_SCOPE_UNIVERSE;
+}
+
+static inline int inet6_ifaddr_msgsize(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(16) /* IFA_ADDRESS */
+ + nla_total_size(sizeof(struct ifa_cacheinfo));
+}
+
+static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+ u32 pid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ u32 preferred, valid;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+ ifa->idev->dev->ifindex);
+
+ if (!(ifa->flags&IFA_F_PERMANENT)) {
+ preferred = ifa->prefered_lft;
+ valid = ifa->valid_lft;
+ if (preferred != INFINITY_LIFE_TIME) {
+ long tval = (jiffies - ifa->tstamp)/HZ;
+ if (preferred > tval)
+ preferred -= tval;
+ else
+ preferred = 0;
+ if (valid != INFINITY_LIFE_TIME) {
+ if (valid > tval)
+ valid -= tval;
+ else
+ valid = 0;
+ }
+ }
+ } else {
+ preferred = INFINITY_LIFE_TIME;
+ valid = INFINITY_LIFE_TIME;
+ }
+
+ if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
+ put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ return nlmsg_end(skb, nlh);
+}
+
+static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+ u32 pid, u32 seq, int event, u16 flags)
+{
+ struct nlmsghdr *nlh;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ int ifindex = ifmca->idev->dev->ifindex;
+
+ if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+ scope = RT_SCOPE_SITE;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+ if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+ put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ return nlmsg_end(skb, nlh);
+}
+
+static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+ u32 pid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ int ifindex = ifaca->aca_idev->dev->ifindex;
+
+ if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+ scope = RT_SCOPE_SITE;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+ if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+ put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ return nlmsg_end(skb, nlh);
+}
+
+enum addr_type_t {
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+/* called with rcu_read_lock() */
+static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
+ struct netlink_callback *cb, enum addr_type_t type,
+ int s_ip_idx, int *p_ip_idx)
+{
+ struct ifmcaddr6 *ifmca;
+ struct ifacaddr6 *ifaca;
+ int err = 1;
+ int ip_idx = *p_ip_idx;
+
+ read_lock_bh(&idev->lock);
+ switch (type) {
+ case UNICAST_ADDR: {
+ struct inet6_ifaddr *ifa;
+
+ /* unicast address incl. temp addr */
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (++ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifaddr(skb, ifa,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDR,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ }
+ case MULTICAST_ADDR:
+ /* multicast address */
+ for (ifmca = idev->mc_list; ifmca;
+ ifmca = ifmca->next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifmcaddr(skb, ifmca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETMULTICAST,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ case ANYCAST_ADDR:
+ /* anycast address */
+ for (ifaca = idev->ac_list; ifaca;
+ ifaca = ifaca->aca_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifacaddr(skb, ifaca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETANYCAST,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ *p_ip_idx = ip_idx;
+ return err;
+}
+
+static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
+ enum addr_type_t type)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx, ip_idx;
+ int s_idx, s_ip_idx;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ s_h = cb->args[0];
+ s_idx = idx = cb->args[1];
+ s_ip_idx = ip_idx = cb->args[2];
+
+ rcu_read_lock();
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ if (h > s_h || idx > s_idx)
+ s_ip_idx = 0;
+ ip_idx = 0;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ goto cont;
+
+ if (in6_dump_addrs(idev, skb, cb, type,
+ s_ip_idx, &ip_idx) <= 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+done:
+ rcu_read_unlock();
+ cb->args[0] = h;
+ cb->args[1] = idx;
+ cb->args[2] = ip_idx;
+
+ return skb->len;
+}
+
+static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = UNICAST_ADDR;
+
+ return inet6_dump_addr(skb, cb, type);
+}
+
+static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = MULTICAST_ADDR;
+
+ return inet6_dump_addr(skb, cb, type);
+}
+
+
+static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = ANYCAST_ADDR;
+
+ return inet6_dump_addr(skb, cb, type);
+}
+
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *addr = NULL;
+ struct net_device *dev = NULL;
+ struct inet6_ifaddr *ifa;
+ struct sk_buff *skb;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ if (addr == NULL) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_index)
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+
+ ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+ if (!ifa) {
+ err = -EADDRNOTAVAIL;
+ goto errout;
+ }
+
+ skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout_ifa;
+ }
+
+ err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, RTM_NEWADDR, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout_ifa;
+ }
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+errout_ifa:
+ in6_ifa_put(ifa);
+errout:
+ return err;
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+ struct sk_buff *skb;
+ struct net *net = dev_net(ifa->idev->dev);
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+}
+
+static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
+{
+ BUG_ON(bytes < (DEVCONF_MAX * 4));
+
+ memset(array, 0, bytes);
+ array[DEVCONF_FORWARDING] = cnf->forwarding;
+ array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
+ array[DEVCONF_MTU6] = cnf->mtu6;
+ array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
+ array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
+ array[DEVCONF_AUTOCONF] = cnf->autoconf;
+ array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
+ array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_solicit_interval);
+ array[DEVCONF_RTR_SOLICIT_DELAY] =
+ jiffies_to_msecs(cnf->rtr_solicit_delay);
+ array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+#ifdef CONFIG_IPV6_PRIVACY
+ array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
+ array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
+ array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
+ array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
+ array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
+#endif
+ array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
+ array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+ array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+ array[DEVCONF_RTR_PROBE_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_probe_interval);
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+#endif
+#endif
+ array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
+#ifdef CONFIG_IPV6_MROUTE
+ array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+#endif
+ array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
+ array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
+ array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
+}
+
+static inline size_t inet6_ifla6_size(void)
+{
+ return nla_total_size(4) /* IFLA_INET6_FLAGS */
+ + nla_total_size(sizeof(struct ifla_cacheinfo))
+ + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+}
+
+static inline size_t inet6_if_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
+}
+
+static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
+ int items, int bytes)
+{
+ int i;
+ int pad = bytes - sizeof(u64) * items;
+ BUG_ON(pad < 0);
+
+ /* Use put_unaligned() because stats may not be aligned for u64. */
+ put_unaligned(items, &stats[0]);
+ for (i = 1; i < items; i++)
+ put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
+
+ memset(&stats[items], 0, pad);
+}
+
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+ int items, int bytes, size_t syncpoff)
+{
+ int i;
+ int pad = bytes - sizeof(u64) * items;
+ BUG_ON(pad < 0);
+
+ /* Use put_unaligned() because stats may not be aligned for u64. */
+ put_unaligned(items, &stats[0]);
+ for (i = 1; i < items; i++)
+ put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+
+ memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+ int bytes)
+{
+ switch (attrtype) {
+ case IFLA_INET6_STATS:
+ __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+ IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
+ break;
+ case IFLA_INET6_ICMP6STATS:
+ __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+ break;
+ }
+}
+
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+ struct ifla_cacheinfo ci;
+
+ NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
+
+ ci.max_reasm_len = IPV6_MAXPLEN;
+ ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+ ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
+ NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+ nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (nla == NULL)
+ goto nla_put_failure;
+ ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+ /* XXX - MC not implemented */
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static size_t inet6_get_link_af_size(const struct net_device *dev)
+{
+ if (!__in6_dev_get(dev))
+ return 0;
+
+ return inet6_ifla6_size();
+}
+
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev)
+ return -ENODATA;
+
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
+ u32 pid, u32 seq, int event, unsigned int flags)
+{
+ struct net_device *dev = idev->dev;
+ struct ifinfomsg *hdr;
+ struct nlmsghdr *nlh;
+ void *protoinfo;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifi_family = AF_INET6;
+ hdr->__ifi_pad = 0;
+ hdr->ifi_type = dev->type;
+ hdr->ifi_index = dev->ifindex;
+ hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_change = 0;
+
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+
+ if (dev->addr_len)
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ if (dev->ifindex != dev->iflink)
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+ protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+ if (protoinfo == NULL)
+ goto nla_put_failure;
+
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, protoinfo);
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx = 0, s_idx;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rcu_read_lock();
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ goto cont;
+ if (inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI) <= 0)
+ goto out;
+cont:
+ idx++;
+ }
+ }
+out:
+ rcu_read_unlock();
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
+void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
+{
+ struct sk_buff *skb;
+ struct net *net = dev_net(idev->dev);
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+}
+
+static inline size_t inet6_prefix_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct prefixmsg))
+ + nla_total_size(sizeof(struct in6_addr))
+ + nla_total_size(sizeof(struct prefix_cacheinfo));
+}
+
+static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
+ struct prefix_info *pinfo, u32 pid, u32 seq,
+ int event, unsigned int flags)
+{
+ struct prefixmsg *pmsg;
+ struct nlmsghdr *nlh;
+ struct prefix_cacheinfo ci;
+
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ pmsg = nlmsg_data(nlh);
+ pmsg->prefix_family = AF_INET6;
+ pmsg->prefix_pad1 = 0;
+ pmsg->prefix_pad2 = 0;
+ pmsg->prefix_ifindex = idev->dev->ifindex;
+ pmsg->prefix_len = pinfo->prefix_len;
+ pmsg->prefix_type = pinfo->type;
+ pmsg->prefix_pad3 = 0;
+ pmsg->prefix_flags = 0;
+ if (pinfo->onlink)
+ pmsg->prefix_flags |= IF_PREFIX_ONLINK;
+ if (pinfo->autoconf)
+ pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
+
+ NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
+
+ ci.preferred_time = ntohl(pinfo->prefered);
+ ci.valid_time = ntohl(pinfo->valid);
+ NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+ struct prefix_info *pinfo)
+{
+ struct sk_buff *skb;
+ struct net *net = dev_net(idev->dev);
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
+ if (skb == NULL)
+ goto errout;
+
+ err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+}
+
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+ inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
+
+ switch (event) {
+ case RTM_NEWADDR:
+ /*
+ * If the address was optimistic
+ * we inserted the route at the start of
+ * our DAD process, so we don't need
+ * to do it again
+ */
+ if (!(ifp->rt->rt6i_node))
+ ip6_ins_rt(ifp->rt);
+ if (ifp->idev->cnf.forwarding)
+ addrconf_join_anycast(ifp);
+ break;
+ case RTM_DELADDR:
+ if (ifp->idev->cnf.forwarding)
+ addrconf_leave_anycast(ifp);
+ addrconf_leave_solict(ifp->idev, &ifp->addr);
+ dst_hold(&ifp->rt->dst);
+
+ if (ip6_del_rt(ifp->rt))
+ dst_free(&ifp->rt->dst);
+ break;
+ }
+}
+
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+ rcu_read_lock_bh();
+ if (likely(ifp->idev->dead == 0))
+ __ipv6_ifa_notify(event, ifp);
+ rcu_read_unlock_bh();
+}
+
+#ifdef CONFIG_SYSCTL
+
+static
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ ctl_table lctl;
+ int ret;
+
+ /*
+ * ctl->data points to idev->cnf.forwarding, we should
+ * not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write)
+ ret = addrconf_fixup_forwarding(ctl, valp, val);
+ if (ret)
+ *ppos = pos;
+ return ret;
+}
+
+static void dev_disable_change(struct inet6_dev *idev)
+{
+ if (!idev || !idev->dev)
+ return;
+
+ if (idev->cnf.disable_ipv6)
+ addrconf_notify(NULL, NETDEV_DOWN, idev->dev);
+ else
+ addrconf_notify(NULL, NETDEV_UP, idev->dev);
+}
+
+static void addrconf_disable_change(struct net *net, __s32 newf)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
+ idev->cnf.disable_ipv6 = newf;
+ if (changed)
+ dev_disable_change(idev);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
+{
+ struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ net = (struct net *)table->extra2;
+ old = *p;
+ *p = newf;
+
+ if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (p == &net->ipv6.devconf_all->disable_ipv6) {
+ net->ipv6.devconf_dflt->disable_ipv6 = newf;
+ addrconf_disable_change(net, newf);
+ } else if ((!newf) ^ (!old))
+ dev_disable_change((struct inet6_dev *)table->extra1);
+
+ rtnl_unlock();
+ return 0;
+}
+
+static
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ ctl_table lctl;
+ int ret;
+
+ /*
+ * ctl->data points to idev->cnf.disable_ipv6, we should
+ * not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write)
+ ret = addrconf_disable_ipv6(ctl, valp, val);
+ if (ret)
+ *ppos = pos;
+ return ret;
+}
+
+static struct addrconf_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table addrconf_vars[DEVCONF_MAX+1];
+ char *dev_name;
+} addrconf_sysctl __read_mostly = {
+ .sysctl_header = NULL,
+ .addrconf_vars = {
+ {
+ .procname = "forwarding",
+ .data = &ipv6_devconf.forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_forward,
+ },
+ {
+ .procname = "hop_limit",
+ .data = &ipv6_devconf.hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "mtu",
+ .data = &ipv6_devconf.mtu6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra",
+ .data = &ipv6_devconf.accept_ra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_redirects",
+ .data = &ipv6_devconf.accept_redirects,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "autoconf",
+ .data = &ipv6_devconf.autoconf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "dad_transmits",
+ .data = &ipv6_devconf.dad_transmits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_solicitations",
+ .data = &ipv6_devconf.rtr_solicits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_solicitation_interval",
+ .data = &ipv6_devconf.rtr_solicit_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "router_solicitation_delay",
+ .data = &ipv6_devconf.rtr_solicit_delay,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "force_mld_version",
+ .data = &ipv6_devconf.force_mld_version,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_PRIVACY
+ {
+ .procname = "use_tempaddr",
+ .data = &ipv6_devconf.use_tempaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "temp_valid_lft",
+ .data = &ipv6_devconf.temp_valid_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "temp_prefered_lft",
+ .data = &ipv6_devconf.temp_prefered_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "regen_max_retry",
+ .data = &ipv6_devconf.regen_max_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "max_desync_factor",
+ .data = &ipv6_devconf.max_desync_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "max_addresses",
+ .data = &ipv6_devconf.max_addresses,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_defrtr",
+ .data = &ipv6_devconf.accept_ra_defrtr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_pinfo",
+ .data = &ipv6_devconf.accept_ra_pinfo,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ {
+ .procname = "accept_ra_rtr_pref",
+ .data = &ipv6_devconf.accept_ra_rtr_pref,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_probe_interval",
+ .data = &ipv6_devconf.rtr_probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ {
+ .procname = "accept_ra_rt_info_max_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#endif
+ {
+ .procname = "proxy_ndp",
+ .data = &ipv6_devconf.proxy_ndp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_source_route",
+ .data = &ipv6_devconf.accept_source_route,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ {
+ .procname = "optimistic_dad",
+ .data = &ipv6_devconf.optimistic_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+
+ },
+#endif
+#ifdef CONFIG_IPV6_MROUTE
+ {
+ .procname = "mc_forwarding",
+ .data = &ipv6_devconf.mc_forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+ {
+ .procname = "disable_ipv6",
+ .data = &ipv6_devconf.disable_ipv6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable,
+ },
+ {
+ .procname = "accept_dad",
+ .data = &ipv6_devconf.accept_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "force_tllao",
+ .data = &ipv6_devconf.force_tllao,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ /* sentinel */
+ }
+ },
+};
+
+static int __addrconf_sysctl_register(struct net *net, char *dev_name,
+ struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+ int i;
+ struct addrconf_sysctl_table *t;
+
+#define ADDRCONF_CTL_PATH_DEV 3
+
+ struct ctl_path addrconf_ctl_path[] = {
+ { .procname = "net", },
+ { .procname = "ipv6", },
+ { .procname = "conf", },
+ { /* to be set */ },
+ { },
+ };
+
+
+ t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ goto out;
+
+ for (i = 0; t->addrconf_vars[i].data; i++) {
+ t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
+ t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+ t->addrconf_vars[i].extra2 = net;
+ }
+
+ /*
+ * Make a copy of dev_name, because '.procname' is regarded as const
+ * by sysctl and we wouldn't want anyone to change it under our feet
+ * (see SIOCSIFNAME).
+ */
+ t->dev_name = kstrdup(dev_name, GFP_KERNEL);
+ if (!t->dev_name)
+ goto free;
+
+ addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
+
+ t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
+ t->addrconf_vars);
+ if (t->sysctl_header == NULL)
+ goto free_procname;
+
+ p->sysctl = t;
+ return 0;
+
+free_procname:
+ kfree(t->dev_name);
+free:
+ kfree(t);
+out:
+ return -ENOBUFS;
+}
+
+static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+ struct addrconf_sysctl_table *t;
+
+ if (p->sysctl == NULL)
+ return;
+
+ t = p->sysctl;
+ p->sysctl = NULL;
+ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t->dev_name);
+ kfree(t);
+}
+
+static void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+ neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
+ &ndisc_ifinfo_sysctl_change);
+ __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ idev, &idev->cnf);
+}
+
+static void addrconf_sysctl_unregister(struct inet6_dev *idev)
+{
+ __addrconf_sysctl_unregister(&idev->cnf);
+ neigh_sysctl_unregister(idev->nd_parms);
+}
+
+
+#endif
+
+static int __net_init addrconf_init_net(struct net *net)
+{
+ int err;
+ struct ipv6_devconf *all, *dflt;
+
+ err = -ENOMEM;
+ all = &ipv6_devconf;
+ dflt = &ipv6_devconf_dflt;
+
+ if (!net_eq(net, &init_net)) {
+ all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
+ if (all == NULL)
+ goto err_alloc_all;
+
+ dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+ if (dflt == NULL)
+ goto err_alloc_dflt;
+ } else {
+ /* these will be inherited by all namespaces */
+ dflt->autoconf = ipv6_defaults.autoconf;
+ dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
+ }
+
+ net->ipv6.devconf_all = all;
+ net->ipv6.devconf_dflt = dflt;
+
+#ifdef CONFIG_SYSCTL
+ err = __addrconf_sysctl_register(net, "all", NULL, all);
+ if (err < 0)
+ goto err_reg_all;
+
+ err = __addrconf_sysctl_register(net, "default", NULL, dflt);
+ if (err < 0)
+ goto err_reg_dflt;
+#endif
+ return 0;
+
+#ifdef CONFIG_SYSCTL
+err_reg_dflt:
+ __addrconf_sysctl_unregister(all);
+err_reg_all:
+ kfree(dflt);
+#endif
+err_alloc_dflt:
+ kfree(all);
+err_alloc_all:
+ return err;
+}
+
+static void __net_exit addrconf_exit_net(struct net *net)
+{
+#ifdef CONFIG_SYSCTL
+ __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
+ __addrconf_sysctl_unregister(net->ipv6.devconf_all);
+#endif
+ if (!net_eq(net, &init_net)) {
+ kfree(net->ipv6.devconf_dflt);
+ kfree(net->ipv6.devconf_all);
+ }
+}
+
+static struct pernet_operations addrconf_ops = {
+ .init = addrconf_init_net,
+ .exit = addrconf_exit_net,
+};
+
+/*
+ * Device notifier
+ */
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
+static struct rtnl_af_ops inet6_ops = {
+ .family = AF_INET6,
+ .fill_link_af = inet6_fill_link_af,
+ .get_link_af_size = inet6_get_link_af_size,
+};
+
+/*
+ * Init / cleanup code
+ */
+
+int __init addrconf_init(void)
+{
+ int i, err;
+
+ err = ipv6_addr_label_init();
+ if (err < 0) {
+ printk(KERN_CRIT "IPv6 Addrconf:"
+ " cannot initialize default policy table: %d.\n", err);
+ goto out;
+ }
+
+ err = register_pernet_subsys(&addrconf_ops);
+ if (err < 0)
+ goto out_addrlabel;
+
+ /* The addrconf netdev notifier requires that loopback_dev
+ * has it's ipv6 private information allocated and setup
+ * before it can bring up and give link-local addresses
+ * to other devices which are up.
+ *
+ * Unfortunately, loopback_dev is not necessarily the first
+ * entry in the global dev_base list of net devices. In fact,
+ * it is likely to be the very last entry on that list.
+ * So this causes the notifier registry below to try and
+ * give link-local addresses to all devices besides loopback_dev
+ * first, then loopback_dev, which cases all the non-loopback_dev
+ * devices to fail to get a link-local address.
+ *
+ * So, as a temporary fix, allocate the ipv6 structure for
+ * loopback_dev first by hand.
+ * Longer term, all of the dependencies ipv6 has upon the loopback
+ * device and it being up should be removed.
+ */
+ rtnl_lock();
+ if (!ipv6_add_dev(init_net.loopback_dev))
+ err = -ENOMEM;
+ rtnl_unlock();
+ if (err)
+ goto errlo;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_addr_lst[i]);
+
+ register_netdevice_notifier(&ipv6_dev_notf);
+
+ addrconf_verify(0);
+
+ err = rtnl_af_register(&inet6_ops);
+ if (err < 0)
+ goto errout_af;
+
+ err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
+ NULL);
+ if (err < 0)
+ goto errout;
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
+ inet6_dump_ifaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
+ inet6_dump_ifmcaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
+ inet6_dump_ifacaddr, NULL);
+
+ ipv6_addr_label_rtnl_register();
+
+ return 0;
+errout:
+ rtnl_af_unregister(&inet6_ops);
+errout_af:
+ unregister_netdevice_notifier(&ipv6_dev_notf);
+errlo:
+ unregister_pernet_subsys(&addrconf_ops);
+out_addrlabel:
+ ipv6_addr_label_cleanup();
+out:
+ return err;
+}
+
+void addrconf_cleanup(void)
+{
+ struct net_device *dev;
+ int i;
+
+ unregister_netdevice_notifier(&ipv6_dev_notf);
+ unregister_pernet_subsys(&addrconf_ops);
+ ipv6_addr_label_cleanup();
+
+ rtnl_lock();
+
+ __rtnl_af_unregister(&inet6_ops);
+
+ /* clean dev list */
+ for_each_netdev(&init_net, dev) {
+ if (__in6_dev_get(dev) == NULL)
+ continue;
+ addrconf_ifdown(dev, 1);
+ }
+ addrconf_ifdown(init_net.loopback_dev, 2);
+
+ /*
+ * Check hash table.
+ */
+ spin_lock_bh(&addrconf_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
+ spin_unlock_bh(&addrconf_hash_lock);
+
+ del_timer(&addr_chk_timer);
+ rtnl_unlock();
+}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
new file mode 100644
index 00000000..399287e5
--- /dev/null
+++ b/net/ipv6/addrconf_core.c
@@ -0,0 +1,80 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.
+ */
+
+#include <linux/export.h>
+#include <net/ipv6.h>
+
+#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
+
+static inline unsigned ipv6_addr_scope2type(unsigned scope)
+{
+ switch(scope) {
+ case IPV6_ADDR_SCOPE_NODELOCAL:
+ return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
+ IPV6_ADDR_LOOPBACK);
+ case IPV6_ADDR_SCOPE_LINKLOCAL:
+ return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) |
+ IPV6_ADDR_LINKLOCAL);
+ case IPV6_ADDR_SCOPE_SITELOCAL:
+ return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) |
+ IPV6_ADDR_SITELOCAL);
+ }
+ return IPV6_ADDR_SCOPE_TYPE(scope);
+}
+
+int __ipv6_addr_type(const struct in6_addr *addr)
+{
+ __be32 st;
+
+ st = addr->s6_addr32[0];
+
+ /* Consider all addresses with the first three bits different of
+ 000 and 111 as unicasts.
+ */
+ if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
+ (st & htonl(0xE0000000)) != htonl(0xE0000000))
+ return (IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));
+
+ if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
+ /* multicast */
+ /* addr-select 3.1 */
+ return (IPV6_ADDR_MULTICAST |
+ ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr)));
+ }
+
+ if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
+ return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */
+ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
+ return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */
+ if ((st & htonl(0xFE000000)) == htonl(0xFC000000))
+ return (IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* RFC 4193 */
+
+ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
+ if (addr->s6_addr32[2] == 0) {
+ if (addr->s6_addr32[3] == 0)
+ return IPV6_ADDR_ANY;
+
+ if (addr->s6_addr32[3] == htonl(0x00000001))
+ return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */
+
+ return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */
+ }
+
+ if (addr->s6_addr32[2] == htonl(0x0000ffff))
+ return (IPV6_ADDR_MAPPED |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */
+ }
+
+ return (IPV6_ADDR_UNICAST |
+ IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */
+}
+EXPORT_SYMBOL(__ipv6_addr_type);
+
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
new file mode 100644
index 00000000..2d8ddba9
--- /dev/null
+++ b/net/ipv6/addrlabel.c
@@ -0,0 +1,602 @@
+/*
+ * IPv6 Address Label subsystem
+ * for the IPv6 "Default" Source Address Selection
+ *
+ * Copyright (C)2007 USAGI/WIDE Project
+ */
+/*
+ * Author:
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+#include <net/addrconf.h>
+#include <linux/if_addrlabel.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#if 0
+#define ADDRLABEL(x...) printk(x)
+#else
+#define ADDRLABEL(x...) do { ; } while(0)
+#endif
+
+/*
+ * Policy Table
+ */
+struct ip6addrlbl_entry
+{
+#ifdef CONFIG_NET_NS
+ struct net *lbl_net;
+#endif
+ struct in6_addr prefix;
+ int prefixlen;
+ int ifindex;
+ int addrtype;
+ u32 label;
+ struct hlist_node list;
+ atomic_t refcnt;
+ struct rcu_head rcu;
+};
+
+static struct ip6addrlbl_table
+{
+ struct hlist_head head;
+ spinlock_t lock;
+ u32 seq;
+} ip6addrlbl_table;
+
+static inline
+struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
+{
+ return read_pnet(&lbl->lbl_net);
+}
+
+/*
+ * Default policy table (RFC3484 + extensions)
+ *
+ * prefix addr_type label
+ * -------------------------------------------------------------------------
+ * ::1/128 LOOPBACK 0
+ * ::/0 N/A 1
+ * 2002::/16 N/A 2
+ * ::/96 COMPATv4 3
+ * ::ffff:0:0/96 V4MAPPED 4
+ * fc00::/7 N/A 5 ULA (RFC 4193)
+ * 2001::/32 N/A 6 Teredo (RFC 4380)
+ * 2001:10::/28 N/A 7 ORCHID (RFC 4843)
+ *
+ * Note: 0xffffffff is used if we do not have any policies.
+ */
+
+#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
+
+static const __net_initdata struct ip6addrlbl_init_table
+{
+ const struct in6_addr *prefix;
+ int prefixlen;
+ u32 label;
+} ip6addrlbl_init_table[] = {
+ { /* ::/0 */
+ .prefix = &in6addr_any,
+ .label = 1,
+ },{ /* fc00::/7 */
+ .prefix = &(struct in6_addr){{{ 0xfc }}},
+ .prefixlen = 7,
+ .label = 5,
+ },{ /* 2002::/16 */
+ .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ .prefixlen = 16,
+ .label = 2,
+ },{ /* 2001::/32 */
+ .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ .prefixlen = 32,
+ .label = 6,
+ },{ /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ .prefixlen = 28,
+ .label = 7,
+ },{ /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ .prefixlen = 96,
+ .label = 4,
+ },{ /* ::/96 */
+ .prefix = &in6addr_any,
+ .prefixlen = 96,
+ .label = 3,
+ },{ /* ::1/128 */
+ .prefix = &in6addr_loopback,
+ .prefixlen = 128,
+ .label = 0,
+ }
+};
+
+/* Object management */
+static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
+{
+#ifdef CONFIG_NET_NS
+ release_net(p->lbl_net);
+#endif
+ kfree(p);
+}
+
+static void ip6addrlbl_free_rcu(struct rcu_head *h)
+{
+ ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
+}
+
+static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
+{
+ return atomic_inc_not_zero(&p->refcnt);
+}
+
+static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
+{
+ if (atomic_dec_and_test(&p->refcnt))
+ call_rcu(&p->rcu, ip6addrlbl_free_rcu);
+}
+
+/* Find label */
+static int __ip6addrlbl_match(struct net *net,
+ struct ip6addrlbl_entry *p,
+ const struct in6_addr *addr,
+ int addrtype, int ifindex)
+{
+ if (!net_eq(ip6addrlbl_net(p), net))
+ return 0;
+ if (p->ifindex && p->ifindex != ifindex)
+ return 0;
+ if (p->addrtype && p->addrtype != addrtype)
+ return 0;
+ if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
+ return 0;
+ return 1;
+}
+
+static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
+ const struct in6_addr *addr,
+ int type, int ifindex)
+{
+ struct hlist_node *pos;
+ struct ip6addrlbl_entry *p;
+ hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+ return p;
+ }
+ return NULL;
+}
+
+u32 ipv6_addr_label(struct net *net,
+ const struct in6_addr *addr, int type, int ifindex)
+{
+ u32 label;
+ struct ip6addrlbl_entry *p;
+
+ type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
+
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, type, ifindex);
+ label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
+ rcu_read_unlock();
+
+ ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
+ __func__, addr, type, ifindex, label);
+
+ return label;
+}
+
+/* allocate one entry */
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
+ const struct in6_addr *prefix,
+ int prefixlen, int ifindex,
+ u32 label)
+{
+ struct ip6addrlbl_entry *newp;
+ int addrtype;
+
+ ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
+ __func__, prefix, prefixlen, ifindex, (unsigned int)label);
+
+ addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
+
+ switch (addrtype) {
+ case IPV6_ADDR_MAPPED:
+ if (prefixlen > 96)
+ return ERR_PTR(-EINVAL);
+ if (prefixlen < 96)
+ addrtype = 0;
+ break;
+ case IPV6_ADDR_COMPATv4:
+ if (prefixlen != 96)
+ addrtype = 0;
+ break;
+ case IPV6_ADDR_LOOPBACK:
+ if (prefixlen != 128)
+ addrtype = 0;
+ break;
+ }
+
+ newp = kmalloc(sizeof(*newp), GFP_KERNEL);
+ if (!newp)
+ return ERR_PTR(-ENOMEM);
+
+ ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
+ newp->prefixlen = prefixlen;
+ newp->ifindex = ifindex;
+ newp->addrtype = addrtype;
+ newp->label = label;
+ INIT_HLIST_NODE(&newp->list);
+#ifdef CONFIG_NET_NS
+ newp->lbl_net = hold_net(net);
+#endif
+ atomic_set(&newp->refcnt, 1);
+ return newp;
+}
+
+/* add a label */
+static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+{
+ int ret = 0;
+
+ ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
+ __func__,
+ newp, replace);
+
+ if (hlist_empty(&ip6addrlbl_table.head)) {
+ hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+ } else {
+ struct hlist_node *pos, *n;
+ struct ip6addrlbl_entry *p = NULL;
+ hlist_for_each_entry_safe(p, pos, n,
+ &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+ p->ifindex == newp->ifindex &&
+ ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+ if (!replace) {
+ ret = -EEXIST;
+ goto out;
+ }
+ hlist_replace_rcu(&p->list, &newp->list);
+ ip6addrlbl_put(p);
+ goto out;
+ } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+ (p->prefixlen < newp->prefixlen)) {
+ hlist_add_before_rcu(&newp->list, &p->list);
+ goto out;
+ }
+ }
+ hlist_add_after_rcu(&p->list, &newp->list);
+ }
+out:
+ if (!ret)
+ ip6addrlbl_table.seq++;
+ return ret;
+}
+
+/* add a label */
+static int ip6addrlbl_add(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ int ifindex, u32 label, int replace)
+{
+ struct ip6addrlbl_entry *newp;
+ int ret = 0;
+
+ ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+ __func__, prefix, prefixlen, ifindex, (unsigned int)label,
+ replace);
+
+ newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+ if (IS_ERR(newp))
+ return PTR_ERR(newp);
+ spin_lock(&ip6addrlbl_table.lock);
+ ret = __ip6addrlbl_add(newp, replace);
+ spin_unlock(&ip6addrlbl_table.lock);
+ if (ret)
+ ip6addrlbl_free(newp);
+ return ret;
+}
+
+/* remove a label */
+static int __ip6addrlbl_del(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ int ifindex)
+{
+ struct ip6addrlbl_entry *p = NULL;
+ struct hlist_node *pos, *n;
+ int ret = -ESRCH;
+
+ ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
+ __func__, prefix, prefixlen, ifindex);
+
+ hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == prefixlen &&
+ net_eq(ip6addrlbl_net(p), net) &&
+ p->ifindex == ifindex &&
+ ipv6_addr_equal(&p->prefix, prefix)) {
+ hlist_del_rcu(&p->list);
+ ip6addrlbl_put(p);
+ ret = 0;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int ip6addrlbl_del(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ int ifindex)
+{
+ struct in6_addr prefix_buf;
+ int ret;
+
+ ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
+ __func__, prefix, prefixlen, ifindex);
+
+ ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
+ spin_lock(&ip6addrlbl_table.lock);
+ ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
+ spin_unlock(&ip6addrlbl_table.lock);
+ return ret;
+}
+
+/* add default label */
+static int __net_init ip6addrlbl_net_init(struct net *net)
+{
+ int err = 0;
+ int i;
+
+ ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
+
+ for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
+ int ret = ip6addrlbl_add(net,
+ ip6addrlbl_init_table[i].prefix,
+ ip6addrlbl_init_table[i].prefixlen,
+ 0,
+ ip6addrlbl_init_table[i].label, 0);
+ /* XXX: should we free all rules when we catch an error? */
+ if (ret && (!err || err != -ENOMEM))
+ err = ret;
+ }
+ return err;
+}
+
+static void __net_exit ip6addrlbl_net_exit(struct net *net)
+{
+ struct ip6addrlbl_entry *p = NULL;
+ struct hlist_node *pos, *n;
+
+ /* Remove all labels belonging to the exiting net */
+ spin_lock(&ip6addrlbl_table.lock);
+ hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ if (net_eq(ip6addrlbl_net(p), net)) {
+ hlist_del_rcu(&p->list);
+ ip6addrlbl_put(p);
+ }
+ }
+ spin_unlock(&ip6addrlbl_table.lock);
+}
+
+static struct pernet_operations ipv6_addr_label_ops = {
+ .init = ip6addrlbl_net_init,
+ .exit = ip6addrlbl_net_exit,
+};
+
+int __init ipv6_addr_label_init(void)
+{
+ spin_lock_init(&ip6addrlbl_table.lock);
+
+ return register_pernet_subsys(&ipv6_addr_label_ops);
+}
+
+void ipv6_addr_label_cleanup(void)
+{
+ unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
+static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
+ [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), },
+ [IFAL_LABEL] = { .len = sizeof(u32), },
+};
+
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ifaddrlblmsg *ifal;
+ struct nlattr *tb[IFAL_MAX+1];
+ struct in6_addr *pfx;
+ u32 label;
+ int err = 0;
+
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ if (err < 0)
+ return err;
+
+ ifal = nlmsg_data(nlh);
+
+ if (ifal->ifal_family != AF_INET6 ||
+ ifal->ifal_prefixlen > 128)
+ return -EINVAL;
+
+ if (!tb[IFAL_ADDRESS])
+ return -EINVAL;
+
+ pfx = nla_data(tb[IFAL_ADDRESS]);
+ if (!pfx)
+ return -EINVAL;
+
+ if (!tb[IFAL_LABEL])
+ return -EINVAL;
+ label = nla_get_u32(tb[IFAL_LABEL]);
+ if (label == IPV6_ADDR_LABEL_DEFAULT)
+ return -EINVAL;
+
+ switch(nlh->nlmsg_type) {
+ case RTM_NEWADDRLABEL:
+ if (ifal->ifal_index &&
+ !__dev_get_by_index(net, ifal->ifal_index))
+ return -EINVAL;
+
+ err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
+ ifal->ifal_index, label,
+ nlh->nlmsg_flags & NLM_F_REPLACE);
+ break;
+ case RTM_DELADDRLABEL:
+ err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
+ ifal->ifal_index);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
+ int prefixlen, int ifindex, u32 lseq)
+{
+ struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
+ ifal->ifal_family = AF_INET6;
+ ifal->ifal_prefixlen = prefixlen;
+ ifal->ifal_flags = 0;
+ ifal->ifal_index = ifindex;
+ ifal->ifal_seq = lseq;
+};
+
+static int ip6addrlbl_fill(struct sk_buff *skb,
+ struct ip6addrlbl_entry *p,
+ u32 lseq,
+ u32 pid, u32 seq, int event,
+ unsigned int flags)
+{
+ struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+ sizeof(struct ifaddrlblmsg), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
+
+ if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
+ nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
+ return nlmsg_end(skb, nlh);
+}
+
+static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ip6addrlbl_entry *p;
+ struct hlist_node *pos;
+ int idx = 0, s_idx = cb->args[0];
+ int err;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+ if (idx >= s_idx &&
+ net_eq(ip6addrlbl_net(p), net)) {
+ if ((err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI)) <= 0)
+ break;
+ }
+ idx++;
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static inline int ip6addrlbl_msgsize(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
+ + nla_total_size(16) /* IFAL_ADDRESS */
+ + nla_total_size(4); /* IFAL_LABEL */
+}
+
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct ifaddrlblmsg *ifal;
+ struct nlattr *tb[IFAL_MAX+1];
+ struct in6_addr *addr;
+ u32 lseq;
+ int err = 0;
+ struct ip6addrlbl_entry *p;
+ struct sk_buff *skb;
+
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ if (err < 0)
+ return err;
+
+ ifal = nlmsg_data(nlh);
+
+ if (ifal->ifal_family != AF_INET6 ||
+ ifal->ifal_prefixlen != 128)
+ return -EINVAL;
+
+ if (ifal->ifal_index &&
+ !__dev_get_by_index(net, ifal->ifal_index))
+ return -EINVAL;
+
+ if (!tb[IFAL_ADDRESS])
+ return -EINVAL;
+
+ addr = nla_data(tb[IFAL_ADDRESS]);
+ if (!addr)
+ return -EINVAL;
+
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+ if (p && ip6addrlbl_hold(p))
+ p = NULL;
+ lseq = ip6addrlbl_table.seq;
+ rcu_read_unlock();
+
+ if (!p) {
+ err = -ESRCH;
+ goto out;
+ }
+
+ if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ ip6addrlbl_put(p);
+ return -ENOBUFS;
+ }
+
+ err = ip6addrlbl_fill(skb, p, lseq,
+ NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL, 0);
+
+ ip6addrlbl_put(p);
+
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto out;
+ }
+
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+out:
+ return err;
+}
+
+void __init ipv6_addr_label_rtnl_register(void)
+{
+ __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
+ NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
+ NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
+ ip6addrlbl_dump, NULL);
+}
+
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
new file mode 100644
index 00000000..22ebbb97
--- /dev/null
+++ b/net/ipv6/af_inet6.c
@@ -0,0 +1,1363 @@
+/*
+ * PF_INET6 socket protocol family
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Adapted from linux/net/ipv4/af_inet.c
+ *
+ * Fixes:
+ * piggy, Karl Knutson : Socket protocol table
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * Arnaldo Melo : check proc_net_create return, cleanups
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/tcp.h>
+#include <net/ipip.h>
+#include <net/ping.h>
+#include <net/protocol.h>
+#include <net/inet_common.h>
+#include <net/route.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#ifdef CONFIG_IPV6_TUNNEL
+#include <net/ip6_tunnel.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <linux/mroute6.h>
+
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+ return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+ return 1;
+}
+#endif
+
+MODULE_AUTHOR("Cast of dozens");
+MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
+MODULE_LICENSE("GPL");
+
+/* The inetsw6 table contains everything that inet6_create needs to
+ * build a new socket.
+ */
+static struct list_head inetsw6[SOCK_MAX];
+static DEFINE_SPINLOCK(inetsw6_lock);
+
+struct ipv6_params ipv6_defaults = {
+ .disable_ipv6 = 0,
+ .autoconf = 1,
+};
+
+static int disable_ipv6_mod = 0;
+
+module_param_named(disable, disable_ipv6_mod, int, 0444);
+MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
+
+module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
+MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
+
+module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
+MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
+
+static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+{
+ const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct inet_sock *inet;
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ struct inet_protosw *answer;
+ struct proto *answer_prot;
+ unsigned char answer_flags;
+ char answer_no_check;
+ int try_loading_module = 0;
+ int err;
+
+ if (!current_has_network())
+ return -EACCES;
+
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+ build_ehash_secret();
+
+ /* Look for the requested type/protocol pair. */
+lookup_protocol:
+ err = -ESOCKTNOSUPPORT;
+ rcu_read_lock();
+ list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
+
+ err = 0;
+ /* Check the non-wild match. */
+ if (protocol == answer->protocol) {
+ if (protocol != IPPROTO_IP)
+ break;
+ } else {
+ /* Check for the two wild cases. */
+ if (IPPROTO_IP == protocol) {
+ protocol = answer->protocol;
+ break;
+ }
+ if (IPPROTO_IP == answer->protocol)
+ break;
+ }
+ err = -EPROTONOSUPPORT;
+ }
+
+ if (err) {
+ if (try_loading_module < 2) {
+ rcu_read_unlock();
+ /*
+ * Be more specific, e.g. net-pf-10-proto-132-type-1
+ * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+ */
+ if (++try_loading_module == 1)
+ request_module("net-pf-%d-proto-%d-type-%d",
+ PF_INET6, protocol, sock->type);
+ /*
+ * Fall back to generic, e.g. net-pf-10-proto-132
+ * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
+ */
+ else
+ request_module("net-pf-%d-proto-%d",
+ PF_INET6, protocol);
+ goto lookup_protocol;
+ } else
+ goto out_rcu_unlock;
+ }
+
+ err = -EPERM;
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ goto out_rcu_unlock;
+
+ sock->ops = answer->ops;
+ answer_prot = answer->prot;
+ answer_no_check = answer->no_check;
+ answer_flags = answer->flags;
+ rcu_read_unlock();
+
+ WARN_ON(answer_prot->slab == NULL);
+
+ err = -ENOBUFS;
+ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
+ if (sk == NULL)
+ goto out;
+
+ sock_init_data(sock, sk);
+
+ err = 0;
+ sk->sk_no_check = answer_no_check;
+ if (INET_PROTOSW_REUSE & answer_flags)
+ sk->sk_reuse = 1;
+
+ inet = inet_sk(sk);
+ inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+
+ if (SOCK_RAW == sock->type) {
+ inet->inet_num = protocol;
+ if (IPPROTO_RAW == protocol)
+ inet->hdrincl = 1;
+ }
+
+ sk->sk_destruct = inet_sock_destruct;
+ sk->sk_family = PF_INET6;
+ sk->sk_protocol = protocol;
+
+ sk->sk_backlog_rcv = answer->prot->backlog_rcv;
+
+ inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
+ np->hop_limit = -1;
+ np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
+ np->mc_loop = 1;
+ np->pmtudisc = IPV6_PMTUDISC_WANT;
+ np->ipv6only = net->ipv6.sysctl.bindv6only;
+
+ /* Init the ipv4 part of the socket since we can have sockets
+ * using v6 API for ipv4.
+ */
+ inet->uc_ttl = -1;
+
+ inet->mc_loop = 1;
+ inet->mc_ttl = 1;
+ inet->mc_index = 0;
+ inet->mc_list = NULL;
+ inet->rcv_tos = 0;
+
+ if (ipv4_config.no_pmtu_disc)
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+ /*
+ * Increment only the relevant sk_prot->socks debug field, this changes
+ * the previous behaviour of incrementing both the equivalent to
+ * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
+ *
+ * This allows better debug granularity as we'll know exactly how many
+ * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
+ * transport protocol socks. -acme
+ */
+ sk_refcnt_debug_inc(sk);
+
+ if (inet->inet_num) {
+ /* It assumes that any protocol which allows
+ * the user to assign a number at socket
+ * creation time automatically shares.
+ */
+ inet->inet_sport = htons(inet->inet_num);
+ sk->sk_prot->hash(sk);
+ }
+ if (sk->sk_prot->init) {
+ err = sk->sk_prot->init(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+out:
+ return err;
+out_rcu_unlock:
+ rcu_read_unlock();
+ goto out;
+}
+
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+ struct sock *sk = sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ int addr_type = 0;
+ int err = 0;
+
+ /* If the socket has its own bind function then use it. */
+ if (sk->sk_prot->bind)
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (addr->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ addr_type = ipv6_addr_type(&addr->sin6_addr);
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ return -EINVAL;
+
+ snum = ntohs(addr->sin6_port);
+ if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ lock_sock(sk);
+
+ /* Check these errors (active socket, double bind). */
+ if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Check if the address belongs to the host. */
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ int chk_addr_ret;
+
+ /* Binding to v4-mapped address on a v6-only socket
+ * makes no sense
+ */
+ if (np->ipv6only) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Reproduce AF_INET checks to make the bindings consistent */
+ v4addr = addr->sin6_addr.s6_addr32[3];
+ chk_addr_ret = inet_addr_type(net, v4addr);
+ if (!sysctl_ip_nonlocal_bind &&
+ !(inet->freebind || inet->transparent) &&
+ v4addr != htonl(INADDR_ANY) &&
+ chk_addr_ret != RTN_LOCAL &&
+ chk_addr_ret != RTN_MULTICAST &&
+ chk_addr_ret != RTN_BROADCAST) {
+ err = -EADDRNOTAVAIL;
+ goto out;
+ }
+ } else {
+ if (addr_type != IPV6_ADDR_ANY) {
+ struct net_device *dev = NULL;
+
+ rcu_read_lock();
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ addr->sin6_scope_id) {
+ /* Override any existing binding, if another one
+ * is supplied by user.
+ */
+ sk->sk_bound_dev_if = addr->sin6_scope_id;
+ }
+
+ /* Binding to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ /* ipv4 addr of the socket is invalid. Only the
+ * unspecified and mapped address have a v4 equivalent.
+ */
+ v4addr = LOOPBACK4_IPV6;
+ if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ if (!(inet->freebind || inet->transparent) &&
+ !ipv6_chk_addr(net, &addr->sin6_addr,
+ dev, 0)) {
+ err = -EADDRNOTAVAIL;
+ goto out_unlock;
+ }
+ }
+ rcu_read_unlock();
+ }
+ }
+
+ inet->inet_rcv_saddr = v4addr;
+ inet->inet_saddr = v4addr;
+
+ np->rcv_saddr = addr->sin6_addr;
+
+ if (!(addr_type & IPV6_ADDR_MULTICAST))
+ np->saddr = addr->sin6_addr;
+
+ /* Make sure we are allowed to bind here. */
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+
+ if (addr_type != IPV6_ADDR_ANY) {
+ sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+ if (addr_type != IPV6_ADDR_MAPPED)
+ np->ipv6only = 1;
+ }
+ if (snum)
+ sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+ inet->inet_sport = htons(inet->inet_num);
+ inet->inet_dport = 0;
+ inet->inet_daddr = 0;
+out:
+ release_sock(sk);
+ return err;
+out_unlock:
+ rcu_read_unlock();
+ goto out;
+}
+
+EXPORT_SYMBOL(inet6_bind);
+
+int inet6_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk == NULL)
+ return -EINVAL;
+
+ /* Free mc lists */
+ ipv6_sock_mc_close(sk);
+
+ /* Free ac lists */
+ ipv6_sock_ac_close(sk);
+
+ return inet_release(sock);
+}
+
+EXPORT_SYMBOL(inet6_release);
+
+void inet6_destroy_sock(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+ struct ipv6_txoptions *opt;
+
+ /* Release rx options */
+
+ if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
+ kfree_skb(skb);
+
+ if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+ kfree_skb(skb);
+
+ /* Free flowlabels */
+ fl6_free_socklist(sk);
+
+ /* Free tx options */
+
+ if ((opt = xchg(&np->opt, NULL)) != NULL)
+ sock_kfree_s(sk, opt, opt->tot_len);
+}
+
+EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+
+/*
+ * This does both peername and sockname.
+ */
+
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
+{
+ struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+ struct sock *sk = sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_scope_id = 0;
+ if (peer) {
+ if (!inet->inet_dport)
+ return -ENOTCONN;
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+ peer == 1)
+ return -ENOTCONN;
+ sin->sin6_port = inet->inet_dport;
+ sin->sin6_addr = np->daddr;
+ if (np->sndflow)
+ sin->sin6_flowinfo = np->flow_label;
+ } else {
+ if (ipv6_addr_any(&np->rcv_saddr))
+ sin->sin6_addr = np->saddr;
+ else
+ sin->sin6_addr = np->rcv_saddr;
+
+ sin->sin6_port = inet->inet_sport;
+ }
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = sk->sk_bound_dev_if;
+ *uaddr_len = sizeof(*sin);
+ return 0;
+}
+
+EXPORT_SYMBOL(inet6_getname);
+
+int inet6_killaddr_ioctl(struct net *net, void __user *arg) {
+ struct in6_ifreq ireq;
+ struct sockaddr_in6 sin6;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EACCES;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ireq.ifr6_addr;
+ return tcp_nuke_addr(net, (struct sockaddr *) &sin6);
+}
+
+int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+
+ switch(cmd)
+ {
+ case SIOCGSTAMP:
+ return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
+ case SIOCADDRT:
+ case SIOCDELRT:
+
+ return ipv6_route_ioctl(net, cmd, (void __user *)arg);
+
+ case SIOCSIFADDR:
+ return addrconf_add_ifaddr(net, (void __user *) arg);
+ case SIOCDIFADDR:
+ return addrconf_del_ifaddr(net, (void __user *) arg);
+ case SIOCSIFDSTADDR:
+ return addrconf_set_dstaddr(net, (void __user *) arg);
+ case SIOCKILLADDR:
+ return inet6_killaddr_ioctl(net, (void __user *) arg);
+ default:
+ if (!sk->sk_prot->ioctl)
+ return -ENOIOCTLCMD;
+ return sk->sk_prot->ioctl(sk, cmd, arg);
+ }
+ /*NOTREACHED*/
+ return 0;
+}
+
+EXPORT_SYMBOL(inet6_ioctl);
+
+const struct proto_ops inet6_stream_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_stream_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = inet_accept, /* ok */
+ .getname = inet6_getname,
+ .poll = tcp_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = inet_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = inet_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = inet_sendpage,
+ .splice_read = tcp_splice_read,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+const struct proto_ops inet6_dgram_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_dgram_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = sock_no_accept, /* a do nothing */
+ .getname = inet6_getname,
+ .poll = udp_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = sock_no_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = inet_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static const struct net_proto_family inet6_family_ops = {
+ .family = PF_INET6,
+ .create = inet6_create,
+ .owner = THIS_MODULE,
+};
+
+int inet6_register_protosw(struct inet_protosw *p)
+{
+ struct list_head *lh;
+ struct inet_protosw *answer;
+ struct list_head *last_perm;
+ int protocol = p->protocol;
+ int ret;
+
+ spin_lock_bh(&inetsw6_lock);
+
+ ret = -EINVAL;
+ if (p->type >= SOCK_MAX)
+ goto out_illegal;
+
+ /* If we are trying to override a permanent protocol, bail. */
+ answer = NULL;
+ ret = -EPERM;
+ last_perm = &inetsw6[p->type];
+ list_for_each(lh, &inetsw6[p->type]) {
+ answer = list_entry(lh, struct inet_protosw, list);
+
+ /* Check only the non-wild match. */
+ if (INET_PROTOSW_PERMANENT & answer->flags) {
+ if (protocol == answer->protocol)
+ break;
+ last_perm = lh;
+ }
+
+ answer = NULL;
+ }
+ if (answer)
+ goto out_permanent;
+
+ /* Add the new entry after the last permanent entry if any, so that
+ * the new entry does not override a permanent entry when matched with
+ * a wild-card protocol. But it is allowed to override any existing
+ * non-permanent entry. This means that when we remove this entry, the
+ * system automatically returns to the old behavior.
+ */
+ list_add_rcu(&p->list, last_perm);
+ ret = 0;
+out:
+ spin_unlock_bh(&inetsw6_lock);
+ return ret;
+
+out_permanent:
+ printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
+ protocol);
+ goto out;
+
+out_illegal:
+ printk(KERN_ERR
+ "Ignoring attempt to register invalid socket type %d.\n",
+ p->type);
+ goto out;
+}
+
+EXPORT_SYMBOL(inet6_register_protosw);
+
+void
+inet6_unregister_protosw(struct inet_protosw *p)
+{
+ if (INET_PROTOSW_PERMANENT & p->flags) {
+ printk(KERN_ERR
+ "Attempt to unregister permanent protocol %d.\n",
+ p->protocol);
+ } else {
+ spin_lock_bh(&inetsw6_lock);
+ list_del_rcu(&p->list);
+ spin_unlock_bh(&inetsw6_lock);
+
+ synchronize_net();
+ }
+}
+
+EXPORT_SYMBOL(inet6_unregister_protosw);
+
+int inet6_sk_rebuild_header(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct dst_entry *dst;
+
+ dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst == NULL) {
+ struct inet_sock *inet = inet_sk(sk);
+ struct in6_addr *final_p, final;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowlabel = np->flow_label;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (IS_ERR(dst)) {
+ sk->sk_route_caps = 0;
+ sk->sk_err_soft = -PTR_ERR(dst);
+ return PTR_ERR(dst);
+ }
+
+ __ip6_dst_store(sk, dst, NULL, NULL);
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
+
+int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ if (np->rxopt.all) {
+ if ((opt->hop && (np->rxopt.bits.hopopts ||
+ np->rxopt.bits.ohopopts)) ||
+ ((IPV6_FLOWINFO_MASK &
+ *(__be32 *)skb_network_header(skb)) &&
+ np->rxopt.bits.rxflow) ||
+ (opt->srcrt && (np->rxopt.bits.srcrt ||
+ np->rxopt.bits.osrcrt)) ||
+ ((opt->dst1 || opt->dst0) &&
+ (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
+ return 1;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+ const struct inet6_protocol *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_protos[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ const struct inet6_protocol *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_protos[
+ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+ if (likely(ops && ops->gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ const struct inet6_protocol *ops;
+ int proto;
+ struct frag_hdr *fptr;
+ unsigned int unfrag_ip6hlen;
+ u8 *prevhdr;
+ int offset = 0;
+
+ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_protos[proto]);
+ if (likely(ops && ops->gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (IS_ERR(segs))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ if (proto == IPPROTO_UDP) {
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ fptr = (struct frag_hdr *)(skb_network_header(skb) +
+ unfrag_ip6hlen);
+ fptr->frag_off = htons(offset);
+ if (skb->next != NULL)
+ fptr->frag_off |= htons(IP6_MF);
+ offset += (ntohs(ipv6h->payload_len) -
+ sizeof(struct frag_hdr));
+ }
+ }
+
+out:
+ return segs;
+}
+
+struct ipv6_gro_cb {
+ struct napi_gro_cb napi;
+ int proto;
+};
+
+#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb)
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct inet6_protocol *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct ipv6hdr *iph;
+ unsigned int nlen;
+ unsigned int hlen;
+ unsigned int off;
+ int flush = 1;
+ int proto;
+ __wsum csum;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
+
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
+ flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+ rcu_read_lock();
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_protos[proto]);
+ if (!ops || !ops->gro_receive) {
+ __pskb_pull(skb, skb_gro_offset(skb));
+ proto = ipv6_gso_pull_exthdrs(skb, proto);
+ skb_gro_pull(skb, -skb_transport_offset(skb));
+ skb_reset_transport_header(skb);
+ __skb_push(skb, skb_gro_offset(skb));
+
+ ops = rcu_dereference(inet6_protos[proto]);
+ if (!ops || !ops->gro_receive)
+ goto out_unlock;
+
+ iph = ipv6_hdr(skb);
+ }
+
+ IPV6_GRO_CB(skb)->proto = proto;
+
+ flush--;
+ nlen = skb_network_header_len(skb);
+
+ for (p = *head; p; p = p->next) {
+ struct ipv6hdr *iph2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ iph2 = ipv6_hdr(p);
+
+ /* All fields must match except length. */
+ if (nlen != skb_network_header_len(p) ||
+ memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) ||
+ memcmp(&iph->nexthdr, &iph2->nexthdr,
+ nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ NAPI_GRO_CB(p)->flush |= flush;
+ }
+
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ csum = skb->csum;
+ skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+
+ pp = ops->gro_receive(head, skb);
+
+ skb->csum = csum;
+
+out_unlock:
+ rcu_read_unlock();
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb)
+{
+ const struct inet6_protocol *ops;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int err = -ENOSYS;
+
+ iph->payload_len = htons(skb->len - skb_network_offset(skb) -
+ sizeof(*iph));
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]);
+ if (WARN_ON(!ops || !ops->gro_complete))
+ goto out_unlock;
+
+ err = ops->gro_complete(skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct packet_type ipv6_packet_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .func = ipv6_rcv,
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+};
+
+static int __init ipv6_packet_init(void)
+{
+ dev_add_pack(&ipv6_packet_type);
+ return 0;
+}
+
+static void ipv6_packet_cleanup(void)
+{
+ dev_remove_pack(&ipv6_packet_type);
+}
+
+static int __net_init ipv6_init_mibs(struct net *net)
+{
+ if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ return -ENOMEM;
+ if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udplite_mib;
+ if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
+ sizeof(struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
+ goto err_icmp_mib;
+ net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpv6msg_statistics)
+ goto err_icmpmsg_mib;
+ return 0;
+
+err_icmpmsg_mib:
+ snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+err_icmp_mib:
+ snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+err_ip_mib:
+ snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+err_udplite_mib:
+ snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ return -ENOMEM;
+}
+
+static void ipv6_cleanup_mibs(struct net *net)
+{
+ snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+ snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+ snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ kfree(net->mib.icmpv6msg_statistics);
+}
+
+static int __net_init inet6_net_init(struct net *net)
+{
+ int err = 0;
+
+ net->ipv6.sysctl.bindv6only = 0;
+ net->ipv6.sysctl.icmpv6_time = 1*HZ;
+
+ err = ipv6_init_mibs(net);
+ if (err)
+ return err;
+#ifdef CONFIG_PROC_FS
+ err = udp6_proc_init(net);
+ if (err)
+ goto out;
+ err = tcp6_proc_init(net);
+ if (err)
+ goto proc_tcp6_fail;
+ err = ac6_proc_init(net);
+ if (err)
+ goto proc_ac6_fail;
+#endif
+ return err;
+
+#ifdef CONFIG_PROC_FS
+proc_ac6_fail:
+ tcp6_proc_exit(net);
+proc_tcp6_fail:
+ udp6_proc_exit(net);
+out:
+ ipv6_cleanup_mibs(net);
+ return err;
+#endif
+}
+
+static void __net_exit inet6_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ udp6_proc_exit(net);
+ tcp6_proc_exit(net);
+ ac6_proc_exit(net);
+#endif
+ ipv6_cleanup_mibs(net);
+}
+
+static struct pernet_operations inet6_net_ops = {
+ .init = inet6_net_init,
+ .exit = inet6_net_exit,
+};
+
+static int __init inet6_init(void)
+{
+ struct sk_buff *dummy_skb;
+ struct list_head *r;
+ int err = 0;
+
+ BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+
+ /* Register the socket-side information for inet6_create. */
+ for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+ INIT_LIST_HEAD(r);
+
+ if (disable_ipv6_mod) {
+ printk(KERN_INFO
+ "IPv6: Loaded, but administratively disabled, "
+ "reboot required to enable\n");
+ goto out;
+ }
+
+ err = proto_register(&tcpv6_prot, 1);
+ if (err)
+ goto out;
+
+ err = proto_register(&udpv6_prot, 1);
+ if (err)
+ goto out_unregister_tcp_proto;
+
+ err = proto_register(&udplitev6_prot, 1);
+ if (err)
+ goto out_unregister_udp_proto;
+
+ err = proto_register(&rawv6_prot, 1);
+ if (err)
+ goto out_unregister_udplite_proto;
+
+ err = proto_register(&pingv6_prot, 1);
+ if (err)
+ goto out_unregister_ping_proto;
+
+ /* We MUST register RAW sockets before we create the ICMP6,
+ * IGMP6, or NDISC control sockets.
+ */
+ err = rawv6_init();
+ if (err)
+ goto out_unregister_raw_proto;
+
+ /* Register the family here so that the init calls below will
+ * be able to create sockets. (?? is this dangerous ??)
+ */
+ err = sock_register(&inet6_family_ops);
+ if (err)
+ goto out_sock_register_fail;
+
+#ifdef CONFIG_SYSCTL
+ err = ipv6_static_sysctl_register();
+ if (err)
+ goto static_sysctl_fail;
+#endif
+ tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
+
+ /*
+ * ipngwg API draft makes clear that the correct semantics
+ * for TCP and UDP is to consider one TCP and UDP instance
+ * in a host available by both INET and INET6 APIs and
+ * able to communicate via both network protocols.
+ */
+
+ err = register_pernet_subsys(&inet6_net_ops);
+ if (err)
+ goto register_pernet_fail;
+ err = icmpv6_init();
+ if (err)
+ goto icmp_fail;
+ err = ip6_mr_init();
+ if (err)
+ goto ipmr_fail;
+ err = ndisc_init();
+ if (err)
+ goto ndisc_fail;
+ err = igmp6_init();
+ if (err)
+ goto igmp_fail;
+ err = ipv6_netfilter_init();
+ if (err)
+ goto netfilter_fail;
+ /* Create /proc/foo6 entries. */
+#ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+ if (raw6_proc_init())
+ goto proc_raw6_fail;
+ if (udplite6_proc_init())
+ goto proc_udplite6_fail;
+ if (ipv6_misc_proc_init())
+ goto proc_misc6_fail;
+ if (if6_proc_init())
+ goto proc_if6_fail;
+#endif
+ err = ip6_route_init();
+ if (err)
+ goto ip6_route_fail;
+ err = ip6_flowlabel_init();
+ if (err)
+ goto ip6_flowlabel_fail;
+ err = addrconf_init();
+ if (err)
+ goto addrconf_fail;
+
+ /* Init v6 extension headers. */
+ err = ipv6_exthdrs_init();
+ if (err)
+ goto ipv6_exthdrs_fail;
+
+ err = ipv6_frag_init();
+ if (err)
+ goto ipv6_frag_fail;
+
+ /* Init v6 transport protocols. */
+ err = udpv6_init();
+ if (err)
+ goto udpv6_fail;
+
+ err = udplitev6_init();
+ if (err)
+ goto udplitev6_fail;
+
+ err = tcpv6_init();
+ if (err)
+ goto tcpv6_fail;
+
+ err = ipv6_packet_init();
+ if (err)
+ goto ipv6_packet_fail;
+
+ err = pingv6_init();
+ if (err)
+ goto pingv6_fail;
+
+#ifdef CONFIG_SYSCTL
+ err = ipv6_sysctl_register();
+ if (err)
+ goto sysctl_fail;
+#endif
+out:
+ return err;
+
+#ifdef CONFIG_SYSCTL
+sysctl_fail:
+ ipv6_packet_cleanup();
+#endif
+pingv6_fail:
+ pingv6_exit();
+ipv6_packet_fail:
+ tcpv6_exit();
+tcpv6_fail:
+ udplitev6_exit();
+udplitev6_fail:
+ udpv6_exit();
+udpv6_fail:
+ ipv6_frag_exit();
+ipv6_frag_fail:
+ ipv6_exthdrs_exit();
+ipv6_exthdrs_fail:
+ addrconf_cleanup();
+addrconf_fail:
+ ip6_flowlabel_cleanup();
+ip6_flowlabel_fail:
+ ip6_route_cleanup();
+ip6_route_fail:
+#ifdef CONFIG_PROC_FS
+ if6_proc_exit();
+proc_if6_fail:
+ ipv6_misc_proc_exit();
+proc_misc6_fail:
+ udplite6_proc_exit();
+proc_udplite6_fail:
+ raw6_proc_exit();
+proc_raw6_fail:
+#endif
+ ipv6_netfilter_fini();
+netfilter_fail:
+ igmp6_cleanup();
+igmp_fail:
+ ndisc_cleanup();
+ndisc_fail:
+ ip6_mr_cleanup();
+ipmr_fail:
+ icmpv6_cleanup();
+icmp_fail:
+ unregister_pernet_subsys(&inet6_net_ops);
+register_pernet_fail:
+#ifdef CONFIG_SYSCTL
+ ipv6_static_sysctl_unregister();
+static_sysctl_fail:
+#endif
+ sock_unregister(PF_INET6);
+ rtnl_unregister_all(PF_INET6);
+out_sock_register_fail:
+ rawv6_exit();
+out_unregister_ping_proto:
+ proto_unregister(&pingv6_prot);
+out_unregister_raw_proto:
+ proto_unregister(&rawv6_prot);
+out_unregister_udplite_proto:
+ proto_unregister(&udplitev6_prot);
+out_unregister_udp_proto:
+ proto_unregister(&udpv6_prot);
+out_unregister_tcp_proto:
+ proto_unregister(&tcpv6_prot);
+ goto out;
+}
+module_init(inet6_init);
+
+static void __exit inet6_exit(void)
+{
+ if (disable_ipv6_mod)
+ return;
+
+ /* First of all disallow new sockets creation. */
+ sock_unregister(PF_INET6);
+ /* Disallow any further netlink messages */
+ rtnl_unregister_all(PF_INET6);
+
+#ifdef CONFIG_SYSCTL
+ ipv6_sysctl_unregister();
+#endif
+ udpv6_exit();
+ udplitev6_exit();
+ tcpv6_exit();
+
+ /* Cleanup code parts. */
+ ipv6_packet_cleanup();
+ ipv6_frag_exit();
+ ipv6_exthdrs_exit();
+ addrconf_cleanup();
+ ip6_flowlabel_cleanup();
+ ip6_route_cleanup();
+#ifdef CONFIG_PROC_FS
+
+ /* Cleanup code parts. */
+ if6_proc_exit();
+ ipv6_misc_proc_exit();
+ udplite6_proc_exit();
+ raw6_proc_exit();
+#endif
+ ipv6_netfilter_fini();
+ igmp6_cleanup();
+ ndisc_cleanup();
+ ip6_mr_cleanup();
+ icmpv6_cleanup();
+ rawv6_exit();
+
+ unregister_pernet_subsys(&inet6_net_ops);
+#ifdef CONFIG_SYSCTL
+ ipv6_static_sysctl_unregister();
+#endif
+ proto_unregister(&rawv6_prot);
+ proto_unregister(&udplitev6_prot);
+ proto_unregister(&udpv6_prot);
+ proto_unregister(&tcpv6_prot);
+
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+module_exit(inet6_exit);
+
+MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
new file mode 100644
index 00000000..2ae79dbe
--- /dev/null
+++ b/net/ipv6/ah6.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors
+ *
+ * Mitsuru KANDA @USAGI : IPv6 Support
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *
+ * This file is derived from net/ipv4/ah.c.
+ */
+
+#include <crypto/hash.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+#define IPV6HDR_BASELEN 8
+
+struct tmp_ext {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ struct in6_addr saddr;
+#endif
+ struct in6_addr daddr;
+ char hdrs[0];
+};
+
+struct ah_skb_cb {
+ struct xfrm_skb_cb xfrm;
+ void *tmp;
+};
+
+#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
+
+static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
+ unsigned int size)
+{
+ unsigned int len;
+
+ len = size + crypto_ahash_digestsize(ahash) +
+ (crypto_ahash_alignmask(ahash) &
+ ~(crypto_tfm_ctx_alignment() - 1));
+
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+
+ len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
+ len = ALIGN(len, __alignof__(struct scatterlist));
+
+ len += sizeof(struct scatterlist) * nfrags;
+
+ return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline struct tmp_ext *ah_tmp_ext(void *base)
+{
+ return base + IPV6HDR_BASELEN;
+}
+
+static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset)
+{
+ return tmp + offset;
+}
+
+static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
+ unsigned int offset)
+{
+ return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+}
+
+static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
+ u8 *icv)
+{
+ struct ahash_request *req;
+
+ req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
+ crypto_tfm_ctx_alignment());
+
+ ahash_request_set_tfm(req, ahash);
+
+ return req;
+}
+
+static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
+ struct ahash_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_ahash_reqsize(ahash),
+ __alignof__(struct scatterlist));
+}
+
+static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+{
+ u8 *opt = (u8 *)opthdr;
+ int len = ipv6_optlen(opthdr);
+ int off = 0;
+ int optlen = 0;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+
+ switch (opt[off]) {
+
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ if (len < 2)
+ goto bad;
+ optlen = opt[off+1]+2;
+ if (len < optlen)
+ goto bad;
+ if (opt[off] & 0x20)
+ memset(&opt[off+2], 0, opt[off+1]);
+ break;
+ }
+
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 1;
+
+bad:
+ return 0;
+}
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+/**
+ * ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ * @iph: IPv6 header
+ * @destopt: destionation options header
+ */
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
+{
+ u8 *opt = (u8 *)destopt;
+ int len = ipv6_optlen(destopt);
+ int off = 0;
+ int optlen = 0;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+
+ switch (opt[off]) {
+
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ if (len < 2)
+ goto bad;
+ optlen = opt[off+1]+2;
+ if (len < optlen)
+ goto bad;
+
+ /* Rearrange the source address in @iph and the
+ * addresses in home address option for final source.
+ * See 11.3.2 of RFC 3775 for details.
+ */
+ if (opt[off] == IPV6_TLV_HAO) {
+ struct in6_addr final_addr;
+ struct ipv6_destopt_hao *hao;
+
+ hao = (struct ipv6_destopt_hao *)&opt[off];
+ if (hao->length != sizeof(hao->addr)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
+ goto bad;
+ }
+ final_addr = hao->addr;
+ hao->addr = iph->saddr;
+ iph->saddr = final_addr;
+ }
+ break;
+ }
+
+ off += optlen;
+ len -= optlen;
+ }
+ /* Note: ok if len == 0 */
+bad:
+ return;
+}
+#else
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
+#endif
+
+/**
+ * ipv6_rearrange_rthdr - rearrange IPv6 routing header
+ * @iph: IPv6 header
+ * @rthdr: routing header
+ *
+ * Rearrange the destination address in @iph and the addresses in @rthdr
+ * so that they appear in the order they will at the final destination.
+ * See Appendix A2 of RFC 2402 for details.
+ */
+static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
+{
+ int segments, segments_left;
+ struct in6_addr *addrs;
+ struct in6_addr final_addr;
+
+ segments_left = rthdr->segments_left;
+ if (segments_left == 0)
+ return;
+ rthdr->segments_left = 0;
+
+ /* The value of rthdr->hdrlen has been verified either by the system
+ * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
+ * packets. So we can assume that it is even and that segments is
+ * greater than or equal to segments_left.
+ *
+ * For the same reason we can assume that this option is of type 0.
+ */
+ segments = rthdr->hdrlen >> 1;
+
+ addrs = ((struct rt0_hdr *)rthdr)->addr;
+ final_addr = addrs[segments - 1];
+
+ addrs += segments - segments_left;
+ memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
+
+ addrs[0] = iph->daddr;
+ iph->daddr = final_addr;
+}
+
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
+{
+ union {
+ struct ipv6hdr *iph;
+ struct ipv6_opt_hdr *opth;
+ struct ipv6_rt_hdr *rth;
+ char *raw;
+ } exthdr = { .iph = iph };
+ char *end = exthdr.raw + len;
+ int nexthdr = iph->nexthdr;
+
+ exthdr.iph++;
+
+ while (exthdr.raw < end) {
+ switch (nexthdr) {
+ case NEXTHDR_DEST:
+ if (dir == XFRM_POLICY_OUT)
+ ipv6_rearrange_destopt(iph, exthdr.opth);
+ case NEXTHDR_HOP:
+ if (!zero_out_mutable_opts(exthdr.opth)) {
+ LIMIT_NETDEBUG(
+ KERN_WARNING "overrun %sopts\n",
+ nexthdr == NEXTHDR_HOP ?
+ "hop" : "dest");
+ return -EINVAL;
+ }
+ break;
+
+ case NEXTHDR_ROUTING:
+ ipv6_rearrange_rthdr(iph, exthdr.rth);
+ break;
+
+ default :
+ return 0;
+ }
+
+ nexthdr = exthdr.opth->nexthdr;
+ exthdr.raw += ipv6_optlen(exthdr.opth);
+ }
+
+ return 0;
+}
+
+static void ah6_output_done(struct crypto_async_request *base, int err)
+{
+ int extlen;
+ u8 *iph_base;
+ u8 *icv;
+ struct sk_buff *skb = base->data;
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+ struct ah_data *ahp = x->data;
+ struct ipv6hdr *top_iph = ipv6_hdr(skb);
+ struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+ struct tmp_ext *iph_ext;
+
+ extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+ if (extlen)
+ extlen += sizeof(*iph_ext);
+
+ iph_base = AH_SKB_CB(skb)->tmp;
+ iph_ext = ah_tmp_ext(iph_base);
+ icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen);
+
+ memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+ memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+ if (extlen) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+ memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+ }
+
+ kfree(AH_SKB_CB(skb)->tmp);
+ xfrm_output_resume(skb, err);
+}
+
+static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+ int nfrags;
+ int extlen;
+ u8 *iph_base;
+ u8 *icv;
+ u8 nexthdr;
+ struct sk_buff *trailer;
+ struct crypto_ahash *ahash;
+ struct ahash_request *req;
+ struct scatterlist *sg;
+ struct ipv6hdr *top_iph;
+ struct ip_auth_hdr *ah;
+ struct ah_data *ahp;
+ struct tmp_ext *iph_ext;
+
+ ahp = x->data;
+ ahash = ahp->ahash;
+
+ if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ goto out;
+ nfrags = err;
+
+ skb_push(skb, -skb_network_offset(skb));
+ extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
+ if (extlen)
+ extlen += sizeof(*iph_ext);
+
+ err = -ENOMEM;
+ iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ if (!iph_base)
+ goto out;
+
+ iph_ext = ah_tmp_ext(iph_base);
+ icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ req = ah_tmp_req(ahash, icv);
+ sg = ah_req_sg(ahash, req);
+
+ ah = ip_auth_hdr(skb);
+ memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
+ top_iph = ipv6_hdr(skb);
+ top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
+
+ nexthdr = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_AH;
+
+ /* When there are no extension headers, we only need to save the first
+ * 8 bytes of the base IP header.
+ */
+ memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
+
+ if (extlen) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(iph_ext, &top_iph->saddr, extlen);
+#else
+ memcpy(iph_ext, &top_iph->daddr, extlen);
+#endif
+ err = ipv6_clear_mutable_options(top_iph,
+ extlen - sizeof(*iph_ext) +
+ sizeof(*top_iph),
+ XFRM_POLICY_OUT);
+ if (err)
+ goto out_free;
+ }
+
+ ah->nexthdr = nexthdr;
+
+ top_iph->priority = 0;
+ top_iph->flow_lbl[0] = 0;
+ top_iph->flow_lbl[1] = 0;
+ top_iph->flow_lbl[2] = 0;
+ top_iph->hop_limit = 0;
+
+ ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
+
+ ah->reserved = 0;
+ ah->spi = x->id.spi;
+ ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_callback(req, 0, ah6_output_done, skb);
+
+ AH_SKB_CB(skb)->tmp = iph_base;
+
+ err = crypto_ahash_digest(req);
+ if (err) {
+ if (err == -EINPROGRESS)
+ goto out;
+
+ if (err == -EBUSY)
+ err = NET_XMIT_DROP;
+ goto out_free;
+ }
+
+ memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
+ memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
+
+ if (extlen) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(&top_iph->saddr, iph_ext, extlen);
+#else
+ memcpy(&top_iph->daddr, iph_ext, extlen);
+#endif
+ }
+
+out_free:
+ kfree(iph_base);
+out:
+ return err;
+}
+
+static void ah6_input_done(struct crypto_async_request *base, int err)
+{
+ u8 *auth_data;
+ u8 *icv;
+ u8 *work_iph;
+ struct sk_buff *skb = base->data;
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ah_data *ahp = x->data;
+ struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+ int hdr_len = skb_network_header_len(skb);
+ int ah_hlen = (ah->hdrlen + 2) << 2;
+
+ work_iph = AH_SKB_CB(skb)->tmp;
+ auth_data = ah_tmp_auth(work_iph, hdr_len);
+ icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+ if (err)
+ goto out;
+
+ err = ah->nexthdr;
+
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), work_iph, hdr_len);
+ __skb_pull(skb, ah_hlen + hdr_len);
+ skb_set_transport_header(skb, -hdr_len);
+out:
+ kfree(AH_SKB_CB(skb)->tmp);
+ xfrm_input_resume(skb, err);
+}
+
+
+
+static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ /*
+ * Before process AH
+ * [IPv6][Ext1][Ext2][AH][Dest][Payload]
+ * |<-------------->| hdr_len
+ *
+ * To erase AH:
+ * Keeping copy of cleared headers. After AH processing,
+ * Moving the pointer of skb->network_header by using skb_pull as long
+ * as AH header length. Then copy back the copy as long as hdr_len
+ * If destination header following AH exists, copy it into after [Ext2].
+ *
+ * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
+ * There is offset of AH before IPv6 header after the process.
+ */
+
+ u8 *auth_data;
+ u8 *icv;
+ u8 *work_iph;
+ struct sk_buff *trailer;
+ struct crypto_ahash *ahash;
+ struct ahash_request *req;
+ struct scatterlist *sg;
+ struct ip_auth_hdr *ah;
+ struct ipv6hdr *ip6h;
+ struct ah_data *ahp;
+ u16 hdr_len;
+ u16 ah_hlen;
+ int nexthdr;
+ int nfrags;
+ int err = -ENOMEM;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+ goto out;
+
+ /* We are going to _remove_ AH header to keep sockets happy,
+ * so... Later this can change. */
+ if (skb_cloned(skb) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto out;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ hdr_len = skb_network_header_len(skb);
+ ah = (struct ip_auth_hdr *)skb->data;
+ ahp = x->data;
+ ahash = ahp->ahash;
+
+ nexthdr = ah->nexthdr;
+ ah_hlen = (ah->hdrlen + 2) << 2;
+
+ if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
+ ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
+ goto out;
+
+ if (!pskb_may_pull(skb, ah_hlen))
+ goto out;
+
+
+ if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ goto out;
+ nfrags = err;
+
+ ah = (struct ip_auth_hdr *)skb->data;
+ ip6h = ipv6_hdr(skb);
+
+ skb_push(skb, hdr_len);
+
+ work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (!work_iph)
+ goto out;
+
+ auth_data = ah_tmp_auth(work_iph, hdr_len);
+ icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ req = ah_tmp_req(ahash, icv);
+ sg = ah_req_sg(ahash, req);
+
+ memcpy(work_iph, ip6h, hdr_len);
+ memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+ memset(ah->auth_data, 0, ahp->icv_trunc_len);
+
+ if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+ goto out_free;
+
+ ip6h->priority = 0;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->hop_limit = 0;
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_callback(req, 0, ah6_input_done, skb);
+
+ AH_SKB_CB(skb)->tmp = work_iph;
+
+ err = crypto_ahash_digest(req);
+ if (err) {
+ if (err == -EINPROGRESS)
+ goto out;
+
+ goto out_free;
+ }
+
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+ if (err)
+ goto out_free;
+
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), work_iph, hdr_len);
+ skb->transport_header = skb->network_header;
+ __skb_pull(skb, ah_hlen + hdr_len);
+
+ err = nexthdr;
+
+out_free:
+ kfree(work_iph);
+out:
+ return err;
+}
+
+static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+ if (!x)
+ return;
+
+ NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
+ ntohl(ah->spi), &iph->daddr);
+
+ xfrm_state_put(x);
+}
+
+static int ah6_init_state(struct xfrm_state *x)
+{
+ struct ah_data *ahp = NULL;
+ struct xfrm_algo_desc *aalg_desc;
+ struct crypto_ahash *ahash;
+
+ if (!x->aalg)
+ goto error;
+
+ if (x->encap)
+ goto error;
+
+ ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
+ if (ahp == NULL)
+ return -ENOMEM;
+
+ ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
+ if (IS_ERR(ahash))
+ goto error;
+
+ ahp->ahash = ahash;
+ if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
+ (x->aalg->alg_key_len + 7) / 8))
+ goto error;
+
+ /*
+ * Lookup the algorithm description maintained by xfrm_algo,
+ * verify crypto transform properties, and store information
+ * we need for AH processing. This lookup cannot fail here
+ * after a successful crypto_alloc_hash().
+ */
+ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+ BUG_ON(!aalg_desc);
+
+ if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ crypto_ahash_digestsize(ahash)) {
+ printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+ x->aalg->alg_name, crypto_ahash_digestsize(ahash),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
+ goto error;
+ }
+
+ ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+ ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
+
+ BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+
+ x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
+ ahp->icv_trunc_len);
+ switch (x->props.mode) {
+ case XFRM_MODE_BEET:
+ case XFRM_MODE_TRANSPORT:
+ break;
+ case XFRM_MODE_TUNNEL:
+ x->props.header_len += sizeof(struct ipv6hdr);
+ break;
+ default:
+ goto error;
+ }
+ x->data = ahp;
+
+ return 0;
+
+error:
+ if (ahp) {
+ crypto_free_ahash(ahp->ahash);
+ kfree(ahp);
+ }
+ return -EINVAL;
+}
+
+static void ah6_destroy(struct xfrm_state *x)
+{
+ struct ah_data *ahp = x->data;
+
+ if (!ahp)
+ return;
+
+ crypto_free_ahash(ahp->ahash);
+ kfree(ahp);
+}
+
+static const struct xfrm_type ah6_type =
+{
+ .description = "AH6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_AH,
+ .flags = XFRM_TYPE_REPLAY_PROT,
+ .init_state = ah6_init_state,
+ .destructor = ah6_destroy,
+ .input = ah6_input,
+ .output = ah6_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_rcv,
+ .err_handler = ah6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init ah6_init(void)
+{
+ if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+
+ if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+ xfrm_unregister_type(&ah6_type, AF_INET6);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit ah6_fini(void)
+{
+ if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+
+ if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+
+}
+
+module_init(ah6_init);
+module_exit(ah6_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
new file mode 100644
index 00000000..db00d27f
--- /dev/null
+++ b/net/ipv6/anycast.c
@@ -0,0 +1,520 @@
+/*
+ * Anycast support for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * David L Stevens (dlstevens@us.ibm.com)
+ *
+ * based heavily on net/ipv6/mcast.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/checksum.h>
+
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+
+/* Big ac list lock for all the sockets */
+static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+
+
+/*
+ * socket join an anycast group
+ */
+
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev;
+ struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
+ int ishost = !net->ipv6.devconf_all->forwarding;
+ int err = 0;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+ if (ipv6_chk_addr(net, addr, NULL, 0))
+ return -EINVAL;
+
+ pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+ if (pac == NULL)
+ return -ENOMEM;
+ pac->acl_next = NULL;
+ pac->acl_addr = *addr;
+
+ rcu_read_lock();
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+
+ rt = rt6_lookup(net, addr, NULL, 0, 0);
+ if (rt) {
+ dev = rt->dst.dev;
+ dst_release(&rt->dst);
+ } else if (ishost) {
+ err = -EADDRNOTAVAIL;
+ goto error;
+ } else {
+ /* router, no matching interface: just pick one */
+ dev = dev_get_by_flags_rcu(net, IFF_UP,
+ IFF_UP | IFF_LOOPBACK);
+ }
+ } else
+ dev = dev_get_by_index_rcu(net, ifindex);
+
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ idev = __in6_dev_get(dev);
+ if (!idev) {
+ if (ifindex)
+ err = -ENODEV;
+ else
+ err = -EADDRNOTAVAIL;
+ goto error;
+ }
+ /* reset ishost, now that we have a specific device */
+ ishost = !idev->cnf.forwarding;
+
+ pac->acl_ifindex = dev->ifindex;
+
+ /* XXX
+ * For hosts, allow link-local or matching prefix anycasts.
+ * This obviates the need for propagating anycast routes while
+ * still allowing some non-router anycast participation.
+ */
+ if (!ipv6_chk_prefix(addr, dev)) {
+ if (ishost)
+ err = -EADDRNOTAVAIL;
+ if (err)
+ goto error;
+ }
+
+ err = ipv6_dev_ac_inc(dev, addr);
+ if (!err) {
+ write_lock_bh(&ipv6_sk_ac_lock);
+ pac->acl_next = np->ipv6_ac_list;
+ np->ipv6_ac_list = pac;
+ write_unlock_bh(&ipv6_sk_ac_lock);
+ pac = NULL;
+ }
+
+error:
+ rcu_read_unlock();
+ if (pac)
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return err;
+}
+
+/*
+ * socket leave an anycast group
+ */
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev;
+ struct ipv6_ac_socklist *pac, *prev_pac;
+ struct net *net = sock_net(sk);
+
+ write_lock_bh(&ipv6_sk_ac_lock);
+ prev_pac = NULL;
+ for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
+ if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
+ ipv6_addr_equal(&pac->acl_addr, addr))
+ break;
+ prev_pac = pac;
+ }
+ if (!pac) {
+ write_unlock_bh(&ipv6_sk_ac_lock);
+ return -ENOENT;
+ }
+ if (prev_pac)
+ prev_pac->acl_next = pac->acl_next;
+ else
+ np->ipv6_ac_list = pac->acl_next;
+
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ if (dev)
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ rcu_read_unlock();
+
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return 0;
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
+ int prev_index;
+
+ write_lock_bh(&ipv6_sk_ac_lock);
+ pac = np->ipv6_ac_list;
+ np->ipv6_ac_list = NULL;
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+ prev_index = 0;
+ rcu_read_lock();
+ while (pac) {
+ struct ipv6_ac_socklist *next = pac->acl_next;
+
+ if (pac->acl_ifindex != prev_index) {
+ dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ prev_index = pac->acl_ifindex;
+ }
+ if (dev)
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ pac = next;
+ }
+ rcu_read_unlock();
+}
+
+static void aca_put(struct ifacaddr6 *ac)
+{
+ if (atomic_dec_and_test(&ac->aca_refcnt)) {
+ in6_dev_put(ac->aca_idev);
+ dst_release(&ac->aca_rt->dst);
+ kfree(ac);
+ }
+}
+
+/*
+ * device anycast group inc (add if not found)
+ */
+int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca;
+ struct inet6_dev *idev;
+ struct rt6_info *rt;
+ int err;
+
+ idev = in6_dev_get(dev);
+
+ if (idev == NULL)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+ if (idev->dead) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
+ aca->aca_users++;
+ err = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * not found: create a new one.
+ */
+
+ aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
+
+ if (aca == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, true);
+ if (IS_ERR(rt)) {
+ kfree(aca);
+ err = PTR_ERR(rt);
+ goto out;
+ }
+
+ aca->aca_addr = *addr;
+ aca->aca_idev = idev;
+ aca->aca_rt = rt;
+ aca->aca_users = 1;
+ /* aca_tstamp should be updated upon changes */
+ aca->aca_cstamp = aca->aca_tstamp = jiffies;
+ atomic_set(&aca->aca_refcnt, 2);
+ spin_lock_init(&aca->aca_lock);
+
+ aca->aca_next = idev->ac_list;
+ idev->ac_list = aca;
+ write_unlock_bh(&idev->lock);
+
+ ip6_ins_rt(rt);
+
+ addrconf_join_solict(dev, &aca->aca_addr);
+
+ aca_put(aca);
+ return 0;
+out:
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return err;
+}
+
+/*
+ * device anycast group decrement
+ */
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca, *prev_aca;
+
+ write_lock_bh(&idev->lock);
+ prev_aca = NULL;
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ prev_aca = aca;
+ }
+ if (!aca) {
+ write_unlock_bh(&idev->lock);
+ return -ENOENT;
+ }
+ if (--aca->aca_users > 0) {
+ write_unlock_bh(&idev->lock);
+ return 0;
+ }
+ if (prev_aca)
+ prev_aca->aca_next = aca->aca_next;
+ else
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ dst_hold(&aca->aca_rt->dst);
+ ip6_del_rt(aca->aca_rt);
+
+ aca_put(aca);
+ return 0;
+}
+
+/* called with rcu_read_lock() */
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (idev == NULL)
+ return -ENODEV;
+ return __ipv6_dev_ac_dec(idev, addr);
+}
+
+/*
+ * check if the interface has this anycast address
+ * called with rcu_read_lock()
+ */
+static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct inet6_dev *idev;
+ struct ifacaddr6 *aca;
+
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ read_unlock_bh(&idev->lock);
+ return aca != NULL;
+ }
+ return 0;
+}
+
+/*
+ * check if given interface (or any, if dev==0) has this anycast address
+ */
+int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
+{
+ int found = 0;
+
+ rcu_read_lock();
+ if (dev)
+ found = ipv6_chk_acast_dev(dev, addr);
+ else
+ for_each_netdev_rcu(net, dev)
+ if (ipv6_chk_acast_dev(dev, addr)) {
+ found = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return found;
+}
+
+
+#ifdef CONFIG_PROC_FS
+struct ac6_iter_state {
+ struct seq_net_private p;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+};
+
+#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
+
+static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
+{
+ struct ifacaddr6 *im = NULL;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ state->idev = NULL;
+ for_each_netdev_rcu(net, state->dev) {
+ struct inet6_dev *idev;
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->ac_list;
+ if (im) {
+ state->idev = idev;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ im = im->aca_next;
+ while (!im) {
+ if (likely(state->idev != NULL))
+ read_unlock_bh(&state->idev->lock);
+
+ state->dev = next_net_device_rcu(state->dev);
+ if (!state->dev) {
+ state->idev = NULL;
+ break;
+ }
+ state->idev = __in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ im = state->idev->ac_list;
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ifacaddr6 *im = ac6_get_first(seq);
+ if (im)
+ while (pos && (im = ac6_get_next(seq, im)) != NULL)
+ --pos;
+ return pos ? NULL : im;
+}
+
+static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ac6_get_idx(seq, *pos);
+}
+
+static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ifacaddr6 *im = ac6_get_next(seq, v);
+
+ ++*pos;
+ return im;
+}
+
+static void ac6_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ state->idev = NULL;
+ }
+ rcu_read_unlock();
+}
+
+static int ac6_seq_show(struct seq_file *seq, void *v)
+{
+ struct ifacaddr6 *im = (struct ifacaddr6 *)v;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
+ state->dev->ifindex, state->dev->name,
+ &im->aca_addr, im->aca_users);
+ return 0;
+}
+
+static const struct seq_operations ac6_seq_ops = {
+ .start = ac6_seq_start,
+ .next = ac6_seq_next,
+ .stop = ac6_seq_stop,
+ .show = ac6_seq_show,
+};
+
+static int ac6_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ac6_seq_ops,
+ sizeof(struct ac6_iter_state));
+}
+
+static const struct file_operations ac6_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ac6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+int __net_init ac6_proc_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ac6_proc_exit(struct net *net)
+{
+ proc_net_remove(net, "anycast6");
+}
+#endif
+
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
new file mode 100644
index 00000000..76832c8d
--- /dev/null
+++ b/net/ipv6/datagram.c
@@ -0,0 +1,872 @@
+/*
+ * common UDP/RAW code
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+#include <linux/slab.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/tcp_states.h>
+
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+
+static inline int ipv6_mapped_addr_any(const struct in6_addr *a)
+{
+ return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0));
+}
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *daddr, *final_p, final;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct ip6_flowlabel *flowlabel = NULL;
+ struct ipv6_txoptions *opt;
+ int addr_type;
+ int err;
+
+ if (usin->sin6_family == AF_INET) {
+ if (__ipv6_only_sock(sk))
+ return -EAFNOSUPPORT;
+ err = ip4_datagram_connect(sk, uaddr, addr_len);
+ goto ipv4_connected;
+ }
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (usin->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ memset(&fl6, 0, sizeof(fl6));
+ if (np->sndflow) {
+ fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ usin->sin6_addr = flowlabel->dst;
+ }
+ }
+
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ /*
+ * connect to self
+ */
+ usin->sin6_addr.s6_addr[15] = 0x01;
+ }
+
+ daddr = &usin->sin6_addr;
+
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ struct sockaddr_in sin;
+
+ if (__ipv6_only_sock(sk)) {
+ err = -ENETUNREACH;
+ goto out;
+ }
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = daddr->s6_addr32[3];
+ sin.sin_port = usin->sin6_port;
+
+ err = ip4_datagram_connect(sk,
+ (struct sockaddr*) &sin,
+ sizeof(sin));
+
+ipv4_connected:
+ if (err)
+ goto out;
+
+ ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+
+ if (ipv6_addr_any(&np->saddr) ||
+ ipv6_mapped_addr_any(&np->saddr))
+ ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+
+ if (ipv6_addr_any(&np->rcv_saddr) ||
+ ipv6_mapped_addr_any(&np->rcv_saddr)) {
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+ &np->rcv_saddr);
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
+ }
+
+ goto out;
+ }
+
+ if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ usin->sin6_scope_id) {
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != usin->sin6_scope_id) {
+ err = -EINVAL;
+ goto out;
+ }
+ sk->sk_bound_dev_if = usin->sin6_scope_id;
+ }
+
+ if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
+ sk->sk_bound_dev_if = np->mcast_oif;
+
+ /* Connect to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ np->daddr = *daddr;
+ np->flow_label = fl6.flowlabel;
+
+ inet->inet_dport = usin->sin6_port;
+
+ /*
+ * Check for a route to destination an obtain the
+ * destination cache for it.
+ */
+
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
+
+ if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
+ fl6.flowi6_oif = np->mcast_oif;
+
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ opt = flowlabel ? flowlabel->opt : np->opt;
+ final_p = fl6_update_dst(&fl6, opt, &final);
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ err = 0;
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto out;
+ }
+
+ /* source address lookup done in ip6_dst_lookup */
+
+ if (ipv6_addr_any(&np->saddr))
+ np->saddr = fl6.saddr;
+
+ if (ipv6_addr_any(&np->rcv_saddr)) {
+ np->rcv_saddr = fl6.saddr;
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
+ }
+
+ ip6_dst_store(sk, dst,
+ ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
+ &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
+
+ sk->sk_state = TCP_ESTABLISHED;
+out:
+ fl6_sock_release(flowlabel);
+ return err;
+}
+
+void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
+ struct sock_exterr_skb *serr;
+
+ if (!np->recverr)
+ return;
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ skb->protocol = htons(ETH_P_IPV6);
+
+ serr = SKB_EXT_ERR(skb);
+ serr->ee.ee_errno = err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
+ serr->ee.ee_type = icmph->icmp6_type;
+ serr->ee.ee_code = icmph->icmp6_code;
+ serr->ee.ee_pad = 0;
+ serr->ee.ee_info = info;
+ serr->ee.ee_data = 0;
+ serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+ skb_network_header(skb);
+ serr->port = port;
+
+ __skb_pull(skb, payload - skb->data);
+ skb_reset_transport_header(skb);
+
+ if (sock_queue_err_skb(sk, skb))
+ kfree_skb(skb);
+}
+
+void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sock_exterr_skb *serr;
+ struct ipv6hdr *iph;
+ struct sk_buff *skb;
+
+ if (!np->recverr)
+ return;
+
+ skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ skb->protocol = htons(ETH_P_IPV6);
+
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
+ iph->daddr = fl6->daddr;
+
+ serr = SKB_EXT_ERR(skb);
+ serr->ee.ee_errno = err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_type = 0;
+ serr->ee.ee_code = 0;
+ serr->ee.ee_pad = 0;
+ serr->ee.ee_info = info;
+ serr->ee.ee_data = 0;
+ serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
+ serr->port = fl6->fl6_dport;
+
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
+
+ if (sock_queue_err_skb(sk, skb))
+ kfree_skb(skb);
+}
+
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *iph;
+ struct sk_buff *skb;
+ struct ip6_mtuinfo *mtu_info;
+
+ if (!np->rxopt.bits.rxpmtu)
+ return;
+
+ skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
+ iph->daddr = fl6->daddr;
+
+ mtu_info = IP6CBMTU(skb);
+
+ mtu_info->ip6m_mtu = mtu;
+ mtu_info->ip6m_addr.sin6_family = AF_INET6;
+ mtu_info->ip6m_addr.sin6_port = 0;
+ mtu_info->ip6m_addr.sin6_flowinfo = 0;
+ mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
+ mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
+
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
+
+ skb = xchg(&np->rxpmtu, skb);
+ kfree_skb(skb);
+}
+
+/*
+ * Handle MSG_ERRQUEUE
+ */
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb, *skb2;
+ struct sockaddr_in6 *sin;
+ struct {
+ struct sock_extended_err ee;
+ struct sockaddr_in6 offender;
+ } errhdr;
+ int err;
+ int copied;
+
+ err = -EAGAIN;
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ serr = SKB_EXT_ERR(skb);
+
+ sin = (struct sockaddr_in6 *)msg->msg_name;
+ if (sin) {
+ const unsigned char *nh = skb_network_header(skb);
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_port = serr->port;
+ sin->sin6_scope_id = 0;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ sin->sin6_addr =
+ *(struct in6_addr *)(nh + serr->addr_offset);
+ if (np->sndflow)
+ sin->sin6_flowinfo =
+ (*(__be32 *)(nh + serr->addr_offset - 24) &
+ IPV6_FLOWINFO_MASK);
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = IP6CB(skb)->iif;
+ } else {
+ ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+ &sin->sin6_addr);
+ }
+ }
+
+ memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
+ sin = &errhdr.offender;
+ sin->sin6_family = AF_UNSPEC;
+ if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_scope_id = 0;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ sin->sin6_addr = ipv6_hdr(skb)->saddr;
+ if (np->rxopt.all)
+ datagram_recv_ctl(sk, msg, skb);
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = IP6CB(skb)->iif;
+ } else {
+ struct inet_sock *inet = inet_sk(sk);
+
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+ &sin->sin6_addr);
+ if (inet->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+ }
+ }
+
+ put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
+
+ /* Now we could try to dump offended packet options */
+
+ msg->msg_flags |= MSG_ERRQUEUE;
+ err = copied;
+
+ /* Reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else {
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ }
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+
+/*
+ * Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+ struct sockaddr_in6 *sin;
+ struct ip6_mtuinfo mtu_info;
+ int err;
+ int copied;
+
+ err = -EAGAIN;
+ skb = xchg(&np->rxpmtu, NULL);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+ sin = (struct sockaddr_in6 *)msg->msg_name;
+ if (sin) {
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_port = 0;
+ sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+ sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+ }
+
+ put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+ err = copied;
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+
+
+int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ unsigned char *nh = skb_network_header(skb);
+
+ if (np->rxopt.bits.rxinfo) {
+ struct in6_pktinfo src_info;
+
+ src_info.ipi6_ifindex = opt->iif;
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+ put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+ }
+
+ if (np->rxopt.bits.rxhlim) {
+ int hlim = ipv6_hdr(skb)->hop_limit;
+ put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+ }
+
+ if (np->rxopt.bits.rxtclass) {
+ int tclass = ipv6_tclass(ipv6_hdr(skb));
+ put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
+ }
+
+ if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+ __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
+ put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ }
+
+ /* HbH is allowed only once */
+ if (np->rxopt.bits.hopopts && opt->hop) {
+ u8 *ptr = nh + opt->hop;
+ put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
+ }
+
+ if (opt->lastopt &&
+ (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) {
+ /*
+ * Silly enough, but we need to reparse in order to
+ * report extension headers (except for HbH)
+ * in order.
+ *
+ * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
+ * (and WILL NOT be) defined because
+ * IPV6_RECVDSTOPTS is more generic. --yoshfuji
+ */
+ unsigned int off = sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+
+ while (off <= opt->lastopt) {
+ unsigned len;
+ u8 *ptr = nh + off;
+
+ switch(nexthdr) {
+ case IPPROTO_DSTOPTS:
+ nexthdr = ptr[0];
+ len = (ptr[1] + 1) << 3;
+ if (np->rxopt.bits.dstopts)
+ put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr);
+ break;
+ case IPPROTO_ROUTING:
+ nexthdr = ptr[0];
+ len = (ptr[1] + 1) << 3;
+ if (np->rxopt.bits.srcrt)
+ put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr);
+ break;
+ case IPPROTO_AH:
+ nexthdr = ptr[0];
+ len = (ptr[1] + 2) << 2;
+ break;
+ default:
+ nexthdr = ptr[0];
+ len = (ptr[1] + 1) << 3;
+ break;
+ }
+
+ off += len;
+ }
+ }
+
+ /* socket options in old style */
+ if (np->rxopt.bits.rxoinfo) {
+ struct in6_pktinfo src_info;
+
+ src_info.ipi6_ifindex = opt->iif;
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
+ put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
+ }
+ if (np->rxopt.bits.rxohlim) {
+ int hlim = ipv6_hdr(skb)->hop_limit;
+ put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
+ }
+ if (np->rxopt.bits.ohopopts && opt->hop) {
+ u8 *ptr = nh + opt->hop;
+ put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.odstopts && opt->dst0) {
+ u8 *ptr = nh + opt->dst0;
+ put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.osrcrt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
+ put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
+ }
+ if (np->rxopt.bits.odstopts && opt->dst1) {
+ u8 *ptr = nh + opt->dst1;
+ put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.rxorigdstaddr) {
+ struct sockaddr_in6 sin6;
+ __be16 *ports = (__be16 *) skb_transport_header(skb);
+
+ if (skb_transport_offset(skb) + 4 <= skb->len) {
+ /* All current transport protocols have the port numbers in the
+ * first four bytes of the transport header and this function is
+ * written with this assumption in mind.
+ */
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+ sin6.sin6_port = ports[1];
+ sin6.sin6_flowinfo = 0;
+ sin6.sin6_scope_id = 0;
+
+ put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
+ }
+ }
+ return 0;
+}
+
+int datagram_send_ctl(struct net *net, struct sock *sk,
+ struct msghdr *msg, struct flowi6 *fl6,
+ struct ipv6_txoptions *opt,
+ int *hlimit, int *tclass, int *dontfrag)
+{
+ struct in6_pktinfo *src_info;
+ struct cmsghdr *cmsg;
+ struct ipv6_rt_hdr *rthdr;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+ int err = 0;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ int addr_type;
+
+ if (!CMSG_OK(msg, cmsg)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ if (cmsg->cmsg_level != SOL_IPV6)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case IPV6_PKTINFO:
+ case IPV6_2292PKTINFO:
+ {
+ struct net_device *dev = NULL;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+
+ if (src_info->ipi6_ifindex) {
+ if (fl6->flowi6_oif &&
+ src_info->ipi6_ifindex != fl6->flowi6_oif)
+ return -EINVAL;
+ fl6->flowi6_oif = src_info->ipi6_ifindex;
+ }
+
+ addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
+
+ rcu_read_lock();
+ if (fl6->flowi6_oif) {
+ dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ } else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ if (addr_type != IPV6_ADDR_ANY) {
+ int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
+ if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+ !ipv6_chk_addr(net, &src_info->ipi6_addr,
+ strict ? dev : NULL, 0))
+ err = -EINVAL;
+ else
+ fl6->saddr = src_info->ipi6_addr;
+ }
+
+ rcu_read_unlock();
+
+ if (err)
+ goto exit_f;
+
+ break;
+ }
+
+ case IPV6_FLOWINFO:
+ if (cmsg->cmsg_len < CMSG_LEN(4)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
+ if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ }
+ fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
+ break;
+
+ case IPV6_2292HOPOPTS:
+ case IPV6_HOPOPTS:
+ if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ opt->opt_nflen += len;
+ opt->hopopt = hdr;
+ break;
+
+ case IPV6_2292DSTOPTS:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ if (opt->dst1opt) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ opt->opt_flen += len;
+ opt->dst1opt = hdr;
+ break;
+
+ case IPV6_DSTOPTS:
+ case IPV6_RTHDRDSTOPTS:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ if (cmsg->cmsg_type == IPV6_DSTOPTS) {
+ opt->opt_flen += len;
+ opt->dst1opt = hdr;
+ } else {
+ opt->opt_nflen += len;
+ opt->dst0opt = hdr;
+ }
+ break;
+
+ case IPV6_2292RTHDR:
+ case IPV6_RTHDR:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
+
+ switch (rthdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ if (rthdr->hdrlen != 2 ||
+ rthdr->segments_left != 1) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ break;
+#endif
+ default:
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ len = ((rthdr->hdrlen + 1) << 3);
+
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ /* segments left must also match */
+ if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ opt->opt_nflen += len;
+ opt->srcrt = rthdr;
+
+ if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) {
+ int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
+
+ opt->opt_nflen += dsthdrlen;
+ opt->dst0opt = opt->dst1opt;
+ opt->dst1opt = NULL;
+ opt->opt_flen -= dsthdrlen;
+ }
+
+ break;
+
+ case IPV6_2292HOPLIMIT:
+ case IPV6_HOPLIMIT:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ *hlimit = *(int *)CMSG_DATA(cmsg);
+ if (*hlimit < -1 || *hlimit > 0xff) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ break;
+
+ case IPV6_TCLASS:
+ {
+ int tc;
+
+ err = -EINVAL;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ goto exit_f;
+ }
+
+ tc = *(int *)CMSG_DATA(cmsg);
+ if (tc < -1 || tc > 0xff)
+ goto exit_f;
+
+ err = 0;
+ *tclass = tc;
+
+ break;
+ }
+
+ case IPV6_DONTFRAG:
+ {
+ int df;
+
+ err = -EINVAL;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ goto exit_f;
+ }
+
+ df = *(int *)CMSG_DATA(cmsg);
+ if (df < 0 || df > 1)
+ goto exit_f;
+
+ err = 0;
+ *dontfrag = df;
+
+ break;
+ }
+ default:
+ LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
+ cmsg->cmsg_type);
+ err = -EINVAL;
+ goto exit_f;
+ }
+ }
+
+exit_f:
+ return err;
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
new file mode 100644
index 00000000..65dd5433
--- /dev/null
+++ b/net/ipv6/esp6.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors
+ *
+ * Mitsuru KANDA @USAGI : IPv6 Support
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *
+ * This file is derived from net/ipv4/esp.c
+ */
+
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <linux/icmpv6.h>
+
+struct esp_skb_cb {
+ struct xfrm_skb_cb xfrm;
+ void *tmp;
+};
+
+#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
+
+/*
+ * Allocate an AEAD request structure with extra space for SG and IV.
+ *
+ * For alignment considerations the upper 32 bits of the sequence number are
+ * placed at the front, if present. Followed by the IV, the request and finally
+ * the SG list.
+ *
+ * TODO: Use spare space in skb for this where possible.
+ */
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
+{
+ unsigned int len;
+
+ len = seqihlen;
+
+ len += crypto_aead_ivsize(aead);
+
+ if (len) {
+ len += crypto_aead_alignmask(aead) &
+ ~(crypto_tfm_ctx_alignment() - 1);
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+ }
+
+ len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+ len = ALIGN(len, __alignof__(struct scatterlist));
+
+ len += sizeof(struct scatterlist) * nfrags;
+
+ return kmalloc(len, GFP_ATOMIC);
+}
+
+static inline __be32 *esp_tmp_seqhi(void *tmp)
+{
+ return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+}
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+{
+ return crypto_aead_ivsize(aead) ?
+ PTR_ALIGN((u8 *)tmp + seqhilen,
+ crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+}
+
+static inline struct aead_givcrypt_request *esp_tmp_givreq(
+ struct crypto_aead *aead, u8 *iv)
+{
+ struct aead_givcrypt_request *req;
+
+ req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+ crypto_tfm_ctx_alignment());
+ aead_givcrypt_set_tfm(req, aead);
+ return req;
+}
+
+static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
+{
+ struct aead_request *req;
+
+ req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
+ crypto_tfm_ctx_alignment());
+ aead_request_set_tfm(req, aead);
+ return req;
+}
+
+static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
+ struct aead_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_aead_reqsize(aead),
+ __alignof__(struct scatterlist));
+}
+
+static inline struct scatterlist *esp_givreq_sg(
+ struct crypto_aead *aead, struct aead_givcrypt_request *req)
+{
+ return (void *)ALIGN((unsigned long)(req + 1) +
+ crypto_aead_reqsize(aead),
+ __alignof__(struct scatterlist));
+}
+
+static void esp_output_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ kfree(ESP_SKB_CB(skb)->tmp);
+ xfrm_output_resume(skb, err);
+}
+
+static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct aead_givcrypt_request *req;
+ struct scatterlist *sg;
+ struct scatterlist *asg;
+ struct sk_buff *trailer;
+ void *tmp;
+ int blksize;
+ int clen;
+ int alen;
+ int plen;
+ int tfclen;
+ int nfrags;
+ int assoclen;
+ int sglists;
+ int seqhilen;
+ u8 *iv;
+ u8 *tail;
+ __be32 *seqhi;
+ struct esp_data *esp = x->data;
+
+ /* skb is pure payload to encrypt */
+ err = -ENOMEM;
+
+ aead = esp->aead;
+ alen = crypto_aead_authsize(aead);
+
+ tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ tfclen = padto - skb->len;
+ }
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2 + tfclen, blksize);
+ if (esp->padlen)
+ clen = ALIGN(clen, esp->padlen);
+ plen = clen - skb->len - tfclen;
+
+ err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+ if (err < 0)
+ goto error;
+ nfrags = err;
+
+ assoclen = sizeof(*esph);
+ sglists = 1;
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists += 2;
+ seqhilen += sizeof(__be32);
+ assoclen += seqhilen;
+ }
+
+ tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ if (!tmp)
+ goto error;
+
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_givreq(aead, iv);
+ asg = esp_givreq_sg(aead, req);
+ sg = asg + sglists;
+
+ /* Fill padding... */
+ tail = skb_tail_pointer(trailer);
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = *skb_mac_header(skb);
+ pskb_put(skb, trailer, clen - skb->len + alen);
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph = ip_esp_hdr(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
+ clen + alen);
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ sg_init_table(asg, 3);
+ sg_set_buf(asg, &esph->spi, sizeof(__be32));
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(asg + 1, seqhi, seqhilen);
+ sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+ } else
+ sg_init_one(asg, esph, sizeof(*esph));
+
+ aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
+ aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
+ aead_givcrypt_set_assoc(req, asg, assoclen);
+ aead_givcrypt_set_giv(req, esph->enc_data,
+ XFRM_SKB_CB(skb)->seq.output.low);
+
+ ESP_SKB_CB(skb)->tmp = tmp;
+ err = crypto_aead_givencrypt(req);
+ if (err == -EINPROGRESS)
+ goto error;
+
+ if (err == -EBUSY)
+ err = NET_XMIT_DROP;
+
+ kfree(tmp);
+
+error:
+ return err;
+}
+
+static int esp_input_done2(struct sk_buff *skb, int err)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead = esp->aead;
+ int alen = crypto_aead_authsize(aead);
+ int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ int elen = skb->len - hlen;
+ int hdr_len = skb_network_header_len(skb);
+ int padlen;
+ u8 nexthdr[2];
+
+ kfree(ESP_SKB_CB(skb)->tmp);
+
+ if (unlikely(err))
+ goto out;
+
+ if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+ BUG();
+
+ err = -EINVAL;
+ padlen = nexthdr[0];
+ if (padlen + 2 + alen >= elen) {
+ LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage "
+ "padlen=%d, elen=%d\n", padlen + 2, elen - alen);
+ goto out;
+ }
+
+ /* ... check padding bits here. Silly. :-) */
+
+ pskb_trim(skb, skb->len - alen - padlen - 2);
+ __skb_pull(skb, hlen);
+ skb_set_transport_header(skb, -hdr_len);
+
+ err = nexthdr[1];
+
+ /* RFC4303: Drop dummy packets without any error */
+ if (err == IPPROTO_NONE)
+ err = -EINVAL;
+
+out:
+ return err;
+}
+
+static void esp_input_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ xfrm_input_resume(skb, esp_input_done2(skb, err));
+}
+
+static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead = esp->aead;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
+ int nfrags;
+ int assoclen;
+ int sglists;
+ int seqhilen;
+ int ret = 0;
+ void *tmp;
+ __be32 *seqhi;
+ u8 *iv;
+ struct scatterlist *sg;
+ struct scatterlist *asg;
+
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (elen <= 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = -ENOMEM;
+
+ assoclen = sizeof(*esph);
+ sglists = 1;
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists += 2;
+ seqhilen += sizeof(__be32);
+ assoclen += seqhilen;
+ }
+
+ tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ if (!tmp)
+ goto out;
+
+ ESP_SKB_CB(skb)->tmp = tmp;
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+ asg = esp_req_sg(aead, req);
+ sg = asg + sglists;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ esph = (struct ip_esp_hdr *)skb->data;
+
+ /* Get ivec. This can be wrong, check against another impls. */
+ iv = esph->enc_data;
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ sg_init_table(asg, 3);
+ sg_set_buf(asg, &esph->spi, sizeof(__be32));
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(asg + 1, seqhi, seqhilen);
+ sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
+ } else
+ sg_init_one(asg, esph, sizeof(*esph));
+
+ aead_request_set_callback(req, 0, esp_input_done, skb);
+ aead_request_set_crypt(req, sg, sg, elen, iv);
+ aead_request_set_assoc(req, asg, assoclen);
+
+ ret = crypto_aead_decrypt(req);
+ if (ret == -EINPROGRESS)
+ goto out;
+
+ ret = esp_input_done2(skb, ret);
+
+out:
+ return ret;
+}
+
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
+{
+ struct esp_data *esp = x->data;
+ u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
+ u32 align = max_t(u32, blksize, esp->padlen);
+ unsigned int net_adj;
+
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ net_adj = sizeof(struct ipv6hdr);
+ else
+ net_adj = 0;
+
+ return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
+ net_adj) & ~(align - 1)) + (net_adj - 2);
+}
+
+static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ esph->spi, IPPROTO_ESP, AF_INET6);
+ if (!x)
+ return;
+ printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
+ ntohl(esph->spi), &iph->daddr);
+ xfrm_state_put(x);
+}
+
+static void esp6_destroy(struct xfrm_state *x)
+{
+ struct esp_data *esp = x->data;
+
+ if (!esp)
+ return;
+
+ crypto_free_aead(esp->aead);
+ kfree(esp);
+}
+
+static int esp_init_aead(struct xfrm_state *x)
+{
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead;
+ int err;
+
+ aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+ err = PTR_ERR(aead);
+ if (IS_ERR(aead))
+ goto error;
+
+ esp->aead = aead;
+
+ err = crypto_aead_setkey(aead, x->aead->alg_key,
+ (x->aead->alg_key_len + 7) / 8);
+ if (err)
+ goto error;
+
+ err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
+ if (err)
+ goto error;
+
+error:
+ return err;
+}
+
+static int esp_init_authenc(struct xfrm_state *x)
+{
+ struct esp_data *esp = x->data;
+ struct crypto_aead *aead;
+ struct crypto_authenc_key_param *param;
+ struct rtattr *rta;
+ char *key;
+ char *p;
+ char authenc_name[CRYPTO_MAX_ALG_NAME];
+ unsigned int keylen;
+ int err;
+
+ err = -EINVAL;
+ if (x->ealg == NULL)
+ goto error;
+
+ err = -ENAMETOOLONG;
+
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+ "authencesn(%s,%s)",
+ x->aalg ? x->aalg->alg_name : "digest_null",
+ x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+ } else {
+ if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
+ "authenc(%s,%s)",
+ x->aalg ? x->aalg->alg_name : "digest_null",
+ x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+ }
+
+ aead = crypto_alloc_aead(authenc_name, 0, 0);
+ err = PTR_ERR(aead);
+ if (IS_ERR(aead))
+ goto error;
+
+ esp->aead = aead;
+
+ keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
+ (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
+ err = -ENOMEM;
+ key = kmalloc(keylen, GFP_KERNEL);
+ if (!key)
+ goto error;
+
+ p = key;
+ rta = (void *)p;
+ rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
+ rta->rta_len = RTA_LENGTH(sizeof(*param));
+ param = RTA_DATA(rta);
+ p += RTA_SPACE(sizeof(*param));
+
+ if (x->aalg) {
+ struct xfrm_algo_desc *aalg_desc;
+
+ memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
+ p += (x->aalg->alg_key_len + 7) / 8;
+
+ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+ BUG_ON(!aalg_desc);
+
+ err = -EINVAL;
+ if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ crypto_aead_authsize(aead)) {
+ NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_aead_authsize(aead),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
+ goto free_key;
+ }
+
+ err = crypto_aead_setauthsize(
+ aead, x->aalg->alg_trunc_len / 8);
+ if (err)
+ goto free_key;
+ }
+
+ param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
+ memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
+
+ err = crypto_aead_setkey(aead, key, keylen);
+
+free_key:
+ kfree(key);
+
+error:
+ return err;
+}
+
+static int esp6_init_state(struct xfrm_state *x)
+{
+ struct esp_data *esp;
+ struct crypto_aead *aead;
+ u32 align;
+ int err;
+
+ if (x->encap)
+ return -EINVAL;
+
+ esp = kzalloc(sizeof(*esp), GFP_KERNEL);
+ if (esp == NULL)
+ return -ENOMEM;
+
+ x->data = esp;
+
+ if (x->aead)
+ err = esp_init_aead(x);
+ else
+ err = esp_init_authenc(x);
+
+ if (err)
+ goto error;
+
+ aead = esp->aead;
+
+ esp->padlen = 0;
+
+ x->props.header_len = sizeof(struct ip_esp_hdr) +
+ crypto_aead_ivsize(aead);
+ switch (x->props.mode) {
+ case XFRM_MODE_BEET:
+ if (x->sel.family != AF_INET6)
+ x->props.header_len += IPV4_BEET_PHMAXLEN +
+ (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
+ break;
+ case XFRM_MODE_TRANSPORT:
+ break;
+ case XFRM_MODE_TUNNEL:
+ x->props.header_len += sizeof(struct ipv6hdr);
+ break;
+ default:
+ goto error;
+ }
+
+ align = ALIGN(crypto_aead_blocksize(aead), 4);
+ if (esp->padlen)
+ align = max_t(u32, align, esp->padlen);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
+
+error:
+ return err;
+}
+
+static const struct xfrm_type esp6_type =
+{
+ .description = "ESP6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .flags = XFRM_TYPE_REPLAY_PROT,
+ .init_state = esp6_init_state,
+ .destructor = esp6_destroy,
+ .get_mtu = esp6_get_mtu,
+ .input = esp6_input,
+ .output = esp6_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
+};
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .err_handler = esp6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init esp6_init(void)
+{
+ if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+ if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+ xfrm_unregister_type(&esp6_type, AF_INET6);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit esp6_fini(void)
+{
+ if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+ if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+}
+
+module_init(esp6_init);
+module_exit(esp6_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
new file mode 100644
index 00000000..3d641b6e
--- /dev/null
+++ b/net/ipv6/exthdrs.c
@@ -0,0 +1,896 @@
+/*
+ * Extension Header handling for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Andi Kleen <ak@muc.de>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ * yoshfuji : ensure not to overrun while parsing
+ * tlv options.
+ * Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
+ * YOSHIFUJI Hideaki @USAGI Register inbound extension header
+ * handlers as inet6_protocol{}.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#include <net/xfrm.h>
+#endif
+
+#include <asm/uaccess.h>
+
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb->tail - skb->network_header;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+
+ if (offset + 2 > packet_len)
+ goto bad;
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
+ len = ((hdr->hdrlen + 1) << 3);
+
+ if (offset + len > packet_len)
+ goto bad;
+
+ offset += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int opttype = nh[offset];
+ int optlen;
+
+ if (opttype == type)
+ return offset;
+
+ switch (opttype) {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ optlen = nh[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ offset += optlen;
+ len -= optlen;
+ }
+ /* not_found */
+ bad:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+/*
+ * Parsing tlv encoded headers.
+ *
+ * Parsing function "func" returns 1, if parsing succeed
+ * and 0, if it failed.
+ * It MUST NOT touch skb->h.
+ */
+
+struct tlvtype_proc {
+ int type;
+ int (*func)(struct sk_buff *skb, int offset);
+};
+
+/*********************
+ Generic functions
+ *********************/
+
+/* An unknown option is detected, decide what to do */
+
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+{
+ switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
+ case 0: /* ignore */
+ return 1;
+
+ case 1: /* drop packet */
+ break;
+
+ case 3: /* Send ICMP if not a multicast address and drop packet */
+ /* Actually, it is redundant check. icmp_send
+ will recheck in any case.
+ */
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
+ break;
+ case 2: /* send ICMP PARM PROB regardless and drop packet */
+ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+ return 0;
+ }
+
+ kfree_skb(skb);
+ return 0;
+}
+
+/* Parse tlv encoded option header (hop-by-hop or destination) */
+
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+{
+ struct tlvtype_proc *curr;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = skb_network_header_len(skb);
+ int len = (skb_transport_header(skb)[1] + 1) << 3;
+
+ if (skb_transport_offset(skb) + len > skb_headlen(skb))
+ goto bad;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int optlen = nh[off + 1] + 2;
+
+ switch (nh[off]) {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ default: /* Other TLV code so scan list */
+ if (optlen > len)
+ goto bad;
+ for (curr=procs; curr->type >= 0; curr++) {
+ if (curr->type == nh[off]) {
+ /* type specific length/alignment
+ checks will be performed in the
+ func(). */
+ if (curr->func(skb, off) == 0)
+ return 0;
+ break;
+ }
+ }
+ if (curr->type < 0) {
+ if (ip6_tlvopt_unknown(skb, off) == 0)
+ return 0;
+ }
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 1;
+bad:
+ kfree_skb(skb);
+ return 0;
+}
+
+/*****************************
+ Destination options header.
+ *****************************/
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
+{
+ struct ipv6_destopt_hao *hao;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct in6_addr tmp_addr;
+ int ret;
+
+ if (opt->dsthao) {
+ LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+ goto discard;
+ }
+ opt->dsthao = opt->dst1;
+ opt->dst1 = 0;
+
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
+
+ if (hao->length != 16) {
+ LIMIT_NETDEBUG(
+ KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+ goto discard;
+ }
+
+ if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
+ LIMIT_NETDEBUG(
+ KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr);
+ goto discard;
+ }
+
+ ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
+ (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
+ if (unlikely(ret < 0))
+ goto discard;
+
+ if (skb_cloned(skb)) {
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto discard;
+
+ /* update all variable using below by copied skbuff */
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
+ optoff);
+ ipv6h = ipv6_hdr(skb);
+ }
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ tmp_addr = ipv6h->saddr;
+ ipv6h->saddr = hao->addr;
+ hao->addr = tmp_addr;
+
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp(skb);
+
+ return 1;
+
+ discard:
+ kfree_skb(skb);
+ return 0;
+}
+#endif
+
+static struct tlvtype_proc tlvprocdestopt_lst[] = {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ {
+ .type = IPV6_TLV_HAO,
+ .func = ipv6_dest_hao,
+ },
+#endif
+ {-1, NULL}
+};
+
+static int ipv6_destopt_rcv(struct sk_buff *skb)
+{
+ struct inet6_skb_parm *opt = IP6CB(skb);
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ __u16 dstbuf;
+#endif
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
+ IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ opt->lastopt = opt->dst1 = skb_network_header_len(skb);
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ dstbuf = opt->dst1;
+#endif
+
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ opt = IP6CB(skb);
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ opt->nhoff = dstbuf;
+#else
+ opt->nhoff = opt->dst1;
+#endif
+ return 1;
+ }
+
+ IP6_INC_STATS_BH(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ return -1;
+}
+
+/********************************
+ Routing header.
+ ********************************/
+
+/* called with rcu_read_lock() */
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
+{
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct in6_addr *addr = NULL;
+ struct in6_addr daddr;
+ struct inet6_dev *idev;
+ int n, i;
+ struct ipv6_rt_hdr *hdr;
+ struct rt0_hdr *rthdr;
+ struct net *net = dev_net(skb->dev);
+ int accept_source_route = net->ipv6.devconf_all->accept_source_route;
+
+ idev = __in6_dev_get(skb->dev);
+ if (idev && accept_source_route > idev->cnf.accept_source_route)
+ accept_source_route = idev->cnf.accept_source_route;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
+
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
+ skb->pkt_type != PACKET_HOST) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+looped_back:
+ if (hdr->segments_left == 0) {
+ switch (hdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ /* Silently discard type 2 header unless it was
+ * processed by own
+ */
+ if (!addr) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
+ opt->lastopt = opt->srcrt = skb_network_header_len(skb);
+ skb->transport_header += (hdr->hdrlen + 1) << 3;
+ opt->dst0 = opt->dst1;
+ opt->dst1 = 0;
+ opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
+ return 1;
+ }
+
+ switch (hdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ if (accept_source_route < 0)
+ goto unknown_rh;
+ /* Silently discard invalid RTH type 2 */
+ if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
+ goto unknown_rh;
+ }
+
+ /*
+ * This is the routing header forwarding algorithm from
+ * RFC 2460, page 16.
+ */
+
+ n = hdr->hdrlen >> 1;
+
+ if (hdr->segments_left > n) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->segments_left) -
+ skb_network_header(skb)));
+ return -1;
+ }
+
+ /* We are about to mangle packet header. Be careful!
+ Do not damage packets queued somewhere.
+ */
+ if (skb_cloned(skb)) {
+ /* the copy is a forwarded packet */
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -1;
+ }
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
+ }
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ i = n - --hdr->segments_left;
+
+ rthdr = (struct rt0_hdr *) hdr;
+ addr = rthdr->addr;
+ addr += i - 1;
+
+ switch (hdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+ (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+ IPPROTO_ROUTING) < 0) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
+ if (ipv6_addr_is_multicast(addr)) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ daddr = *addr;
+ *addr = ipv6_hdr(skb)->daddr;
+ ipv6_hdr(skb)->daddr = daddr;
+
+ skb_dst_drop(skb);
+ ip6_route_input(skb);
+ if (skb_dst(skb)->error) {
+ skb_push(skb, skb->data - skb_network_header(skb));
+ dst_input(skb);
+ return -1;
+ }
+
+ if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ 0);
+ kfree_skb(skb);
+ return -1;
+ }
+ ipv6_hdr(skb)->hop_limit--;
+ goto looped_back;
+ }
+
+ skb_push(skb, skb->data - skb_network_header(skb));
+ dst_input(skb);
+ return -1;
+
+unknown_rh:
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ (&hdr->type) - skb_network_header(skb));
+ return -1;
+}
+
+static const struct inet6_protocol rthdr_protocol = {
+ .handler = ipv6_rthdr_rcv,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct inet6_protocol destopt_protocol = {
+ .handler = ipv6_destopt_rcv,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
+};
+
+static const struct inet6_protocol nodata_protocol = {
+ .handler = dst_discard,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+int __init ipv6_exthdrs_init(void)
+{
+ int ret;
+
+ ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+ if (ret)
+ goto out;
+
+ ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+ if (ret)
+ goto out_rthdr;
+
+ ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
+ if (ret)
+ goto out_destopt;
+
+out:
+ return ret;
+out_rthdr:
+ inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+out_destopt:
+ inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+ goto out;
+};
+
+void ipv6_exthdrs_exit(void)
+{
+ inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
+ inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
+ inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
+}
+
+/**********************************
+ Hop-by-hop options.
+ **********************************/
+
+/*
+ * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
+ */
+static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
+{
+ return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev);
+}
+
+static inline struct net *ipv6_skb_net(struct sk_buff *skb)
+{
+ return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
+}
+
+/* Router Alert as of RFC 2711 */
+
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+{
+ const unsigned char *nh = skb_network_header(skb);
+
+ if (nh[optoff + 1] == 2) {
+ IP6CB(skb)->ra = optoff;
+ return 1;
+ }
+ LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
+ nh[optoff + 1]);
+ kfree_skb(skb);
+ return 0;
+}
+
+/* Jumbo payload */
+
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ struct net *net = ipv6_skb_net(skb);
+ u32 pkt_len;
+
+ if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
+ LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
+ nh[optoff+1]);
+ IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
+ goto drop;
+ }
+
+ pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
+ if (pkt_len <= IPV6_MAXPLEN) {
+ IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ return 0;
+ }
+ if (ipv6_hdr(skb)->payload_len) {
+ IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+ return 0;
+ }
+
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
+ IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
+ goto drop;
+
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct tlvtype_proc tlvprochopopt_lst[] = {
+ {
+ .type = IPV6_TLV_ROUTERALERT,
+ .func = ipv6_hop_ra,
+ },
+ {
+ .type = IPV6_TLV_JUMBO,
+ .func = ipv6_hop_jumbo,
+ },
+ { -1, }
+};
+
+int ipv6_parse_hopopts(struct sk_buff *skb)
+{
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ /*
+ * skb_network_header(skb) is equal to skb->data, and
+ * skb_network_header_len(skb) is always equal to
+ * sizeof(struct ipv6hdr) by definition of
+ * hop-by-hop options.
+ */
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
+ !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
+ kfree_skb(skb);
+ return -1;
+ }
+
+ opt->hop = sizeof(struct ipv6hdr);
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ opt = IP6CB(skb);
+ opt->nhoff = sizeof(struct ipv6hdr);
+ return 1;
+ }
+ return -1;
+}
+
+/*
+ * Creating outbound headers.
+ *
+ * "build" functions work when skb is filled from head to tail (datagram)
+ * "push" functions work when headers are added from tail to head (tcp)
+ *
+ * In both cases we assume, that caller reserved enough room
+ * for headers.
+ */
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p)
+{
+ struct rt0_hdr *phdr, *ihdr;
+ int hops;
+
+ ihdr = (struct rt0_hdr *) opt;
+
+ phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+ memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
+
+ hops = ihdr->rt_hdr.hdrlen >> 1;
+
+ if (hops > 1)
+ memcpy(phdr->addr, ihdr->addr + 1,
+ (hops - 1) * sizeof(struct in6_addr));
+
+ phdr->addr[hops - 1] = **addr_p;
+ *addr_p = ihdr->addr;
+
+ phdr->rt_hdr.nexthdr = *proto;
+ *proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
+
+ memcpy(h, opt, ipv6_optlen(opt));
+ h->nexthdr = *proto;
+ *proto = type;
+}
+
+void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 *proto,
+ struct in6_addr **daddr)
+{
+ if (opt->srcrt) {
+ ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+ /*
+ * IPV6_RTHDRDSTOPTS is ignored
+ * unless IPV6_RTHDR is set (RFC3542).
+ */
+ if (opt->dst0opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+ }
+ if (opt->hopopt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+}
+
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+
+void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
+{
+ if (opt->dst1opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+}
+
+struct ipv6_txoptions *
+ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
+{
+ struct ipv6_txoptions *opt2;
+
+ opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
+ if (opt2) {
+ long dif = (char*)opt2 - (char*)opt;
+ memcpy(opt2, opt, opt->tot_len);
+ if (opt2->hopopt)
+ *((char**)&opt2->hopopt) += dif;
+ if (opt2->dst0opt)
+ *((char**)&opt2->dst0opt) += dif;
+ if (opt2->dst1opt)
+ *((char**)&opt2->dst1opt) += dif;
+ if (opt2->srcrt)
+ *((char**)&opt2->srcrt) += dif;
+ }
+ return opt2;
+}
+
+EXPORT_SYMBOL_GPL(ipv6_dup_options);
+
+static int ipv6_renew_option(void *ohdr,
+ struct ipv6_opt_hdr __user *newopt, int newoptlen,
+ int inherit,
+ struct ipv6_opt_hdr **hdr,
+ char **p)
+{
+ if (inherit) {
+ if (ohdr) {
+ memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
+ *hdr = (struct ipv6_opt_hdr *)*p;
+ *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
+ }
+ } else {
+ if (newopt) {
+ if (copy_from_user(*p, newopt, newoptlen))
+ return -EFAULT;
+ *hdr = (struct ipv6_opt_hdr *)*p;
+ if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
+ return -EINVAL;
+ *p += CMSG_ALIGN(newoptlen);
+ }
+ }
+ return 0;
+}
+
+struct ipv6_txoptions *
+ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
+ int newtype,
+ struct ipv6_opt_hdr __user *newopt, int newoptlen)
+{
+ int tot_len = 0;
+ char *p;
+ struct ipv6_txoptions *opt2;
+ int err;
+
+ if (opt) {
+ if (newtype != IPV6_HOPOPTS && opt->hopopt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
+ if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
+ if (newtype != IPV6_RTHDR && opt->srcrt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
+ if (newtype != IPV6_DSTOPTS && opt->dst1opt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+ }
+
+ if (newopt && newoptlen)
+ tot_len += CMSG_ALIGN(newoptlen);
+
+ if (!tot_len)
+ return NULL;
+
+ tot_len += sizeof(*opt2);
+ opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC);
+ if (!opt2)
+ return ERR_PTR(-ENOBUFS);
+
+ memset(opt2, 0, tot_len);
+
+ opt2->tot_len = tot_len;
+ p = (char *)(opt2 + 1);
+
+ err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
+ newtype != IPV6_HOPOPTS,
+ &opt2->hopopt, &p);
+ if (err)
+ goto out;
+
+ err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
+ newtype != IPV6_RTHDRDSTOPTS,
+ &opt2->dst0opt, &p);
+ if (err)
+ goto out;
+
+ err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
+ newtype != IPV6_RTHDR,
+ (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
+ if (err)
+ goto out;
+
+ err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
+ newtype != IPV6_DSTOPTS,
+ &opt2->dst1opt, &p);
+ if (err)
+ goto out;
+
+ opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
+ (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
+ (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0);
+ opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
+
+ return opt2;
+out:
+ sock_kfree_s(sk, opt2, opt2->tot_len);
+ return ERR_PTR(err);
+}
+
+struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+ struct ipv6_txoptions *opt)
+{
+ /*
+ * ignore the dest before srcrt unless srcrt is being included.
+ * --yoshfuji
+ */
+ if (opt && opt->dst0opt && !opt->srcrt) {
+ if (opt_space != opt) {
+ memcpy(opt_space, opt, sizeof(*opt_space));
+ opt = opt_space;
+ }
+ opt->opt_nflen -= ipv6_optlen(opt->dst0opt);
+ opt->dst0opt = NULL;
+ }
+
+ return opt;
+}
+
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ * by srcrt option, if any.
+ *
+ * @fl6: flowi6 for which daddr is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original daddr address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl6->daddr set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
+ const struct ipv6_txoptions *opt,
+ struct in6_addr *orig)
+{
+ if (!opt || !opt->srcrt)
+ return NULL;
+
+ *orig = fl6->daddr;
+ fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+ return orig;
+}
+
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
new file mode 100644
index 00000000..72957f4a
--- /dev/null
+++ b/net/ipv6/exthdrs_core.c
@@ -0,0 +1,114 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.
+ */
+#include <linux/export.h>
+#include <net/ipv6.h>
+
+/*
+ * find out if nexthdr is a well-known extension header or a protocol
+ */
+
+int ipv6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return (nexthdr == NEXTHDR_HOP) ||
+ (nexthdr == NEXTHDR_ROUTING) ||
+ (nexthdr == NEXTHDR_FRAGMENT) ||
+ (nexthdr == NEXTHDR_AUTH) ||
+ (nexthdr == NEXTHDR_NONE) ||
+ (nexthdr == NEXTHDR_DEST);
+}
+
+/*
+ * Skip any extension headers. This is used by the ICMP module.
+ *
+ * Note that strictly speaking this conflicts with RFC 2460 4.0:
+ * ...The contents and semantics of each extension header determine whether
+ * or not to proceed to the next header. Therefore, extension headers must
+ * be processed strictly in the order they appear in the packet; a
+ * receiver must not, for example, scan through a packet looking for a
+ * particular kind of extension header and process that header prior to
+ * processing all preceding ones.
+ *
+ * We do exactly this. This is a protocol bug. We can't decide after a
+ * seeing an unknown discard-with-error flavour TLV option if it's a
+ * ICMP error message or not (errors should never be send in reply to
+ * ICMP error messages).
+ *
+ * But I see no other way to do this. This might need to be reexamined
+ * when Linux implements ESP (and maybe AUTH) headers.
+ * --AK
+ *
+ * This function parses (probably truncated) exthdr set "hdr".
+ * "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * If it is not NULL *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
+ * - it may return pointer pointing beyond end of packet,
+ * if the last recognized header is truncated in the middle.
+ * - if packet is truncated, so that all parsed headers are skipped,
+ * it returns NULL.
+ * - First fragment header is skipped, not-first ones
+ * are considered as unparsable.
+ * - Reports the offset field of the final fragment header so it is
+ * possible to tell whether this is a first fragment, later fragment,
+ * or not fragmented.
+ * - ESP is unparsable for now and considered like
+ * normal payload protocol.
+ * - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ *
+ * --ANK (980726)
+ */
+
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+ __be16 *frag_offp)
+{
+ u8 nexthdr = *nexthdrp;
+
+ *frag_offp = 0;
+
+ while (ipv6_ext_hdr(nexthdr)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+ int hdrlen;
+
+ if (nexthdr == NEXTHDR_NONE)
+ return -1;
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -1;
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ __be16 _frag_off, *fp;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -1;
+
+ *frag_offp = *fp;
+ if (ntohs(*frag_offp) & ~0x7)
+ break;
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hp->hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ nexthdr = hp->nexthdr;
+ start += hdrlen;
+ }
+
+ *nexthdrp = nexthdr;
+ return start;
+}
+
+EXPORT_SYMBOL(ipv6_ext_hdr);
+EXPORT_SYMBOL(ipv6_skip_exthdr);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 00000000..b6c57315
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,308 @@
+/*
+ * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ * Thomas Graf <tgraf@suug.ch>
+ * Ville Nuorvala <vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/export.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule
+{
+ struct fib_rule common;
+ struct rt6key src;
+ struct rt6key dst;
+ u8 tclass;
+};
+
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ int flags, pol_lookup_t lookup)
+{
+ struct fib_lookup_arg arg = {
+ .lookup_ptr = lookup,
+ .flags = FIB_LOOKUP_NOREF,
+ };
+
+ fib_rules_lookup(net->ipv6.fib6_rules_ops,
+ flowi6_to_flowi(fl6), flags, &arg);
+
+ if (arg.result)
+ return arg.result;
+
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return &net->ipv6.ip6_null_entry->dst;
+}
+
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
+{
+ struct flowi6 *flp6 = &flp->u.ip6;
+ struct rt6_info *rt = NULL;
+ struct fib6_table *table;
+ struct net *net = rule->fr_net;
+ pol_lookup_t lookup = arg->lookup_ptr;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+ case FR_ACT_UNREACHABLE:
+ rt = net->ipv6.ip6_null_entry;
+ goto discard_pkt;
+ default:
+ case FR_ACT_BLACKHOLE:
+ rt = net->ipv6.ip6_blk_hole_entry;
+ goto discard_pkt;
+ case FR_ACT_PROHIBIT:
+ rt = net->ipv6.ip6_prohibit_entry;
+ goto discard_pkt;
+ }
+
+ table = fib6_get_table(net, rule->table);
+ if (table)
+ rt = lookup(net, table, flp6, flags);
+
+ if (rt != net->ipv6.ip6_null_entry) {
+ struct fib6_rule *r = (struct fib6_rule *)rule;
+
+ /*
+ * If we need to find a source address for this traffic,
+ * we check the result if it meets requirement of the rule.
+ */
+ if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+ r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+ struct in6_addr saddr;
+
+ if (ipv6_dev_get_saddr(net,
+ ip6_dst_idev(&rt->dst)->dev,
+ &flp6->daddr,
+ rt6_flags2srcprefs(flags),
+ &saddr))
+ goto again;
+ if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+ r->src.plen))
+ goto again;
+ flp6->saddr = saddr;
+ }
+ goto out;
+ }
+again:
+ dst_release(&rt->dst);
+ rt = NULL;
+ goto out;
+
+discard_pkt:
+ dst_hold(&rt->dst);
+out:
+ arg->result = rt;
+ return rt == NULL ? -EAGAIN : 0;
+}
+
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+ struct fib6_rule *r = (struct fib6_rule *) rule;
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (r->dst.plen &&
+ !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen))
+ return 0;
+
+ /*
+ * If FIB_RULE_FIND_SADDR is set and we do not have a
+ * source address for the traffic, we defer check for
+ * source address.
+ */
+ if (r->src.plen) {
+ if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+ if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr,
+ r->src.plen))
+ return 0;
+ } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
+ return 0;
+ }
+
+ if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff))
+ return 0;
+
+ return 1;
+}
+
+static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
+ FRA_GENERIC_POLICY,
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ int err = -EINVAL;
+ struct net *net = sock_net(skb->sk);
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ if (rule->action == FR_ACT_TO_TBL) {
+ if (rule->table == RT6_TABLE_UNSPEC)
+ goto errout;
+
+ if (fib6_new_table(net, rule->table) == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+ }
+
+ if (frh->src_len)
+ nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+ sizeof(struct in6_addr));
+
+ if (frh->dst_len)
+ nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+ sizeof(struct in6_addr));
+
+ rule6->src.plen = frh->src_len;
+ rule6->dst.plen = frh->dst_len;
+ rule6->tclass = frh->tos;
+
+ err = 0;
+errout:
+ return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ if (frh->src_len && (rule6->src.plen != frh->src_len))
+ return 0;
+
+ if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+ return 0;
+
+ if (frh->tos && (rule6->tclass != frh->tos))
+ return 0;
+
+ if (frh->src_len &&
+ nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+ return 0;
+
+ if (frh->dst_len &&
+ nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+ return 0;
+
+ return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct fib_rule_hdr *frh)
+{
+ struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+ frh->dst_len = rule6->dst.plen;
+ frh->src_len = rule6->src.plen;
+ frh->tos = rule6->tclass;
+
+ if (rule6->dst.plen)
+ NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
+ &rule6->dst.addr);
+
+ if (rule6->src.plen)
+ NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
+ &rule6->src.addr);
+
+ return 0;
+
+nla_put_failure:
+ return -ENOBUFS;
+}
+
+static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
+{
+ return 0x3FFF;
+}
+
+static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+{
+ return nla_total_size(16) /* dst */
+ + nla_total_size(16); /* src */
+}
+
+static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
+ .family = AF_INET6,
+ .rule_size = sizeof(struct fib6_rule),
+ .addr_size = sizeof(struct in6_addr),
+ .action = fib6_rule_action,
+ .match = fib6_rule_match,
+ .configure = fib6_rule_configure,
+ .compare = fib6_rule_compare,
+ .fill = fib6_rule_fill,
+ .default_pref = fib6_rule_default_pref,
+ .nlmsg_payload = fib6_rule_nlmsg_payload,
+ .nlgroup = RTNLGRP_IPV6_RULE,
+ .policy = fib6_rule_policy,
+ .owner = THIS_MODULE,
+ .fro_net = &init_net,
+};
+
+static int __net_init fib6_rules_net_init(struct net *net)
+{
+ struct fib_rules_ops *ops;
+ int err = -ENOMEM;
+
+ ops = fib_rules_register(&fib6_rules_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.fib6_rules_ops = ops;
+
+
+ err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
+ RT6_TABLE_LOCAL, 0);
+ if (err)
+ goto out_fib6_rules_ops;
+
+ err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
+ 0x7FFE, RT6_TABLE_MAIN, 0);
+ if (err)
+ goto out_fib6_rules_ops;
+
+out:
+ return err;
+
+out_fib6_rules_ops:
+ fib_rules_unregister(ops);
+ goto out;
+}
+
+static void __net_exit fib6_rules_net_exit(struct net *net)
+{
+ fib_rules_unregister(net->ipv6.fib6_rules_ops);
+}
+
+static struct pernet_operations fib6_rules_net_ops = {
+ .init = fib6_rules_net_init,
+ .exit = fib6_rules_net_exit,
+};
+
+int __init fib6_rules_init(void)
+{
+ return register_pernet_subsys(&fib6_rules_net_ops);
+}
+
+
+void fib6_rules_cleanup(void)
+{
+ unregister_pernet_subsys(&fib6_rules_net_ops);
+}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
new file mode 100644
index 00000000..ba0c1479
--- /dev/null
+++ b/net/ipv6/icmp.c
@@ -0,0 +1,997 @@
+/*
+ * Internet Control Message Protocol (ICMPv6)
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on net/ipv4/icmp.c
+ *
+ * RFC 1885
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Andi Kleen : exception handling
+ * Andi Kleen add rate limits. never reply to a icmp.
+ * add more length checks and other fixes.
+ * yoshfuji : ensure to sent parameter problem for
+ * fragments.
+ * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
+ * Randy Dunlap and
+ * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
+ * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/ping.h>
+#include <net/protocol.h>
+#include <net/raw.h>
+#include <net/rawv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/inet_common.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * The ICMP socket(s). This is the most convenient way to flow control
+ * our ICMP output as well as maintain a clean interface throughout
+ * all layers. All Socketless IP sends will soon be gone.
+ *
+ * On SMP we have one ICMP socket per-cpu.
+ */
+static inline struct sock *icmpv6_sk(struct net *net)
+{
+ return net->ipv6.icmp_sk[smp_processor_id()];
+}
+
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
+
+ if (!(type & ICMPV6_INFOMSG_MASK))
+ if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+ ping_err(skb, offset, info);
+}
+
+static int icmpv6_rcv(struct sk_buff *skb);
+
+static const struct inet6_protocol icmpv6_protocol = {
+ .handler = icmpv6_rcv,
+ .err_handler = icmpv6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+{
+ struct sock *sk;
+
+ local_bh_disable();
+
+ sk = icmpv6_sk(net);
+ if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
+ /* This can happen if the output path (f.e. SIT or
+ * ip6ip6 tunnel) signals dst_link_failure() for an
+ * outgoing ICMP6 packet.
+ */
+ local_bh_enable();
+ return NULL;
+ }
+ return sk;
+}
+
+static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+{
+ spin_unlock_bh(&sk->sk_lock.slock);
+}
+
+/*
+ * Slightly more convenient version of icmpv6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+ icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
+ kfree_skb(skb);
+}
+
+/*
+ * Figure out, may we reply to this packet with icmp error.
+ *
+ * We do not reply, if:
+ * - it was icmp error message.
+ * - it is truncated, so that it is known, that protocol is ICMPV6
+ * (i.e. in the middle of some exthdr)
+ *
+ * --ANK (980726)
+ */
+
+static int is_ineligible(struct sk_buff *skb)
+{
+ int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+ int len = skb->len - ptr;
+ __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+
+ if (len < 0)
+ return 1;
+
+ ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
+ if (ptr < 0)
+ return 0;
+ if (nexthdr == IPPROTO_ICMPV6) {
+ u8 _type, *tp;
+ tp = skb_header_pointer(skb,
+ ptr+offsetof(struct icmp6hdr, icmp6_type),
+ sizeof(_type), &_type);
+ if (tp == NULL ||
+ !(*tp & ICMPV6_INFOMSG_MASK))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Check the ICMP output rate limit
+ */
+static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+ struct flowi6 *fl6)
+{
+ struct dst_entry *dst;
+ struct net *net = sock_net(sk);
+ bool res = false;
+
+ /* Informational messages are not limited. */
+ if (type & ICMPV6_INFOMSG_MASK)
+ return true;
+
+ /* Do not limit pmtu discovery, it would break it. */
+ if (type == ICMPV6_PKT_TOOBIG)
+ return true;
+
+ /*
+ * Look up the output route.
+ * XXX: perhaps the expire for routing entries cloned by
+ * this lookup should be more aggressive (not longer than timeout).
+ */
+ dst = ip6_route_output(net, sk, fl6);
+ if (dst->error) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTNOROUTES);
+ } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
+ res = true;
+ } else {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ int tmo = net->ipv6.sysctl.icmpv6_time;
+
+ /* Give more bandwidth to wider prefixes. */
+ if (rt->rt6i_dst.plen < 128)
+ tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
+
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+ res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
+ }
+ dst_release(dst);
+ return res;
+}
+
+/*
+ * an inline helper for the "simple" if statement below
+ * checks if parameter problem report is caused by an
+ * unrecognized IPv6 option that has the Option Type
+ * highest-order two bits set to 10
+ */
+
+static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+{
+ u8 _optval, *op;
+
+ offset += skb_network_offset(skb);
+ op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
+ if (op == NULL)
+ return 1;
+ return (*op & 0xC0) == 0x80;
+}
+
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
+{
+ struct sk_buff *skb;
+ struct icmp6hdr *icmp6h;
+ int err = 0;
+
+ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+ goto out;
+
+ icmp6h = icmp6_hdr(skb);
+ memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
+ icmp6h->icmp6_cksum = 0;
+
+ if (skb_queue_len(&sk->sk_write_queue) == 1) {
+ skb->csum = csum_partial(icmp6h,
+ sizeof(struct icmp6hdr), skb->csum);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+ &fl6->daddr,
+ len, fl6->flowi6_proto,
+ skb->csum);
+ } else {
+ __wsum tmp_csum = 0;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
+ }
+
+ tmp_csum = csum_partial(icmp6h,
+ sizeof(struct icmp6hdr), tmp_csum);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
+ &fl6->daddr,
+ len, fl6->flowi6_proto,
+ tmp_csum);
+ }
+ ip6_push_pending_frames(sk);
+out:
+ return err;
+}
+
+struct icmpv6_msg {
+ struct sk_buff *skb;
+ int offset;
+ uint8_t type;
+};
+
+static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+ struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
+ struct sk_buff *org_skb = msg->skb;
+ __wsum csum = 0;
+
+ csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
+ to, len, csum);
+ skb->csum = csum_block_add(skb->csum, csum, odd);
+ if (!(msg->type & ICMPV6_INFOMSG_MASK))
+ nf_ct_attach(skb, org_skb);
+ return 0;
+}
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct ipv6_destopt_hao *hao;
+ struct in6_addr tmp;
+ int off;
+
+ if (opt->dsthao) {
+ off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+ if (likely(off >= 0)) {
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + off);
+ tmp = iph->saddr;
+ iph->saddr = hao->addr;
+ hao->addr = tmp;
+ }
+ }
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+ struct sock *sk, struct flowi6 *fl6)
+{
+ struct dst_entry *dst, *dst2;
+ struct flowi6 fl2;
+ int err;
+
+ err = ip6_dst_lookup(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+
+ /*
+ * We won't send icmp if the destination is known
+ * anycast.
+ */
+ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+ dst_release(dst);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* No need to clone since we're just using its address. */
+ dst2 = dst;
+
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
+ if (!IS_ERR(dst)) {
+ if (dst != dst2)
+ return dst;
+ } else {
+ if (PTR_ERR(dst) == -EPERM)
+ dst = NULL;
+ else
+ return dst;
+ }
+
+ err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+ if (err)
+ goto relookup_failed;
+
+ err = ip6_dst_lookup(sk, &dst2, &fl2);
+ if (err)
+ goto relookup_failed;
+
+ dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
+ if (!IS_ERR(dst2)) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ err = PTR_ERR(dst2);
+ if (err == -EPERM) {
+ dst_release(dst);
+ return dst2;
+ } else
+ goto relookup_failed;
+ }
+
+relookup_failed:
+ if (dst)
+ return dst;
+ return ERR_PTR(err);
+}
+
+/*
+ * Send an ICMP message in response to a packet in error
+ */
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ struct inet6_dev *idev = NULL;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct sock *sk;
+ struct ipv6_pinfo *np;
+ const struct in6_addr *saddr = NULL;
+ struct dst_entry *dst;
+ struct icmp6hdr tmp_hdr;
+ struct flowi6 fl6;
+ struct icmpv6_msg msg;
+ int iif = 0;
+ int addr_type = 0;
+ int len;
+ int hlimit;
+ int err = 0;
+
+ if ((u8 *)hdr < skb->head ||
+ (skb->network_header + sizeof(*hdr)) > skb->tail)
+ return;
+
+ /*
+ * Make sure we respect the rules
+ * i.e. RFC 1885 2.4(e)
+ * Rule (e.1) is enforced by not using icmpv6_send
+ * in any code that processes icmp errors.
+ */
+ addr_type = ipv6_addr_type(&hdr->daddr);
+
+ if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
+ saddr = &hdr->daddr;
+
+ /*
+ * Dest addr check
+ */
+
+ if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
+ if (type != ICMPV6_PKT_TOOBIG &&
+ !(type == ICMPV6_PARAMPROB &&
+ code == ICMPV6_UNK_OPTION &&
+ (opt_unrec(skb, info))))
+ return;
+
+ saddr = NULL;
+ }
+
+ addr_type = ipv6_addr_type(&hdr->saddr);
+
+ /*
+ * Source addr check
+ */
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL)
+ iif = skb->dev->ifindex;
+
+ /*
+ * Must not send error if the source does not uniquely
+ * identify a single node (RFC2463 Section 2.4).
+ * We check unspecified / multicast addresses here,
+ * and anycast addresses will be checked later.
+ */
+ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+ return;
+ }
+
+ /*
+ * Never answer to a ICMP packet.
+ */
+ if (is_ineligible(skb)) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
+ return;
+ }
+
+ mip6_addr_swap(skb);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.daddr = hdr->saddr;
+ if (saddr)
+ fl6.saddr = *saddr;
+ fl6.flowi6_oif = iif;
+ fl6.fl6_icmp_type = type;
+ fl6.fl6_icmp_code = code;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+ sk = icmpv6_xmit_lock(net);
+ if (sk == NULL)
+ return;
+ np = inet6_sk(sk);
+
+ if (!icmpv6_xrlim_allow(sk, type, &fl6))
+ goto out;
+
+ tmp_hdr.icmp6_type = type;
+ tmp_hdr.icmp6_code = code;
+ tmp_hdr.icmp6_cksum = 0;
+ tmp_hdr.icmp6_pointer = htonl(info);
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = icmpv6_route_lookup(net, skb, sk, &fl6);
+ if (IS_ERR(dst))
+ goto out;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ msg.skb = skb;
+ msg.offset = skb_network_offset(skb);
+ msg.type = type;
+
+ len = skb->len - msg.offset;
+ len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
+ if (len < 0) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
+ goto out_dst_release;
+ }
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+
+ err = ip6_append_data(sk, icmpv6_getfrag, &msg,
+ len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), hlimit,
+ np->tclass, NULL, &fl6, (struct rt6_info*)dst,
+ MSG_DONTWAIT, np->dontfrag);
+ if (err) {
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
+ }
+ rcu_read_unlock();
+out_dst_release:
+ dst_release(dst);
+out:
+ icmpv6_xmit_unlock(sk);
+}
+EXPORT_SYMBOL(icmpv6_send);
+
+static void icmpv6_echo_reply(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sock *sk;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *np;
+ const struct in6_addr *saddr = NULL;
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
+ struct icmp6hdr tmp_hdr;
+ struct flowi6 fl6;
+ struct icmpv6_msg msg;
+ struct dst_entry *dst;
+ int err = 0;
+ int hlimit;
+
+ saddr = &ipv6_hdr(skb)->daddr;
+
+ if (!ipv6_unicast_destination(skb))
+ saddr = NULL;
+
+ memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
+ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.daddr = ipv6_hdr(skb)->saddr;
+ if (saddr)
+ fl6.saddr = *saddr;
+ fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+ sk = icmpv6_xmit_lock(net);
+ if (sk == NULL)
+ return;
+ np = inet6_sk(sk);
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl6);
+ if (err)
+ goto out;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
+ if (IS_ERR(dst))
+ goto out;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ idev = __in6_dev_get(skb->dev);
+
+ msg.skb = skb;
+ msg.offset = 0;
+ msg.type = ICMPV6_ECHO_REPLY;
+
+ err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
+ (struct rt6_info*)dst, MSG_DONTWAIT,
+ np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
+ }
+ dst_release(dst);
+out:
+ icmpv6_xmit_unlock(sk);
+}
+
+void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+{
+ const struct inet6_protocol *ipprot;
+ int inner_offset;
+ int hash;
+ u8 nexthdr;
+ __be16 frag_off;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return;
+
+ nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
+ if (ipv6_ext_hdr(nexthdr)) {
+ /* now skip over extension headers */
+ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+ if (inner_offset<0)
+ return;
+ } else {
+ inner_offset = sizeof(struct ipv6hdr);
+ }
+
+ /* Checkin header including 8 bytes of inner protocol header. */
+ if (!pskb_may_pull(skb, inner_offset+8))
+ return;
+
+ /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
+ Without this we will not able f.e. to make source routed
+ pmtu discovery.
+ Corresponding argument (opt) to notifiers is already added.
+ --ANK (980726)
+ */
+
+ hash = nexthdr & (MAX_INET_PROTOS - 1);
+
+ rcu_read_lock();
+ ipprot = rcu_dereference(inet6_protos[hash]);
+ if (ipprot && ipprot->err_handler)
+ ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+ rcu_read_unlock();
+
+ raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
+}
+
+/*
+ * Handle icmp messages
+ */
+
+static int icmpv6_rcv(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ const struct in6_addr *saddr, *daddr;
+ const struct ipv6hdr *orig_hdr;
+ struct icmp6hdr *hdr;
+ u8 type;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ struct sec_path *sp = skb_sec_path(skb);
+ int nh;
+
+ if (!(sp && sp->xvec[sp->len - 1]->props.flags &
+ XFRM_STATE_ICMP))
+ goto drop_no_count;
+
+ if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
+ goto drop_no_count;
+
+ nh = skb_network_offset(skb);
+ skb_set_network_header(skb, sizeof(*hdr));
+
+ if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ goto drop_no_count;
+
+ skb_set_network_header(skb, nh);
+ }
+
+ ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+
+ /* Perform checksum. */
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+ skb->csum))
+ break;
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_ICMPV6, 0));
+ if (__skb_checksum_complete(skb)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
+ goto discard_it;
+ }
+ }
+
+ if (!pskb_pull(skb, sizeof(*hdr)))
+ goto discard_it;
+
+ hdr = icmp6_hdr(skb);
+
+ type = hdr->icmp6_type;
+
+ ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
+
+ switch (type) {
+ case ICMPV6_ECHO_REQUEST:
+ icmpv6_echo_reply(skb);
+ break;
+
+ case ICMPV6_ECHO_REPLY:
+ ping_rcv(skb);
+ break;
+
+ case ICMPV6_PKT_TOOBIG:
+ /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
+ standard destination cache. Seems, only "advanced"
+ destination cache will allow to solve this problem
+ --ANK (980726)
+ */
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto discard_it;
+ hdr = icmp6_hdr(skb);
+ orig_hdr = (struct ipv6hdr *) (hdr + 1);
+ rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
+ ntohl(hdr->icmp6_mtu));
+
+ /*
+ * Drop through to notify
+ */
+
+ case ICMPV6_DEST_UNREACH:
+ case ICMPV6_TIME_EXCEED:
+ case ICMPV6_PARAMPROB:
+ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ break;
+
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ ndisc_rcv(skb);
+ break;
+
+ case ICMPV6_MGM_QUERY:
+ igmp6_event_query(skb);
+ break;
+
+ case ICMPV6_MGM_REPORT:
+ igmp6_event_report(skb);
+ break;
+
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_NI_QUERY:
+ case ICMPV6_NI_REPLY:
+ case ICMPV6_MLD2_REPORT:
+ case ICMPV6_DHAAD_REQUEST:
+ case ICMPV6_DHAAD_REPLY:
+ case ICMPV6_MOBILE_PREFIX_SOL:
+ case ICMPV6_MOBILE_PREFIX_ADV:
+ break;
+
+ default:
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
+
+ /* informational */
+ if (type & ICMPV6_INFOMSG_MASK)
+ break;
+
+ /*
+ * error of unknown type.
+ * must pass to upper level
+ */
+
+ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ }
+
+ kfree_skb(skb);
+ return 0;
+
+discard_it:
+ ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+drop_no_count:
+ kfree_skb(skb);
+ return 0;
+}
+
+void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
+ u8 type,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int oif)
+{
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->saddr = *saddr;
+ fl6->daddr = *daddr;
+ fl6->flowi6_proto = IPPROTO_ICMPV6;
+ fl6->fl6_icmp_type = type;
+ fl6->fl6_icmp_code = 0;
+ fl6->flowi6_oif = oif;
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+}
+
+/*
+ * Special lock-class for __icmpv6_sk:
+ */
+static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
+
+static int __net_init icmpv6_sk_init(struct net *net)
+{
+ struct sock *sk;
+ int err, i, j;
+
+ net->ipv6.icmp_sk =
+ kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
+ if (net->ipv6.icmp_sk == NULL)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ err = inet_ctl_sock_create(&sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
+ if (err < 0) {
+ printk(KERN_ERR
+ "Failed to initialize the ICMP6 control socket "
+ "(err %d).\n",
+ err);
+ goto fail;
+ }
+
+ net->ipv6.icmp_sk[i] = sk;
+
+ /*
+ * Split off their lock-class, because sk->sk_dst_lock
+ * gets used from softirqs, which is safe for
+ * __icmpv6_sk (because those never get directly used
+ * via userspace syscalls), but unsafe for normal sockets.
+ */
+ lockdep_set_class(&sk->sk_dst_lock,
+ &icmpv6_socket_sk_dst_lock_key);
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff struct overhead.
+ */
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+ }
+ return 0;
+
+ fail:
+ for (j = 0; j < i; j++)
+ inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
+ kfree(net->ipv6.icmp_sk);
+ return err;
+}
+
+static void __net_exit icmpv6_sk_exit(struct net *net)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
+ }
+ kfree(net->ipv6.icmp_sk);
+}
+
+static struct pernet_operations icmpv6_sk_ops = {
+ .init = icmpv6_sk_init,
+ .exit = icmpv6_sk_exit,
+};
+
+int __init icmpv6_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&icmpv6_sk_ops);
+ if (err < 0)
+ return err;
+
+ err = -EAGAIN;
+ if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
+ goto fail;
+ return 0;
+
+fail:
+ printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+ unregister_pernet_subsys(&icmpv6_sk_ops);
+ return err;
+}
+
+void icmpv6_cleanup(void)
+{
+ unregister_pernet_subsys(&icmpv6_sk_ops);
+ inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
+}
+
+
+static const struct icmp6_err {
+ int err;
+ int fatal;
+} tab_unreach[] = {
+ { /* NOROUTE */
+ .err = ENETUNREACH,
+ .fatal = 0,
+ },
+ { /* ADM_PROHIBITED */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* Was NOT_NEIGHBOUR, now reserved */
+ .err = EHOSTUNREACH,
+ .fatal = 0,
+ },
+ { /* ADDR_UNREACH */
+ .err = EHOSTUNREACH,
+ .fatal = 0,
+ },
+ { /* PORT_UNREACH */
+ .err = ECONNREFUSED,
+ .fatal = 1,
+ },
+};
+
+int icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ int fatal = 0;
+
+ *err = EPROTO;
+
+ switch (type) {
+ case ICMPV6_DEST_UNREACH:
+ fatal = 1;
+ if (code <= ICMPV6_PORT_UNREACH) {
+ *err = tab_unreach[code].err;
+ fatal = tab_unreach[code].fatal;
+ }
+ break;
+
+ case ICMPV6_PKT_TOOBIG:
+ *err = EMSGSIZE;
+ break;
+
+ case ICMPV6_PARAMPROB:
+ *err = EPROTO;
+ fatal = 1;
+ break;
+
+ case ICMPV6_TIME_EXCEED:
+ *err = EHOSTUNREACH;
+ break;
+ }
+
+ return fatal;
+}
+
+EXPORT_SYMBOL(icmpv6_err_convert);
+
+#ifdef CONFIG_SYSCTL
+ctl_table ipv6_icmp_table_template[] = {
+ {
+ .procname = "ratelimit",
+ .data = &init_net.ipv6.sysctl.icmpv6_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ { },
+};
+
+struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(ipv6_icmp_table_template,
+ sizeof(ipv6_icmp_table_template),
+ GFP_KERNEL);
+
+ if (table)
+ table[0].data = &net->ipv6.sysctl.icmpv6_time;
+
+ return table;
+}
+#endif
+
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 00000000..02dd203d
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,255 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Support for INET6 connection oriented protocols.
+ *
+ * Authors: See the TCPv6 sources
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+
+#include <net/addrconf.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/sock.h>
+#include <net/inet6_connection_sock.h>
+
+int inet6_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb)
+{
+ const struct sock *sk2;
+ const struct hlist_node *node;
+
+ /* We must walk the whole port owner list in this case. -DaveM */
+ /*
+ * See comment in inet_csk_bind_conflict about sock lookup
+ * vs net namespaces issues.
+ */
+ sk_for_each_bound(sk2, node, &tb->owners) {
+ if (sk != sk2 &&
+ (!sk->sk_bound_dev_if ||
+ !sk2->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+ (!sk->sk_reuse || !sk2->sk_reuse ||
+ sk2->sk_state == TCP_LISTEN) &&
+ ipv6_rcv_saddr_equal(sk, sk2))
+ break;
+ }
+
+ return node != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
+struct dst_entry *inet6_csk_route_req(struct sock *sk,
+ const struct request_sock *req)
+{
+ struct inet6_request_sock *treq = inet6_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *final_p, final;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = treq->rmt_addr;
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+ fl6.saddr = treq->loc_addr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet_rsk(req)->rmt_port;
+ fl6.fl6_sport = inet_rsk(req)->loc_port;
+ security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (IS_ERR(dst))
+ return NULL;
+
+ return dst;
+}
+
+/*
+ * request_sock (formerly open request) hash tables.
+ */
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
+ const u32 rnd, const u32 synq_hsize)
+{
+ u32 c;
+
+ c = jhash_3words((__force u32)raddr->s6_addr32[0],
+ (__force u32)raddr->s6_addr32[1],
+ (__force u32)raddr->s6_addr32[2],
+ rnd);
+
+ c = jhash_2words((__force u32)raddr->s6_addr32[3],
+ (__force u32)rport,
+ c);
+
+ return c & (synq_hsize - 1);
+}
+
+struct request_sock *inet6_csk_search_req(const struct sock *sk,
+ struct request_sock ***prevp,
+ const __be16 rport,
+ const struct in6_addr *raddr,
+ const struct in6_addr *laddr,
+ const int iif)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+ struct request_sock *req, **prev;
+
+ for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+ lopt->hash_rnd,
+ lopt->nr_table_entries)];
+ (req = *prev) != NULL;
+ prev = &req->dl_next) {
+ const struct inet6_request_sock *treq = inet6_rsk(req);
+
+ if (inet_rsk(req)->rmt_port == rport &&
+ req->rsk_ops->family == AF_INET6 &&
+ ipv6_addr_equal(&treq->rmt_addr, raddr) &&
+ ipv6_addr_equal(&treq->loc_addr, laddr) &&
+ (!treq->iif || treq->iif == iif)) {
+ WARN_ON(req->sk != NULL);
+ *prevp = prev;
+ return req;
+ }
+ }
+
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_search_req);
+
+void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
+ struct request_sock *req,
+ const unsigned long timeout)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+ const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
+ inet_rsk(req)->rmt_port,
+ lopt->hash_rnd, lopt->nr_table_entries);
+
+ reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+ inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_addr = np->daddr;
+ sin6->sin6_port = inet_sk(sk)->inet_dport;
+ /* We do not store received flowlabel for TCP */
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+ if (sk->sk_bound_dev_if &&
+ ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+static inline
+void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ __ip6_dst_store(sk, dst, daddr, saddr);
+
+#ifdef CONFIG_XFRM
+ {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
+ }
+#endif
+}
+
+static inline
+struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
+{
+ struct dst_entry *dst;
+
+ dst = __sk_dst_check(sk, cookie);
+
+#ifdef CONFIG_XFRM
+ if (dst) {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
+ __sk_dst_reset(sk);
+ dst = NULL;
+ }
+ }
+#endif
+
+ return dst;
+}
+
+int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+{
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ struct in6_addr *final_p, final;
+ int res;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = sk->sk_protocol;
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowlabel = np->flow_label;
+ IP6_ECN_flow_xmit(sk, fl6.flowlabel);
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_sport = inet->inet_sport;
+ fl6.fl6_dport = inet->inet_dport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+ dst = __inet6_csk_dst_check(sk, np->dst_cookie);
+
+ if (dst == NULL) {
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
+ }
+
+ __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ }
+
+ rcu_read_lock();
+ skb_dst_set_noref(skb, dst);
+
+ /* Restore final destination back after routing done */
+ fl6.daddr = np->daddr;
+
+ res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_unlock();
+ return res;
+}
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
new file mode 100644
index 00000000..73f1a00a
--- /dev/null
+++ b/net/ipv6/inet6_hashtables.c
@@ -0,0 +1,304 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Generic INET6 transport hashtables
+ *
+ * Authors: Lotsa people, from code originally in tcp, generalised here
+ * by Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/random.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
+#include <net/secure_seq.h>
+#include <net/ip.h>
+
+int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ int twrefcnt = 0;
+
+ WARN_ON(!sk_unhashed(sk));
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ spin_lock(&ilb->lock);
+ __sk_nulls_add_node_rcu(sk, &ilb->head);
+ spin_unlock(&ilb->lock);
+ } else {
+ unsigned int hash;
+ struct hlist_nulls_head *list;
+ spinlock_t *lock;
+
+ sk->sk_hash = hash = inet6_sk_ehashfn(sk);
+ list = &inet_ehash_bucket(hashinfo, hash)->chain;
+ lock = inet_ehash_lockp(hashinfo, hash);
+ spin_lock(lock);
+ __sk_nulls_add_node_rcu(sk, list);
+ if (tw) {
+ WARN_ON(sk->sk_hash != tw->tw_hash);
+ twrefcnt = inet_twsk_unhash(tw);
+ }
+ spin_unlock(lock);
+ }
+
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ return twrefcnt;
+}
+EXPORT_SYMBOL(__inet6_hash);
+
+/*
+ * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * The sockhash lock must be held as a reader here.
+ */
+struct sock *__inet6_lookup_established(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum,
+ const int dif)
+{
+ struct sock *sk;
+ const struct hlist_nulls_node *node;
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ /* Optimize here for direct hit, only listening connections can
+ * have wildcards anyways.
+ */
+ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ unsigned int slot = hash & hashinfo->ehash_mask;
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+
+
+ rcu_read_lock();
+begin:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ /* For IPV6 do the cheaper port and family tests first. */
+ if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+ goto begintw;
+ if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ sock_put(sk);
+ goto begin;
+ }
+ goto out;
+ }
+ }
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+begintw:
+ /* Must check for a TIME_WAIT'er before going to listener hash. */
+ sk_nulls_for_each_rcu(sk, node, &head->twchain) {
+ if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
+ sk = NULL;
+ goto out;
+ }
+ if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+ sock_put(sk);
+ goto begintw;
+ }
+ goto out;
+ }
+ }
+ if (get_nulls_value(node) != slot)
+ goto begintw;
+ sk = NULL;
+out:
+ rcu_read_unlock();
+ return sk;
+}
+EXPORT_SYMBOL(__inet6_lookup_established);
+
+static inline int compute_score(struct sock *sk, struct net *net,
+ const unsigned short hnum,
+ const struct in6_addr *daddr,
+ const int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+ sk->sk_family == PF_INET6) {
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+
+ score = 1;
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
+ if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
+struct sock *inet6_lookup_listener(struct net *net,
+ struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+ const unsigned short hnum, const int dif)
+{
+ struct sock *sk;
+ const struct hlist_nulls_node *node;
+ struct sock *result;
+ int score, hiscore;
+ unsigned int hash = inet_lhashfn(net, hnum);
+ struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+
+ rcu_read_lock();
+begin:
+ result = NULL;
+ hiscore = -1;
+ sk_nulls_for_each(sk, node, &ilb->head) {
+ score = compute_score(sk, net, hnum, daddr, dif);
+ if (score > hiscore) {
+ hiscore = score;
+ result = sk;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
+ goto begin;
+ if (result) {
+ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ result = NULL;
+ else if (unlikely(compute_score(result, net, hnum, daddr,
+ dif) < hiscore)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ rcu_read_unlock();
+ return result;
+}
+
+EXPORT_SYMBOL_GPL(inet6_lookup_listener);
+
+struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+ const int dif)
+{
+ struct sock *sk;
+
+ local_bh_disable();
+ sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+ local_bh_enable();
+
+ return sk;
+}
+
+EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+ struct sock *sk, const __u16 lport,
+ struct inet_timewait_sock **twp)
+{
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct in6_addr *daddr = &np->rcv_saddr;
+ const struct in6_addr *saddr = &np->daddr;
+ const int dif = sk->sk_bound_dev_if;
+ const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
+ struct net *net = sock_net(sk);
+ const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
+ inet->inet_dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
+ struct sock *sk2;
+ const struct hlist_nulls_node *node;
+ struct inet_timewait_sock *tw;
+ int twrefcnt = 0;
+
+ spin_lock(lock);
+
+ /* Check TIME-WAIT sockets first. */
+ sk_nulls_for_each(sk2, node, &head->twchain) {
+ tw = inet_twsk(sk2);
+
+ if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
+ if (twsk_unique(sk, sk2, twp))
+ goto unique;
+ else
+ goto not_unique;
+ }
+ }
+ tw = NULL;
+
+ /* And established part... */
+ sk_nulls_for_each(sk2, node, &head->chain) {
+ if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+
+unique:
+ /* Must record num and sport now. Otherwise we will see
+ * in hash table socket with a funny identity. */
+ inet->inet_num = lport;
+ inet->inet_sport = htons(lport);
+ sk->sk_hash = hash;
+ WARN_ON(!sk_unhashed(sk));
+ __sk_nulls_add_node_rcu(sk, &head->chain);
+ if (tw) {
+ twrefcnt = inet_twsk_unhash(tw);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ }
+ spin_unlock(lock);
+ if (twrefcnt)
+ inet_twsk_put(tw);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+
+ if (twp) {
+ *twp = tw;
+ } else if (tw) {
+ /* Silly. Should hash-dance instead... */
+ inet_twsk_deschedule(tw, death_row);
+
+ inet_twsk_put(tw);
+ }
+ return 0;
+
+not_unique:
+ spin_unlock(lock);
+ return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+ np->daddr.s6_addr32,
+ inet->inet_dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+{
+ return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
+ __inet6_check_established, __inet6_hash);
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
new file mode 100644
index 00000000..92bb9cba
--- /dev/null
+++ b/net/ipv6/ip6_fib.c
@@ -0,0 +1,1713 @@
+/*
+ * Linux INET6 implementation
+ * Forwarding Information Database
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
+static struct kmem_cache * fib6_node_kmem __read_mostly;
+
+enum fib_walk_state_t
+{
+#ifdef CONFIG_IPV6_SUBTREES
+ FWS_S,
+#endif
+ FWS_L,
+ FWS_R,
+ FWS_C,
+ FWS_U
+};
+
+struct fib6_cleaner_t
+{
+ struct fib6_walker_t w;
+ struct net *net;
+ int (*func)(struct rt6_info *, void *arg);
+ void *arg;
+};
+
+static DEFINE_RWLOCK(fib6_walker_lock);
+
+#ifdef CONFIG_IPV6_SUBTREES
+#define FWS_INIT FWS_S
+#else
+#define FWS_INIT FWS_L
+#endif
+
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
+ struct rt6_info *rt);
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
+
+/*
+ * A routing update causes an increase of the serial number on the
+ * affected subtree. This allows for cached routes to be asynchronously
+ * tested when modifications are made to the destination cache as a
+ * result of redirects, path MTU changes, etc.
+ */
+
+static __u32 rt_sernum;
+
+static void fib6_gc_timer_cb(unsigned long arg);
+
+static LIST_HEAD(fib6_walkers);
+#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
+
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+ write_lock_bh(&fib6_walker_lock);
+ list_add(&w->lh, &fib6_walkers);
+ write_unlock_bh(&fib6_walker_lock);
+}
+
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+ write_lock_bh(&fib6_walker_lock);
+ list_del(&w->lh);
+ write_unlock_bh(&fib6_walker_lock);
+}
+static __inline__ u32 fib6_new_sernum(void)
+{
+ u32 n = ++rt_sernum;
+ if ((__s32)n <= 0)
+ rt_sernum = n = 1;
+ return n;
+}
+
+/*
+ * Auxiliary address test functions for the radix tree.
+ *
+ * These assume a 32bit processor (although it will work on
+ * 64bit processors)
+ */
+
+/*
+ * test bit
+ */
+#if defined(__LITTLE_ENDIAN)
+# define BITOP_BE32_SWIZZLE (0x1F & ~7)
+#else
+# define BITOP_BE32_SWIZZLE 0
+#endif
+
+static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
+{
+ const __be32 *addr = token;
+ /*
+ * Here,
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * is optimized version of
+ * htonl(1 << ((~fn_bit)&0x1F))
+ * See include/asm-generic/bitops/le.h.
+ */
+ return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
+ addr[fn_bit >> 5];
+}
+
+static __inline__ struct fib6_node * node_alloc(void)
+{
+ struct fib6_node *fn;
+
+ fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+
+ return fn;
+}
+
+static __inline__ void node_free(struct fib6_node * fn)
+{
+ kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static __inline__ void rt6_release(struct rt6_info *rt)
+{
+ if (atomic_dec_and_test(&rt->rt6i_ref))
+ dst_free(&rt->dst);
+}
+
+static void fib6_link_table(struct net *net, struct fib6_table *tb)
+{
+ unsigned int h;
+
+ /*
+ * Initialize table lock at a single place to give lockdep a key,
+ * tables aren't visible prior to being linked to the list.
+ */
+ rwlock_init(&tb->tb6_lock);
+
+ h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
+
+ /*
+ * No protection necessary, this is the only list mutatation
+ * operation, tables never disappear once they exist.
+ */
+ hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
+{
+ struct fib6_table *table;
+
+ table = kzalloc(sizeof(*table), GFP_ATOMIC);
+ if (table) {
+ table->tb6_id = id;
+ table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+ }
+
+ return table;
+}
+
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
+{
+ struct fib6_table *tb;
+
+ if (id == 0)
+ id = RT6_TABLE_MAIN;
+ tb = fib6_get_table(net, id);
+ if (tb)
+ return tb;
+
+ tb = fib6_alloc_table(net, id);
+ if (tb)
+ fib6_link_table(net, tb);
+
+ return tb;
+}
+
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
+{
+ struct fib6_table *tb;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ unsigned int h;
+
+ if (id == 0)
+ id = RT6_TABLE_MAIN;
+ h = id & (FIB6_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+ if (tb->tb6_id == id) {
+ rcu_read_unlock();
+ return tb;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void __net_init fib6_tables_init(struct net *net)
+{
+ fib6_link_table(net, net->ipv6.fib6_main_tbl);
+ fib6_link_table(net, net->ipv6.fib6_local_tbl);
+}
+#else
+
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
+{
+ return fib6_get_table(net, id);
+}
+
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
+{
+ return net->ipv6.fib6_main_tbl;
+}
+
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ int flags, pol_lookup_t lookup)
+{
+ return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+}
+
+static void __net_init fib6_tables_init(struct net *net)
+{
+ fib6_link_table(net, net->ipv6.fib6_main_tbl);
+}
+
+#endif
+
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ res = rt6_dump_route(rt, w->args);
+ if (res < 0) {
+ /* Frame is full, suspend walking */
+ w->leaf = rt;
+ return 1;
+ }
+ WARN_ON(res == 0);
+ }
+ w->leaf = NULL;
+ return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+ struct fib6_walker_t *w = (void*)cb->args[2];
+
+ if (w) {
+ if (cb->args[4]) {
+ cb->args[4] = 0;
+ fib6_walker_unlink(w);
+ }
+ cb->args[2] = 0;
+ kfree(w);
+ }
+ cb->done = (void*)cb->args[3];
+ cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+ fib6_dump_end(cb);
+ return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct fib6_walker_t *w;
+ int res;
+
+ w = (void *)cb->args[2];
+ w->root = &table->tb6_root;
+
+ if (cb->args[4] == 0) {
+ w->count = 0;
+ w->skip = 0;
+
+ read_lock_bh(&table->tb6_lock);
+ res = fib6_walk(w);
+ read_unlock_bh(&table->tb6_lock);
+ if (res > 0) {
+ cb->args[4] = 1;
+ cb->args[5] = w->root->fn_sernum;
+ }
+ } else {
+ if (cb->args[5] != w->root->fn_sernum) {
+ /* Begin at the root if the tree changed */
+ cb->args[5] = w->root->fn_sernum;
+ w->state = FWS_INIT;
+ w->node = w->root;
+ w->skip = w->count;
+ } else
+ w->skip = 0;
+
+ read_lock_bh(&table->tb6_lock);
+ res = fib6_walk_continue(w);
+ read_unlock_bh(&table->tb6_lock);
+ if (res <= 0) {
+ fib6_walker_unlink(w);
+ cb->args[4] = 0;
+ }
+ }
+
+ return res;
+}
+
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct rt6_rtnl_dump_arg arg;
+ struct fib6_walker_t *w;
+ struct fib6_table *tb;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ int res = 0;
+
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+ w = (void *)cb->args[2];
+ if (!w) {
+ /* New dump:
+ *
+ * 1. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
+ /*
+ * 2. allocate and initialize walker.
+ */
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return -ENOMEM;
+ w->func = fib6_dump_node;
+ cb->args[2] = (long)w;
+ }
+
+ arg.skb = skb;
+ arg.cb = cb;
+ arg.net = net;
+ w->args = &arg;
+
+ rcu_read_lock();
+ for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
+ e = 0;
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+ if (e < s_e)
+ goto next;
+ res = fib6_dump_table(tb, skb, cb);
+ if (res != 0)
+ goto out;
+next:
+ e++;
+ }
+ }
+out:
+ rcu_read_unlock();
+ cb->args[1] = e;
+ cb->args[0] = h;
+
+ res = res < 0 ? res : skb->len;
+ if (res <= 0)
+ fib6_dump_end(cb);
+ return res;
+}
+
+/*
+ * Routing Table
+ *
+ * return the appropriate node for a routing tree "add" operation
+ * by either creating and inserting or by returning an existing
+ * node.
+ */
+
+static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
+ int addrlen, int plen,
+ int offset, int allow_create,
+ int replace_required)
+{
+ struct fib6_node *fn, *in, *ln;
+ struct fib6_node *pn = NULL;
+ struct rt6key *key;
+ int bit;
+ __be32 dir = 0;
+ __u32 sernum = fib6_new_sernum();
+
+ RT6_TRACE("fib6_add_1\n");
+
+ /* insert node in tree */
+
+ fn = root;
+
+ do {
+ key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
+ if (!allow_create) {
+ if (replace_required) {
+ pr_warn("IPv6: Can't replace route, "
+ "no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("IPv6: NLM_F_CREATE should be set "
+ "when creating new route\n");
+ }
+ goto insert_above;
+ }
+
+ /*
+ * Exact match ?
+ */
+
+ if (plen == fn->fn_bit) {
+ /* clean up an intermediate node */
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ rt6_release(fn->leaf);
+ fn->leaf = NULL;
+ }
+
+ fn->fn_sernum = sernum;
+
+ return fn;
+ }
+
+ /*
+ * We have more bits to go
+ */
+
+ /* Try to walk down on tree. */
+ fn->fn_sernum = sernum;
+ dir = addr_bit_set(addr, fn->fn_bit);
+ pn = fn;
+ fn = dir ? fn->right: fn->left;
+ } while (fn);
+
+ if (!allow_create) {
+ /* We should not create new node because
+ * NLM_F_REPLACE was specified without NLM_F_CREATE
+ * I assume it is safe to require NLM_F_CREATE when
+ * REPLACE flag is used! Later we may want to remove the
+ * check for replace_required, because according
+ * to netlink specification, NLM_F_CREATE
+ * MUST be specified if new route is created.
+ * That would keep IPv6 consistent with IPv4
+ */
+ if (replace_required) {
+ pr_warn("IPv6: Can't replace route, no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("IPv6: NLM_F_CREATE should be set "
+ "when creating new route\n");
+ }
+ /*
+ * We walked to the bottom of tree.
+ * Create new leaf node without children.
+ */
+
+ ln = node_alloc();
+
+ if (!ln)
+ return NULL;
+ ln->fn_bit = plen;
+
+ ln->parent = pn;
+ ln->fn_sernum = sernum;
+
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ return ln;
+
+
+insert_above:
+ /*
+ * split since we don't have a common prefix anymore or
+ * we have a less significant route.
+ * we've to insert an intermediate node on the list
+ * this new node will point to the one we need to create
+ * and the current
+ */
+
+ pn = fn->parent;
+
+ /* find 1st bit in difference between the 2 addrs.
+
+ See comment in __ipv6_addr_diff: bit may be an invalid value,
+ but if it is >= plen, the value is ignored in any case.
+ */
+
+ bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
+
+ /*
+ * (intermediate)[in]
+ * / \
+ * (new leaf node)[ln] (old node)[fn]
+ */
+ if (plen > bit) {
+ in = node_alloc();
+ ln = node_alloc();
+
+ if (!in || !ln) {
+ if (in)
+ node_free(in);
+ if (ln)
+ node_free(ln);
+ return NULL;
+ }
+
+ /*
+ * new intermediate node.
+ * RTN_RTINFO will
+ * be off since that an address that chooses one of
+ * the branches would not match less specific routes
+ * in the other branch
+ */
+
+ in->fn_bit = bit;
+
+ in->parent = pn;
+ in->leaf = fn->leaf;
+ atomic_inc(&in->leaf->rt6i_ref);
+
+ in->fn_sernum = sernum;
+
+ /* update parent pointer */
+ if (dir)
+ pn->right = in;
+ else
+ pn->left = in;
+
+ ln->fn_bit = plen;
+
+ ln->parent = in;
+ fn->parent = in;
+
+ ln->fn_sernum = sernum;
+
+ if (addr_bit_set(addr, bit)) {
+ in->right = ln;
+ in->left = fn;
+ } else {
+ in->left = ln;
+ in->right = fn;
+ }
+ } else { /* plen <= bit */
+
+ /*
+ * (new leaf node)[ln]
+ * / \
+ * (old node)[fn] NULL
+ */
+
+ ln = node_alloc();
+
+ if (!ln)
+ return NULL;
+
+ ln->fn_bit = plen;
+
+ ln->parent = pn;
+
+ ln->fn_sernum = sernum;
+
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ if (addr_bit_set(&key->addr, plen))
+ ln->right = fn;
+ else
+ ln->left = fn;
+
+ fn->parent = ln;
+ }
+ return ln;
+}
+
+/*
+ * Insert routing information in a node.
+ */
+
+static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
+ struct nl_info *info)
+{
+ struct rt6_info *iter = NULL;
+ struct rt6_info **ins;
+ int replace = (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_REPLACE));
+ int add = (!info->nlh ||
+ (info->nlh->nlmsg_flags & NLM_F_CREATE));
+ int found = 0;
+
+ ins = &fn->leaf;
+
+ for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+ /*
+ * Search for duplicates
+ */
+
+ if (iter->rt6i_metric == rt->rt6i_metric) {
+ /*
+ * Same priority level
+ */
+ if (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_EXCL))
+ return -EEXIST;
+ if (replace) {
+ found++;
+ break;
+ }
+
+ if (iter->dst.dev == rt->dst.dev &&
+ iter->rt6i_idev == rt->rt6i_idev &&
+ ipv6_addr_equal(&iter->rt6i_gateway,
+ &rt->rt6i_gateway)) {
+ if (!(iter->rt6i_flags & RTF_EXPIRES))
+ return -EEXIST;
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ rt6_clean_expires(iter);
+ else
+ rt6_set_expires(iter, rt->dst.expires);
+ return -EEXIST;
+ }
+ }
+
+ if (iter->rt6i_metric > rt->rt6i_metric)
+ break;
+
+ ins = &iter->dst.rt6_next;
+ }
+
+ /* Reset round-robin state, if necessary */
+ if (ins == &fn->leaf)
+ fn->rr_ptr = NULL;
+
+ /*
+ * insert node
+ */
+ if (!replace) {
+ if (!add)
+ pr_warn("IPv6: NLM_F_CREATE should be set when creating new route\n");
+
+add:
+ rt->dst.rt6_next = iter;
+ *ins = rt;
+ rt->rt6i_node = fn;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
+
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
+
+ } else {
+ if (!found) {
+ if (add)
+ goto add;
+ pr_warn("IPv6: NLM_F_REPLACE set, but no existing node found!\n");
+ return -ENOENT;
+ }
+ *ins = rt;
+ rt->rt6i_node = fn;
+ rt->dst.rt6_next = iter->dst.rt6_next;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ rt6_release(iter);
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
+ }
+
+ return 0;
+}
+
+static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
+{
+ if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
+ (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+ mod_timer(&net->ipv6.ip6_fib_timer,
+ jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
+}
+
+void fib6_force_start_gc(struct net *net)
+{
+ if (!timer_pending(&net->ipv6.ip6_fib_timer))
+ mod_timer(&net->ipv6.ip6_fib_timer,
+ jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
+}
+
+/*
+ * Add routing information to the routing tree.
+ * <destination addr>/<source addr>
+ * with source addr info in sub-trees
+ */
+
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+{
+ struct fib6_node *fn, *pn = NULL;
+ int err = -ENOMEM;
+ int allow_create = 1;
+ int replace_required = 0;
+
+ if (info->nlh) {
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+ allow_create = 0;
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+ replace_required = 1;
+ }
+ if (!allow_create && !replace_required)
+ pr_warn("IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
+
+ fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
+ rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
+ allow_create, replace_required);
+
+ if (IS_ERR(fn)) {
+ err = PTR_ERR(fn);
+ fn = NULL;
+ }
+
+ if (!fn)
+ goto out;
+
+ pn = fn;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt->rt6i_src.plen) {
+ struct fib6_node *sn;
+
+ if (!fn->subtree) {
+ struct fib6_node *sfn;
+
+ /*
+ * Create subtree.
+ *
+ * fn[main tree]
+ * |
+ * sfn[subtree root]
+ * \
+ * sn[new leaf node]
+ */
+
+ /* Create subtree root node */
+ sfn = node_alloc();
+ if (!sfn)
+ goto st_failure;
+
+ sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ sfn->fn_flags = RTN_ROOT;
+ sfn->fn_sernum = fib6_new_sernum();
+
+ /* Now add the first leaf node to new subtree */
+
+ sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
+
+ if (!sn) {
+ /* If it is failed, discard just allocated
+ root, and then (in st_failure) stale node
+ in main tree.
+ */
+ node_free(sfn);
+ goto st_failure;
+ }
+
+ /* Now link new subtree to main tree */
+ sfn->parent = fn;
+ fn->subtree = sfn;
+ } else {
+ sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
+
+ if (IS_ERR(sn)) {
+ err = PTR_ERR(sn);
+ sn = NULL;
+ }
+ if (!sn)
+ goto st_failure;
+ }
+
+ if (!fn->leaf) {
+ fn->leaf = rt;
+ atomic_inc(&rt->rt6i_ref);
+ }
+ fn = sn;
+ }
+#endif
+
+ err = fib6_add_rt2node(fn, rt, info);
+ if (!err) {
+ fib6_start_gc(info->nl_net, rt);
+ if (!(rt->rt6i_flags & RTF_CACHE))
+ fib6_prune_clones(info->nl_net, pn, rt);
+ }
+
+out:
+ if (err) {
+#ifdef CONFIG_IPV6_SUBTREES
+ /*
+ * If fib6_add_1 has cleared the old leaf pointer in the
+ * super-tree leaf node we have to find a new one for it.
+ */
+ if (pn != fn && pn->leaf == rt) {
+ pn->leaf = NULL;
+ atomic_dec(&rt->rt6i_ref);
+ }
+ if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn->leaf = fib6_find_prefix(info->nl_net, pn);
+#if RT6_DEBUG >= 2
+ if (!pn->leaf) {
+ WARN_ON(pn->leaf == NULL);
+ pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ }
+#endif
+ atomic_inc(&pn->leaf->rt6i_ref);
+ }
+#endif
+ dst_free(&rt->dst);
+ }
+ return err;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree creation failed, probably main tree node
+ is orphan. If it is, shoot it.
+ */
+st_failure:
+ if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ fib6_repair_tree(info->nl_net, fn);
+ dst_free(&rt->dst);
+ return err;
+#endif
+}
+
+/*
+ * Routing tree lookup
+ *
+ */
+
+struct lookup_args {
+ int offset; /* key offset on rt6_info */
+ const struct in6_addr *addr; /* search key */
+};
+
+static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
+{
+ struct fib6_node *fn;
+ __be32 dir;
+
+ if (unlikely(args->offset == 0))
+ return NULL;
+
+ /*
+ * Descend on a tree
+ */
+
+ fn = root;
+
+ for (;;) {
+ struct fib6_node *next;
+
+ dir = addr_bit_set(args->addr, fn->fn_bit);
+
+ next = dir ? fn->right : fn->left;
+
+ if (next) {
+ fn = next;
+ continue;
+ }
+ break;
+ }
+
+ while (fn) {
+ if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+ struct rt6key *key;
+
+ key = (struct rt6key *) ((u8 *) fn->leaf +
+ args->offset);
+
+ if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+#ifdef CONFIG_IPV6_SUBTREES
+ if (fn->subtree)
+ fn = fib6_lookup_1(fn->subtree, args + 1);
+#endif
+ if (!fn || fn->fn_flags & RTN_RTINFO)
+ return fn;
+ }
+ }
+
+ if (fn->fn_flags & RTN_ROOT)
+ break;
+
+ fn = fn->parent;
+ }
+
+ return NULL;
+}
+
+struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct fib6_node *fn;
+ struct lookup_args args[] = {
+ {
+ .offset = offsetof(struct rt6_info, rt6i_dst),
+ .addr = daddr,
+ },
+#ifdef CONFIG_IPV6_SUBTREES
+ {
+ .offset = offsetof(struct rt6_info, rt6i_src),
+ .addr = saddr,
+ },
+#endif
+ {
+ .offset = 0, /* sentinel */
+ }
+ };
+
+ fn = fib6_lookup_1(root, daddr ? args : args + 1);
+ if (!fn || fn->fn_flags & RTN_TL_ROOT)
+ fn = root;
+
+ return fn;
+}
+
+/*
+ * Get node with specified destination prefix (and source prefix,
+ * if subtrees are used)
+ */
+
+
+static struct fib6_node * fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
+{
+ struct fib6_node *fn;
+
+ for (fn = root; fn ; ) {
+ struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+ return NULL;
+
+ if (plen == fn->fn_bit)
+ return fn;
+
+ /*
+ * We have more bits to go
+ */
+ if (addr_bit_set(addr, fn->fn_bit))
+ fn = fn->right;
+ else
+ fn = fn->left;
+ }
+ return NULL;
+}
+
+struct fib6_node * fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
+{
+ struct fib6_node *fn;
+
+ fn = fib6_locate_1(root, daddr, dst_len,
+ offsetof(struct rt6_info, rt6i_dst));
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src_len) {
+ WARN_ON(saddr == NULL);
+ if (fn && fn->subtree)
+ fn = fib6_locate_1(fn->subtree, saddr, src_len,
+ offsetof(struct rt6_info, rt6i_src));
+ }
+#endif
+
+ if (fn && fn->fn_flags & RTN_RTINFO)
+ return fn;
+
+ return NULL;
+}
+
+
+/*
+ * Deletion
+ *
+ */
+
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+{
+ if (fn->fn_flags & RTN_ROOT)
+ return net->ipv6.ip6_null_entry;
+
+ while (fn) {
+ if (fn->left)
+ return fn->left->leaf;
+ if (fn->right)
+ return fn->right->leaf;
+
+ fn = FIB6_SUBTREE(fn);
+ }
+ return NULL;
+}
+
+/*
+ * Called to trim the tree of intermediate nodes when possible. "fn"
+ * is the node we want to try and remove.
+ */
+
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_node *fn)
+{
+ int children;
+ int nstate;
+ struct fib6_node *child, *pn;
+ struct fib6_walker_t *w;
+ int iter = 0;
+
+ for (;;) {
+ RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ iter++;
+
+ WARN_ON(fn->fn_flags & RTN_RTINFO);
+ WARN_ON(fn->fn_flags & RTN_TL_ROOT);
+ WARN_ON(fn->leaf != NULL);
+
+ children = 0;
+ child = NULL;
+ if (fn->right) child = fn->right, children |= 1;
+ if (fn->left) child = fn->left, children |= 2;
+
+ if (children == 3 || FIB6_SUBTREE(fn)
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree root (i.e. fn) may have one child */
+ || (children && fn->fn_flags & RTN_ROOT)
+#endif
+ ) {
+ fn->leaf = fib6_find_prefix(net, fn);
+#if RT6_DEBUG >= 2
+ if (!fn->leaf) {
+ WARN_ON(!fn->leaf);
+ fn->leaf = net->ipv6.ip6_null_entry;
+ }
+#endif
+ atomic_inc(&fn->leaf->rt6i_ref);
+ return fn->parent;
+ }
+
+ pn = fn->parent;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (FIB6_SUBTREE(pn) == fn) {
+ WARN_ON(!(fn->fn_flags & RTN_ROOT));
+ FIB6_SUBTREE(pn) = NULL;
+ nstate = FWS_L;
+ } else {
+ WARN_ON(fn->fn_flags & RTN_ROOT);
+#endif
+ if (pn->right == fn) pn->right = child;
+ else if (pn->left == fn) pn->left = child;
+#if RT6_DEBUG >= 2
+ else
+ WARN_ON(1);
+#endif
+ if (child)
+ child->parent = pn;
+ nstate = FWS_R;
+#ifdef CONFIG_IPV6_SUBTREES
+ }
+#endif
+
+ read_lock(&fib6_walker_lock);
+ FOR_WALKERS(w) {
+ if (!child) {
+ if (w->root == fn) {
+ w->root = w->node = NULL;
+ RT6_TRACE("W %p adjusted by delroot 1\n", w);
+ } else if (w->node == fn) {
+ RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ w->node = pn;
+ w->state = nstate;
+ }
+ } else {
+ if (w->root == fn) {
+ w->root = child;
+ RT6_TRACE("W %p adjusted by delroot 2\n", w);
+ }
+ if (w->node == fn) {
+ w->node = child;
+ if (children&2) {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ } else {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ }
+ }
+ }
+ }
+ read_unlock(&fib6_walker_lock);
+
+ node_free(fn);
+ if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
+ return pn;
+
+ rt6_release(pn->leaf);
+ pn->leaf = NULL;
+ fn = pn;
+ }
+}
+
+static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
+ struct nl_info *info)
+{
+ struct fib6_walker_t *w;
+ struct rt6_info *rt = *rtp;
+ struct net *net = info->nl_net;
+
+ RT6_TRACE("fib6_del_route\n");
+
+ /* Unlink it */
+ *rtp = rt->dst.rt6_next;
+ rt->rt6i_node = NULL;
+ net->ipv6.rt6_stats->fib_rt_entries--;
+ net->ipv6.rt6_stats->fib_discarded_routes++;
+
+ /* Reset round-robin state, if necessary */
+ if (fn->rr_ptr == rt)
+ fn->rr_ptr = NULL;
+
+ /* Adjust walkers */
+ read_lock(&fib6_walker_lock);
+ FOR_WALKERS(w) {
+ if (w->state == FWS_C && w->leaf == rt) {
+ RT6_TRACE("walker %p adjusted by delroute\n", w);
+ w->leaf = rt->dst.rt6_next;
+ if (!w->leaf)
+ w->state = FWS_U;
+ }
+ }
+ read_unlock(&fib6_walker_lock);
+
+ rt->dst.rt6_next = NULL;
+
+ /* If it was last route, expunge its radix tree node */
+ if (!fn->leaf) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ fn = fib6_repair_tree(net, fn);
+ }
+
+ if (atomic_read(&rt->rt6i_ref) != 1) {
+ /* This route is used as dummy address holder in some split
+ * nodes. It is not leaked, but it still holds other resources,
+ * which must be released in time. So, scan ascendant nodes
+ * and replace dummy references to this route with references
+ * to still alive ones.
+ */
+ while (fn) {
+ if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
+ fn->leaf = fib6_find_prefix(net, fn);
+ atomic_inc(&fn->leaf->rt6i_ref);
+ rt6_release(rt);
+ }
+ fn = fn->parent;
+ }
+ /* No more references are possible at this point. */
+ BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
+ }
+
+ inet6_rt_notify(RTM_DELROUTE, rt, info);
+ rt6_release(rt);
+}
+
+int fib6_del(struct rt6_info *rt, struct nl_info *info)
+{
+ struct net *net = info->nl_net;
+ struct fib6_node *fn = rt->rt6i_node;
+ struct rt6_info **rtp;
+
+#if RT6_DEBUG >= 2
+ if (rt->dst.obsolete>0) {
+ WARN_ON(fn != NULL);
+ return -ENOENT;
+ }
+#endif
+ if (!fn || rt == net->ipv6.ip6_null_entry)
+ return -ENOENT;
+
+ WARN_ON(!(fn->fn_flags & RTN_RTINFO));
+
+ if (!(rt->rt6i_flags & RTF_CACHE)) {
+ struct fib6_node *pn = fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ /* clones of this route might be in another subtree */
+ if (rt->rt6i_src.plen) {
+ while (!(pn->fn_flags & RTN_ROOT))
+ pn = pn->parent;
+ pn = pn->parent;
+ }
+#endif
+ fib6_prune_clones(info->nl_net, pn, rt);
+ }
+
+ /*
+ * Walk the leaf entries looking for ourself
+ */
+
+ for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
+ if (*rtp == rt) {
+ fib6_del_route(fn, rtp, info);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * Tree traversal function.
+ *
+ * Certainly, it is not interrupt safe.
+ * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
+ * It means, that we can modify tree during walking
+ * and use this function for garbage collection, clone pruning,
+ * cleaning tree when a device goes down etc. etc.
+ *
+ * It guarantees that every node will be traversed,
+ * and that it will be traversed only once.
+ *
+ * Callback function w->func may return:
+ * 0 -> continue walking.
+ * positive value -> walking is suspended (used by tree dumps,
+ * and probably by gc, if it will be split to several slices)
+ * negative value -> terminate walking.
+ *
+ * The function itself returns:
+ * 0 -> walk is complete.
+ * >0 -> walk is incomplete (i.e. suspended)
+ * <0 -> walk is terminated by an error.
+ */
+
+static int fib6_walk_continue(struct fib6_walker_t *w)
+{
+ struct fib6_node *fn, *pn;
+
+ for (;;) {
+ fn = w->node;
+ if (!fn)
+ return 0;
+
+ if (w->prune && fn != w->root &&
+ fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ }
+ switch (w->state) {
+#ifdef CONFIG_IPV6_SUBTREES
+ case FWS_S:
+ if (FIB6_SUBTREE(fn)) {
+ w->node = FIB6_SUBTREE(fn);
+ continue;
+ }
+ w->state = FWS_L;
+#endif
+ case FWS_L:
+ if (fn->left) {
+ w->node = fn->left;
+ w->state = FWS_INIT;
+ continue;
+ }
+ w->state = FWS_R;
+ case FWS_R:
+ if (fn->right) {
+ w->node = fn->right;
+ w->state = FWS_INIT;
+ continue;
+ }
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ case FWS_C:
+ if (w->leaf && fn->fn_flags & RTN_RTINFO) {
+ int err;
+
+ if (w->count < w->skip) {
+ w->count++;
+ continue;
+ }
+
+ err = w->func(w);
+ if (err)
+ return err;
+
+ w->count++;
+ continue;
+ }
+ w->state = FWS_U;
+ case FWS_U:
+ if (fn == w->root)
+ return 0;
+ pn = fn->parent;
+ w->node = pn;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (FIB6_SUBTREE(pn) == fn) {
+ WARN_ON(!(fn->fn_flags & RTN_ROOT));
+ w->state = FWS_L;
+ continue;
+ }
+#endif
+ if (pn->left == fn) {
+ w->state = FWS_R;
+ continue;
+ }
+ if (pn->right == fn) {
+ w->state = FWS_C;
+ w->leaf = w->node->leaf;
+ continue;
+ }
+#if RT6_DEBUG >= 2
+ WARN_ON(1);
+#endif
+ }
+ }
+}
+
+static int fib6_walk(struct fib6_walker_t *w)
+{
+ int res;
+
+ w->state = FWS_INIT;
+ w->node = w->root;
+
+ fib6_walker_link(w);
+ res = fib6_walk_continue(w);
+ if (res <= 0)
+ fib6_walker_unlink(w);
+ return res;
+}
+
+static int fib6_clean_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+ struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
+ struct nl_info info = {
+ .nl_net = c->net,
+ };
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ res = c->func(rt, c->arg);
+ if (res < 0) {
+ w->leaf = rt;
+ res = fib6_del(rt, &info);
+ if (res) {
+#if RT6_DEBUG >= 2
+ printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+#endif
+ continue;
+ }
+ return 0;
+ }
+ WARN_ON(res != 0);
+ }
+ w->leaf = rt;
+ return 0;
+}
+
+/*
+ * Convenient frontend to tree walker.
+ *
+ * func is called on each route.
+ * It may return -1 -> delete this route.
+ * 0 -> continue walking
+ *
+ * prune==1 -> only immediate children of node (certainly,
+ * ignoring pure split nodes) will be scanned.
+ */
+
+static void fib6_clean_tree(struct net *net, struct fib6_node *root,
+ int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_cleaner_t c;
+
+ c.w.root = root;
+ c.w.func = fib6_clean_node;
+ c.w.prune = prune;
+ c.w.count = 0;
+ c.w.skip = 0;
+ c.func = func;
+ c.arg = arg;
+ c.net = net;
+
+ fib6_walk(&c.w);
+}
+
+void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_table *table;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ read_lock_bh(&table->tb6_lock);
+ fib6_clean_tree(net, &table->tb6_root,
+ func, prune, arg);
+ read_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_table *table;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ write_lock_bh(&table->tb6_lock);
+ fib6_clean_tree(net, &table->tb6_root,
+ func, prune, arg);
+ write_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int fib6_prune_clone(struct rt6_info *rt, void *arg)
+{
+ if (rt->rt6i_flags & RTF_CACHE) {
+ RT6_TRACE("pruning clone %p\n", rt);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
+ struct rt6_info *rt)
+{
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+}
+
+/*
+ * Garbage collection
+ */
+
+static struct fib6_gc_args
+{
+ int timeout;
+ int more;
+} gc_args;
+
+static int fib6_age(struct rt6_info *rt, void *arg)
+{
+ unsigned long now = jiffies;
+
+ /*
+ * check addrconf expiration here.
+ * Routes are expired even if they are in use.
+ *
+ * Also age clones. Note, that clones are aged out
+ * only if they are not in use now.
+ */
+
+ if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
+ if (time_after(now, rt->dst.expires)) {
+ RT6_TRACE("expiring %p\n", rt);
+ return -1;
+ }
+ gc_args.more++;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (atomic_read(&rt->dst.__refcnt) == 0 &&
+ time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ return -1;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ return -1;
+ }
+ }
+ gc_args.more++;
+ }
+
+ return 0;
+}
+
+static DEFINE_SPINLOCK(fib6_gc_lock);
+
+void fib6_run_gc(unsigned long expires, struct net *net)
+{
+ if (expires != ~0UL) {
+ spin_lock_bh(&fib6_gc_lock);
+ gc_args.timeout = expires ? (int)expires :
+ net->ipv6.sysctl.ip6_rt_gc_interval;
+ } else {
+ if (!spin_trylock_bh(&fib6_gc_lock)) {
+ mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+ return;
+ }
+ gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+ }
+
+ gc_args.more = icmp6_dst_gc();
+
+ fib6_clean_all(net, fib6_age, 0, NULL);
+
+ if (gc_args.more)
+ mod_timer(&net->ipv6.ip6_fib_timer,
+ round_jiffies(jiffies
+ + net->ipv6.sysctl.ip6_rt_gc_interval));
+ else
+ del_timer(&net->ipv6.ip6_fib_timer);
+ spin_unlock_bh(&fib6_gc_lock);
+}
+
+static void fib6_gc_timer_cb(unsigned long arg)
+{
+ fib6_run_gc(0, (struct net *)arg);
+}
+
+static int __net_init fib6_net_init(struct net *net)
+{
+ size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+
+ setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
+
+ net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
+ if (!net->ipv6.rt6_stats)
+ goto out_timer;
+
+ /* Avoid false sharing : Use at least a full cache line */
+ size = max_t(size_t, size, L1_CACHE_BYTES);
+
+ net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
+ if (!net->ipv6.fib_table_hash)
+ goto out_rt6_stats;
+
+ net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_main_tbl)
+ goto out_fib_table_hash;
+
+ net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
+ net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
+ RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_local_tbl)
+ goto out_fib6_main_tbl;
+ net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
+ net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
+ RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+#endif
+ fib6_tables_init(net);
+
+ return 0;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_fib6_main_tbl:
+ kfree(net->ipv6.fib6_main_tbl);
+#endif
+out_fib_table_hash:
+ kfree(net->ipv6.fib_table_hash);
+out_rt6_stats:
+ kfree(net->ipv6.rt6_stats);
+out_timer:
+ return -ENOMEM;
+ }
+
+static void fib6_net_exit(struct net *net)
+{
+ rt6_ifdown(net, NULL);
+ del_timer_sync(&net->ipv6.ip6_fib_timer);
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ kfree(net->ipv6.fib6_local_tbl);
+#endif
+ kfree(net->ipv6.fib6_main_tbl);
+ kfree(net->ipv6.fib_table_hash);
+ kfree(net->ipv6.rt6_stats);
+}
+
+static struct pernet_operations fib6_net_ops = {
+ .init = fib6_net_init,
+ .exit = fib6_net_exit,
+};
+
+int __init fib6_init(void)
+{
+ int ret = -ENOMEM;
+
+ fib6_node_kmem = kmem_cache_create("fib6_nodes",
+ sizeof(struct fib6_node),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!fib6_node_kmem)
+ goto out;
+
+ ret = register_pernet_subsys(&fib6_net_ops);
+ if (ret)
+ goto out_kmem_cache_create;
+
+ ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
+ NULL);
+ if (ret)
+ goto out_unregister_subsys;
+out:
+ return ret;
+
+out_unregister_subsys:
+ unregister_pernet_subsys(&fib6_net_ops);
+out_kmem_cache_create:
+ kmem_cache_destroy(fib6_node_kmem);
+ goto out;
+}
+
+void fib6_gc_cleanup(void)
+{
+ unregister_pernet_subsys(&fib6_net_ops);
+ kmem_cache_destroy(fib6_node_kmem);
+}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
new file mode 100644
index 00000000..b7867a12
--- /dev/null
+++ b/net/ipv6/ip6_flowlabel.c
@@ -0,0 +1,783 @@
+/*
+ * ip6_flowlabel.c IPv6 flowlabel manager.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/transp_v6.h>
+
+#include <asm/uaccess.h>
+
+#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
+ in old IPv6 RFC. Well, it was reasonable value.
+ */
+#define FL_MAX_LINGER 60 /* Maximal linger timeout */
+
+/* FL hash table */
+
+#define FL_MAX_PER_SOCK 32
+#define FL_MAX_SIZE 4096
+#define FL_HASH_MASK 255
+#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
+
+static atomic_t fl_size = ATOMIC_INIT(0);
+static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+
+static void ip6_fl_gc(unsigned long dummy);
+static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
+
+/* FL hash table lock: it protects only of GC */
+
+static DEFINE_RWLOCK(ip6_fl_lock);
+
+/* Big socket sock */
+
+static DEFINE_RWLOCK(ip6_sk_fl_lock);
+
+
+static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
+{
+ struct ip6_flowlabel *fl;
+
+ for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+ if (fl->label == label && net_eq(fl->fl_net, net))
+ return fl;
+ }
+ return NULL;
+}
+
+static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
+{
+ struct ip6_flowlabel *fl;
+
+ read_lock_bh(&ip6_fl_lock);
+ fl = __fl_lookup(net, label);
+ if (fl)
+ atomic_inc(&fl->users);
+ read_unlock_bh(&ip6_fl_lock);
+ return fl;
+}
+
+
+static void fl_free(struct ip6_flowlabel *fl)
+{
+ if (fl) {
+ release_net(fl->fl_net);
+ kfree(fl->opt);
+ }
+ kfree(fl);
+}
+
+static void fl_release(struct ip6_flowlabel *fl)
+{
+ write_lock_bh(&ip6_fl_lock);
+
+ fl->lastuse = jiffies;
+ if (atomic_dec_and_test(&fl->users)) {
+ unsigned long ttd = fl->lastuse + fl->linger;
+ if (time_after(ttd, fl->expires))
+ fl->expires = ttd;
+ ttd = fl->expires;
+ if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
+ struct ipv6_txoptions *opt = fl->opt;
+ fl->opt = NULL;
+ kfree(opt);
+ }
+ if (!timer_pending(&ip6_fl_gc_timer) ||
+ time_after(ip6_fl_gc_timer.expires, ttd))
+ mod_timer(&ip6_fl_gc_timer, ttd);
+ }
+ write_unlock_bh(&ip6_fl_lock);
+}
+
+static void ip6_fl_gc(unsigned long dummy)
+{
+ int i;
+ unsigned long now = jiffies;
+ unsigned long sched = 0;
+
+ write_lock(&ip6_fl_lock);
+
+ for (i=0; i<=FL_HASH_MASK; i++) {
+ struct ip6_flowlabel *fl, **flp;
+ flp = &fl_ht[i];
+ while ((fl=*flp) != NULL) {
+ if (atomic_read(&fl->users) == 0) {
+ unsigned long ttd = fl->lastuse + fl->linger;
+ if (time_after(ttd, fl->expires))
+ fl->expires = ttd;
+ ttd = fl->expires;
+ if (time_after_eq(now, ttd)) {
+ *flp = fl->next;
+ fl_free(fl);
+ atomic_dec(&fl_size);
+ continue;
+ }
+ if (!sched || time_before(ttd, sched))
+ sched = ttd;
+ }
+ flp = &fl->next;
+ }
+ }
+ if (!sched && atomic_read(&fl_size))
+ sched = now + FL_MAX_LINGER;
+ if (sched) {
+ mod_timer(&ip6_fl_gc_timer, sched);
+ }
+ write_unlock(&ip6_fl_lock);
+}
+
+static void __net_exit ip6_fl_purge(struct net *net)
+{
+ int i;
+
+ write_lock(&ip6_fl_lock);
+ for (i = 0; i <= FL_HASH_MASK; i++) {
+ struct ip6_flowlabel *fl, **flp;
+ flp = &fl_ht[i];
+ while ((fl = *flp) != NULL) {
+ if (net_eq(fl->fl_net, net) &&
+ atomic_read(&fl->users) == 0) {
+ *flp = fl->next;
+ fl_free(fl);
+ atomic_dec(&fl_size);
+ continue;
+ }
+ flp = &fl->next;
+ }
+ }
+ write_unlock(&ip6_fl_lock);
+}
+
+static struct ip6_flowlabel *fl_intern(struct net *net,
+ struct ip6_flowlabel *fl, __be32 label)
+{
+ struct ip6_flowlabel *lfl;
+
+ fl->label = label & IPV6_FLOWLABEL_MASK;
+
+ write_lock_bh(&ip6_fl_lock);
+ if (label == 0) {
+ for (;;) {
+ fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+ if (fl->label) {
+ lfl = __fl_lookup(net, fl->label);
+ if (lfl == NULL)
+ break;
+ }
+ }
+ } else {
+ /*
+ * we dropper the ip6_fl_lock, so this entry could reappear
+ * and we need to recheck with it.
+ *
+ * OTOH no need to search the active socket first, like it is
+ * done in ipv6_flowlabel_opt - sock is locked, so new entry
+ * with the same label can only appear on another sock
+ */
+ lfl = __fl_lookup(net, fl->label);
+ if (lfl != NULL) {
+ atomic_inc(&lfl->users);
+ write_unlock_bh(&ip6_fl_lock);
+ return lfl;
+ }
+ }
+
+ fl->lastuse = jiffies;
+ fl->next = fl_ht[FL_HASH(fl->label)];
+ fl_ht[FL_HASH(fl->label)] = fl;
+ atomic_inc(&fl_size);
+ write_unlock_bh(&ip6_fl_lock);
+ return NULL;
+}
+
+
+
+/* Socket flowlabel lists */
+
+struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
+{
+ struct ipv6_fl_socklist *sfl;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ label &= IPV6_FLOWLABEL_MASK;
+
+ read_lock_bh(&ip6_sk_fl_lock);
+ for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ struct ip6_flowlabel *fl = sfl->fl;
+ if (fl->label == label) {
+ fl->lastuse = jiffies;
+ atomic_inc(&fl->users);
+ read_unlock_bh(&ip6_sk_fl_lock);
+ return fl;
+ }
+ }
+ read_unlock_bh(&ip6_sk_fl_lock);
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+
+void fl6_free_socklist(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+
+ while ((sfl = np->ipv6_fl_list) != NULL) {
+ np->ipv6_fl_list = sfl->next;
+ fl_release(sfl->fl);
+ kfree(sfl);
+ }
+}
+
+/* Service routines */
+
+
+/*
+ It is the only difficult place. flowlabel enforces equal headers
+ before and including routing header, however user may supply options
+ following rthdr.
+ */
+
+struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
+ struct ip6_flowlabel * fl,
+ struct ipv6_txoptions * fopt)
+{
+ struct ipv6_txoptions * fl_opt = fl->opt;
+
+ if (fopt == NULL || fopt->opt_flen == 0)
+ return fl_opt;
+
+ if (fl_opt != NULL) {
+ opt_space->hopopt = fl_opt->hopopt;
+ opt_space->dst0opt = fl_opt->dst0opt;
+ opt_space->srcrt = fl_opt->srcrt;
+ opt_space->opt_nflen = fl_opt->opt_nflen;
+ } else {
+ if (fopt->opt_nflen == 0)
+ return fopt;
+ opt_space->hopopt = NULL;
+ opt_space->dst0opt = NULL;
+ opt_space->srcrt = NULL;
+ opt_space->opt_nflen = 0;
+ }
+ opt_space->dst1opt = fopt->dst1opt;
+ opt_space->opt_flen = fopt->opt_flen;
+ return opt_space;
+}
+
+static unsigned long check_linger(unsigned long ttl)
+{
+ if (ttl < FL_MIN_LINGER)
+ return FL_MIN_LINGER*HZ;
+ if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
+ return 0;
+ return ttl*HZ;
+}
+
+static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
+{
+ linger = check_linger(linger);
+ if (!linger)
+ return -EPERM;
+ expires = check_linger(expires);
+ if (!expires)
+ return -EPERM;
+ fl->lastuse = jiffies;
+ if (time_before(fl->linger, linger))
+ fl->linger = linger;
+ if (time_before(expires, fl->linger))
+ expires = fl->linger;
+ if (time_before(fl->expires, fl->lastuse + expires))
+ fl->expires = fl->lastuse + expires;
+ return 0;
+}
+
+static struct ip6_flowlabel *
+fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+ char __user *optval, int optlen, int *err_p)
+{
+ struct ip6_flowlabel *fl = NULL;
+ int olen;
+ int addr_type;
+ int err;
+
+ olen = optlen - CMSG_ALIGN(sizeof(*freq));
+ err = -EINVAL;
+ if (olen > 64 * 1024)
+ goto done;
+
+ err = -ENOMEM;
+ fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+ if (fl == NULL)
+ goto done;
+
+ if (olen > 0) {
+ struct msghdr msg;
+ struct flowi6 flowi6;
+ int junk;
+
+ err = -ENOMEM;
+ fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
+ if (fl->opt == NULL)
+ goto done;
+
+ memset(fl->opt, 0, sizeof(*fl->opt));
+ fl->opt->tot_len = sizeof(*fl->opt) + olen;
+ err = -EFAULT;
+ if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
+ goto done;
+
+ msg.msg_controllen = olen;
+ msg.msg_control = (void*)(fl->opt+1);
+ memset(&flowi6, 0, sizeof(flowi6));
+
+ err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
+ &junk, &junk);
+ if (err)
+ goto done;
+ err = -EINVAL;
+ if (fl->opt->opt_flen)
+ goto done;
+ if (fl->opt->opt_nflen == 0) {
+ kfree(fl->opt);
+ fl->opt = NULL;
+ }
+ }
+
+ fl->fl_net = hold_net(net);
+ fl->expires = jiffies;
+ err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
+ if (err)
+ goto done;
+ fl->share = freq->flr_share;
+ addr_type = ipv6_addr_type(&freq->flr_dst);
+ if ((addr_type & IPV6_ADDR_MAPPED) ||
+ addr_type == IPV6_ADDR_ANY) {
+ err = -EINVAL;
+ goto done;
+ }
+ fl->dst = freq->flr_dst;
+ atomic_set(&fl->users, 1);
+ switch (fl->share) {
+ case IPV6_FL_S_EXCL:
+ case IPV6_FL_S_ANY:
+ break;
+ case IPV6_FL_S_PROCESS:
+ fl->owner = current->pid;
+ break;
+ case IPV6_FL_S_USER:
+ fl->owner = current_euid();
+ break;
+ default:
+ err = -EINVAL;
+ goto done;
+ }
+ return fl;
+
+done:
+ fl_free(fl);
+ *err_p = err;
+ return NULL;
+}
+
+static int mem_check(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+ int room = FL_MAX_SIZE - atomic_read(&fl_size);
+ int count = 0;
+
+ if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
+ return 0;
+
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+ count++;
+
+ if (room <= 0 ||
+ ((count >= FL_MAX_PER_SOCK ||
+ (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+ !capable(CAP_NET_ADMIN)))
+ return -ENOBUFS;
+
+ return 0;
+}
+
+static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
+{
+ if (h1 == h2)
+ return 0;
+ if (h1 == NULL || h2 == NULL)
+ return 1;
+ if (h1->hdrlen != h2->hdrlen)
+ return 1;
+ return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
+}
+
+static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
+{
+ if (o1 == o2)
+ return 0;
+ if (o1 == NULL || o2 == NULL)
+ return 1;
+ if (o1->opt_nflen != o2->opt_nflen)
+ return 1;
+ if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
+ return 1;
+ if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
+ return 1;
+ if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
+ return 1;
+ return 0;
+}
+
+static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
+ struct ip6_flowlabel *fl)
+{
+ write_lock_bh(&ip6_sk_fl_lock);
+ sfl->fl = fl;
+ sfl->next = np->ipv6_fl_list;
+ np->ipv6_fl_list = sfl;
+ write_unlock_bh(&ip6_sk_fl_lock);
+}
+
+int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
+{
+ int uninitialized_var(err);
+ struct net *net = sock_net(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_flowlabel_req freq;
+ struct ipv6_fl_socklist *sfl1=NULL;
+ struct ipv6_fl_socklist *sfl, **sflp;
+ struct ip6_flowlabel *fl, *fl1 = NULL;
+
+
+ if (optlen < sizeof(freq))
+ return -EINVAL;
+
+ if (copy_from_user(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ switch (freq.flr_action) {
+ case IPV6_FL_A_PUT:
+ write_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
+ np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+ *sflp = sfl->next;
+ write_unlock_bh(&ip6_sk_fl_lock);
+ fl_release(sfl->fl);
+ kfree(sfl);
+ return 0;
+ }
+ }
+ write_unlock_bh(&ip6_sk_fl_lock);
+ return -ESRCH;
+
+ case IPV6_FL_A_RENEW:
+ read_lock_bh(&ip6_sk_fl_lock);
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
+ read_unlock_bh(&ip6_sk_fl_lock);
+ return err;
+ }
+ }
+ read_unlock_bh(&ip6_sk_fl_lock);
+
+ if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+ fl = fl_lookup(net, freq.flr_label);
+ if (fl) {
+ err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
+ fl_release(fl);
+ return err;
+ }
+ }
+ return -ESRCH;
+
+ case IPV6_FL_A_GET:
+ if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
+ return -EINVAL;
+
+ fl = fl_create(net, sk, &freq, optval, optlen, &err);
+ if (fl == NULL)
+ return err;
+ sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+
+ if (freq.flr_label) {
+ err = -EEXIST;
+ read_lock_bh(&ip6_sk_fl_lock);
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ if (freq.flr_flags&IPV6_FL_F_EXCL) {
+ read_unlock_bh(&ip6_sk_fl_lock);
+ goto done;
+ }
+ fl1 = sfl->fl;
+ atomic_inc(&fl1->users);
+ break;
+ }
+ }
+ read_unlock_bh(&ip6_sk_fl_lock);
+
+ if (fl1 == NULL)
+ fl1 = fl_lookup(net, freq.flr_label);
+ if (fl1) {
+recheck:
+ err = -EEXIST;
+ if (freq.flr_flags&IPV6_FL_F_EXCL)
+ goto release;
+ err = -EPERM;
+ if (fl1->share == IPV6_FL_S_EXCL ||
+ fl1->share != fl->share ||
+ fl1->owner != fl->owner)
+ goto release;
+
+ err = -EINVAL;
+ if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
+ ipv6_opt_cmp(fl1->opt, fl->opt))
+ goto release;
+
+ err = -ENOMEM;
+ if (sfl1 == NULL)
+ goto release;
+ if (fl->linger > fl1->linger)
+ fl1->linger = fl->linger;
+ if ((long)(fl->expires - fl1->expires) > 0)
+ fl1->expires = fl->expires;
+ fl_link(np, sfl1, fl1);
+ fl_free(fl);
+ return 0;
+
+release:
+ fl_release(fl1);
+ goto done;
+ }
+ }
+ err = -ENOENT;
+ if (!(freq.flr_flags&IPV6_FL_F_CREATE))
+ goto done;
+
+ err = -ENOMEM;
+ if (sfl1 == NULL || (err = mem_check(sk)) != 0)
+ goto done;
+
+ fl1 = fl_intern(net, fl, freq.flr_label);
+ if (fl1 != NULL)
+ goto recheck;
+
+ if (!freq.flr_label) {
+ if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
+ &fl->label, sizeof(fl->label))) {
+ /* Intentionally ignore fault. */
+ }
+ }
+
+ fl_link(np, sfl1, fl);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+
+done:
+ fl_free(fl);
+ kfree(sfl1);
+ return err;
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct ip6fl_iter_state {
+ struct seq_net_private p;
+ int bucket;
+};
+
+#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
+
+static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
+{
+ struct ip6_flowlabel *fl = NULL;
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
+ fl = fl_ht[state->bucket];
+
+ while (fl && !net_eq(fl->fl_net, net))
+ fl = fl->next;
+ if (fl)
+ break;
+ }
+ return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
+{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ fl = fl->next;
+try_again:
+ while (fl && !net_eq(fl->fl_net, net))
+ fl = fl->next;
+
+ while (!fl) {
+ if (++state->bucket <= FL_HASH_MASK) {
+ fl = fl_ht[state->bucket];
+ goto try_again;
+ } else
+ break;
+ }
+ return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ip6_flowlabel *fl = ip6fl_get_first(seq);
+ if (fl)
+ while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
+ --pos;
+ return pos ? NULL : fl;
+}
+
+static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(ip6_fl_lock)
+{
+ read_lock_bh(&ip6_fl_lock);
+ return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip6_flowlabel *fl;
+
+ if (v == SEQ_START_TOKEN)
+ fl = ip6fl_get_first(seq);
+ else
+ fl = ip6fl_get_next(seq, v);
+ ++*pos;
+ return fl;
+}
+
+static void ip6fl_seq_stop(struct seq_file *seq, void *v)
+ __releases(ip6_fl_lock)
+{
+ read_unlock_bh(&ip6_fl_lock);
+}
+
+static int ip6fl_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
+ "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
+ else {
+ struct ip6_flowlabel *fl = v;
+ seq_printf(seq,
+ "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
+ (unsigned)ntohl(fl->label),
+ fl->share,
+ (unsigned)fl->owner,
+ atomic_read(&fl->users),
+ fl->linger/HZ,
+ (long)(fl->expires - jiffies)/HZ,
+ &fl->dst,
+ fl->opt ? fl->opt->opt_nflen : 0);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip6fl_seq_ops = {
+ .start = ip6fl_seq_start,
+ .next = ip6fl_seq_next,
+ .stop = ip6fl_seq_stop,
+ .show = ip6fl_seq_show,
+};
+
+static int ip6fl_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ip6fl_seq_ops,
+ sizeof(struct ip6fl_iter_state));
+}
+
+static const struct file_operations ip6fl_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ip6fl_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init ip6_flowlabel_proc_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "ip6_flowlabel",
+ S_IRUGO, &ip6fl_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
+{
+ proc_net_remove(net, "ip6_flowlabel");
+}
+#else
+static inline int ip6_flowlabel_proc_init(struct net *net)
+{
+ return 0;
+}
+static inline void ip6_flowlabel_proc_fini(struct net *net)
+{
+}
+#endif
+
+static void __net_exit ip6_flowlabel_net_exit(struct net *net)
+{
+ ip6_fl_purge(net);
+ ip6_flowlabel_proc_fini(net);
+}
+
+static struct pernet_operations ip6_flowlabel_net_ops = {
+ .init = ip6_flowlabel_proc_init,
+ .exit = ip6_flowlabel_net_exit,
+};
+
+int ip6_flowlabel_init(void)
+{
+ return register_pernet_subsys(&ip6_flowlabel_net_ops);
+}
+
+void ip6_flowlabel_cleanup(void)
+{
+ del_timer(&ip6_fl_gc_timer);
+ unregister_pernet_subsys(&ip6_flowlabel_net_ops);
+}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
new file mode 100644
index 00000000..1ca5d45a
--- /dev/null
+++ b/net/ipv6/ip6_input.c
@@ -0,0 +1,344 @@
+/*
+ * IPv6 input
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Ian P. Morris <I.P.Morris@soton.ac.uk>
+ *
+ * Based in linux/net/ipv4/ip_input.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+/* Changes
+ *
+ * Mitsuru KANDA @USAGI and
+ * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/mroute6.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+
+
+inline int ip6_rcv_finish( struct sk_buff *skb)
+{
+ if (skb_dst(skb) == NULL)
+ ip6_route_input(skb);
+
+ return dst_input(skb);
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+ const struct ipv6hdr *hdr;
+ u32 pkt_len;
+ struct inet6_dev *idev;
+ struct net *net = dev_net(skb->dev);
+
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ rcu_read_lock();
+
+ idev = __in6_dev_get(skb->dev);
+
+ IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
+ !idev || unlikely(idev->cnf.disable_ipv6)) {
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+
+ /*
+ * Store incoming device index. When the packet will
+ * be queued, we cannot refer to skb->dev anymore.
+ *
+ * BTW, when we send a packet for our own local address on a
+ * non-loopback interface (e.g. ethX), it is being delivered
+ * via the loopback interface (lo) here; skb->dev = loopback_dev.
+ * It, however, should be considered as if it is being
+ * arrived via the sending interface (ethX), because of the
+ * nature of scoping architecture. --yoshfuji
+ */
+ IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
+ goto err;
+
+ hdr = ipv6_hdr(skb);
+
+ if (hdr->version != 6)
+ goto err;
+
+ /*
+ * RFC4291 2.5.3
+ * A packet received on an interface with a destination address
+ * of loopback must be dropped.
+ */
+ if (!(dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_loopback(&hdr->daddr))
+ goto err;
+
+ /*
+ * RFC4291 2.7
+ * Multicast addresses must not be used as source addresses in IPv6
+ * packets or appear in any Routing header.
+ */
+ if (ipv6_addr_is_multicast(&hdr->saddr))
+ goto err;
+
+ skb->transport_header = skb->network_header + sizeof(*hdr);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ pkt_len = ntohs(hdr->payload_len);
+
+ /* pkt_len may be zero if Jumbo payload option is present */
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
+ IP6_INC_STATS_BH(net,
+ idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ goto drop;
+ }
+ hdr = ipv6_hdr(skb);
+ }
+
+ if (hdr->nexthdr == NEXTHDR_HOP) {
+ if (ipv6_parse_hopopts(skb) < 0) {
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ rcu_read_unlock();
+ return NET_RX_DROP;
+ }
+ }
+
+ rcu_read_unlock();
+
+ /* Must drop socket now because of tproxy. */
+ skb_orphan(skb);
+
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+ ip6_rcv_finish);
+err:
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+drop:
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+/*
+ * Deliver the packet to the host
+ */
+
+
+static int ip6_input_finish(struct sk_buff *skb)
+{
+ const struct inet6_protocol *ipprot;
+ unsigned int nhoff;
+ int nexthdr, raw;
+ u8 hash;
+ struct inet6_dev *idev;
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ /*
+ * Parse extension headers
+ */
+
+ rcu_read_lock();
+resubmit:
+ idev = ip6_dst_idev(skb_dst(skb));
+ if (!pskb_pull(skb, skb_transport_offset(skb)))
+ goto discard;
+ nhoff = IP6CB(skb)->nhoff;
+ nexthdr = skb_network_header(skb)[nhoff];
+
+ raw = raw6_local_deliver(skb, nexthdr);
+
+ hash = nexthdr & (MAX_INET_PROTOS - 1);
+ if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+ int ret;
+
+ if (ipprot->flags & INET6_PROTO_FINAL) {
+ const struct ipv6hdr *hdr;
+
+ /* Free reference early: we don't need it any more,
+ and it may hold ip_conntrack module loaded
+ indefinitely. */
+ nf_reset(skb);
+
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ hdr = ipv6_hdr(skb);
+ if (ipv6_addr_is_multicast(&hdr->daddr) &&
+ !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
+ &hdr->saddr) &&
+ !ipv6_is_mld(skb, nexthdr))
+ goto discard;
+ }
+ if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard;
+
+ ret = ipprot->handler(skb);
+ if (ret > 0)
+ goto resubmit;
+ else if (ret == 0)
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ } else {
+ if (!raw) {
+ if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ IP6_INC_STATS_BH(net, idev,
+ IPSTATS_MIB_INUNKNOWNPROTOS);
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_UNK_NEXTHDR, nhoff);
+ }
+ } else
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ kfree_skb(skb);
+ }
+ rcu_read_unlock();
+ return 0;
+
+discard:
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return 0;
+}
+
+
+int ip6_input(struct sk_buff *skb)
+{
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+ ip6_input_finish);
+}
+
+int ip6_mc_input(struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ int deliver;
+
+ IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+ ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
+ skb->len);
+
+ hdr = ipv6_hdr(skb);
+ deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+
+#ifdef CONFIG_IPV6_MROUTE
+ /*
+ * IPv6 multicast router mode is now supported ;)
+ */
+ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
+ likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
+ /*
+ * Okay, we try to forward - split and duplicate
+ * packets.
+ */
+ struct sk_buff *skb2;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ /* Check for MLD */
+ if (unlikely(opt->ra)) {
+ /* Check if this is a mld message */
+ u8 *ptr = skb_network_header(skb) + opt->ra;
+ struct icmp6hdr *icmp6;
+ u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
+ int offset;
+
+ /* Check if the value of Router Alert
+ * is for MLD (0x0000).
+ */
+ if ((ptr[2] | ptr[3]) == 0) {
+ deliver = 0;
+
+ if (!ipv6_ext_hdr(nexthdr)) {
+ /* BUG */
+ goto out;
+ }
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
+ &nexthdr, &frag_off);
+ if (offset < 0)
+ goto out;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ goto out;
+
+ if (!pskb_may_pull(skb, (skb_network_header(skb) +
+ offset + 1 - skb->data)))
+ goto out;
+
+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+
+ switch (icmp6->icmp6_type) {
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MLD2_REPORT:
+ deliver = 1;
+ break;
+ }
+ goto out;
+ }
+ /* unknown RA - process it normally */
+ }
+
+ if (deliver)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb;
+ skb = NULL;
+ }
+
+ if (skb2) {
+ ip6_mr_input(skb2);
+ }
+ }
+out:
+#endif
+ if (likely(deliver))
+ ip6_input(skb);
+ else {
+ /* discard */
+ kfree_skb(skb);
+ }
+
+ return 0;
+}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
new file mode 100644
index 00000000..13e5399b
--- /dev/null
+++ b/net/ipv6/ip6_output.c
@@ -0,0 +1,1686 @@
+/*
+ * IPv6 output functions
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on linux/net/ipv4/ip_output.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * A.N.Kuznetsov : airthmetics in fragmentation.
+ * extension headers are implemented.
+ * route changes now work.
+ * ip6_forward does not confuse sniffers.
+ * etc.
+ *
+ * H. von Brand : Added missing #include <linux/string.h>
+ * Imran Patel : frag id should be in NBO
+ * Kazunori MIYAZAWA @USAGI
+ * : add ip6_append_data and related functions
+ * for datagram xmit
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/tcp.h>
+#include <linux/route.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/checksum.h>
+#include <linux/mroute6.h>
+
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+ ipv6_hdr(skb)->payload_len = htons(len);
+
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ skb_dst(skb)->dev, dst_output);
+}
+
+int ip6_local_out(struct sk_buff *skb)
+{
+ int err;
+
+ err = __ip6_local_out(skb);
+ if (likely(err == 1))
+ err = dst_output(skb);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
+
+/* dev_loopback_xmit for use with netfilter. */
+static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
+{
+ skb_reset_mac_header(newskb);
+ __skb_pull(newskb, skb_network_offset(newskb));
+ newskb->pkt_type = PACKET_LOOPBACK;
+ newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ WARN_ON(!skb_dst(newskb));
+
+ netif_rx_ni(newskb);
+ return 0;
+}
+
+static int ip6_finish_output2(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct neighbour *neigh;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
+ if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
+ ((mroute6_socket(dev_net(dev), skb) &&
+ !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
+ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr))) {
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+
+ /* Do not check for IFF_ALLMULTI; multicast routing
+ is not supported in any case.
+ */
+ if (newskb)
+ NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ newskb, NULL, newskb->dev,
+ ip6_dev_loopback_xmit);
+
+ if (ipv6_hdr(skb)->hop_limit == 0) {
+ IP6_INC_STATS(dev_net(dev), idev,
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return 0;
+ }
+ }
+
+ IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
+ skb->len);
+ }
+
+ rcu_read_lock();
+ neigh = dst_get_neighbour_noref(dst);
+ if (neigh) {
+ int res = neigh_output(neigh, skb);
+
+ rcu_read_unlock();
+ return res;
+ }
+ rcu_read_unlock();
+ IP6_INC_STATS_BH(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int ip6_finish_output(struct sk_buff *skb)
+{
+ if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+ dst_allfrag(skb_dst(skb)))
+ return ip6_fragment(skb, ip6_finish_output2);
+ else
+ return ip6_finish_output2(skb);
+}
+
+int ip6_output(struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ if (unlikely(idev->cnf.disable_ipv6)) {
+ IP6_INC_STATS(dev_net(dev), idev,
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+ ip6_finish_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+}
+
+/*
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ */
+
+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+ struct ipv6_txoptions *opt, int tclass)
+{
+ struct net *net = sock_net(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *first_hop = &fl6->daddr;
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *hdr;
+ u8 proto = fl6->flowi6_proto;
+ int seg_len = skb->len;
+ int hlimit = -1;
+ u32 mtu;
+
+ if (opt) {
+ unsigned int head_room;
+
+ /* First: exthdrs may take lots of space (~8K for now)
+ MAX_HEADER is not enough.
+ */
+ head_room = opt->opt_nflen + opt->opt_flen;
+ seg_len += head_room;
+ head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+
+ if (skb_headroom(skb) < head_room) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ if (skb2 == NULL) {
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -ENOBUFS;
+ }
+ kfree_skb(skb);
+ skb = skb2;
+ skb_set_owner_w(skb, sk);
+ }
+ if (opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+ }
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
+
+ /*
+ * Fill in the IPv6 header
+ */
+ if (np)
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
+
+ hdr->payload_len = htons(seg_len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = hlimit;
+
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *first_hop;
+
+ skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
+
+ mtu = dst_mtu(dst);
+ if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+ IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUT, skb->len);
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ dst->dev, dst_output);
+ }
+
+ if (net_ratelimit())
+ printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+}
+
+EXPORT_SYMBOL(ip6_xmit);
+
+/*
+ * To avoid extra problems ND packets are send through this
+ * routine. It's code duplication but I really want to avoid
+ * extra checks since ipv6_build_header is used by TCP (which
+ * is for us performance critical)
+ */
+
+int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ int proto, int len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *hdr;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ skb_reset_network_header(skb);
+ skb_put(skb, sizeof(struct ipv6hdr));
+ hdr = ipv6_hdr(skb);
+
+ *(__be32*)hdr = htonl(0x60000000);
+
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = np->hop_limit;
+
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
+
+ return 0;
+}
+
+static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
+{
+ struct ip6_ra_chain *ra;
+ struct sock *last = NULL;
+
+ read_lock(&ip6_ra_lock);
+ for (ra = ip6_ra_chain; ra; ra = ra->next) {
+ struct sock *sk = ra->sk;
+ if (sk && ra->sel == sel &&
+ (!sk->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ if (last) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ rawv6_rcv(last, skb2);
+ }
+ last = sk;
+ }
+ }
+
+ if (last) {
+ rawv6_rcv(last, skb);
+ read_unlock(&ip6_ra_lock);
+ return 1;
+ }
+ read_unlock(&ip6_ra_lock);
+ return 0;
+}
+
+static int ip6_forward_proxy_check(struct sk_buff *skb)
+{
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
+ int offset;
+
+ if (ipv6_ext_hdr(nexthdr)) {
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
+ if (offset < 0)
+ return 0;
+ } else
+ offset = sizeof(struct ipv6hdr);
+
+ if (nexthdr == IPPROTO_ICMPV6) {
+ struct icmp6hdr *icmp6;
+
+ if (!pskb_may_pull(skb, (skb_network_header(skb) +
+ offset + 1 - skb->data)))
+ return 0;
+
+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+
+ switch (icmp6->icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ /* For reaction involving unicast neighbor discovery
+ * message destined to the proxied address, pass it to
+ * input function.
+ */
+ return 1;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * The proxying router can't forward traffic sent to a link-local
+ * address, so signal the sender and discard the packet. This
+ * behavior is clarified by the MIPv6 specification.
+ */
+ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
+ dst_link_failure(skb);
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int ip6_forward_finish(struct sk_buff *skb)
+{
+ return dst_output(skb);
+}
+
+int ip6_forward(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(dst->dev);
+ u32 mtu;
+
+ if (net->ipv6.devconf_all->forwarding == 0)
+ goto error;
+
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
+ skb_forward_csum(skb);
+
+ /*
+ * We DO NOT make any processing on
+ * RA packets, pushing them to user level AS IS
+ * without ane WARRANTY that application will be able
+ * to interpret them. The reason is that we
+ * cannot make anything clever here.
+ *
+ * We are not end-node, so that if packet contains
+ * AH/ESP, we cannot make anything.
+ * Defragmentation also would be mistake, RA packets
+ * cannot be fragmented, because there is no warranty
+ * that different fragments will go along one path. --ANK
+ */
+ if (opt->ra) {
+ u8 *ptr = skb_network_header(skb) + opt->ra;
+ if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+ return 0;
+ }
+
+ /*
+ * check and decrement ttl
+ */
+ if (hdr->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
+ IP6_INC_STATS_BH(net,
+ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+
+ kfree_skb(skb);
+ return -ETIMEDOUT;
+ }
+
+ /* XXX: idev->cnf.proxy_ndp? */
+ if (net->ipv6.devconf_all->proxy_ndp &&
+ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
+ int proxied = ip6_forward_proxy_check(skb);
+ if (proxied > 0)
+ return ip6_input(skb);
+ else if (proxied < 0) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ }
+
+ if (!xfrm6_route_forward(skb)) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ dst = skb_dst(skb);
+
+ /* IPv6 specs say nothing about it, but it is clear that we cannot
+ send redirects to source routed frames.
+ We don't send redirects to frames decapsulated from IPsec.
+ */
+ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
+ struct in6_addr *target = NULL;
+ struct rt6_info *rt;
+
+ /*
+ * incoming and outgoing devices are the same
+ * send a redirect.
+ */
+
+ rt = (struct rt6_info *) dst;
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ target = &rt->rt6i_gateway;
+ else
+ target = &hdr->daddr;
+
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+
+ /* Limit redirects both by destination (here)
+ and by source (inside ndisc_send_redirect)
+ */
+ if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ ndisc_send_redirect(skb, target);
+ } else {
+ int addrtype = ipv6_addr_type(&hdr->saddr);
+
+ /* This check is security critical. */
+ if (addrtype == IPV6_ADDR_ANY ||
+ addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
+ goto error;
+ if (addrtype & IPV6_ADDR_LINKLOCAL) {
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH,
+ ICMPV6_NOT_NEIGHBOUR, 0);
+ goto error;
+ }
+ }
+
+ mtu = dst_mtu(dst);
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ if (skb->len > mtu && !skb_is_gso(skb)) {
+ /* Again, force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP6_INC_STATS_BH(net,
+ ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
+ IP6_INC_STATS_BH(net,
+ ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ if (skb_cow(skb, dst->dev->hard_header_len)) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ goto drop;
+ }
+
+ hdr = ipv6_hdr(skb);
+
+ /* Mangling hops number delayed to point after skb COW */
+
+ hdr->hop_limit--;
+
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+ ip6_forward_finish);
+
+error:
+ IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+ to->pkt_type = from->pkt_type;
+ to->priority = from->priority;
+ to->protocol = from->protocol;
+ skb_dst_drop(to);
+ skb_dst_set(to, dst_clone(skb_dst(from)));
+ to->dev = from->dev;
+ to->mark = from->mark;
+
+#ifdef CONFIG_NET_SCHED
+ to->tc_index = from->tc_index;
+#endif
+ nf_copy(to, from);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+ to->nf_trace = from->nf_trace;
+#endif
+ skb_copy_secmark(to, from);
+}
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb->tail - skb->network_header;
+ int found_rhdr = 0;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+#endif
+ if (found_rhdr)
+ return offset;
+ break;
+ default :
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ }
+
+ return offset;
+}
+
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static atomic_t ipv6_fragmentation_id;
+ int old, new;
+
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
+ struct inet_peer *peer;
+
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+ peer = rt->rt6i_peer;
+ if (peer) {
+ fhdr->identification = htonl(inet_getid(peer, 0));
+ return;
+ }
+ }
+ do {
+ old = atomic_read(&ipv6_fragmentation_id);
+ new = old + 1;
+ if (!new)
+ new = 1;
+ } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
+ fhdr->identification = htonl(new);
+}
+
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+{
+ struct sk_buff *frag;
+ struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
+ struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+ struct ipv6hdr *tmp_hdr;
+ struct frag_hdr *fh;
+ unsigned int mtu, hlen, left, len;
+ int hroom, troom;
+ __be32 frag_id = 0;
+ int ptr, offset = 0, err=0;
+ u8 *prevhdr, nexthdr = 0;
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+
+ mtu = ip6_skb_dst_mtu(skb);
+
+ /* We must not fragment if the socket is set to force MTU discovery
+ * or if the skb it not generated by a local socket.
+ */
+ if (!skb->local_df && skb->len > mtu) {
+ skb->dev = skb_dst(skb)->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ if (np && np->frag_size < mtu) {
+ if (np->frag_size)
+ mtu = np->frag_size;
+ }
+ mtu -= hlen + sizeof(struct frag_hdr);
+
+ if (skb_has_frag_list(skb)) {
+ int first_len = skb_pagelen(skb);
+ struct sk_buff *frag2;
+
+ if (first_len - hlen > mtu ||
+ ((first_len - hlen) & 7) ||
+ skb_cloned(skb))
+ goto slow_path;
+
+ skb_walk_frags(skb, frag) {
+ /* Correct geometry. */
+ if (frag->len > mtu ||
+ ((frag->len & 7) && frag->next) ||
+ skb_headroom(frag) < hlen)
+ goto slow_path_clean;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag))
+ goto slow_path_clean;
+
+ BUG_ON(frag->sk);
+ if (skb->sk) {
+ frag->sk = skb->sk;
+ frag->destructor = sock_wfree;
+ }
+ skb->truesize -= frag->truesize;
+ }
+
+ err = 0;
+ offset = 0;
+ frag = skb_shinfo(skb)->frag_list;
+ skb_frag_list_init(skb);
+ /* BUILD HEADER */
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!tmp_hdr) {
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGFAILS);
+ return -ENOMEM;
+ }
+
+ __skb_pull(skb, hlen);
+ fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), tmp_hdr, hlen);
+
+ ipv6_select_ident(fh, rt);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ frag_id = fh->identification;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ ipv6_hdr(skb)->payload_len = htons(first_len -
+ sizeof(struct ipv6hdr));
+
+ dst_hold(&rt->dst);
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), tmp_hdr,
+ hlen);
+ offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(offset);
+ if (frag->next != NULL)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = frag_id;
+ ipv6_hdr(frag)->payload_len =
+ htons(frag->len -
+ sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+ }
+
+ err = output(skb);
+ if(!err)
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+ IPSTATS_MIB_FRAGCREATES);
+
+ if (err || !frag)
+ break;
+
+ skb = frag;
+ frag = skb->next;
+ skb->next = NULL;
+ }
+
+ kfree(tmp_hdr);
+
+ if (err == 0) {
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+ IPSTATS_MIB_FRAGOKS);
+ dst_release(&rt->dst);
+ return 0;
+ }
+
+ while (frag) {
+ skb = frag->next;
+ kfree_skb(frag);
+ frag = skb;
+ }
+
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+ IPSTATS_MIB_FRAGFAILS);
+ dst_release(&rt->dst);
+ return err;
+
+slow_path_clean:
+ skb_walk_frags(skb, frag2) {
+ if (frag2 == frag)
+ break;
+ frag2->sk = NULL;
+ frag2->destructor = NULL;
+ skb->truesize += frag2->truesize;
+ }
+ }
+
+slow_path:
+ left = skb->len - hlen; /* Space per frame */
+ ptr = hlen; /* Where to start from */
+
+ /*
+ * Fragment the datagram.
+ */
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+ hroom = LL_RESERVED_SPACE(rt->dst.dev);
+ troom = rt->dst.dev->needed_tailroom;
+
+ /*
+ * Keep copying data until we run out.
+ */
+ while(left > 0) {
+ len = left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > mtu)
+ len = mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < left) {
+ len &= ~7;
+ }
+ /*
+ * Allocate buffer.
+ */
+
+ if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+ hroom + troom, GFP_ATOMIC)) == NULL) {
+ NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGFAILS);
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, hroom);
+ skb_put(frag, len + hlen + sizeof(struct frag_hdr));
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+ frag->transport_header = (frag->network_header + hlen +
+ sizeof(struct frag_hdr));
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ if (!frag_id) {
+ ipv6_select_ident(fh, rt);
+ frag_id = fh->identification;
+ } else
+ fh->identification = frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
+ BUG();
+ left -= len;
+
+ fh->frag_off = htons(offset);
+ if (left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ ipv6_hdr(frag)->payload_len = htons(frag->len -
+ sizeof(struct ipv6hdr));
+
+ ptr += len;
+ offset += len;
+
+ /*
+ * Put this fragment into the sending queue.
+ */
+ err = output(frag);
+ if (err)
+ goto fail;
+
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGCREATES);
+ }
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGOKS);
+ kfree_skb(skb);
+ return err;
+
+fail:
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return err;
+}
+
+static inline int ip6_rt_check(const struct rt6key *rt_key,
+ const struct in6_addr *fl_addr,
+ const struct in6_addr *addr_cache)
+{
+ return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+ (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
+}
+
+static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+ struct dst_entry *dst,
+ const struct flowi6 *fl6)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct rt6_info *rt = (struct rt6_info *)dst;
+
+ if (!dst)
+ goto out;
+
+ /* Yes, checking route validity in not connected
+ * case is not very simple. Take into account,
+ * that we do not support routing by source, TOS,
+ * and MSG_DONTROUTE --ANK (980726)
+ *
+ * 1. ip6_rt_check(): If route was host route,
+ * check that cached destination is current.
+ * If it is network route, we still may
+ * check its validity using saved pointer
+ * to the last used address: daddr_cache.
+ * We do not want to save whole address now,
+ * (because main consumer of this service
+ * is tcp, which has not this problem),
+ * so that the last trick works only on connected
+ * sockets.
+ * 2. oif also should be the same.
+ */
+ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
+#ifdef CONFIG_IPV6_SUBTREES
+ ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
+#endif
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ dst_release(dst);
+ dst = NULL;
+ }
+
+out:
+ return dst;
+}
+
+static int ip6_dst_lookup_tail(struct sock *sk,
+ struct dst_entry **dst, struct flowi6 *fl6)
+{
+ struct net *net = sock_net(sk);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ struct neighbour *n;
+#endif
+ int err;
+
+ if (*dst == NULL)
+ *dst = ip6_route_output(net, sk, fl6);
+
+ if ((err = (*dst)->error))
+ goto out_err_release;
+
+ if (ipv6_addr_any(&fl6->saddr)) {
+ struct rt6_info *rt = (struct rt6_info *) *dst;
+ err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+ sk ? inet6_sk(sk)->srcprefs : 0,
+ &fl6->saddr);
+ if (err)
+ goto out_err_release;
+ }
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * Here if the dst entry we've looked up
+ * has a neighbour entry that is in the INCOMPLETE
+ * state and the src address from the flow is
+ * marked as OPTIMISTIC, we release the found
+ * dst entry and replace it instead with the
+ * dst entry of the nexthop router
+ */
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(*dst);
+ if (n && !(n->nud_state & NUD_VALID)) {
+ struct inet6_ifaddr *ifp;
+ struct flowi6 fl_gw6;
+ int redirect;
+
+ rcu_read_unlock();
+ ifp = ipv6_get_ifaddr(net, &fl6->saddr,
+ (*dst)->dev, 1);
+
+ redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+ if (ifp)
+ in6_ifa_put(ifp);
+
+ if (redirect) {
+ /*
+ * We need to get the dst entry for the
+ * default router instead
+ */
+ dst_release(*dst);
+ memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
+ memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
+ *dst = ip6_route_output(net, sk, &fl_gw6);
+ if ((err = (*dst)->error))
+ goto out_err_release;
+ }
+ } else {
+ rcu_read_unlock();
+ }
+#endif
+
+ return 0;
+
+out_err_release:
+ if (err == -ENETUNREACH)
+ IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
+ dst_release(*dst);
+ *dst = NULL;
+ return err;
+}
+
+/**
+ * ip6_dst_lookup - perform route lookup on flow
+ * @sk: socket which provides route info
+ * @dst: pointer to dst_entry * for result
+ * @fl6: flow to lookup
+ *
+ * This function performs a route lookup on the given flow.
+ *
+ * It returns zero on success, or a standard errno code on error.
+ */
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
+{
+ *dst = NULL;
+ return ip6_dst_lookup_tail(sk, dst, fl6);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+
+/**
+ * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ * @sk: socket which provides route info
+ * @fl6: flow to lookup
+ * @final_dst: final destination address for ipsec lookup
+ * @can_sleep: we are in a sleepable context
+ *
+ * This function performs a route lookup on the given flow.
+ *
+ * It returns a valid dst pointer on success, or a pointer encoded
+ * error code.
+ */
+struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+ const struct in6_addr *final_dst,
+ bool can_sleep)
+{
+ struct dst_entry *dst = NULL;
+ int err;
+
+ err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+ if (final_dst)
+ fl6->daddr = *final_dst;
+ if (can_sleep)
+ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
+
+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+
+/**
+ * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
+ * @sk: socket which provides the dst cache and route info
+ * @fl6: flow to lookup
+ * @final_dst: final destination address for ipsec lookup
+ * @can_sleep: we are in a sleepable context
+ *
+ * This function performs a route lookup on the given flow with the
+ * possibility of using the cached route in the socket if it is valid.
+ * It will take the socket dst lock when operating on the dst cache.
+ * As a result, this function can only be used in process context.
+ *
+ * It returns a valid dst pointer on success, or a pointer encoded
+ * error code.
+ */
+struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+ const struct in6_addr *final_dst,
+ bool can_sleep)
+{
+ struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+ int err;
+
+ dst = ip6_sk_dst_check(sk, dst, fl6);
+
+ err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ if (err)
+ return ERR_PTR(err);
+ if (final_dst)
+ fl6->daddr = *final_dst;
+ if (can_sleep)
+ fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
+
+ return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
+
+static inline int ip6_ufo_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len,
+ int odd, struct sk_buff *skb),
+ void *from, int length, int hh_len, int fragheaderlen,
+ int transhdrlen, int mtu,unsigned int flags,
+ struct rt6_info *rt)
+
+{
+ struct sk_buff *skb;
+ int err;
+
+ /* There is support for UDP large send offload by network
+ * device, so create one single skb packet containing complete
+ * udp datagram
+ */
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ skb = sock_alloc_send_skb(sk,
+ hh_len + fragheaderlen + transhdrlen + 20,
+ (flags & MSG_DONTWAIT), &err);
+ if (skb == NULL)
+ return err;
+
+ /* reserve space for Hardware header */
+ skb_reserve(skb, hh_len);
+
+ /* create space for UDP/IP header */
+ skb_put(skb,fragheaderlen + transhdrlen);
+
+ /* initialize network header pointer */
+ skb_reset_network_header(skb);
+
+ /* initialize protocol header pointer */
+ skb->transport_header = skb->network_header + fragheaderlen;
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum = 0;
+ }
+
+ err = skb_append_datato_frags(sk,skb, getfrag, from,
+ (length - transhdrlen));
+ if (!err) {
+ struct frag_hdr fhdr;
+
+ /* Specify the length of each IPv6 datagram fragment.
+ * It has to be a multiple of 8.
+ */
+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+ sizeof(struct frag_hdr)) & ~7;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ ipv6_select_ident(&fhdr, rt);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+
+ return 0;
+ }
+ /* There is not enough support do UPD LSO,
+ * so follow normal path
+ */
+ kfree_skb(skb);
+
+ return err;
+}
+
+static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
+ gfp_t gfp)
+{
+ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
+static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
+ gfp_t gfp)
+{
+ return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
+static void ip6_append_data_mtu(int *mtu,
+ int *maxfraglen,
+ unsigned int fragheaderlen,
+ struct sk_buff *skb,
+ struct rt6_info *rt)
+{
+ if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
+ if (skb == NULL) {
+ /* first fragment, reserve header_len */
+ *mtu = *mtu - rt->dst.header_len;
+
+ } else {
+ /*
+ * this fragment is not first, the headers
+ * space is regarded as data space.
+ */
+ *mtu = dst_mtu(rt->dst.path);
+ }
+ *maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ + fragheaderlen - sizeof(struct frag_hdr);
+ }
+}
+
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
+ int offset, int len, int odd, struct sk_buff *skb),
+ void *from, int length, int transhdrlen,
+ int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
+ struct rt6_info *rt, unsigned int flags, int dontfrag)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_cork *cork;
+ struct sk_buff *skb, *skb_prev = NULL;
+ unsigned int maxfraglen, fragheaderlen;
+ int exthdrlen;
+ int dst_exthdrlen;
+ int hh_len;
+ int mtu;
+ int copy;
+ int err;
+ int offset = 0;
+ int csummode = CHECKSUM_NONE;
+ __u8 tx_flags = 0;
+
+ if (flags&MSG_PROBE)
+ return 0;
+ cork = &inet->cork.base;
+ if (skb_queue_empty(&sk->sk_write_queue)) {
+ /*
+ * setup for corking
+ */
+ if (opt) {
+ if (WARN_ON(np->cork.opt))
+ return -EINVAL;
+
+ np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+ if (unlikely(np->cork.opt == NULL))
+ return -ENOBUFS;
+
+ np->cork.opt->tot_len = opt->tot_len;
+ np->cork.opt->opt_flen = opt->opt_flen;
+ np->cork.opt->opt_nflen = opt->opt_nflen;
+
+ np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+ sk->sk_allocation);
+ if (opt->dst0opt && !np->cork.opt->dst0opt)
+ return -ENOBUFS;
+
+ np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+ sk->sk_allocation);
+ if (opt->dst1opt && !np->cork.opt->dst1opt)
+ return -ENOBUFS;
+
+ np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
+ sk->sk_allocation);
+ if (opt->hopopt && !np->cork.opt->hopopt)
+ return -ENOBUFS;
+
+ np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+ sk->sk_allocation);
+ if (opt->srcrt && !np->cork.opt->srcrt)
+ return -ENOBUFS;
+
+ /* need source address above miyazawa*/
+ }
+ dst_hold(&rt->dst);
+ cork->dst = &rt->dst;
+ inet->cork.fl.u.ip6 = *fl6;
+ np->cork.hop_limit = hlimit;
+ np->cork.tclass = tclass;
+ if (rt->dst.flags & DST_XFRM_TUNNEL)
+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ else
+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ if (np->frag_size < mtu) {
+ if (np->frag_size)
+ mtu = np->frag_size;
+ }
+ cork->fragsize = mtu;
+ if (dst_allfrag(rt->dst.path))
+ cork->flags |= IPCORK_ALLFRAG;
+ cork->length = 0;
+ sk->sk_sndmsg_page = NULL;
+ sk->sk_sndmsg_off = 0;
+ exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+ length += exthdrlen;
+ transhdrlen += exthdrlen;
+ dst_exthdrlen = rt->dst.header_len;
+ } else {
+ rt = (struct rt6_info *)cork->dst;
+ fl6 = &inet->cork.fl.u.ip6;
+ opt = np->cork.opt;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ dst_exthdrlen = 0;
+ mtu = cork->fragsize;
+ }
+
+ hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+
+ fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
+ (opt ? opt->opt_nflen : 0);
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+
+ if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+ if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
+ ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
+ }
+
+ /* For UDP, check if TX timestamp is enabled */
+ if (sk->sk_type == SOCK_DGRAM) {
+ err = sock_tx_timestamp(sk, &tx_flags);
+ if (err)
+ goto error;
+ }
+
+ /*
+ * Let's try using as much space as possible.
+ * Use MTU if total length of the message fits into the MTU.
+ * Otherwise, we need to reserve fragment header and
+ * fragment alignment (= 8-15 octects, in total).
+ *
+ * Note that we may need to "move" the data from the tail of
+ * of the buffer to the new fragment when we split
+ * the message.
+ *
+ * FIXME: It may be fragmented into multiple chunks
+ * at once if non-fragmentable extension headers
+ * are too large.
+ * --yoshfuji
+ */
+
+ cork->length += length;
+ if (length > mtu) {
+ int proto = sk->sk_protocol;
+ if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
+
+ if (proto == IPPROTO_UDP &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
+ }
+ }
+
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ goto alloc_new_skb;
+
+ while (length > 0) {
+ /* Check if the remaining data fits into current packet. */
+ copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+ if (copy < length)
+ copy = maxfraglen - skb->len;
+
+ if (copy <= 0) {
+ char *data;
+ unsigned int datalen;
+ unsigned int fraglen;
+ unsigned int fraggap;
+ unsigned int alloclen;
+alloc_new_skb:
+ /* There's no room in the current skb */
+ if (skb)
+ fraggap = skb->len - maxfraglen;
+ else
+ fraggap = 0;
+ /* update mtu and maxfraglen if necessary */
+ if (skb == NULL || skb_prev == NULL)
+ ip6_append_data_mtu(&mtu, &maxfraglen,
+ fragheaderlen, skb, rt);
+
+ skb_prev = skb;
+
+ /*
+ * If remaining data exceeds the mtu,
+ * we know we need more fragment(s).
+ */
+ datalen = length + fraggap;
+
+ if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+ datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+ if ((flags & MSG_MORE) &&
+ !(rt->dst.dev->features&NETIF_F_SG))
+ alloclen = mtu;
+ else
+ alloclen = datalen + fragheaderlen;
+
+ alloclen += dst_exthdrlen;
+
+ if (datalen != length + fraggap) {
+ /*
+ * this is not the last fragment, the trailer
+ * space is regarded as data space.
+ */
+ datalen += rt->dst.trailer_len;
+ }
+
+ alloclen += rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
+
+ /*
+ * We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloclen += sizeof(struct frag_hdr);
+
+ if (transhdrlen) {
+ skb = sock_alloc_send_skb(sk,
+ alloclen + hh_len,
+ (flags & MSG_DONTWAIT), &err);
+ } else {
+ skb = NULL;
+ if (atomic_read(&sk->sk_wmem_alloc) <=
+ 2 * sk->sk_sndbuf)
+ skb = sock_wmalloc(sk,
+ alloclen + hh_len, 1,
+ sk->sk_allocation);
+ if (unlikely(skb == NULL))
+ err = -ENOBUFS;
+ else {
+ /* Only the initial fragment
+ * is time stamped.
+ */
+ tx_flags = 0;
+ }
+ }
+ if (skb == NULL)
+ goto error;
+ /*
+ * Fill in the control structures
+ */
+ skb->ip_summed = csummode;
+ skb->csum = 0;
+ /* reserve for fragmentation and ipsec header */
+ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
+ dst_exthdrlen);
+
+ if (sk->sk_type == SOCK_DGRAM)
+ skb_shinfo(skb)->tx_flags = tx_flags;
+
+ /*
+ * Find where to start putting bytes
+ */
+ data = skb_put(skb, fraglen);
+ skb_set_network_header(skb, exthdrlen);
+ data += fragheaderlen;
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data + transhdrlen, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ data += fraggap;
+ pskb_trim_unique(skb_prev, maxfraglen);
+ }
+ copy = datalen - transhdrlen - fraggap;
+
+ if (copy < 0) {
+ err = -EINVAL;
+ kfree_skb(skb);
+ goto error;
+ } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ offset += copy;
+ length -= datalen - fraggap;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ dst_exthdrlen = 0;
+ csummode = CHECKSUM_NONE;
+
+ /*
+ * Put the packet on the pending queue
+ */
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+ continue;
+ }
+
+ if (copy > length)
+ copy = length;
+
+ if (!(rt->dst.dev->features&NETIF_F_SG)) {
+ unsigned int off;
+
+ off = skb->len;
+ if (getfrag(from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
+ __skb_trim(skb, off);
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ int i = skb_shinfo(skb)->nr_frags;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+ struct page *page = sk->sk_sndmsg_page;
+ int off = sk->sk_sndmsg_off;
+ unsigned int left;
+
+ if (page && (left = PAGE_SIZE - off) > 0) {
+ if (copy >= left)
+ copy = left;
+ if (page != skb_frag_page(frag)) {
+ if (i == MAX_SKB_FRAGS) {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+ skb_frag_ref(skb, i);
+ frag = &skb_shinfo(skb)->frags[i];
+ }
+ } else if(i < MAX_SKB_FRAGS) {
+ if (copy > PAGE_SIZE)
+ copy = PAGE_SIZE;
+ page = alloc_pages(sk->sk_allocation, 0);
+ if (page == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
+
+ skb_fill_page_desc(skb, i, page, 0, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ } else {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ if (getfrag(from,
+ skb_frag_address(frag) + skb_frag_size(frag),
+ offset, copy, skb->len, skb) < 0) {
+ err = -EFAULT;
+ goto error;
+ }
+ sk->sk_sndmsg_off += copy;
+ skb_frag_size_add(frag, copy);
+ skb->len += copy;
+ skb->data_len += copy;
+ skb->truesize += copy;
+ atomic_add(copy, &sk->sk_wmem_alloc);
+ }
+ offset += copy;
+ length -= copy;
+ }
+ return 0;
+error:
+ cork->length -= length;
+ IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ return err;
+}
+
+static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
+{
+ if (np->cork.opt) {
+ kfree(np->cork.opt->dst0opt);
+ kfree(np->cork.opt->dst1opt);
+ kfree(np->cork.opt->hopopt);
+ kfree(np->cork.opt->srcrt);
+ kfree(np->cork.opt);
+ np->cork.opt = NULL;
+ }
+
+ if (inet->cork.base.dst) {
+ dst_release(inet->cork.base.dst);
+ inet->cork.base.dst = NULL;
+ inet->cork.base.flags &= ~IPCORK_ALLFRAG;
+ }
+ memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb, *tmp_skb;
+ struct sk_buff **tail_skb;
+ struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ struct ipv6hdr *hdr;
+ struct ipv6_txoptions *opt = np->cork.opt;
+ struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
+ struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+ unsigned char proto = fl6->flowi6_proto;
+ int err = 0;
+
+ if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
+ goto out;
+ tail_skb = &(skb_shinfo(skb)->frag_list);
+
+ /* move skb->data to ip header from ext header */
+ if (skb->data < skb_network_header(skb))
+ __skb_pull(skb, skb_network_offset(skb));
+ while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ __skb_pull(tmp_skb, skb_network_header_len(skb));
+ *tail_skb = tmp_skb;
+ tail_skb = &(tmp_skb->next);
+ skb->len += tmp_skb->len;
+ skb->data_len += tmp_skb->len;
+ skb->truesize += tmp_skb->truesize;
+ tmp_skb->destructor = NULL;
+ tmp_skb->sk = NULL;
+ }
+
+ /* Allow local fragmentation. */
+ if (np->pmtudisc < IPV6_PMTUDISC_DO)
+ skb->local_df = 1;
+
+ *final_dst = fl6->daddr;
+ __skb_pull(skb, skb_network_header_len(skb));
+ if (opt && opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt && opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
+
+ *(__be32*)hdr = fl6->flowlabel |
+ htonl(0x60000000 | ((int)np->cork.tclass << 20));
+
+ hdr->hop_limit = np->cork.hop_limit;
+ hdr->nexthdr = proto;
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *final_dst;
+
+ skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
+
+ skb_dst_set(skb, dst_clone(&rt->dst));
+ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ if (proto == IPPROTO_ICMPV6) {
+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
+ ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ err = ip6_local_out(skb);
+ if (err) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto error;
+ }
+
+out:
+ ip6_cork_release(inet, np);
+ return err;
+error:
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ goto out;
+}
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+ if (skb_dst(skb))
+ IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ }
+
+ ip6_cork_release(inet_sk(sk), inet6_sk(sk));
+}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
new file mode 100644
index 00000000..aa21da6a
--- /dev/null
+++ b/net/ipv6/ip6_tunnel.c
@@ -0,0 +1,1592 @@
+/*
+ * IPv6 tunneling device
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Ville Nuorvala <vnuorval@tcs.hut.fi>
+ * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
+ *
+ * Based on:
+ * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
+ *
+ * RFC 2473
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+MODULE_AUTHOR("Ville Nuorvala");
+MODULE_DESCRIPTION("IPv6 tunneling device");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETDEV("ip6tnl0");
+
+#ifdef IP6_TNL_DEBUG
+#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
+#else
+#define IP6_TNL_TRACE(x...) do {;} while(0)
+#endif
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
+
+#define HASH_SIZE 32
+
+#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
+ (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
+ (HASH_SIZE - 1))
+
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
+
+static int ip6_tnl_net_id __read_mostly;
+struct ip6_tnl_net {
+ /* the IPv6 tunnel fallback device */
+ struct net_device *fb_tnl_dev;
+ /* lists for storing tunnels in use */
+ struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_wc[1];
+ struct ip6_tnl __rcu **tnls[2];
+};
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+} __attribute__((aligned(4*sizeof(unsigned long))));
+
+static struct net_device_stats *ip6_get_stats(struct net_device *dev)
+{
+ struct pcpu_tstats sum = { 0 };
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+ sum.rx_packets += tstats->rx_packets;
+ sum.rx_bytes += tstats->rx_bytes;
+ sum.tx_packets += tstats->tx_packets;
+ sum.tx_bytes += tstats->tx_bytes;
+ }
+ dev->stats.rx_packets = sum.rx_packets;
+ dev->stats.rx_bytes = sum.rx_bytes;
+ dev->stats.tx_packets = sum.tx_packets;
+ dev->stats.tx_bytes = sum.tx_bytes;
+ return &dev->stats;
+}
+
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+{
+ struct dst_entry *dst = t->dst_cache;
+
+ if (dst && dst->obsolete &&
+ dst->ops->check(dst, t->dst_cookie) == NULL) {
+ t->dst_cache = NULL;
+ dst_release(dst);
+ return NULL;
+ }
+
+ return dst;
+}
+
+static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+{
+ dst_release(t->dst_cache);
+ t->dst_cache = NULL;
+}
+
+static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ dst_release(t->dst_cache);
+ t->dst_cache = dst;
+}
+
+/**
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @remote: the address of the tunnel exit-point
+ * @local: the address of the tunnel entry-point
+ *
+ * Return:
+ * tunnel matching given end-points if found,
+ * else fallback tunnel if its device is up,
+ * else %NULL
+ **/
+
+#define for_each_ip6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+static struct ip6_tnl *
+ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+{
+ unsigned int h0 = HASH(remote);
+ unsigned int h1 = HASH(local);
+ struct ip6_tnl *t;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ t = rcu_dereference(ip6n->tnls_wc[0]);
+ if (t && (t->dev->flags & IFF_UP))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
+ * @p: parameters containing tunnel end-points
+ *
+ * Description:
+ * ip6_tnl_bucket() returns the head of the list matching the
+ * &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+
+static struct ip6_tnl __rcu **
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+ prio = 1;
+ h = HASH(remote) ^ HASH(local);
+ }
+ return &ip6n->tnls[prio][h];
+}
+
+/**
+ * ip6_tnl_link - add tunnel to hash table
+ * @t: tunnel to be added
+ **/
+
+static void
+ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
+
+ rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+/**
+ * ip6_tnl_unlink - remove tunnel from hash table
+ * @t: tunnel to be removed
+ **/
+
+static void
+ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = ip6_tnl_bucket(ip6n, &t->parms);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static void ip6_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+/**
+ * ip6_tnl_create() - create a new tunnel
+ * @p: tunnel parameters
+ * @pt: pointer to new tunnel
+ *
+ * Description:
+ * Create tunnel matching given parameters.
+ *
+ * Return:
+ * created tunnel or NULL
+ **/
+
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
+{
+ struct net_device *dev;
+ struct ip6_tnl *t;
+ char name[IFNAMSIZ];
+ int err;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (p->name[0])
+ strlcpy(name, p->name, IFNAMSIZ);
+ else
+ sprintf(name, "ip6tnl%%d");
+
+ dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
+ if (dev == NULL)
+ goto failed;
+
+ dev_net_set(dev, net);
+
+ t = netdev_priv(dev);
+ t->parms = *p;
+ err = ip6_tnl_dev_init(dev);
+ if (err < 0)
+ goto failed_free;
+
+ if ((err = register_netdevice(dev)) < 0)
+ goto failed_free;
+
+ strcpy(t->parms.name, dev->name);
+
+ dev_hold(dev);
+ ip6_tnl_link(ip6n, t);
+ return t;
+
+failed_free:
+ ip6_dev_free(dev);
+failed:
+ return NULL;
+}
+
+/**
+ * ip6_tnl_locate - find or create tunnel matching given parameters
+ * @p: tunnel parameters
+ * @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ * ip6_tnl_locate() first tries to locate an existing tunnel
+ * based on @parms. If this is unsuccessful, but @create is set a new
+ * tunnel device is created and registered for use.
+ *
+ * Return:
+ * matching tunnel or NULL
+ **/
+
+static struct ip6_tnl *ip6_tnl_locate(struct net *net,
+ struct ip6_tnl_parm *p, int create)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *t;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ for (tp = ip6_tnl_bucket(ip6n, p);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr))
+ return t;
+ }
+ if (!create)
+ return NULL;
+ return ip6_tnl_create(net, p);
+}
+
+/**
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
+ * @dev: the device to be destroyed
+ *
+ * Description:
+ * ip6_tnl_dev_uninit() removes tunnel from its list
+ **/
+
+static void
+ip6_tnl_dev_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+ else
+ ip6_tnl_unlink(ip6n, t);
+ ip6_tnl_dst_reset(t);
+ dev_put(dev);
+}
+
+/**
+ * parse_tvl_tnl_enc_lim - handle encapsulation limit option
+ * @skb: received socket buffer
+ *
+ * Return:
+ * 0 if none was found,
+ * else index to encapsulation limit
+ **/
+
+static __u16
+parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
+ __u8 nexthdr = ipv6h->nexthdr;
+ __u16 off = sizeof (*ipv6h);
+
+ while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
+ __u16 optlen = 0;
+ struct ipv6_opt_hdr *hdr;
+ if (raw + off + sizeof (*hdr) > skb->data &&
+ !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *) (raw + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
+ if (frag_hdr->frag_off)
+ break;
+ optlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ optlen = (hdr->hdrlen + 2) << 2;
+ } else {
+ optlen = ipv6_optlen(hdr);
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ __u16 i = off + 2;
+ while (1) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ /* No more room for encapsulation limit */
+ if (i + sizeof (*tel) > off + optlen)
+ break;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+ /* return index of option if found and valid */
+ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
+ tel->length == 1)
+ return i;
+ /* else jump to next option */
+ if (tel->type)
+ i += tel->length + 2;
+ else
+ i++;
+ }
+ }
+ nexthdr = hdr->nexthdr;
+ off += optlen;
+ }
+ return 0;
+}
+
+/**
+ * ip6_tnl_err - tunnel error handler
+ *
+ * Description:
+ * ip6_tnl_err() should handle errors in the tunnel according
+ * to the specifications in RFC 2473.
+ **/
+
+static int
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
+ u8 *type, u8 *code, int *msg, __u32 *info, int offset)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
+ struct ip6_tnl *t;
+ int rel_msg = 0;
+ u8 rel_type = ICMPV6_DEST_UNREACH;
+ u8 rel_code = ICMPV6_ADDR_UNREACH;
+ __u32 rel_info = 0;
+ __u16 len;
+ int err = -ENOENT;
+
+ /* If the packet doesn't contain the original IPv6 header we are
+ in trouble since we might need the source address for further
+ processing of the error. */
+
+ rcu_read_lock();
+ if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
+ &ipv6h->saddr)) == NULL)
+ goto out;
+
+ if (t->parms.proto != ipproto && t->parms.proto != 0)
+ goto out;
+
+ err = 0;
+
+ switch (*type) {
+ __u32 teli;
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 mtu;
+ case ICMPV6_DEST_UNREACH:
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Path to destination invalid "
+ "or inactive!\n", t->parms.name);
+ rel_msg = 1;
+ break;
+ case ICMPV6_TIME_EXCEED:
+ if ((*code) == ICMPV6_EXC_HOPLIMIT) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Too small hop limit or "
+ "routing loop in tunnel!\n",
+ t->parms.name);
+ rel_msg = 1;
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ teli = 0;
+ if ((*code) == ICMPV6_HDR_FIELD)
+ teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+
+ if (teli && teli == *info - 2) {
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+ if (tel->encap_limit == 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Too small encapsulation "
+ "limit or routing loop in "
+ "tunnel!\n", t->parms.name);
+ rel_msg = 1;
+ }
+ } else if (net_ratelimit()) {
+ printk(KERN_WARNING
+ "%s: Recipient unable to parse tunneled "
+ "packet!\n ", t->parms.name);
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = *info - offset;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+
+ if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
+ rel_type = ICMPV6_PKT_TOOBIG;
+ rel_code = 0;
+ rel_info = mtu;
+ rel_msg = 1;
+ }
+ break;
+ }
+
+ *type = rel_type;
+ *code = rel_code;
+ *info = rel_info;
+ *msg = rel_msg;
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ int rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
+ __u32 rel_info = ntohl(info);
+ int err;
+ struct sk_buff *skb2;
+ const struct iphdr *eiph;
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg == 0)
+ return 0;
+
+ switch (rel_type) {
+ case ICMPV6_DEST_UNREACH:
+ if (rel_code != ICMPV6_ADDR_UNREACH)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ if (rel_code != 0)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_FRAG_NEEDED;
+ break;
+ default:
+ return 0;
+ }
+
+ if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+ return 0;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ return 0;
+
+ skb_dst_drop(skb2);
+
+ skb_pull(skb2, offset);
+ skb_reset_network_header(skb2);
+ eiph = ip_hdr(skb2);
+
+ /* Try to guess incoming interface */
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+ eiph->saddr, 0,
+ 0, 0,
+ IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt))
+ goto out;
+
+ skb2->dev = rt->dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags & RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
+ eiph->daddr, eiph->saddr,
+ 0, 0,
+ IPPROTO_IPIP,
+ RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt) ||
+ rt->dst.dev->type != ARPHRD_TUNNEL) {
+ if (!IS_ERR(rt))
+ ip_rt_put(rt);
+ goto out;
+ }
+ skb_dst_set(skb2, &rt->dst);
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+ skb2->dev) ||
+ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+ goto out;
+ }
+
+ /* change mtu on this route */
+ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+ if (rel_info > dst_mtu(skb_dst(skb2)))
+ goto out;
+
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
+ }
+
+ icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+
+out:
+ kfree_skb(skb2);
+ return 0;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ int rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
+ __u32 rel_info = ntohl(info);
+ int err;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
+ struct rt6_info *rt;
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 0;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, offset);
+ skb_reset_network_header(skb2);
+
+ /* Try to guess incoming interface */
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ icmpv6_send(skb2, rel_type, rel_code, rel_info);
+
+ if (rt)
+ dst_release(&rt->dst);
+
+ kfree_skb(skb2);
+ }
+
+ return 0;
+}
+
+static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
+{
+ __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
+
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+ if (INET_ECN_is_ce(dsfield))
+ IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
+{
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+ IP6_ECN_set_ce(ipv6_hdr(skb));
+}
+
+/* called with rcu_read_lock() */
+static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ret = 0;
+ struct net *net = dev_net(t->dev);
+
+ if (p->flags & IP6_TNL_F_CAP_RCV) {
+ struct net_device *ldev = NULL;
+
+ if (p->link)
+ ldev = dev_get_by_index_rcu(net, p->link);
+
+ if ((ipv6_addr_is_multicast(&p->laddr) ||
+ likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ ret = 1;
+
+ }
+ return ret;
+}
+
+/**
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
+ * @skb: received socket buffer
+ * @protocol: ethernet protocol ID
+ * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
+ *
+ * Return: 0
+ **/
+
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+ __u8 ipproto,
+ void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ rcu_read_lock();
+
+ if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+ &ipv6h->daddr)) != NULL) {
+ struct pcpu_tstats *tstats;
+
+ if (t->parms.proto != ipproto && t->parms.proto != 0) {
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ if (!ip6_tnl_rcv_ctl(t)) {
+ t->dev->stats.rx_dropped++;
+ rcu_read_unlock();
+ goto discard;
+ }
+ secpath_reset(skb);
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
+ skb->protocol = htons(protocol);
+ skb->pkt_type = PACKET_HOST;
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+
+ tstats = this_cpu_ptr(t->dev->tstats);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+
+ __skb_tunnel_rx(skb, t->dev);
+
+ dscp_ecn_decapsulate(t, ipv6h, skb);
+
+ netif_rx(skb);
+
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+ return 1;
+
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ip4ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+ ip4ip6_dscp_ecn_decapsulate);
+}
+
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+ ip6ip6_dscp_ecn_decapsulate);
+}
+
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+ memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+ opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+ opt->dst_opt[3] = 1;
+ opt->dst_opt[4] = encap_limit;
+ opt->dst_opt[5] = IPV6_TLV_PADN;
+ opt->dst_opt[6] = 1;
+
+ opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.opt_nflen = 8;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+
+static inline int
+ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ret = 0;
+ struct net *net = dev_net(t->dev);
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ struct net_device *ldev = NULL;
+
+ rcu_read_lock();
+ if (p->link)
+ ldev = dev_get_by_index_rcu(net, p->link);
+
+ if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
+ printk(KERN_WARNING
+ "%s xmit: Local address not yet configured!\n",
+ p->name);
+ else if (!ipv6_addr_is_multicast(&p->raddr) &&
+ unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ printk(KERN_WARNING
+ "%s xmit: Routing loop! "
+ "Remote address found on this node!\n",
+ p->name);
+ else
+ ret = 1;
+ rcu_read_unlock();
+ }
+ return ret;
+}
+/**
+ * ip6_tnl_xmit2 - encapsulate packet and send
+ * @skb: the outgoing socket buffer
+ * @dev: the outgoing tunnel device
+ * @dsfield: dscp code for outer header
+ * @fl: flow of tunneled packet
+ * @encap_limit: encapsulation limit
+ * @pmtu: Path MTU is stored if packet is too big
+ *
+ * Description:
+ * Build new header and do some sanity checks on the packet before sending
+ * it.
+ *
+ * Return:
+ * 0 on success
+ * -1 fail
+ * %-EMSGSIZE message too big. return mtu in this case.
+ **/
+
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi6 *fl6,
+ int encap_limit,
+ __u32 *pmtu)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6_tel_txoption opt;
+ struct dst_entry *dst = NULL, *ndst = NULL;
+ struct net_device *tdev;
+ int mtu;
+ unsigned int max_headroom = sizeof(struct ipv6hdr);
+ u8 proto;
+ int err = -1;
+ int pkt_len;
+
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(t);
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
+
+ if (ndst->error)
+ goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
+ }
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Local routing loop detected!\n",
+ t->parms.name);
+ goto tx_err_dst_release;
+ }
+ mtu = dst_mtu(dst) - sizeof (*ipv6h);
+ if (encap_limit >= 0) {
+ max_headroom += 8;
+ mtu -= 8;
+ }
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ if (skb->len > mtu) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
+ goto tx_err_dst_release;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom += LL_RESERVED_SPACE(tdev);
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ struct sk_buff *new_skb;
+
+ if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
+ goto tx_err_dst_release;
+
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ kfree_skb(skb);
+ skb = new_skb;
+ }
+ skb_dst_drop(skb);
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ skb->transport_header = skb->network_header;
+
+ proto = fl6->flowi6_proto;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ipv6h = ipv6_hdr(skb);
+ *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
+ dsfield = INET_ECN_encapsulate(0, dsfield);
+ ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->nexthdr = proto;
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
+ nf_reset(skb);
+ pkt_len = skb->len;
+ err = ip6_local_out(skb);
+
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
+
+ tstats->tx_bytes += pkt_len;
+ tstats->tx_packets++;
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+ if (ndst)
+ ip6_tnl_dst_store(t, ndst);
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(ndst);
+ return err;
+}
+
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ const struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t))
+ return -1;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+ return -1;
+
+ offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -1;
+ }
+
+ return 0;
+}
+
+static netdev_tx_t
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ int ret;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ret = ip4ip6_tnl_xmit(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ret = ip6ip6_tnl_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void ip6_tnl_set_cap(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ int ltype = ipv6_addr_type(&p->laddr);
+ int rtype = ipv6_addr_type(&p->raddr);
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
+
+ if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+ !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+ (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_RCV;
+ }
+}
+
+static void ip6_tnl_link_config(struct ip6_tnl *t)
+{
+ struct net_device *dev = t->dev;
+ struct ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+ /* Set up flowi template */
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+ ip6_tnl_set_cap(t);
+
+ if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+ struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt == NULL)
+ return;
+
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len +
+ sizeof (struct ipv6hdr);
+
+ dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ dst_release(&rt->dst);
+ }
+}
+
+/**
+ * ip6_tnl_change - update the tunnel parameters
+ * @t: tunnel to be changed
+ * @p: tunnel configuration parameters
+ *
+ * Description:
+ * ip6_tnl_change() updates the tunnel parameters
+ **/
+
+static int
+ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.flags = p->flags;
+ t->parms.hop_limit = p->hop_limit;
+ t->parms.encap_limit = p->encap_limit;
+ t->parms.flowinfo = p->flowinfo;
+ t->parms.link = p->link;
+ t->parms.proto = p->proto;
+ ip6_tnl_dst_reset(t);
+ ip6_tnl_link_config(t);
+ return 0;
+}
+
+/**
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * @dev: virtual device associated with tunnel
+ * @ifr: parameters passed from userspace
+ * @cmd: command to be performed
+ *
+ * Description:
+ * ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ * from userspace.
+ *
+ * The possible commands are the following:
+ * %SIOCGETTUNNEL: get tunnel parameters for device
+ * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ * %SIOCCHGTUNNEL: change tunnel parameters to those given
+ * %SIOCDELTUNNEL: delete tunnel
+ *
+ * The fallback device "ip6tnl0", created during module
+ * initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ * 0 on success,
+ * %-EFAULT if unable to copy data to or from userspace,
+ * %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ * %-EINVAL if passed tunnel parameters are invalid,
+ * %-EEXIST if changing a tunnel's parameters would cause a conflict
+ * %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+
+static int
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm p;
+ struct ip6_tnl *t = NULL;
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6n->fb_tnl_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
+ err = -EFAULT;
+ break;
+ }
+ t = ip6_tnl_locate(net, &p, 0);
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ memcpy(&p, &t->parms, sizeof (p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
+ err = -EFAULT;
+ }
+ break;
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ break;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
+ break;
+ err = -EINVAL;
+ if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+ p.proto != 0)
+ break;
+ t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+ if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else
+ t = netdev_priv(dev);
+
+ ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = ip6_tnl_change(t, &p);
+ ip6_tnl_link(ip6n, t);
+ netdev_state_change(dev);
+ }
+ if (t) {
+ err = 0;
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+ err = -EFAULT;
+
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ break;
+
+ if (dev == ip6n->fb_tnl_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
+ break;
+ err = -ENOENT;
+ if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
+ break;
+ err = -EPERM;
+ if (t->dev == ip6n->fb_tnl_dev)
+ break;
+ dev = t->dev;
+ }
+ err = 0;
+ unregister_netdevice(dev);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ return err;
+}
+
+/**
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * @dev: virtual device associated with tunnel
+ * @new_mtu: the new mtu
+ *
+ * Return:
+ * 0 on success,
+ * %-EINVAL if mtu too small
+ **/
+
+static int
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU) {
+ return -EINVAL;
+ }
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+
+static const struct net_device_ops ip6_tnl_netdev_ops = {
+ .ndo_uninit = ip6_tnl_dev_uninit,
+ .ndo_start_xmit = ip6_tnl_xmit,
+ .ndo_do_ioctl = ip6_tnl_ioctl,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
+ .ndo_get_stats = ip6_get_stats,
+};
+
+
+/**
+ * ip6_tnl_dev_setup - setup virtual tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Description:
+ * Initialize function pointers and device parameters
+ **/
+
+static void ip6_tnl_dev_setup(struct net_device *dev)
+{
+ struct ip6_tnl *t;
+
+ dev->netdev_ops = &ip6_tnl_netdev_ops;
+ dev->destructor = ip6_dev_free;
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
+ dev->flags |= IFF_NOARP;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+
+/**
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+
+static inline int
+ip6_tnl_dev_init_gen(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ t->dev = dev;
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+
+static int ip6_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int err = ip6_tnl_dev_init_gen(dev);
+
+ if (err)
+ return err;
+ ip6_tnl_link_config(t);
+ return 0;
+}
+
+/**
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * @dev: fallback device
+ *
+ * Return: 0
+ **/
+
+static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ int err = ip6_tnl_dev_init_gen(dev);
+
+ if (err)
+ return err;
+
+ t->parms.proto = IPPROTO_IPV6;
+ dev_hold(dev);
+ rcu_assign_pointer(ip6n->tnls_wc[0], t);
+ return 0;
+}
+
+static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
+ .handler = ip4ip6_rcv,
+ .err_handler = ip4ip6_err,
+ .priority = 1,
+};
+
+static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
+ .handler = ip6ip6_rcv,
+ .err_handler = ip6ip6_err,
+ .priority = 1,
+};
+
+static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
+{
+ int h;
+ struct ip6_tnl *t;
+ LIST_HEAD(list);
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, &list);
+ t = rtnl_dereference(t->next);
+ }
+ }
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_many(&list);
+}
+
+static int __net_init ip6_tnl_init_net(struct net *net)
+{
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct ip6_tnl *t = NULL;
+ int err;
+
+ ip6n->tnls[0] = ip6n->tnls_wc;
+ ip6n->tnls[1] = ip6n->tnls_r_l;
+
+ err = -ENOMEM;
+ ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+ ip6_tnl_dev_setup);
+
+ if (!ip6n->fb_tnl_dev)
+ goto err_alloc_dev;
+ dev_net_set(ip6n->fb_tnl_dev, net);
+
+ err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ err = register_netdev(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+ return 0;
+
+err_register:
+ ip6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit ip6_tnl_exit_net(struct net *net)
+{
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ rtnl_lock();
+ ip6_tnl_destroy_tunnels(ip6n);
+ rtnl_unlock();
+}
+
+static struct pernet_operations ip6_tnl_net_ops = {
+ .init = ip6_tnl_init_net,
+ .exit = ip6_tnl_exit_net,
+ .id = &ip6_tnl_net_id,
+ .size = sizeof(struct ip6_tnl_net),
+};
+
+/**
+ * ip6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+
+static int __init ip6_tunnel_init(void)
+{
+ int err;
+
+ err = register_pernet_device(&ip6_tnl_net_ops);
+ if (err < 0)
+ goto out_pernet;
+
+ err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
+ if (err < 0) {
+ printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
+ goto out_ip4ip6;
+ }
+
+ err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
+ if (err < 0) {
+ printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
+ goto out_ip6ip6;
+ }
+
+ return 0;
+
+out_ip6ip6:
+ xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out_ip4ip6:
+ unregister_pernet_device(&ip6_tnl_net_ops);
+out_pernet:
+ return err;
+}
+
+/**
+ * ip6_tunnel_cleanup - free resources and unregister protocol
+ **/
+
+static void __exit ip6_tunnel_cleanup(void)
+{
+ if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
+
+ if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
+
+ unregister_pernet_device(&ip6_tnl_net_ops);
+}
+
+module_init(ip6_tunnel_init);
+module_exit(ip6_tunnel_cleanup);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
new file mode 100644
index 00000000..8110362e
--- /dev/null
+++ b/net/ipv6/ip6mr.c
@@ -0,0 +1,2281 @@
+/*
+ * Linux IPv6 multicast routing support for BSD pim6sd
+ * Based on net/ipv4/ipmr.c.
+ *
+ * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
+ * LSIIT Laboratory, Strasbourg, France
+ * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
+ * 6WIND, Paris, France
+ * Copyright (C)2007,2008 USAGI/WIDE Project
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <net/checksum.h>
+#include <net/netlink.h>
+#include <net/fib_rules.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/mroute6.h>
+#include <linux/pim.h>
+#include <net/addrconf.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/ip6_checksum.h>
+
+struct mr6_table {
+ struct list_head list;
+#ifdef CONFIG_NET_NS
+ struct net *net;
+#endif
+ u32 id;
+ struct sock *mroute6_sk;
+ struct timer_list ipmr_expire_timer;
+ struct list_head mfc6_unres_queue;
+ struct list_head mfc6_cache_array[MFC6_LINES];
+ struct mif_device vif6_table[MAXMIFS];
+ int maxvif;
+ atomic_t cache_resolve_queue_len;
+ int mroute_do_assert;
+ int mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ int mroute_reg_vif_num;
+#endif
+};
+
+struct ip6mr_rule {
+ struct fib_rule common;
+};
+
+struct ip6mr_result {
+ struct mr6_table *mrt;
+};
+
+/* Big lock, protecting vif table, mrt cache and mroute socket state.
+ Note that the changes are semaphored via rtnl_lock.
+ */
+
+static DEFINE_RWLOCK(mrt_lock);
+
+/*
+ * Multicast router control variables
+ */
+
+#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+
+/* Special spinlock for queue of unresolved entries */
+static DEFINE_SPINLOCK(mfc_unres_lock);
+
+/* We return to original Alan's scheme. Hash table of resolved
+ entries is changed only in process context and protected
+ with weak lock mrt_lock. Queue of unresolved entries is protected
+ with strong spinlock mfc_unres_lock.
+
+ In this case data path is free of exclusive locks at all.
+ */
+
+static struct kmem_cache *mrt_cachep __read_mostly;
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr6_table *mrt);
+
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+ mifi_t mifi, int assert);
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+ struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_rtm_dumproute(struct sk_buff *skb,
+ struct netlink_callback *cb);
+static void mroute_clean_tables(struct mr6_table *mrt);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+#define ip6mr_for_each_table(mrt, net) \
+ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr6_table *mrt;
+
+ ip6mr_for_each_table(mrt, net) {
+ if (mrt->id == id)
+ return mrt;
+ }
+ return NULL;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
+ struct mr6_table **mrt)
+{
+ struct ip6mr_result res;
+ struct fib_lookup_arg arg = { .result = &res, };
+ int err;
+
+ err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
+ flowi6_to_flowi(flp6), 0, &arg);
+ if (err < 0)
+ return err;
+ *mrt = res.mrt;
+ return 0;
+}
+
+static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
+{
+ struct ip6mr_result *res = arg->result;
+ struct mr6_table *mrt;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+ case FR_ACT_UNREACHABLE:
+ return -ENETUNREACH;
+ case FR_ACT_PROHIBIT:
+ return -EACCES;
+ case FR_ACT_BLACKHOLE:
+ default:
+ return -EINVAL;
+ }
+
+ mrt = ip6mr_get_table(rule->fr_net, rule->table);
+ if (mrt == NULL)
+ return -EAGAIN;
+ res->mrt = mrt;
+ return 0;
+}
+
+static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
+{
+ return 1;
+}
+
+static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
+ FRA_GENERIC_POLICY,
+};
+
+static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+ return 0;
+}
+
+static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+{
+ return 1;
+}
+
+static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+ struct fib_rule_hdr *frh)
+{
+ frh->dst_len = 0;
+ frh->src_len = 0;
+ frh->tos = 0;
+ return 0;
+}
+
+static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+ .family = RTNL_FAMILY_IP6MR,
+ .rule_size = sizeof(struct ip6mr_rule),
+ .addr_size = sizeof(struct in6_addr),
+ .action = ip6mr_rule_action,
+ .match = ip6mr_rule_match,
+ .configure = ip6mr_rule_configure,
+ .compare = ip6mr_rule_compare,
+ .default_pref = fib_default_rule_pref,
+ .fill = ip6mr_rule_fill,
+ .nlgroup = RTNLGRP_IPV6_RULE,
+ .policy = ip6mr_rule_policy,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+ struct fib_rules_ops *ops;
+ struct mr6_table *mrt;
+ int err;
+
+ ops = fib_rules_register(&ip6mr_rules_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ INIT_LIST_HEAD(&net->ipv6.mr6_tables);
+
+ mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+ if (mrt == NULL) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+ if (err < 0)
+ goto err2;
+
+ net->ipv6.mr6_rules_ops = ops;
+ return 0;
+
+err2:
+ kfree(mrt);
+err1:
+ fib_rules_unregister(ops);
+ return err;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+ struct mr6_table *mrt, *next;
+
+ list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
+ list_del(&mrt->list);
+ ip6mr_free_table(mrt);
+ }
+ fib_rules_unregister(net->ipv6.mr6_rules_ops);
+}
+#else
+#define ip6mr_for_each_table(mrt, net) \
+ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ return net->ipv6.mrt6;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
+ struct mr6_table **mrt)
+{
+ *mrt = net->ipv6.mrt6;
+ return 0;
+}
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+ net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
+ return net->ipv6.mrt6 ? 0 : -ENOMEM;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+ ip6mr_free_table(net->ipv6.mrt6);
+}
+#endif
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+{
+ struct mr6_table *mrt;
+ unsigned int i;
+
+ mrt = ip6mr_get_table(net, id);
+ if (mrt != NULL)
+ return mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (mrt == NULL)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ /* Forwarding cache */
+ for (i = 0; i < MFC6_LINES; i++)
+ INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+
+ INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+
+ setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+ (unsigned long)mrt);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+ return mrt;
+}
+
+static void ip6mr_free_table(struct mr6_table *mrt)
+{
+ del_timer(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt);
+ kfree(mrt);
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct ipmr_mfc_iter {
+ struct seq_net_private p;
+ struct mr6_table *mrt;
+ struct list_head *cache;
+ int ct;
+};
+
+
+static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
+ struct ipmr_mfc_iter *it, loff_t pos)
+{
+ struct mr6_table *mrt = it->mrt;
+ struct mfc6_cache *mfc;
+
+ read_lock(&mrt_lock);
+ for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
+ it->cache = &mrt->mfc6_cache_array[it->ct];
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ }
+ read_unlock(&mrt_lock);
+
+ spin_lock_bh(&mfc_unres_lock);
+ it->cache = &mrt->mfc6_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(&mfc_unres_lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+
+/*
+ * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
+struct ipmr_vif_iter {
+ struct seq_net_private p;
+ struct mr6_table *mrt;
+ int ct;
+};
+
+static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
+ struct ipmr_vif_iter *iter,
+ loff_t pos)
+{
+ struct mr6_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!MIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif6_table[iter->ct];
+ }
+ return NULL;
+}
+
+static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(mrt_lock)
+{
+ struct ipmr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return ERR_PTR(-ENOENT);
+
+ iter->mrt = mrt;
+
+ read_lock(&mrt_lock);
+ return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ipmr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr6_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ip6mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!MIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif6_table[iter->ct];
+ }
+ return NULL;
+}
+
+static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
+ __releases(mrt_lock)
+{
+ read_unlock(&mrt_lock);
+}
+
+static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
+{
+ struct ipmr_vif_iter *iter = seq->private;
+ struct mr6_table *mrt = iter->mrt;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
+ } else {
+ const struct mif_device *vif = v;
+ const char *name = vif->dev ? vif->dev->name : "none";
+
+ seq_printf(seq,
+ "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
+ vif - mrt->vif6_table,
+ name, vif->bytes_in, vif->pkt_in,
+ vif->bytes_out, vif->pkt_out,
+ vif->flags);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip6mr_vif_seq_ops = {
+ .start = ip6mr_vif_seq_start,
+ .next = ip6mr_vif_seq_next,
+ .stop = ip6mr_vif_seq_stop,
+ .show = ip6mr_vif_seq_show,
+};
+
+static int ip6mr_vif_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
+ sizeof(struct ipmr_vif_iter));
+}
+
+static const struct file_operations ip6mr_vif_fops = {
+ .owner = THIS_MODULE,
+ .open = ip6mr_vif_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct ipmr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return ERR_PTR(-ENOENT);
+
+ it->mrt = mrt;
+ return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mfc6_cache *mfc = v;
+ struct ipmr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr6_table *mrt = it->mrt;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return ipmr_mfc_seq_idx(net, seq->private, 0);
+
+ if (mfc->list.next != it->cache)
+ return list_entry(mfc->list.next, struct mfc6_cache, list);
+
+ if (it->cache == &mrt->mfc6_unres_queue)
+ goto end_of_list;
+
+ BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
+
+ while (++it->ct < MFC6_LINES) {
+ it->cache = &mrt->mfc6_cache_array[it->ct];
+ if (list_empty(it->cache))
+ continue;
+ return list_first_entry(it->cache, struct mfc6_cache, list);
+ }
+
+ /* exhausted cache_array, show unresolved */
+ read_unlock(&mrt_lock);
+ it->cache = &mrt->mfc6_unres_queue;
+ it->ct = 0;
+
+ spin_lock_bh(&mfc_unres_lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mfc6_cache, list);
+
+ end_of_list:
+ spin_unlock_bh(&mfc_unres_lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+
+static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct ipmr_mfc_iter *it = seq->private;
+ struct mr6_table *mrt = it->mrt;
+
+ if (it->cache == &mrt->mfc6_unres_queue)
+ spin_unlock_bh(&mfc_unres_lock);
+ else if (it->cache == mrt->mfc6_cache_array)
+ read_unlock(&mrt_lock);
+}
+
+static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
+{
+ int n;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Group "
+ "Origin "
+ "Iif Pkts Bytes Wrong Oifs\n");
+ } else {
+ const struct mfc6_cache *mfc = v;
+ const struct ipmr_mfc_iter *it = seq->private;
+ struct mr6_table *mrt = it->mrt;
+
+ seq_printf(seq, "%pI6 %pI6 %-3hd",
+ &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
+ mfc->mf6c_parent);
+
+ if (it->cache != &mrt->mfc6_unres_queue) {
+ seq_printf(seq, " %8lu %8lu %8lu",
+ mfc->mfc_un.res.pkt,
+ mfc->mfc_un.res.bytes,
+ mfc->mfc_un.res.wrong_if);
+ for (n = mfc->mfc_un.res.minvif;
+ n < mfc->mfc_un.res.maxvif; n++) {
+ if (MIF_EXISTS(mrt, n) &&
+ mfc->mfc_un.res.ttls[n] < 255)
+ seq_printf(seq,
+ " %2d:%-3d",
+ n, mfc->mfc_un.res.ttls[n]);
+ }
+ } else {
+ /* unresolved mfc_caches don't contain
+ * pkt, bytes and wrong_if values
+ */
+ seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
+ }
+ seq_putc(seq, '\n');
+ }
+ return 0;
+}
+
+static const struct seq_operations ipmr_mfc_seq_ops = {
+ .start = ipmr_mfc_seq_start,
+ .next = ipmr_mfc_seq_next,
+ .stop = ipmr_mfc_seq_stop,
+ .show = ipmr_mfc_seq_show,
+};
+
+static int ipmr_mfc_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
+ sizeof(struct ipmr_mfc_iter));
+}
+
+static const struct file_operations ip6mr_mfc_fops = {
+ .owner = THIS_MODULE,
+ .open = ipmr_mfc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+#endif
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+
+static int pim6_rcv(struct sk_buff *skb)
+{
+ struct pimreghdr *pim;
+ struct ipv6hdr *encap;
+ struct net_device *reg_dev = NULL;
+ struct net *net = dev_net(skb->dev);
+ struct mr6_table *mrt;
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
+ };
+ int reg_vif_num;
+
+ if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
+ goto drop;
+
+ pim = (struct pimreghdr *)skb_transport_header(skb);
+ if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+ (pim->flags & PIM_NULL_REGISTER) ||
+ (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ sizeof(*pim), IPPROTO_PIM,
+ csum_partial((void *)pim, sizeof(*pim), 0)) &&
+ csum_fold(skb_checksum(skb, 0, skb->len, 0))))
+ goto drop;
+
+ /* check if the inner packet is destined to mcast group */
+ encap = (struct ipv6hdr *)(skb_transport_header(skb) +
+ sizeof(*pim));
+
+ if (!ipv6_addr_is_multicast(&encap->daddr) ||
+ encap->payload_len == 0 ||
+ ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
+ goto drop;
+
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+ goto drop;
+ reg_vif_num = mrt->mroute_reg_vif_num;
+
+ read_lock(&mrt_lock);
+ if (reg_vif_num >= 0)
+ reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ if (reg_dev)
+ dev_hold(reg_dev);
+ read_unlock(&mrt_lock);
+
+ if (reg_dev == NULL)
+ goto drop;
+
+ skb->mac_header = skb->network_header;
+ skb_pull(skb, (u8 *)encap - skb->data);
+ skb_reset_network_header(skb);
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->pkt_type = PACKET_HOST;
+
+ skb_tunnel_rx(skb, reg_dev);
+
+ netif_rx(skb);
+
+ dev_put(reg_dev);
+ return 0;
+ drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static const struct inet6_protocol pim6_protocol = {
+ .handler = pim6_rcv,
+};
+
+/* Service routines creating virtual interfaces: PIMREG */
+
+static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct mr6_table *mrt;
+ struct flowi6 fl6 = {
+ .flowi6_oif = dev->ifindex,
+ .flowi6_iif = skb->skb_iif,
+ .flowi6_mark = skb->mark,
+ };
+ int err;
+
+ err = ip6mr_fib_lookup(net, &fl6, &mrt);
+ if (err < 0) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ read_lock(&mrt_lock);
+ dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
+ ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
+ read_unlock(&mrt_lock);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops reg_vif_netdev_ops = {
+ .ndo_start_xmit = reg_vif_xmit,
+};
+
+static void reg_vif_setup(struct net_device *dev)
+{
+ dev->type = ARPHRD_PIMREG;
+ dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
+ dev->flags = IFF_NOARP;
+ dev->netdev_ops = &reg_vif_netdev_ops;
+ dev->destructor = free_netdev;
+ dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+{
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+
+ if (mrt->id == RT6_TABLE_DFLT)
+ sprintf(name, "pim6reg");
+ else
+ sprintf(name, "pim6reg%u", mrt->id);
+
+ dev = alloc_netdev(0, name, reg_vif_setup);
+ if (dev == NULL)
+ return NULL;
+
+ dev_net_set(dev, net);
+
+ if (register_netdevice(dev)) {
+ free_netdev(dev);
+ return NULL;
+ }
+ dev->iflink = 0;
+
+ if (dev_open(dev))
+ goto failure;
+
+ dev_hold(dev);
+ return dev;
+
+failure:
+ /* allow the register to be completed before unregistering. */
+ rtnl_unlock();
+ rtnl_lock();
+
+ unregister_netdevice(dev);
+ return NULL;
+}
+#endif
+
+/*
+ * Delete a VIF entry
+ */
+
+static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
+{
+ struct mif_device *v;
+ struct net_device *dev;
+ struct inet6_dev *in6_dev;
+
+ if (vifi < 0 || vifi >= mrt->maxvif)
+ return -EADDRNOTAVAIL;
+
+ v = &mrt->vif6_table[vifi];
+
+ write_lock_bh(&mrt_lock);
+ dev = v->dev;
+ v->dev = NULL;
+
+ if (!dev) {
+ write_unlock_bh(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ }
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (vifi == mrt->mroute_reg_vif_num)
+ mrt->mroute_reg_vif_num = -1;
+#endif
+
+ if (vifi + 1 == mrt->maxvif) {
+ int tmp;
+ for (tmp = vifi - 1; tmp >= 0; tmp--) {
+ if (MIF_EXISTS(mrt, tmp))
+ break;
+ }
+ mrt->maxvif = tmp + 1;
+ }
+
+ write_unlock_bh(&mrt_lock);
+
+ dev_set_allmulti(dev, -1);
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev)
+ in6_dev->cnf.mc_forwarding--;
+
+ if (v->flags & MIFF_REGISTER)
+ unregister_netdevice_queue(dev, head);
+
+ dev_put(dev);
+ return 0;
+}
+
+static inline void ip6mr_cache_free(struct mfc6_cache *c)
+{
+ kmem_cache_free(mrt_cachep, c);
+}
+
+/* Destroy an unresolved cache entry, killing queued skbs
+ and reporting error to netlink readers.
+ */
+
+static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+{
+ struct net *net = read_pnet(&mrt->net);
+ struct sk_buff *skb;
+
+ atomic_dec(&mrt->cache_resolve_queue_len);
+
+ while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ if (ipv6_hdr(skb)->version == 0) {
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+ rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ } else
+ kfree_skb(skb);
+ }
+
+ ip6mr_cache_free(c);
+}
+
+
+/* Timer process for all the unresolved queue. */
+
+static void ipmr_do_expire_process(struct mr6_table *mrt)
+{
+ unsigned long now = jiffies;
+ unsigned long expires = 10 * HZ;
+ struct mfc6_cache *c, *next;
+
+ list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ if (time_after(c->mfc_un.unres.expires, now)) {
+ /* not yet... */
+ unsigned long interval = c->mfc_un.unres.expires - now;
+ if (interval < expires)
+ expires = interval;
+ continue;
+ }
+
+ list_del(&c->list);
+ ip6mr_destroy_unres(mrt, c);
+ }
+
+ if (!list_empty(&mrt->mfc6_unres_queue))
+ mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
+}
+
+static void ipmr_expire_process(unsigned long arg)
+{
+ struct mr6_table *mrt = (struct mr6_table *)arg;
+
+ if (!spin_trylock(&mfc_unres_lock)) {
+ mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
+ return;
+ }
+
+ if (!list_empty(&mrt->mfc6_unres_queue))
+ ipmr_do_expire_process(mrt);
+
+ spin_unlock(&mfc_unres_lock);
+}
+
+/* Fill oifs list. It is called under write locked mrt_lock. */
+
+static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+ unsigned char *ttls)
+{
+ int vifi;
+
+ cache->mfc_un.res.minvif = MAXMIFS;
+ cache->mfc_un.res.maxvif = 0;
+ memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
+
+ for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+ if (MIF_EXISTS(mrt, vifi) &&
+ ttls[vifi] && ttls[vifi] < 255) {
+ cache->mfc_un.res.ttls[vifi] = ttls[vifi];
+ if (cache->mfc_un.res.minvif > vifi)
+ cache->mfc_un.res.minvif = vifi;
+ if (cache->mfc_un.res.maxvif <= vifi)
+ cache->mfc_un.res.maxvif = vifi + 1;
+ }
+ }
+}
+
+static int mif6_add(struct net *net, struct mr6_table *mrt,
+ struct mif6ctl *vifc, int mrtsock)
+{
+ int vifi = vifc->mif6c_mifi;
+ struct mif_device *v = &mrt->vif6_table[vifi];
+ struct net_device *dev;
+ struct inet6_dev *in6_dev;
+ int err;
+
+ /* Is vif busy ? */
+ if (MIF_EXISTS(mrt, vifi))
+ return -EADDRINUSE;
+
+ switch (vifc->mif6c_flags) {
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MIFF_REGISTER:
+ /*
+ * Special Purpose VIF in PIM
+ * All the packets will be sent to the daemon
+ */
+ if (mrt->mroute_reg_vif_num >= 0)
+ return -EADDRINUSE;
+ dev = ip6mr_reg_vif(net, mrt);
+ if (!dev)
+ return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ unregister_netdevice(dev);
+ dev_put(dev);
+ return err;
+ }
+ break;
+#endif
+ case 0:
+ dev = dev_get_by_index(net, vifc->mif6c_pifi);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev)
+ in6_dev->cnf.mc_forwarding++;
+
+ /*
+ * Fill in the VIF structures
+ */
+ v->rate_limit = vifc->vifc_rate_limit;
+ v->flags = vifc->mif6c_flags;
+ if (!mrtsock)
+ v->flags |= VIFF_STATIC;
+ v->threshold = vifc->vifc_threshold;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->link = dev->ifindex;
+ if (v->flags & MIFF_REGISTER)
+ v->link = dev->iflink;
+
+ /* And finish update writing critical data */
+ write_lock_bh(&mrt_lock);
+ v->dev = dev;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (v->flags & MIFF_REGISTER)
+ mrt->mroute_reg_vif_num = vifi;
+#endif
+ if (vifi + 1 > mrt->maxvif)
+ mrt->maxvif = vifi + 1;
+ write_unlock_bh(&mrt_lock);
+ return 0;
+}
+
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp)
+{
+ int line = MFC6_HASH(mcastgrp, origin);
+ struct mfc6_cache *c;
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
+ if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
+ return c;
+ }
+ return NULL;
+}
+
+/*
+ * Allocate a multicast cache entry
+ */
+static struct mfc6_cache *ip6mr_cache_alloc(void)
+{
+ struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
+ if (c == NULL)
+ return NULL;
+ c->mfc_un.res.minvif = MAXMIFS;
+ return c;
+}
+
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
+{
+ struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
+ if (c == NULL)
+ return NULL;
+ skb_queue_head_init(&c->mfc_un.unres.unresolved);
+ c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ return c;
+}
+
+/*
+ * A cache entry has gone into a resolved state from queued
+ */
+
+static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+ struct mfc6_cache *uc, struct mfc6_cache *c)
+{
+ struct sk_buff *skb;
+
+ /*
+ * Play the pending entries through our router
+ */
+
+ while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ if (ipv6_hdr(skb)->version == 0) {
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+
+ if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+ nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
+ } else {
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+ }
+ rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ } else
+ ip6_mr_forward(net, mrt, skb, c);
+ }
+}
+
+/*
+ * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
+ * expects the following bizarre scheme.
+ *
+ * Called under mrt_lock.
+ */
+
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+ mifi_t mifi, int assert)
+{
+ struct sk_buff *skb;
+ struct mrt6msg *msg;
+ int ret;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (assert == MRT6MSG_WHOLEPKT)
+ skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
+ +sizeof(*msg));
+ else
+#endif
+ skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
+
+ if (!skb)
+ return -ENOBUFS;
+
+ /* I suppose that internal messages
+ * do not require checksums */
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (assert == MRT6MSG_WHOLEPKT) {
+ /* Ugly, but we have no choice with this interface.
+ Duplicate old header, fix length etc.
+ And all this only to mangle msg->im6_msgtype and
+ to set msg->im6_mbz to "mbz" :-)
+ */
+ skb_push(skb, -skb_network_offset(pkt));
+
+ skb_push(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+ msg = (struct mrt6msg *)skb_transport_header(skb);
+ msg->im6_mbz = 0;
+ msg->im6_msgtype = MRT6MSG_WHOLEPKT;
+ msg->im6_mif = mrt->mroute_reg_vif_num;
+ msg->im6_pad = 0;
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else
+#endif
+ {
+ /*
+ * Copy the IP header
+ */
+
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
+
+ /*
+ * Add our header
+ */
+ skb_put(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+ msg = (struct mrt6msg *)skb_transport_header(skb);
+
+ msg->im6_mbz = 0;
+ msg->im6_msgtype = assert;
+ msg->im6_mif = mifi;
+ msg->im6_pad = 0;
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
+
+ skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ if (mrt->mroute6_sk == NULL) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ /*
+ * Deliver to user space multicast routing algorithms
+ */
+ ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ if (ret < 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
+ kfree_skb(skb);
+ }
+
+ return ret;
+}
+
+/*
+ * Queue a packet for resolution. It gets locked cache entry!
+ */
+
+static int
+ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+{
+ bool found = false;
+ int err;
+ struct mfc6_cache *c;
+
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+ ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /*
+ * Create a new entry if allowable
+ */
+
+ if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
+ (c = ip6mr_cache_alloc_unres()) == NULL) {
+ spin_unlock_bh(&mfc_unres_lock);
+
+ kfree_skb(skb);
+ return -ENOBUFS;
+ }
+
+ /*
+ * Fill in the new cache entry
+ */
+ c->mf6c_parent = -1;
+ c->mf6c_origin = ipv6_hdr(skb)->saddr;
+ c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
+
+ /*
+ * Reflect first query at pim6sd
+ */
+ err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
+ if (err < 0) {
+ /* If the report failed throw the cache entry
+ out - Brad Parker
+ */
+ spin_unlock_bh(&mfc_unres_lock);
+
+ ip6mr_cache_free(c);
+ kfree_skb(skb);
+ return err;
+ }
+
+ atomic_inc(&mrt->cache_resolve_queue_len);
+ list_add(&c->list, &mrt->mfc6_unres_queue);
+
+ ipmr_do_expire_process(mrt);
+ }
+
+ /*
+ * See if we can append the packet
+ */
+ if (c->mfc_un.unres.unresolved.qlen > 3) {
+ kfree_skb(skb);
+ err = -ENOBUFS;
+ } else {
+ skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ err = 0;
+ }
+
+ spin_unlock_bh(&mfc_unres_lock);
+ return err;
+}
+
+/*
+ * MFC6 cache manipulation by user space
+ */
+
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
+{
+ int line;
+ struct mfc6_cache *c, *next;
+
+ line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+ list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
+ if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ write_lock_bh(&mrt_lock);
+ list_del(&c->list);
+ write_unlock_bh(&mrt_lock);
+
+ ip6mr_cache_free(c);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int ip6mr_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
+ struct mr6_table *mrt;
+ struct mif_device *v;
+ int ct;
+ LIST_HEAD(list);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ ip6mr_for_each_table(mrt, net) {
+ v = &mrt->vif6_table[0];
+ for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+ if (v->dev == dev)
+ mif6_delete(mrt, ct, &list);
+ }
+ }
+ unregister_netdevice_many(&list);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ip6_mr_notifier = {
+ .notifier_call = ip6mr_device_event
+};
+
+/*
+ * Setup for IP multicast routing
+ */
+
+static int __net_init ip6mr_net_init(struct net *net)
+{
+ int err;
+
+ err = ip6mr_rules_init(net);
+ if (err < 0)
+ goto fail;
+
+#ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+ if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+ goto proc_vif_fail;
+ if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
+ goto proc_cache_fail;
+#endif
+
+ return 0;
+
+#ifdef CONFIG_PROC_FS
+proc_cache_fail:
+ proc_net_remove(net, "ip6_mr_vif");
+proc_vif_fail:
+ ip6mr_rules_exit(net);
+#endif
+fail:
+ return err;
+}
+
+static void __net_exit ip6mr_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "ip6_mr_cache");
+ proc_net_remove(net, "ip6_mr_vif");
+#endif
+ ip6mr_rules_exit(net);
+}
+
+static struct pernet_operations ip6mr_net_ops = {
+ .init = ip6mr_net_init,
+ .exit = ip6mr_net_exit,
+};
+
+int __init ip6_mr_init(void)
+{
+ int err;
+
+ mrt_cachep = kmem_cache_create("ip6_mrt_cache",
+ sizeof(struct mfc6_cache),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mrt_cachep)
+ return -ENOMEM;
+
+ err = register_pernet_subsys(&ip6mr_net_ops);
+ if (err)
+ goto reg_pernet_fail;
+
+ err = register_netdevice_notifier(&ip6_mr_notifier);
+ if (err)
+ goto reg_notif_fail;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
+ printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
+ err = -EAGAIN;
+ goto add_proto_fail;
+ }
+#endif
+ rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
+ ip6mr_rtm_dumproute, NULL);
+ return 0;
+#ifdef CONFIG_IPV6_PIMSM_V2
+add_proto_fail:
+ unregister_netdevice_notifier(&ip6_mr_notifier);
+#endif
+reg_notif_fail:
+ unregister_pernet_subsys(&ip6mr_net_ops);
+reg_pernet_fail:
+ kmem_cache_destroy(mrt_cachep);
+ return err;
+}
+
+void ip6_mr_cleanup(void)
+{
+ unregister_netdevice_notifier(&ip6_mr_notifier);
+ unregister_pernet_subsys(&ip6mr_net_ops);
+ kmem_cache_destroy(mrt_cachep);
+}
+
+static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+ struct mf6cctl *mfc, int mrtsock)
+{
+ bool found = false;
+ int line;
+ struct mfc6_cache *uc, *c;
+ unsigned char ttls[MAXMIFS];
+ int i;
+
+ if (mfc->mf6cc_parent >= MAXMIFS)
+ return -ENFILE;
+
+ memset(ttls, 255, MAXMIFS);
+ for (i = 0; i < MAXMIFS; i++) {
+ if (IF_ISSET(i, &mfc->mf6cc_ifset))
+ ttls[i] = 1;
+
+ }
+
+ line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+ list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
+ if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ write_lock_bh(&mrt_lock);
+ c->mf6c_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, c, ttls);
+ if (!mrtsock)
+ c->mfc_flags |= MFC_STATIC;
+ write_unlock_bh(&mrt_lock);
+ return 0;
+ }
+
+ if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+ return -EINVAL;
+
+ c = ip6mr_cache_alloc();
+ if (c == NULL)
+ return -ENOMEM;
+
+ c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
+ c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
+ c->mf6c_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, c, ttls);
+ if (!mrtsock)
+ c->mfc_flags |= MFC_STATIC;
+
+ write_lock_bh(&mrt_lock);
+ list_add(&c->list, &mrt->mfc6_cache_array[line]);
+ write_unlock_bh(&mrt_lock);
+
+ /*
+ * Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
+ */
+ found = false;
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+ ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
+ list_del(&uc->list);
+ atomic_dec(&mrt->cache_resolve_queue_len);
+ found = true;
+ break;
+ }
+ }
+ if (list_empty(&mrt->mfc6_unres_queue))
+ del_timer(&mrt->ipmr_expire_timer);
+ spin_unlock_bh(&mfc_unres_lock);
+
+ if (found) {
+ ip6mr_cache_resolve(net, mrt, uc, c);
+ ip6mr_cache_free(uc);
+ }
+ return 0;
+}
+
+/*
+ * Close the multicast socket, and clear the vif tables etc
+ */
+
+static void mroute_clean_tables(struct mr6_table *mrt)
+{
+ int i;
+ LIST_HEAD(list);
+ struct mfc6_cache *c, *next;
+
+ /*
+ * Shut down all active vif entries
+ */
+ for (i = 0; i < mrt->maxvif; i++) {
+ if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
+ mif6_delete(mrt, i, &list);
+ }
+ unregister_netdevice_many(&list);
+
+ /*
+ * Wipe the cache
+ */
+ for (i = 0; i < MFC6_LINES; i++) {
+ list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
+ if (c->mfc_flags & MFC_STATIC)
+ continue;
+ write_lock_bh(&mrt_lock);
+ list_del(&c->list);
+ write_unlock_bh(&mrt_lock);
+
+ ip6mr_cache_free(c);
+ }
+ }
+
+ if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_del(&c->list);
+ ip6mr_destroy_unres(mrt, c);
+ }
+ spin_unlock_bh(&mfc_unres_lock);
+ }
+}
+
+static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+{
+ int err = 0;
+ struct net *net = sock_net(sk);
+
+ rtnl_lock();
+ write_lock_bh(&mrt_lock);
+ if (likely(mrt->mroute6_sk == NULL)) {
+ mrt->mroute6_sk = sk;
+ net->ipv6.devconf_all->mc_forwarding++;
+ }
+ else
+ err = -EADDRINUSE;
+ write_unlock_bh(&mrt_lock);
+
+ rtnl_unlock();
+
+ return err;
+}
+
+int ip6mr_sk_done(struct sock *sk)
+{
+ int err = -EACCES;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ rtnl_lock();
+ ip6mr_for_each_table(mrt, net) {
+ if (sk == mrt->mroute6_sk) {
+ write_lock_bh(&mrt_lock);
+ mrt->mroute6_sk = NULL;
+ net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
+
+ mroute_clean_tables(mrt);
+ err = 0;
+ break;
+ }
+ }
+ rtnl_unlock();
+
+ return err;
+}
+
+struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+{
+ struct mr6_table *mrt;
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->skb_iif,
+ .flowi6_oif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
+ };
+
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+ return NULL;
+
+ return mrt->mroute6_sk;
+}
+
+/*
+ * Socket options and virtual interface manipulation. The whole
+ * virtual interface system is a complete heap, but unfortunately
+ * that's how BSD mrouted happens to think. Maybe one day with a proper
+ * MOSPF/PIM router set up we can clean this up.
+ */
+
+int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
+{
+ int ret;
+ struct mif6ctl vif;
+ struct mf6cctl mfc;
+ mifi_t mifi;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ if (optname != MRT6_INIT) {
+ if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
+ return -EACCES;
+ }
+
+ switch (optname) {
+ case MRT6_INIT:
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ return ip6mr_sk_init(mrt, sk);
+
+ case MRT6_DONE:
+ return ip6mr_sk_done(sk);
+
+ case MRT6_ADD_MIF:
+ if (optlen < sizeof(vif))
+ return -EINVAL;
+ if (copy_from_user(&vif, optval, sizeof(vif)))
+ return -EFAULT;
+ if (vif.mif6c_mifi >= MAXMIFS)
+ return -ENFILE;
+ rtnl_lock();
+ ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ rtnl_unlock();
+ return ret;
+
+ case MRT6_DEL_MIF:
+ if (optlen < sizeof(mifi_t))
+ return -EINVAL;
+ if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+ return -EFAULT;
+ rtnl_lock();
+ ret = mif6_delete(mrt, mifi, NULL);
+ rtnl_unlock();
+ return ret;
+
+ /*
+ * Manipulate the forwarding caches. These live
+ * in a sort of kernel/user symbiosis.
+ */
+ case MRT6_ADD_MFC:
+ case MRT6_DEL_MFC:
+ if (optlen < sizeof(mfc))
+ return -EINVAL;
+ if (copy_from_user(&mfc, optval, sizeof(mfc)))
+ return -EFAULT;
+ rtnl_lock();
+ if (optname == MRT6_DEL_MFC)
+ ret = ip6mr_mfc_delete(mrt, &mfc);
+ else
+ ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
+ rtnl_unlock();
+ return ret;
+
+ /*
+ * Control PIM assert (to activate pim will activate assert)
+ */
+ case MRT6_ASSERT:
+ {
+ int v;
+ if (get_user(v, (int __user *)optval))
+ return -EFAULT;
+ mrt->mroute_do_assert = !!v;
+ return 0;
+ }
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MRT6_PIM:
+ {
+ int v;
+ if (get_user(v, (int __user *)optval))
+ return -EFAULT;
+ v = !!v;
+ rtnl_lock();
+ ret = 0;
+ if (v != mrt->mroute_do_pim) {
+ mrt->mroute_do_pim = v;
+ mrt->mroute_do_assert = v;
+ }
+ rtnl_unlock();
+ return ret;
+ }
+
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+ case MRT6_TABLE:
+ {
+ u32 v;
+
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+ if (get_user(v, (u32 __user *)optval))
+ return -EFAULT;
+ if (sk == mrt->mroute6_sk)
+ return -EBUSY;
+
+ rtnl_lock();
+ ret = 0;
+ if (!ip6mr_new_table(net, v))
+ ret = -ENOMEM;
+ raw6_sk(sk)->ip6mr_table = v;
+ rtnl_unlock();
+ return ret;
+ }
+#endif
+ /*
+ * Spurious command, or MRT6_VERSION which you cannot
+ * set.
+ */
+ default:
+ return -ENOPROTOOPT;
+ }
+}
+
+/*
+ * Getsock opt support for the multicast routing system.
+ */
+
+int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int olr;
+ int val;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ switch (optname) {
+ case MRT6_VERSION:
+ val = 0x0305;
+ break;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MRT6_PIM:
+ val = mrt->mroute_do_pim;
+ break;
+#endif
+ case MRT6_ASSERT:
+ val = mrt->mroute_do_assert;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (get_user(olr, optlen))
+ return -EFAULT;
+
+ olr = min_t(int, olr, sizeof(int));
+ if (olr < 0)
+ return -EINVAL;
+
+ if (put_user(olr, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, olr))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * The IP multicast ioctl support routines.
+ */
+
+int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+ struct sioc_sg_req6 sr;
+ struct sioc_mif_req6 vr;
+ struct mif_device *vif;
+ struct mfc6_cache *c;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ switch (cmd) {
+ case SIOCGETMIFCNT_IN6:
+ if (copy_from_user(&vr, arg, sizeof(vr)))
+ return -EFAULT;
+ if (vr.mifi >= mrt->maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif = &mrt->vif6_table[vr.mifi];
+ if (MIF_EXISTS(mrt, vr.mifi)) {
+ vr.icount = vif->pkt_in;
+ vr.ocount = vif->pkt_out;
+ vr.ibytes = vif->bytes_in;
+ vr.obytes = vif->bytes_out;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &vr, sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT_IN6:
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &sr, sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_sioc_sg_req6 {
+ struct sockaddr_in6 src;
+ struct sockaddr_in6 grp;
+ compat_ulong_t pktcnt;
+ compat_ulong_t bytecnt;
+ compat_ulong_t wrong_if;
+};
+
+struct compat_sioc_mif_req6 {
+ mifi_t mifi;
+ compat_ulong_t icount;
+ compat_ulong_t ocount;
+ compat_ulong_t ibytes;
+ compat_ulong_t obytes;
+};
+
+int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+ struct compat_sioc_sg_req6 sr;
+ struct compat_sioc_mif_req6 vr;
+ struct mif_device *vif;
+ struct mfc6_cache *c;
+ struct net *net = sock_net(sk);
+ struct mr6_table *mrt;
+
+ mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ switch (cmd) {
+ case SIOCGETMIFCNT_IN6:
+ if (copy_from_user(&vr, arg, sizeof(vr)))
+ return -EFAULT;
+ if (vr.mifi >= mrt->maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif = &mrt->vif6_table[vr.mifi];
+ if (MIF_EXISTS(mrt, vr.mifi)) {
+ vr.icount = vif->pkt_in;
+ vr.ocount = vif->pkt_out;
+ vr.ibytes = vif->bytes_in;
+ vr.obytes = vif->bytes_out;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &vr, sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT_IN6:
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &sr, sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+#endif
+
+static inline int ip6mr_forward2_finish(struct sk_buff *skb)
+{
+ IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTFORWDATAGRAMS);
+ return dst_output(skb);
+}
+
+/*
+ * Processing handlers for ip6mr_forward
+ */
+
+static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+{
+ struct ipv6hdr *ipv6h;
+ struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct net_device *dev;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ if (vif->dev == NULL)
+ goto out_free;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (vif->flags & MIFF_REGISTER) {
+ vif->pkt_out++;
+ vif->bytes_out += skb->len;
+ vif->dev->stats.tx_bytes += skb->len;
+ vif->dev->stats.tx_packets++;
+ ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
+ goto out_free;
+ }
+#endif
+
+ ipv6h = ipv6_hdr(skb);
+
+ fl6 = (struct flowi6) {
+ .flowi6_oif = vif->link,
+ .daddr = ipv6h->daddr,
+ };
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ goto out_free;
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ /*
+ * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+ * not only before forwarding, but after forwarding on all output
+ * interfaces. It is clear, if mrouter runs a multicasting
+ * program, it should receive packets not depending to what interface
+ * program is joined.
+ * If we will not make it, the program will have to join on all
+ * interfaces. On the other hand, multihoming host (or router, but
+ * not mrouter) cannot join to more than one interface - it will
+ * result in receiving multiple packets.
+ */
+ dev = vif->dev;
+ skb->dev = dev;
+ vif->pkt_out++;
+ vif->bytes_out += skb->len;
+
+ /* We are about to write */
+ /* XXX: extension headers? */
+ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
+ goto out_free;
+
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->hop_limit--;
+
+ IP6CB(skb)->flags |= IP6SKB_FORWARDED;
+
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
+ ip6mr_forward2_finish);
+
+out_free:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+{
+ int ct;
+
+ for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
+ if (mrt->vif6_table[ct].dev == dev)
+ break;
+ }
+ return ct;
+}
+
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache)
+{
+ int psend = -1;
+ int vif, ct;
+
+ vif = cache->mf6c_parent;
+ cache->mfc_un.res.pkt++;
+ cache->mfc_un.res.bytes += skb->len;
+
+ /*
+ * Wrong interface: drop packet and (maybe) send PIM assert.
+ */
+ if (mrt->vif6_table[vif].dev != skb->dev) {
+ int true_vifi;
+
+ cache->mfc_un.res.wrong_if++;
+ true_vifi = ip6mr_find_vif(mrt, skb->dev);
+
+ if (true_vifi >= 0 && mrt->mroute_do_assert &&
+ /* pimsm uses asserts, when switching from RPT to SPT,
+ so that we cannot check that packet arrived on an oif.
+ It is bad, but otherwise we would need to move pretty
+ large chunk of pimd to kernel. Ough... --ANK
+ */
+ (mrt->mroute_do_pim ||
+ cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ time_after(jiffies,
+ cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
+ cache->mfc_un.res.last_assert = jiffies;
+ ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
+ }
+ goto dont_forward;
+ }
+
+ mrt->vif6_table[vif].pkt_in++;
+ mrt->vif6_table[vif].bytes_in += skb->len;
+
+ /*
+ * Forward the frame
+ */
+ for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if (psend != -1) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ ip6mr_forward2(net, mrt, skb2, cache, psend);
+ }
+ psend = ct;
+ }
+ }
+ if (psend != -1) {
+ ip6mr_forward2(net, mrt, skb, cache, psend);
+ return 0;
+ }
+
+dont_forward:
+ kfree_skb(skb);
+ return 0;
+}
+
+
+/*
+ * Multicast packets for forwarding arrive here
+ */
+
+int ip6_mr_input(struct sk_buff *skb)
+{
+ struct mfc6_cache *cache;
+ struct net *net = dev_net(skb->dev);
+ struct mr6_table *mrt;
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .flowi6_mark = skb->mark,
+ };
+ int err;
+
+ err = ip6mr_fib_lookup(net, &fl6, &mrt);
+ if (err < 0) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ read_lock(&mrt_lock);
+ cache = ip6mr_cache_find(mrt,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+
+ /*
+ * No usable cache entry
+ */
+ if (cache == NULL) {
+ int vif;
+
+ vif = ip6mr_find_vif(mrt, skb->dev);
+ if (vif >= 0) {
+ int err = ip6mr_cache_unresolved(mrt, vif, skb);
+ read_unlock(&mrt_lock);
+
+ return err;
+ }
+ read_unlock(&mrt_lock);
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+
+ ip6_mr_forward(net, mrt, skb, cache);
+
+ read_unlock(&mrt_lock);
+
+ return 0;
+}
+
+
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+ struct mfc6_cache *c, struct rtmsg *rtm)
+{
+ int ct;
+ struct rtnexthop *nhp;
+ u8 *b = skb_tail_pointer(skb);
+ struct rtattr *mp_head;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mf6c_parent >= MAXMIFS)
+ return -ENOENT;
+
+ if (MIF_EXISTS(mrt, c->mf6c_parent))
+ RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
+
+ mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+ goto rtattr_failure;
+ nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+ mp_head->rta_type = RTA_MULTIPATH;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+
+rtattr_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+int ip6mr_get_route(struct net *net,
+ struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+{
+ int err;
+ struct mr6_table *mrt;
+ struct mfc6_cache *cache;
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+ mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (mrt == NULL)
+ return -ENOENT;
+
+ read_lock(&mrt_lock);
+ cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+
+ if (!cache) {
+ struct sk_buff *skb2;
+ struct ipv6hdr *iph;
+ struct net_device *dev;
+ int vif;
+
+ if (nowait) {
+ read_unlock(&mrt_lock);
+ return -EAGAIN;
+ }
+
+ dev = skb->dev;
+ if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
+ read_unlock(&mrt_lock);
+ return -ENODEV;
+ }
+
+ /* really correct? */
+ skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (!skb2) {
+ read_unlock(&mrt_lock);
+ return -ENOMEM;
+ }
+
+ skb_reset_transport_header(skb2);
+
+ skb_put(skb2, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb2);
+
+ iph = ipv6_hdr(skb2);
+ iph->version = 0;
+ iph->priority = 0;
+ iph->flow_lbl[0] = 0;
+ iph->flow_lbl[1] = 0;
+ iph->flow_lbl[2] = 0;
+ iph->payload_len = 0;
+ iph->nexthdr = IPPROTO_NONE;
+ iph->hop_limit = 0;
+ iph->saddr = rt->rt6i_src.addr;
+ iph->daddr = rt->rt6i_dst.addr;
+
+ err = ip6mr_cache_unresolved(mrt, vif, skb2);
+ read_unlock(&mrt_lock);
+
+ return err;
+ }
+
+ if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+ cache->mfc_flags |= MFC_NOTIFY;
+
+ err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ read_unlock(&mrt_lock);
+ return err;
+}
+
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+ u32 pid, u32 seq, struct mfc6_cache *c)
+{
+ struct nlmsghdr *nlh;
+ struct rtmsg *rtm;
+
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ rtm = nlmsg_data(nlh);
+ rtm->rtm_family = RTNL_FAMILY_IPMR;
+ rtm->rtm_dst_len = 128;
+ rtm->rtm_src_len = 128;
+ rtm->rtm_tos = 0;
+ rtm->rtm_table = mrt->id;
+ NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = RTPROT_UNSPEC;
+ rtm->rtm_flags = 0;
+
+ NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
+ NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
+
+ if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mr6_table *mrt;
+ struct mfc6_cache *mfc;
+ unsigned int t = 0, s_t;
+ unsigned int h = 0, s_h;
+ unsigned int e = 0, s_e;
+
+ s_t = cb->args[0];
+ s_h = cb->args[1];
+ s_e = cb->args[2];
+
+ read_lock(&mrt_lock);
+ ip6mr_for_each_table(mrt, net) {
+ if (t < s_t)
+ goto next_table;
+ if (t > s_t)
+ s_h = 0;
+ for (h = s_h; h < MFC6_LINES; h++) {
+ list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
+ if (e < s_e)
+ goto next_entry;
+ if (ip6mr_fill_mroute(mrt, skb,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ mfc) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = s_e = 0;
+ }
+ s_h = 0;
+next_table:
+ t++;
+ }
+done:
+ read_unlock(&mrt_lock);
+
+ cb->args[2] = e;
+ cb->args[1] = h;
+ cb->args[0] = t;
+
+ return skb->len;
+}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
new file mode 100644
index 00000000..bba658d9
--- /dev/null
+++ b/net/ipv6/ipcomp6.c
@@ -0,0 +1,218 @@
+/*
+ * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Author Mitsuru KANDA <mk@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * [Memo]
+ *
+ * Outbound:
+ * The compression of IP datagram MUST be done before AH/ESP processing,
+ * fragmentation, and the addition of Hop-by-Hop/Routing header.
+ *
+ * Inbound:
+ * The decompression of IP datagram MUST be done after the reassembly,
+ * AH/ESP processing.
+ */
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipcomp.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+
+static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ __be32 spi;
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ struct ip_comp_hdr *ipcomph =
+ (struct ip_comp_hdr *)(skb->data + offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ spi = htonl(ntohs(ipcomph->cpi));
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, IPPROTO_COMP, AF_INET6);
+ if (!x)
+ return;
+
+ printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n",
+ spi, &iph->daddr);
+ xfrm_state_put(x);
+}
+
+static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
+{
+ struct net *net = xs_net(x);
+ struct xfrm_state *t = NULL;
+
+ t = xfrm_state_alloc(net);
+ if (!t)
+ goto out;
+
+ t->id.proto = IPPROTO_IPV6;
+ t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
+ if (!t->id.spi)
+ goto error;
+
+ memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
+ memcpy(&t->sel, &x->sel, sizeof(t->sel));
+ t->props.family = AF_INET6;
+ t->props.mode = x->props.mode;
+ memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+ memcpy(&t->mark, &x->mark, sizeof(t->mark));
+
+ if (xfrm_init_state(t))
+ goto error;
+
+ atomic_set(&t->tunnel_users, 1);
+
+out:
+ return t;
+
+error:
+ t->km.state = XFRM_STATE_DEAD;
+ xfrm_state_put(t);
+ t = NULL;
+ goto out;
+}
+
+static int ipcomp6_tunnel_attach(struct xfrm_state *x)
+{
+ struct net *net = xs_net(x);
+ int err = 0;
+ struct xfrm_state *t = NULL;
+ __be32 spi;
+ u32 mark = x->mark.m & x->mark.v;
+
+ spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr);
+ if (spi)
+ t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr,
+ spi, IPPROTO_IPV6, AF_INET6);
+ if (!t) {
+ t = ipcomp6_tunnel_create(x);
+ if (!t) {
+ err = -EINVAL;
+ goto out;
+ }
+ xfrm_state_insert(t);
+ xfrm_state_hold(t);
+ }
+ x->tunnel = t;
+ atomic_inc(&t->tunnel_users);
+
+out:
+ return err;
+}
+
+static int ipcomp6_init_state(struct xfrm_state *x)
+{
+ int err = -EINVAL;
+
+ x->props.header_len = 0;
+ switch (x->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ break;
+ case XFRM_MODE_TUNNEL:
+ x->props.header_len += sizeof(struct ipv6hdr);
+ break;
+ default:
+ goto out;
+ }
+
+ err = ipcomp_init_state(x);
+ if (err)
+ goto out;
+
+ if (x->props.mode == XFRM_MODE_TUNNEL) {
+ err = ipcomp6_tunnel_attach(x);
+ if (err)
+ goto out;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+static const struct xfrm_type ipcomp6_type =
+{
+ .description = "IPCOMP6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_COMP,
+ .init_state = ipcomp6_init_state,
+ .destructor = ipcomp_destroy,
+ .input = ipcomp_input,
+ .output = ipcomp_output,
+ .hdr_offset = xfrm6_find_1stfragopt,
+};
+
+static const struct inet6_protocol ipcomp6_protocol =
+{
+ .handler = xfrm6_rcv,
+ .err_handler = ipcomp6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init ipcomp6_init(void)
+{
+ if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+ if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+ xfrm_unregister_type(&ipcomp6_type, AF_INET6);
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static void __exit ipcomp6_fini(void)
+{
+ if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+ if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp6_init);
+module_exit(ipcomp6_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
new file mode 100644
index 00000000..63dd1f89
--- /dev/null
+++ b/net/ipv6/ipv6_sockglue.c
@@ -0,0 +1,1326 @@
+/*
+ * IPv6 BSD socket options interface
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on linux/net/ipv4/ip_sockglue.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * FIXME: Make the setsockopt code POSIX compliant: That is
+ *
+ * o Truncate getsockopt returns
+ * o Return an optlen of the truncated length if need be
+ *
+ * Changes:
+ * David L Stevens <dlstevens@us.ibm.com>:
+ * - added multicast source filtering API for MLDv2
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/mroute6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/xfrm.h>
+#include <net/compat.h>
+
+#include <asm/uaccess.h>
+
+struct ip6_ra_chain *ip6_ra_chain;
+DEFINE_RWLOCK(ip6_ra_lock);
+
+int ip6_ra_control(struct sock *sk, int sel)
+{
+ struct ip6_ra_chain *ra, *new_ra, **rap;
+
+ /* RA packet may be delivered ONLY to IPPROTO_RAW socket */
+ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
+ return -ENOPROTOOPT;
+
+ new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+ write_lock_bh(&ip6_ra_lock);
+ for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ if (ra->sk == sk) {
+ if (sel>=0) {
+ write_unlock_bh(&ip6_ra_lock);
+ kfree(new_ra);
+ return -EADDRINUSE;
+ }
+
+ *rap = ra->next;
+ write_unlock_bh(&ip6_ra_lock);
+
+ sock_put(sk);
+ kfree(ra);
+ return 0;
+ }
+ }
+ if (new_ra == NULL) {
+ write_unlock_bh(&ip6_ra_lock);
+ return -ENOBUFS;
+ }
+ new_ra->sk = sk;
+ new_ra->sel = sel;
+ new_ra->next = ra;
+ *rap = new_ra;
+ sock_hold(sk);
+ write_unlock_bh(&ip6_ra_lock);
+ return 0;
+}
+
+static
+struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
+ struct ipv6_txoptions *opt)
+{
+ if (inet_sk(sk)->is_icsk) {
+ if (opt &&
+ !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+ inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ }
+ opt = xchg(&inet6_sk(sk)->opt, opt);
+ } else {
+ spin_lock(&sk->sk_dst_lock);
+ opt = xchg(&inet6_sk(sk)->opt, opt);
+ spin_unlock(&sk->sk_dst_lock);
+ }
+ sk_dst_reset(sk);
+
+ return opt;
+}
+
+static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ int val, valbool;
+ int retv = -ENOPROTOOPT;
+
+ if (optval == NULL)
+ val=0;
+ else {
+ if (optlen >= sizeof(int)) {
+ if (get_user(val, (int __user *) optval))
+ return -EFAULT;
+ } else
+ val = 0;
+ }
+
+ valbool = (val!=0);
+
+ if (ip6_mroute_opt(optname))
+ return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+
+ lock_sock(sk);
+
+ switch (optname) {
+
+ case IPV6_ADDRFORM:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val == PF_INET) {
+ struct ipv6_txoptions *opt;
+ struct sk_buff *pktopt;
+
+ if (sk->sk_type == SOCK_RAW)
+ break;
+
+ if (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_UDPLITE) {
+ struct udp_sock *up = udp_sk(sk);
+ if (up->pending == AF_INET6) {
+ retv = -EBUSY;
+ break;
+ }
+ } else if (sk->sk_protocol != IPPROTO_TCP)
+ break;
+
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ retv = -ENOTCONN;
+ break;
+ }
+
+ if (ipv6_only_sock(sk) ||
+ !ipv6_addr_v4mapped(&np->daddr)) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+
+ fl6_free_socklist(sk);
+ ipv6_sock_mc_close(sk);
+
+ /*
+ * Sock is moving from IPv6 to IPv4 (sk_prot), so
+ * remove it from the refcnt debug socks count in the
+ * original family...
+ */
+ sk_refcnt_debug_dec(sk);
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ local_bh_disable();
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+ sock_prot_inuse_add(net, &tcp_prot, 1);
+ local_bh_enable();
+ sk->sk_prot = &tcp_prot;
+ icsk->icsk_af_ops = &ipv4_specific;
+ sk->sk_socket->ops = &inet_stream_ops;
+ sk->sk_family = PF_INET;
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ } else {
+ struct proto *prot = &udp_prot;
+
+ if (sk->sk_protocol == IPPROTO_UDPLITE)
+ prot = &udplite_prot;
+ local_bh_disable();
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+ sock_prot_inuse_add(net, prot, 1);
+ local_bh_enable();
+ sk->sk_prot = prot;
+ sk->sk_socket->ops = &inet_dgram_ops;
+ sk->sk_family = PF_INET;
+ }
+ opt = xchg(&np->opt, NULL);
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ pktopt = xchg(&np->pktoptions, NULL);
+ kfree_skb(pktopt);
+
+ sk->sk_destruct = inet_sock_destruct;
+ /*
+ * ... and add it to the refcnt debug socks count
+ * in the new family. -acme
+ */
+ sk_refcnt_debug_inc(sk);
+ module_put(THIS_MODULE);
+ retv = 0;
+ break;
+ }
+ goto e_inval;
+
+ case IPV6_V6ONLY:
+ if (optlen < sizeof(int) ||
+ inet_sk(sk)->inet_num)
+ goto e_inval;
+ np->ipv6only = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVPKTINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxinfo = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_2292PKTINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxoinfo = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVHOPLIMIT:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxhlim = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_2292HOPLIMIT:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxohlim = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVRTHDR:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.srcrt = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_2292RTHDR:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.osrcrt = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVHOPOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.hopopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_2292HOPOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.ohopopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVDSTOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.dstopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_2292DSTOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.odstopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_TCLASS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val < -1 || val > 0xff)
+ goto e_inval;
+ /* RFC 3542, 6.5: default traffic class of 0x0 */
+ if (val == -1)
+ val = 0;
+ np->tclass = val;
+ retv = 0;
+ break;
+
+ case IPV6_RECVTCLASS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxtclass = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_FLOWINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxflow = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVPATHMTU:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxpmtu = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_TRANSPARENT:
+ if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) {
+ retv = -EPERM;
+ break;
+ }
+ if (optlen < sizeof(int))
+ goto e_inval;
+ /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
+ inet_sk(sk)->transparent = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RECVORIGDSTADDR:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rxopt.bits.rxorigdstaddr = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_HOPOPTS:
+ case IPV6_RTHDRDSTOPTS:
+ case IPV6_RTHDR:
+ case IPV6_DSTOPTS:
+ {
+ struct ipv6_txoptions *opt;
+
+ /* remove any sticky options header with a zero option
+ * length, per RFC3542.
+ */
+ if (optlen == 0)
+ optval = NULL;
+ else if (optval == NULL)
+ goto e_inval;
+ else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+ optlen & 0x7 || optlen > 8 * 255)
+ goto e_inval;
+
+ /* hop-by-hop / destination options are privileged option */
+ retv = -EPERM;
+ if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+ break;
+
+ opt = ipv6_renew_options(sk, np->opt, optname,
+ (struct ipv6_opt_hdr __user *)optval,
+ optlen);
+ if (IS_ERR(opt)) {
+ retv = PTR_ERR(opt);
+ break;
+ }
+
+ /* routing header option needs extra check */
+ retv = -EINVAL;
+ if (optname == IPV6_RTHDR && opt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = opt->srcrt;
+ switch (rthdr->type) {
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ if (rthdr->hdrlen != 2 ||
+ rthdr->segments_left != 1)
+ goto sticky_done;
+
+ break;
+#endif
+ default:
+ goto sticky_done;
+ }
+ }
+
+ retv = 0;
+ opt = ipv6_update_options(sk, opt);
+sticky_done:
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ break;
+ }
+
+ case IPV6_PKTINFO:
+ {
+ struct in6_pktinfo pkt;
+
+ if (optlen == 0)
+ goto e_inval;
+ else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
+ goto e_inval;
+
+ if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
+ retv = -EFAULT;
+ break;
+ }
+ if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+ goto e_inval;
+
+ np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
+ np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
+ retv = 0;
+ break;
+ }
+
+ case IPV6_2292PKTOPTIONS:
+ {
+ struct ipv6_txoptions *opt = NULL;
+ struct msghdr msg;
+ struct flowi6 fl6;
+ int junk;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+
+ if (optlen == 0)
+ goto update;
+
+ /* 1K is probably excessive
+ * 1K is surely not enough, 2K per standard header is 16K.
+ */
+ retv = -EINVAL;
+ if (optlen > 64*1024)
+ break;
+
+ opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL);
+ retv = -ENOBUFS;
+ if (opt == NULL)
+ break;
+
+ memset(opt, 0, sizeof(*opt));
+ opt->tot_len = sizeof(*opt) + optlen;
+ retv = -EFAULT;
+ if (copy_from_user(opt+1, optval, optlen))
+ goto done;
+
+ msg.msg_controllen = optlen;
+ msg.msg_control = (void*)(opt+1);
+
+ retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
+ &junk);
+ if (retv)
+ goto done;
+update:
+ retv = 0;
+ opt = ipv6_update_options(sk, opt);
+done:
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ break;
+ }
+ case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val > 255 || val < -1)
+ goto e_inval;
+ np->hop_limit = val;
+ retv = 0;
+ break;
+
+ case IPV6_MULTICAST_HOPS:
+ if (sk->sk_type == SOCK_STREAM)
+ break;
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val > 255 || val < -1)
+ goto e_inval;
+ np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
+ retv = 0;
+ break;
+
+ case IPV6_MULTICAST_LOOP:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val != valbool)
+ goto e_inval;
+ np->mc_loop = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_UNICAST_IF:
+ {
+ struct net_device *dev = NULL;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ goto e_inval;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (ifindex == 0) {
+ np->ucast_oif = 0;
+ retv = 0;
+ break;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ retv = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
+ dev_put(dev);
+
+ retv = -EINVAL;
+ if (sk->sk_bound_dev_if)
+ break;
+
+ np->ucast_oif = ifindex;
+ retv = 0;
+ break;
+ }
+
+ case IPV6_MULTICAST_IF:
+ if (sk->sk_type == SOCK_STREAM)
+ break;
+ if (optlen < sizeof(int))
+ goto e_inval;
+
+ if (val) {
+ struct net_device *dev;
+
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+ goto e_inval;
+
+ dev = dev_get_by_index(net, val);
+ if (!dev) {
+ retv = -ENODEV;
+ break;
+ }
+ dev_put(dev);
+ }
+ np->mcast_oif = val;
+ retv = 0;
+ break;
+ case IPV6_ADD_MEMBERSHIP:
+ case IPV6_DROP_MEMBERSHIP:
+ {
+ struct ipv6_mreq mreq;
+
+ if (optlen < sizeof(struct ipv6_mreq))
+ goto e_inval;
+
+ retv = -EPROTO;
+ if (inet_sk(sk)->is_icsk)
+ break;
+
+ retv = -EFAULT;
+ if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ break;
+
+ if (optname == IPV6_ADD_MEMBERSHIP)
+ retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+ else
+ retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+ break;
+ }
+ case IPV6_JOIN_ANYCAST:
+ case IPV6_LEAVE_ANYCAST:
+ {
+ struct ipv6_mreq mreq;
+
+ if (optlen < sizeof(struct ipv6_mreq))
+ goto e_inval;
+
+ retv = -EFAULT;
+ if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ break;
+
+ if (optname == IPV6_JOIN_ANYCAST)
+ retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+ else
+ retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+ break;
+ }
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct group_req greq;
+ struct sockaddr_in6 *psin6;
+
+ if (optlen < sizeof(struct group_req))
+ goto e_inval;
+
+ retv = -EFAULT;
+ if (copy_from_user(&greq, optval, sizeof(struct group_req)))
+ break;
+ if (greq.gr_group.ss_family != AF_INET6) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+ psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+ if (optname == MCAST_JOIN_GROUP)
+ retv = ipv6_sock_mc_join(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ else
+ retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ break;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct group_source_req greqs;
+ int omode, add;
+
+ if (optlen < sizeof(struct group_source_req))
+ goto e_inval;
+ if (copy_from_user(&greqs, optval, sizeof(greqs))) {
+ retv = -EFAULT;
+ break;
+ }
+ if (greqs.gsr_group.ss_family != AF_INET6 ||
+ greqs.gsr_source.ss_family != AF_INET6) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+ struct sockaddr_in6 *psin6;
+
+ psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
+ retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
+ &psin6->sin6_addr);
+ /* prior join w/ different source is ok */
+ if (retv && retv != -EADDRINUSE)
+ break;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ retv = ip6_mc_source(add, omode, sk, &greqs);
+ break;
+ }
+ case MCAST_MSFILTER:
+ {
+ extern int sysctl_mld_max_msf;
+ struct group_filter *gsf;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ retv = -ENOBUFS;
+ break;
+ }
+ gsf = kmalloc(optlen,GFP_KERNEL);
+ if (!gsf) {
+ retv = -ENOBUFS;
+ break;
+ }
+ retv = -EFAULT;
+ if (copy_from_user(gsf, optval, optlen)) {
+ kfree(gsf);
+ break;
+ }
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (gsf->gf_numsrc >= 0x1ffffffU ||
+ gsf->gf_numsrc > sysctl_mld_max_msf) {
+ kfree(gsf);
+ retv = -ENOBUFS;
+ break;
+ }
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+ kfree(gsf);
+ retv = -EINVAL;
+ break;
+ }
+ retv = ip6_mc_msfilter(sk, gsf);
+ kfree(gsf);
+
+ break;
+ }
+ case IPV6_ROUTER_ALERT:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ retv = ip6_ra_control(sk, val);
+ break;
+ case IPV6_MTU_DISCOVER:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
+ goto e_inval;
+ np->pmtudisc = val;
+ retv = 0;
+ break;
+ case IPV6_MTU:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val && val < IPV6_MIN_MTU)
+ goto e_inval;
+ np->frag_size = val;
+ retv = 0;
+ break;
+ case IPV6_RECVERR:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->recverr = valbool;
+ if (!val)
+ skb_queue_purge(&sk->sk_error_queue);
+ retv = 0;
+ break;
+ case IPV6_FLOWINFO_SEND:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->sndflow = valbool;
+ retv = 0;
+ break;
+ case IPV6_FLOWLABEL_MGR:
+ retv = ipv6_flowlabel_opt(sk, optval, optlen);
+ break;
+ case IPV6_IPSEC_POLICY:
+ case IPV6_XFRM_POLICY:
+ retv = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ break;
+ retv = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
+
+ case IPV6_ADDR_PREFERENCES:
+ {
+ unsigned int pref = 0;
+ unsigned int prefmask = ~0;
+
+ if (optlen < sizeof(int))
+ goto e_inval;
+
+ retv = -EINVAL;
+
+ /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+ switch (val & (IPV6_PREFER_SRC_PUBLIC|
+ IPV6_PREFER_SRC_TMP|
+ IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+ case IPV6_PREFER_SRC_PUBLIC:
+ pref |= IPV6_PREFER_SRC_PUBLIC;
+ break;
+ case IPV6_PREFER_SRC_TMP:
+ pref |= IPV6_PREFER_SRC_TMP;
+ break;
+ case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+ break;
+ case 0:
+ goto pref_skip_pubtmp;
+ default:
+ goto e_inval;
+ }
+
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
+ IPV6_PREFER_SRC_TMP);
+pref_skip_pubtmp:
+
+ /* check HOME/COA conflicts */
+ switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
+ case IPV6_PREFER_SRC_HOME:
+ break;
+ case IPV6_PREFER_SRC_COA:
+ pref |= IPV6_PREFER_SRC_COA;
+ case 0:
+ goto pref_skip_coa;
+ default:
+ goto e_inval;
+ }
+
+ prefmask &= ~IPV6_PREFER_SRC_COA;
+pref_skip_coa:
+
+ /* check CGA/NONCGA conflicts */
+ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+ case IPV6_PREFER_SRC_CGA:
+ case IPV6_PREFER_SRC_NONCGA:
+ case 0:
+ break;
+ default:
+ goto e_inval;
+ }
+
+ np->srcprefs = (np->srcprefs & prefmask) | pref;
+ retv = 0;
+
+ break;
+ }
+ case IPV6_MINHOPCOUNT:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val < 0 || val > 255)
+ goto e_inval;
+ np->min_hopcount = val;
+ break;
+ case IPV6_DONTFRAG:
+ np->dontfrag = valbool;
+ retv = 0;
+ break;
+ }
+
+ release_sock(sk);
+
+ return retv;
+
+e_inval:
+ release_sock(sk);
+ return -EINVAL;
+}
+
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ int err;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+ return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+ if (level != SOL_IPV6)
+ return -ENOPROTOOPT;
+
+ err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+ /* we need to exclude all possible ENOPROTOOPTs except default case */
+ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+ optname != IPV6_XFRM_POLICY) {
+ lock_sock(sk);
+ err = nf_setsockopt(sk, PF_INET6, optname, optval,
+ optlen);
+ release_sock(sk);
+ }
+#endif
+ return err;
+}
+
+EXPORT_SYMBOL(ipv6_setsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ int err;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+ if (udp_prot.compat_setsockopt != NULL)
+ return udp_prot.compat_setsockopt(sk, level, optname,
+ optval, optlen);
+ return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ }
+
+ if (level != SOL_IPV6)
+ return -ENOPROTOOPT;
+
+ if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+ return compat_mc_setsockopt(sk, level, optname, optval, optlen,
+ ipv6_setsockopt);
+
+ err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+ /* we need to exclude all possible ENOPROTOOPTs except default case */
+ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+ optname != IPV6_XFRM_POLICY) {
+ lock_sock(sk);
+ err = compat_nf_setsockopt(sk, PF_INET6, optname,
+ optval, optlen);
+ release_sock(sk);
+ }
+#endif
+ return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_setsockopt);
+#endif
+
+static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
+ int optname, char __user *optval, int len)
+{
+ struct ipv6_opt_hdr *hdr;
+
+ if (!opt)
+ return 0;
+
+ switch(optname) {
+ case IPV6_HOPOPTS:
+ hdr = opt->hopopt;
+ break;
+ case IPV6_RTHDRDSTOPTS:
+ hdr = opt->dst0opt;
+ break;
+ case IPV6_RTHDR:
+ hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+ break;
+ case IPV6_DSTOPTS:
+ hdr = opt->dst1opt;
+ break;
+ default:
+ return -EINVAL; /* should not happen */
+ }
+
+ if (!hdr)
+ return 0;
+
+ len = min_t(unsigned int, len, ipv6_optlen(hdr));
+ if (copy_to_user(optval, hdr, len))
+ return -EFAULT;
+ return len;
+}
+
+static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen, unsigned flags)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int len;
+ int val;
+
+ if (ip6_mroute_opt(optname))
+ return ip6_mroute_getsockopt(sk, optname, optval, optlen);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ switch (optname) {
+ case IPV6_ADDRFORM:
+ if (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_UDPLITE &&
+ sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTCONN;
+ val = sk->sk_family;
+ break;
+ case MCAST_MSFILTER:
+ {
+ struct group_filter gsf;
+ int err;
+
+ if (len < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+ if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
+ return -EFAULT;
+ if (gsf.gf_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+ lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gsf,
+ (struct group_filter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+
+ case IPV6_2292PKTOPTIONS:
+ {
+ struct msghdr msg;
+ struct sk_buff *skb;
+
+ if (sk->sk_type != SOCK_STREAM)
+ return -ENOPROTOOPT;
+
+ msg.msg_control = optval;
+ msg.msg_controllen = len;
+ msg.msg_flags = flags;
+
+ lock_sock(sk);
+ skb = np->pktoptions;
+ if (skb)
+ atomic_inc(&skb->users);
+ release_sock(sk);
+
+ if (skb) {
+ int err = datagram_recv_ctl(sk, &msg, skb);
+ kfree_skb(skb);
+ if (err)
+ return err;
+ } else {
+ if (np->rxopt.bits.rxinfo) {
+ struct in6_pktinfo src_info;
+ src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+ np->sticky_pktinfo.ipi6_ifindex;
+ src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+ }
+ if (np->rxopt.bits.rxhlim) {
+ int hlim = np->mcast_hops;
+ put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+ }
+ if (np->rxopt.bits.rxtclass) {
+ int tclass = np->rcv_tclass;
+ put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
+ }
+ if (np->rxopt.bits.rxoinfo) {
+ struct in6_pktinfo src_info;
+ src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+ np->sticky_pktinfo.ipi6_ifindex;
+ src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
+ }
+ if (np->rxopt.bits.rxohlim) {
+ int hlim = np->mcast_hops;
+ put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
+ }
+ }
+ len -= msg.msg_controllen;
+ return put_user(len, optlen);
+ }
+ case IPV6_MTU:
+ {
+ struct dst_entry *dst;
+
+ val = 0;
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ if (dst)
+ val = dst_mtu(dst);
+ rcu_read_unlock();
+ if (!val)
+ return -ENOTCONN;
+ break;
+ }
+
+ case IPV6_V6ONLY:
+ val = np->ipv6only;
+ break;
+
+ case IPV6_RECVPKTINFO:
+ val = np->rxopt.bits.rxinfo;
+ break;
+
+ case IPV6_2292PKTINFO:
+ val = np->rxopt.bits.rxoinfo;
+ break;
+
+ case IPV6_RECVHOPLIMIT:
+ val = np->rxopt.bits.rxhlim;
+ break;
+
+ case IPV6_2292HOPLIMIT:
+ val = np->rxopt.bits.rxohlim;
+ break;
+
+ case IPV6_RECVRTHDR:
+ val = np->rxopt.bits.srcrt;
+ break;
+
+ case IPV6_2292RTHDR:
+ val = np->rxopt.bits.osrcrt;
+ break;
+
+ case IPV6_HOPOPTS:
+ case IPV6_RTHDRDSTOPTS:
+ case IPV6_RTHDR:
+ case IPV6_DSTOPTS:
+ {
+
+ lock_sock(sk);
+ len = ipv6_getsockopt_sticky(sk, np->opt,
+ optname, optval, len);
+ release_sock(sk);
+ /* check if ipv6_getsockopt_sticky() returns err code */
+ if (len < 0)
+ return len;
+ return put_user(len, optlen);
+ }
+
+ case IPV6_RECVHOPOPTS:
+ val = np->rxopt.bits.hopopts;
+ break;
+
+ case IPV6_2292HOPOPTS:
+ val = np->rxopt.bits.ohopopts;
+ break;
+
+ case IPV6_RECVDSTOPTS:
+ val = np->rxopt.bits.dstopts;
+ break;
+
+ case IPV6_2292DSTOPTS:
+ val = np->rxopt.bits.odstopts;
+ break;
+
+ case IPV6_TCLASS:
+ val = np->tclass;
+ break;
+
+ case IPV6_RECVTCLASS:
+ val = np->rxopt.bits.rxtclass;
+ break;
+
+ case IPV6_FLOWINFO:
+ val = np->rxopt.bits.rxflow;
+ break;
+
+ case IPV6_RECVPATHMTU:
+ val = np->rxopt.bits.rxpmtu;
+ break;
+
+ case IPV6_PATHMTU:
+ {
+ struct dst_entry *dst;
+ struct ip6_mtuinfo mtuinfo;
+
+ if (len < sizeof(mtuinfo))
+ return -EINVAL;
+
+ len = sizeof(mtuinfo);
+ memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ if (dst)
+ mtuinfo.ip6m_mtu = dst_mtu(dst);
+ rcu_read_unlock();
+ if (!mtuinfo.ip6m_mtu)
+ return -ENOTCONN;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &mtuinfo, len))
+ return -EFAULT;
+
+ return 0;
+ break;
+ }
+
+ case IPV6_TRANSPARENT:
+ val = inet_sk(sk)->transparent;
+ break;
+
+ case IPV6_RECVORIGDSTADDR:
+ val = np->rxopt.bits.rxorigdstaddr;
+ break;
+
+ case IPV6_UNICAST_HOPS:
+ case IPV6_MULTICAST_HOPS:
+ {
+ struct dst_entry *dst;
+
+ if (optname == IPV6_UNICAST_HOPS)
+ val = np->hop_limit;
+ else
+ val = np->mcast_hops;
+
+ if (val < 0) {
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ if (dst)
+ val = ip6_dst_hoplimit(dst);
+ rcu_read_unlock();
+ }
+
+ if (val < 0)
+ val = sock_net(sk)->ipv6.devconf_all->hop_limit;
+ break;
+ }
+
+ case IPV6_MULTICAST_LOOP:
+ val = np->mc_loop;
+ break;
+
+ case IPV6_MULTICAST_IF:
+ val = np->mcast_oif;
+ break;
+
+ case IPV6_UNICAST_IF:
+ val = (__force int)htonl((__u32) np->ucast_oif);
+ break;
+
+ case IPV6_MTU_DISCOVER:
+ val = np->pmtudisc;
+ break;
+
+ case IPV6_RECVERR:
+ val = np->recverr;
+ break;
+
+ case IPV6_FLOWINFO_SEND:
+ val = np->sndflow;
+ break;
+
+ case IPV6_ADDR_PREFERENCES:
+ val = 0;
+
+ if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+ val |= IPV6_PREFER_SRC_TMP;
+ else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+ val |= IPV6_PREFER_SRC_PUBLIC;
+ else {
+ /* XXX: should we return system default? */
+ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
+ }
+
+ if (np->srcprefs & IPV6_PREFER_SRC_COA)
+ val |= IPV6_PREFER_SRC_COA;
+ else
+ val |= IPV6_PREFER_SRC_HOME;
+ break;
+
+ case IPV6_MINHOPCOUNT:
+ val = np->min_hopcount;
+ break;
+
+ case IPV6_DONTFRAG:
+ val = np->dontfrag;
+ break;
+
+ default:
+ return -ENOPROTOOPT;
+ }
+ len = min_t(unsigned int, sizeof(int), len);
+ if(put_user(len, optlen))
+ return -EFAULT;
+ if(copy_to_user(optval,&val,len))
+ return -EFAULT;
+ return 0;
+}
+
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ int err;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+ return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+ if(level != SOL_IPV6)
+ return -ENOPROTOOPT;
+
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_NETFILTER
+ /* we need to exclude all possible ENOPROTOOPTs except default case */
+ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ err = nf_getsockopt(sk, PF_INET6, optname, optval,
+ &len);
+ release_sock(sk);
+ if (err >= 0)
+ err = put_user(len, optlen);
+ }
+#endif
+ return err;
+}
+
+EXPORT_SYMBOL(ipv6_getsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ int err;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+ if (udp_prot.compat_getsockopt != NULL)
+ return udp_prot.compat_getsockopt(sk, level, optname,
+ optval, optlen);
+ return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ }
+
+ if (level != SOL_IPV6)
+ return -ENOPROTOOPT;
+
+ if (optname == MCAST_MSFILTER)
+ return compat_mc_getsockopt(sk, level, optname, optval, optlen,
+ ipv6_getsockopt);
+
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
+#ifdef CONFIG_NETFILTER
+ /* we need to exclude all possible ENOPROTOOPTs except default case */
+ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ err = compat_nf_getsockopt(sk, PF_INET6,
+ optname, optval, &len);
+ release_sock(sk);
+ if (err >= 0)
+ err = put_user(len, optlen);
+ }
+#endif
+ return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_getsockopt);
+#endif
+
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
new file mode 100644
index 00000000..b2869cab
--- /dev/null
+++ b/net/ipv6/mcast.c
@@ -0,0 +1,2668 @@
+/*
+ * Multicast support for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ *
+ * yoshfuji : fix format of router-alert option
+ * YOSHIFUJI Hideaki @USAGI:
+ * Fixed source address for MLD message based on
+ * <draft-ietf-magma-mld-source-05.txt>.
+ * YOSHIFUJI Hideaki @USAGI:
+ * - Ignore Queries for invalid addresses.
+ * - MLD for link-local addresses.
+ * David L Stevens <dlstevens@us.ibm.com>:
+ * - MLDv2 support
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/times.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <net/mld.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/inet_common.h>
+
+#include <net/ip6_checksum.h>
+
+/* Set to 3 to get tracing... */
+#define MCAST_DEBUG 2
+
+#if MCAST_DEBUG >= 3
+#define MDBG(x) printk x
+#else
+#define MDBG(x)
+#endif
+
+/* Ensure that we have struct in6_addr aligned on 32bit word. */
+static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
+ BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
+ BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4),
+ BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4)
+};
+
+static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
+
+/* Big mc list lock for all the sockets */
+static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
+
+static void igmp6_join_group(struct ifmcaddr6 *ma);
+static void igmp6_leave_group(struct ifmcaddr6 *ma);
+static void igmp6_timer_handler(unsigned long data);
+
+static void mld_gq_timer_expire(unsigned long data);
+static void mld_ifc_timer_expire(unsigned long data);
+static void mld_ifc_event(struct inet6_dev *idev);
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
+static void mld_clear_delrec(struct inet6_dev *idev);
+static int sf_setstate(struct ifmcaddr6 *pmc);
+static void sf_markstate(struct ifmcaddr6 *pmc);
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
+ int delta);
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
+ int delta);
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+ struct inet6_dev *idev);
+
+
+#define IGMP6_UNSOLICITED_IVAL (10*HZ)
+#define MLD_QRV_DEFAULT 2
+
+#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
+ (idev)->cnf.force_mld_version == 1 || \
+ ((idev)->mc_v1_seen && \
+ time_before(jiffies, (idev)->mc_v1_seen)))
+
+#define IPV6_MLD_MAX_MSF 64
+
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
+
+/*
+ * socket join on multicast group
+ */
+
+#define for_each_pmc_rcu(np, pmc) \
+ for (pmc = rcu_dereference(np->ipv6_mc_list); \
+ pmc != NULL; \
+ pmc = rcu_dereference(pmc->next))
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct net_device *dev = NULL;
+ struct ipv6_mc_socklist *mc_lst;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ int err;
+
+ if (!ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
+ rcu_read_lock();
+ for_each_pmc_rcu(np, mc_lst) {
+ if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
+ ipv6_addr_equal(&mc_lst->addr, addr)) {
+ rcu_read_unlock();
+ return -EADDRINUSE;
+ }
+ }
+ rcu_read_unlock();
+
+ mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
+
+ if (mc_lst == NULL)
+ return -ENOMEM;
+
+ mc_lst->next = NULL;
+ mc_lst->addr = *addr;
+
+ rcu_read_lock();
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+ rt = rt6_lookup(net, addr, NULL, 0, 0);
+ if (rt) {
+ dev = rt->dst.dev;
+ dst_release(&rt->dst);
+ }
+ } else
+ dev = dev_get_by_index_rcu(net, ifindex);
+
+ if (dev == NULL) {
+ rcu_read_unlock();
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ return -ENODEV;
+ }
+
+ mc_lst->ifindex = dev->ifindex;
+ mc_lst->sfmode = MCAST_EXCLUDE;
+ rwlock_init(&mc_lst->sflock);
+ mc_lst->sflist = NULL;
+
+ /*
+ * now add/increase the group membership on the device
+ */
+
+ err = ipv6_dev_mc_inc(dev, addr);
+
+ if (err) {
+ rcu_read_unlock();
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ return err;
+ }
+
+ spin_lock(&ipv6_sk_mc_lock);
+ mc_lst->next = np->ipv6_mc_list;
+ rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+ spin_unlock(&ipv6_sk_mc_lock);
+
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/*
+ * socket leave on multicast group
+ */
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc_lst;
+ struct ipv6_mc_socklist __rcu **lnk;
+ struct net *net = sock_net(sk);
+
+ spin_lock(&ipv6_sk_mc_lock);
+ for (lnk = &np->ipv6_mc_list;
+ (mc_lst = rcu_dereference_protected(*lnk,
+ lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+ lnk = &mc_lst->next) {
+ if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
+ ipv6_addr_equal(&mc_lst->addr, addr)) {
+ struct net_device *dev;
+
+ *lnk = mc_lst->next;
+ spin_unlock(&ipv6_sk_mc_lock);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ if (dev != NULL) {
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
+ if (idev)
+ __ipv6_dev_mc_dec(idev, &mc_lst->addr);
+ } else
+ (void) ip6_mc_leave_src(sk, mc_lst, NULL);
+ rcu_read_unlock();
+ atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+ kfree_rcu(mc_lst, rcu);
+ return 0;
+ }
+ }
+ spin_unlock(&ipv6_sk_mc_lock);
+
+ return -EADDRNOTAVAIL;
+}
+
+/* called with rcu_read_lock() */
+static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
+ const struct in6_addr *group,
+ int ifindex)
+{
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
+
+ if (ifindex == 0) {
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+
+ if (rt) {
+ dev = rt->dst.dev;
+ dst_release(&rt->dst);
+ }
+ } else
+ dev = dev_get_by_index_rcu(net, ifindex);
+
+ if (!dev)
+ return NULL;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ return NULL;
+ read_lock_bh(&idev->lock);
+ if (idev->dead) {
+ read_unlock_bh(&idev->lock);
+ return NULL;
+ }
+ return idev;
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc_lst;
+ struct net *net = sock_net(sk);
+
+ spin_lock(&ipv6_sk_mc_lock);
+ while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
+ lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
+ struct net_device *dev;
+
+ np->ipv6_mc_list = mc_lst->next;
+ spin_unlock(&ipv6_sk_mc_lock);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ if (dev) {
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
+ if (idev)
+ __ipv6_dev_mc_dec(idev, &mc_lst->addr);
+ } else
+ (void) ip6_mc_leave_src(sk, mc_lst, NULL);
+ rcu_read_unlock();
+
+ atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+ kfree_rcu(mc_lst, rcu);
+
+ spin_lock(&ipv6_sk_mc_lock);
+ }
+ spin_unlock(&ipv6_sk_mc_lock);
+}
+
+int ip6_mc_source(int add, int omode, struct sock *sk,
+ struct group_source_req *pgsr)
+{
+ struct in6_addr *source, *group;
+ struct ipv6_mc_socklist *pmc;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *psl;
+ struct net *net = sock_net(sk);
+ int i, j, rv;
+ int leavegroup = 0;
+ int pmclocked = 0;
+ int err;
+
+ source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
+ group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+
+ rcu_read_lock();
+ idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
+ if (!idev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ err = -EADDRNOTAVAIL;
+
+ for_each_pmc_rcu(inet6, pmc) {
+ if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
+ continue;
+ if (ipv6_addr_equal(&pmc->addr, group))
+ break;
+ }
+ if (!pmc) { /* must have a prior join */
+ err = -EINVAL;
+ goto done;
+ }
+ /* if a source filter was set, must be the same mode as before */
+ if (pmc->sflist) {
+ if (pmc->sfmode != omode) {
+ err = -EINVAL;
+ goto done;
+ }
+ } else if (pmc->sfmode != omode) {
+ /* allow mode switches for empty-set filters */
+ ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
+ ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+ pmc->sfmode = omode;
+ }
+
+ write_lock(&pmc->sflock);
+ pmclocked = 1;
+
+ psl = pmc->sflist;
+ if (!add) {
+ if (!psl)
+ goto done; /* err = -EADDRNOTAVAIL */
+ rv = !0;
+ for (i=0; i<psl->sl_count; i++) {
+ rv = memcmp(&psl->sl_addr[i], source,
+ sizeof(struct in6_addr));
+ if (rv == 0)
+ break;
+ }
+ if (rv) /* source not found */
+ goto done; /* err = -EADDRNOTAVAIL */
+
+ /* special case - (INCLUDE, empty) == LEAVE_GROUP */
+ if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
+ leavegroup = 1;
+ goto done;
+ }
+
+ /* update the interface filter */
+ ip6_mc_del_src(idev, group, omode, 1, source, 1);
+
+ for (j=i+1; j<psl->sl_count; j++)
+ psl->sl_addr[j-1] = psl->sl_addr[j];
+ psl->sl_count--;
+ err = 0;
+ goto done;
+ }
+ /* else, add a new source to the filter */
+
+ if (psl && psl->sl_count >= sysctl_mld_max_msf) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ if (!psl || psl->sl_count == psl->sl_max) {
+ struct ip6_sf_socklist *newpsl;
+ int count = IP6_SFBLOCK;
+
+ if (psl)
+ count += psl->sl_max;
+ newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC);
+ if (!newpsl) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ newpsl->sl_max = count;
+ newpsl->sl_count = count - IP6_SFBLOCK;
+ if (psl) {
+ for (i=0; i<psl->sl_count; i++)
+ newpsl->sl_addr[i] = psl->sl_addr[i];
+ sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+ }
+ pmc->sflist = psl = newpsl;
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+ for (i=0; i<psl->sl_count; i++) {
+ rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
+ if (rv == 0)
+ break;
+ }
+ if (rv == 0) /* address already there is an error */
+ goto done;
+ for (j=psl->sl_count-1; j>=i; j--)
+ psl->sl_addr[j+1] = psl->sl_addr[j];
+ psl->sl_addr[i] = *source;
+ psl->sl_count++;
+ err = 0;
+ /* update the interface list */
+ ip6_mc_add_src(idev, group, omode, 1, source, 1);
+done:
+ if (pmclocked)
+ write_unlock(&pmc->sflock);
+ read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
+ if (leavegroup)
+ return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
+ return err;
+}
+
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+{
+ const struct in6_addr *group;
+ struct ipv6_mc_socklist *pmc;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *newpsl, *psl;
+ struct net *net = sock_net(sk);
+ int leavegroup = 0;
+ int i, err;
+
+ group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+ if (gsf->gf_fmode != MCAST_INCLUDE &&
+ gsf->gf_fmode != MCAST_EXCLUDE)
+ return -EINVAL;
+
+ rcu_read_lock();
+ idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
+
+ if (!idev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ err = 0;
+
+ if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
+ leavegroup = 1;
+ goto done;
+ }
+
+ for_each_pmc_rcu(inet6, pmc) {
+ if (pmc->ifindex != gsf->gf_interface)
+ continue;
+ if (ipv6_addr_equal(&pmc->addr, group))
+ break;
+ }
+ if (!pmc) { /* must have a prior join */
+ err = -EINVAL;
+ goto done;
+ }
+ if (gsf->gf_numsrc) {
+ newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
+ GFP_ATOMIC);
+ if (!newpsl) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
+ for (i=0; i<newpsl->sl_count; ++i) {
+ struct sockaddr_in6 *psin6;
+
+ psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
+ newpsl->sl_addr[i] = psin6->sin6_addr;
+ }
+ err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
+ newpsl->sl_count, newpsl->sl_addr, 0);
+ if (err) {
+ sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ goto done;
+ }
+ } else {
+ newpsl = NULL;
+ (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
+ }
+
+ write_lock(&pmc->sflock);
+ psl = pmc->sflist;
+ if (psl) {
+ (void) ip6_mc_del_src(idev, group, pmc->sfmode,
+ psl->sl_count, psl->sl_addr, 0);
+ sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+ } else
+ (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+ pmc->sflist = newpsl;
+ pmc->sfmode = gsf->gf_fmode;
+ write_unlock(&pmc->sflock);
+ err = 0;
+done:
+ read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
+ if (leavegroup)
+ err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
+ return err;
+}
+
+int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+ struct group_filter __user *optval, int __user *optlen)
+{
+ int err, i, count, copycount;
+ const struct in6_addr *group;
+ struct ipv6_mc_socklist *pmc;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *psl;
+ struct net *net = sock_net(sk);
+
+ group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+
+ rcu_read_lock();
+ idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
+
+ if (!idev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ err = -EADDRNOTAVAIL;
+ /*
+ * changes to the ipv6_mc_list require the socket lock and
+ * a read lock on ip6_sk_mc_lock. We have the socket lock,
+ * so reading the list is safe.
+ */
+
+ for_each_pmc_rcu(inet6, pmc) {
+ if (pmc->ifindex != gsf->gf_interface)
+ continue;
+ if (ipv6_addr_equal(group, &pmc->addr))
+ break;
+ }
+ if (!pmc) /* must have a prior join */
+ goto done;
+ gsf->gf_fmode = pmc->sfmode;
+ psl = pmc->sflist;
+ count = psl ? psl->sl_count : 0;
+ read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
+
+ copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ gsf->gf_numsrc = count;
+ if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
+ copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
+ return -EFAULT;
+ }
+ /* changes to psl require the socket lock, a read lock on
+ * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
+ * have the socket lock, so reading here is safe.
+ */
+ for (i=0; i<copycount; i++) {
+ struct sockaddr_in6 *psin6;
+ struct sockaddr_storage ss;
+
+ psin6 = (struct sockaddr_in6 *)&ss;
+ memset(&ss, 0, sizeof(ss));
+ psin6->sin6_family = AF_INET6;
+ psin6->sin6_addr = psl->sl_addr[i];
+ if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+ return -EFAULT;
+ }
+ return 0;
+done:
+ read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
+ return err;
+}
+
+int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+ const struct in6_addr *src_addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc;
+ struct ip6_sf_socklist *psl;
+ int rv = 1;
+
+ rcu_read_lock();
+ for_each_pmc_rcu(np, mc) {
+ if (ipv6_addr_equal(&mc->addr, mc_addr))
+ break;
+ }
+ if (!mc) {
+ rcu_read_unlock();
+ return 1;
+ }
+ read_lock(&mc->sflock);
+ psl = mc->sflist;
+ if (!psl) {
+ rv = mc->sfmode == MCAST_EXCLUDE;
+ } else {
+ int i;
+
+ for (i=0; i<psl->sl_count; i++) {
+ if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
+ break;
+ }
+ if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
+ rv = 0;
+ if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
+ rv = 0;
+ }
+ read_unlock(&mc->sflock);
+ rcu_read_unlock();
+
+ return rv;
+}
+
+static void ma_put(struct ifmcaddr6 *mc)
+{
+ if (atomic_dec_and_test(&mc->mca_refcnt)) {
+ in6_dev_put(mc->idev);
+ kfree(mc);
+ }
+}
+
+static void igmp6_group_added(struct ifmcaddr6 *mc)
+{
+ struct net_device *dev = mc->idev->dev;
+ char buf[MAX_ADDR_LEN];
+
+ spin_lock_bh(&mc->mca_lock);
+ if (!(mc->mca_flags&MAF_LOADED)) {
+ mc->mca_flags |= MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+ dev_mc_add(dev, buf);
+ }
+ spin_unlock_bh(&mc->mca_lock);
+
+ if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
+ return;
+
+ if (MLD_V1_SEEN(mc->idev)) {
+ igmp6_join_group(mc);
+ return;
+ }
+ /* else v2 */
+
+ mc->mca_crcount = mc->idev->mc_qrv;
+ mld_ifc_event(mc->idev);
+}
+
+static void igmp6_group_dropped(struct ifmcaddr6 *mc)
+{
+ struct net_device *dev = mc->idev->dev;
+ char buf[MAX_ADDR_LEN];
+
+ spin_lock_bh(&mc->mca_lock);
+ if (mc->mca_flags&MAF_LOADED) {
+ mc->mca_flags &= ~MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+ dev_mc_del(dev, buf);
+ }
+
+ if (mc->mca_flags & MAF_NOREPORT)
+ goto done;
+ spin_unlock_bh(&mc->mca_lock);
+
+ if (!mc->idev->dead)
+ igmp6_leave_group(mc);
+
+ spin_lock_bh(&mc->mca_lock);
+ if (del_timer(&mc->mca_timer))
+ atomic_dec(&mc->mca_refcnt);
+done:
+ ip6_mc_clear_src(mc);
+ spin_unlock_bh(&mc->mca_lock);
+}
+
+/*
+ * deleted ifmcaddr6 manipulation
+ */
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
+{
+ struct ifmcaddr6 *pmc;
+
+ /* this is an "ifmcaddr6" for convenience; only the fields below
+ * are actually used. In particular, the refcnt and users are not
+ * used for management of the delete list. Using the same structure
+ * for deleted items allows change reports to use common code with
+ * non-deleted or query-response MCA's.
+ */
+ pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC);
+ if (!pmc)
+ return;
+
+ spin_lock_bh(&im->mca_lock);
+ spin_lock_init(&pmc->mca_lock);
+ pmc->idev = im->idev;
+ in6_dev_hold(idev);
+ pmc->mca_addr = im->mca_addr;
+ pmc->mca_crcount = idev->mc_qrv;
+ pmc->mca_sfmode = im->mca_sfmode;
+ if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ struct ip6_sf_list *psf;
+
+ pmc->mca_tomb = im->mca_tomb;
+ pmc->mca_sources = im->mca_sources;
+ im->mca_tomb = im->mca_sources = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ psf->sf_crcount = pmc->mca_crcount;
+ }
+ spin_unlock_bh(&im->mca_lock);
+
+ spin_lock_bh(&idev->mc_lock);
+ pmc->next = idev->mc_tomb;
+ idev->mc_tomb = pmc;
+ spin_unlock_bh(&idev->mc_lock);
+}
+
+static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
+{
+ struct ifmcaddr6 *pmc, *pmc_prev;
+ struct ip6_sf_list *psf, *psf_next;
+
+ spin_lock_bh(&idev->mc_lock);
+ pmc_prev = NULL;
+ for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(&pmc->mca_addr, pmca))
+ break;
+ pmc_prev = pmc;
+ }
+ if (pmc) {
+ if (pmc_prev)
+ pmc_prev->next = pmc->next;
+ else
+ idev->mc_tomb = pmc->next;
+ }
+ spin_unlock_bh(&idev->mc_lock);
+
+ if (pmc) {
+ for (psf=pmc->mca_tomb; psf; psf=psf_next) {
+ psf_next = psf->sf_next;
+ kfree(psf);
+ }
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ }
+}
+
+static void mld_clear_delrec(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc, *nextpmc;
+
+ spin_lock_bh(&idev->mc_lock);
+ pmc = idev->mc_tomb;
+ idev->mc_tomb = NULL;
+ spin_unlock_bh(&idev->mc_lock);
+
+ for (; pmc; pmc = nextpmc) {
+ nextpmc = pmc->next;
+ ip6_mc_clear_src(pmc);
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ }
+
+ /* clear dead sources, too */
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ struct ip6_sf_list *psf, *psf_next;
+
+ spin_lock_bh(&pmc->mca_lock);
+ psf = pmc->mca_tomb;
+ pmc->mca_tomb = NULL;
+ spin_unlock_bh(&pmc->mca_lock);
+ for (; psf; psf=psf_next) {
+ psf_next = psf->sf_next;
+ kfree(psf);
+ }
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+
+/*
+ * device multicast group inc (add if not found)
+ */
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct ifmcaddr6 *mc;
+ struct inet6_dev *idev;
+
+ /* we need to take a reference on idev */
+ idev = in6_dev_get(dev);
+
+ if (idev == NULL)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+ if (idev->dead) {
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return -ENODEV;
+ }
+
+ for (mc = idev->mc_list; mc; mc = mc->next) {
+ if (ipv6_addr_equal(&mc->mca_addr, addr)) {
+ mc->mca_users++;
+ write_unlock_bh(&idev->lock);
+ ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
+ NULL, 0);
+ in6_dev_put(idev);
+ return 0;
+ }
+ }
+
+ /*
+ * not found: create a new one.
+ */
+
+ mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
+
+ if (mc == NULL) {
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return -ENOMEM;
+ }
+
+ setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
+
+ mc->mca_addr = *addr;
+ mc->idev = idev; /* (reference taken) */
+ mc->mca_users = 1;
+ /* mca_stamp should be updated upon changes */
+ mc->mca_cstamp = mc->mca_tstamp = jiffies;
+ atomic_set(&mc->mca_refcnt, 2);
+ spin_lock_init(&mc->mca_lock);
+
+ /* initial mode is (EX, empty) */
+ mc->mca_sfmode = MCAST_EXCLUDE;
+ mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+
+ if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ mc->mca_flags |= MAF_NOREPORT;
+
+ mc->next = idev->mc_list;
+ idev->mc_list = mc;
+ write_unlock_bh(&idev->lock);
+
+ mld_del_delrec(idev, &mc->mca_addr);
+ igmp6_group_added(mc);
+ ma_put(mc);
+ return 0;
+}
+
+/*
+ * device multicast group del
+ */
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct ifmcaddr6 *ma, **map;
+
+ write_lock_bh(&idev->lock);
+ for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
+ if (ipv6_addr_equal(&ma->mca_addr, addr)) {
+ if (--ma->mca_users == 0) {
+ *map = ma->next;
+ write_unlock_bh(&idev->lock);
+
+ igmp6_group_dropped(ma);
+
+ ma_put(ma);
+ return 0;
+ }
+ write_unlock_bh(&idev->lock);
+ return 0;
+ }
+ }
+ write_unlock_bh(&idev->lock);
+
+ return -ENOENT;
+}
+
+int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct inet6_dev *idev;
+ int err;
+
+ rcu_read_lock();
+
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ err = -ENODEV;
+ else
+ err = __ipv6_dev_mc_dec(idev, addr);
+
+ rcu_read_unlock();
+ return err;
+}
+
+/*
+ * identify MLD packets for MLD filter exceptions
+ */
+int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
+{
+ struct icmp6hdr *pic;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return 0;
+
+ if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
+ return 0;
+
+ pic = icmp6_hdr(skb);
+
+ switch (pic->icmp6_type) {
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MLD2_REPORT:
+ return 1;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/*
+ * check if the interface/address pair is valid
+ */
+int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
+ const struct in6_addr *src_addr)
+{
+ struct inet6_dev *idev;
+ struct ifmcaddr6 *mc;
+ int rv = 0;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (mc = idev->mc_list; mc; mc=mc->next) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (mc) {
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
+
+ spin_lock_bh(&mc->mca_lock);
+ for (psf=mc->mca_sources;psf;psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
+ }
+ if (psf)
+ rv = psf->sf_count[MCAST_INCLUDE] ||
+ psf->sf_count[MCAST_EXCLUDE] !=
+ mc->mca_sfcount[MCAST_EXCLUDE];
+ else
+ rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
+ spin_unlock_bh(&mc->mca_lock);
+ } else
+ rv = 1; /* don't filter unspecified source */
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return rv;
+}
+
+static void mld_gq_start_timer(struct inet6_dev *idev)
+{
+ int tv = net_random() % idev->mc_maxdelay;
+
+ idev->mc_gq_running = 1;
+ if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+{
+ int tv = net_random() % delay;
+
+ if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+/*
+ * IGMP handling (alias multicast ICMPv6 messages)
+ */
+
+static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
+{
+ unsigned long delay = resptime;
+
+ /* Do not start timer for these addresses */
+ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
+ if (del_timer(&ma->mca_timer)) {
+ atomic_dec(&ma->mca_refcnt);
+ delay = ma->mca_timer.expires - jiffies;
+ }
+
+ if (delay >= resptime) {
+ if (resptime)
+ delay = net_random() % resptime;
+ else
+ delay = 1;
+ }
+ ma->mca_timer.expires = jiffies + delay;
+ if (!mod_timer(&ma->mca_timer, jiffies + delay))
+ atomic_inc(&ma->mca_refcnt);
+ ma->mca_flags |= MAF_TIMER_RUNNING;
+}
+
+/* mark EXCLUDE-mode sources */
+static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
+ const struct in6_addr *srcs)
+{
+ struct ip6_sf_list *psf;
+ int i, scount;
+
+ scount = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (scount == nsrcs)
+ break;
+ for (i=0; i<nsrcs; i++) {
+ /* skip inactive filters */
+ if (psf->sf_count[MCAST_INCLUDE] ||
+ pmc->mca_sfcount[MCAST_EXCLUDE] !=
+ psf->sf_count[MCAST_EXCLUDE])
+ continue;
+ if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+ scount++;
+ break;
+ }
+ }
+ }
+ pmc->mca_flags &= ~MAF_GSQUERY;
+ if (scount == nsrcs) /* all sources excluded */
+ return 0;
+ return 1;
+}
+
+static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+ const struct in6_addr *srcs)
+{
+ struct ip6_sf_list *psf;
+ int i, scount;
+
+ if (pmc->mca_sfmode == MCAST_EXCLUDE)
+ return mld_xmarksources(pmc, nsrcs, srcs);
+
+ /* mark INCLUDE-mode sources */
+
+ scount = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (scount == nsrcs)
+ break;
+ for (i=0; i<nsrcs; i++) {
+ if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+ psf->sf_gsresp = 1;
+ scount++;
+ break;
+ }
+ }
+ }
+ if (!scount) {
+ pmc->mca_flags &= ~MAF_GSQUERY;
+ return 0;
+ }
+ pmc->mca_flags |= MAF_GSQUERY;
+ return 1;
+}
+
+/* called with rcu_read_lock() */
+int igmp6_event_query(struct sk_buff *skb)
+{
+ struct mld2_query *mlh2 = NULL;
+ struct ifmcaddr6 *ma;
+ const struct in6_addr *group;
+ unsigned long max_delay;
+ struct inet6_dev *idev;
+ struct mld_msg *mld;
+ int group_type;
+ int mark = 0;
+ int len;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ return -EINVAL;
+
+ /* compute payload length excluding extension headers */
+ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+ len -= skb_network_header_len(skb);
+
+ /* Drop queries with not link local source */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ idev = __in6_dev_get(skb->dev);
+
+ if (idev == NULL)
+ return 0;
+
+ mld = (struct mld_msg *)icmp6_hdr(skb);
+ group = &mld->mld_mca;
+ group_type = ipv6_addr_type(group);
+
+ if (group_type != IPV6_ADDR_ANY &&
+ !(group_type&IPV6_ADDR_MULTICAST))
+ return -EINVAL;
+
+ if (len == 24) {
+ int switchback;
+ /* MLDv1 router present */
+
+ /* Translate milliseconds to jiffies */
+ max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
+
+ switchback = (idev->mc_qrv + 1) * max_delay;
+ idev->mc_v1_seen = jiffies + switchback;
+
+ /* cancel the interface change timer */
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+ /* clear deleted report items */
+ mld_clear_delrec(idev);
+ } else if (len >= 28) {
+ int srcs_offset = sizeof(struct mld2_query) -
+ sizeof(struct icmp6hdr);
+ if (!pskb_may_pull(skb, srcs_offset))
+ return -EINVAL;
+
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
+ max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
+ if (!max_delay)
+ max_delay = 1;
+ idev->mc_maxdelay = max_delay;
+ if (mlh2->mld2q_qrv)
+ idev->mc_qrv = mlh2->mld2q_qrv;
+ if (group_type == IPV6_ADDR_ANY) { /* general query */
+ if (mlh2->mld2q_nsrcs)
+ return -EINVAL; /* no sources allowed */
+
+ mld_gq_start_timer(idev);
+ return 0;
+ }
+ /* mark sources to include, if group & source-specific */
+ if (mlh2->mld2q_nsrcs != 0) {
+ if (!pskb_may_pull(skb, srcs_offset +
+ ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
+ return -EINVAL;
+
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
+ mark = 1;
+ }
+ } else
+ return -EINVAL;
+
+ read_lock_bh(&idev->lock);
+ if (group_type == IPV6_ADDR_ANY) {
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ spin_lock_bh(&ma->mca_lock);
+ igmp6_group_queried(ma, max_delay);
+ spin_unlock_bh(&ma->mca_lock);
+ }
+ } else {
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ if (!ipv6_addr_equal(group, &ma->mca_addr))
+ continue;
+ spin_lock_bh(&ma->mca_lock);
+ if (ma->mca_flags & MAF_TIMER_RUNNING) {
+ /* gsquery <- gsquery && mark */
+ if (!mark)
+ ma->mca_flags &= ~MAF_GSQUERY;
+ } else {
+ /* gsquery <- mark */
+ if (mark)
+ ma->mca_flags |= MAF_GSQUERY;
+ else
+ ma->mca_flags &= ~MAF_GSQUERY;
+ }
+ if (!(ma->mca_flags & MAF_GSQUERY) ||
+ mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
+ igmp6_group_queried(ma, max_delay);
+ spin_unlock_bh(&ma->mca_lock);
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+
+ return 0;
+}
+
+/* called with rcu_read_lock() */
+int igmp6_event_report(struct sk_buff *skb)
+{
+ struct ifmcaddr6 *ma;
+ struct inet6_dev *idev;
+ struct mld_msg *mld;
+ int addr_type;
+
+ /* Our own report looped back. Ignore it. */
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ return 0;
+
+ /* send our report if the MC router may not have heard this report */
+ if (skb->pkt_type != PACKET_MULTICAST &&
+ skb->pkt_type != PACKET_BROADCAST)
+ return 0;
+
+ if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
+ return -EINVAL;
+
+ mld = (struct mld_msg *)icmp6_hdr(skb);
+
+ /* Drop reports with not link local source */
+ addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+ if (addr_type != IPV6_ADDR_ANY &&
+ !(addr_type&IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ idev = __in6_dev_get(skb->dev);
+ if (idev == NULL)
+ return -ENODEV;
+
+ /*
+ * Cancel the timer for this group
+ */
+
+ read_lock_bh(&idev->lock);
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
+ spin_lock(&ma->mca_lock);
+ if (del_timer(&ma->mca_timer))
+ atomic_dec(&ma->mca_refcnt);
+ ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
+ spin_unlock(&ma->mca_lock);
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ return 0;
+}
+
+static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+ int gdeleted, int sdeleted)
+{
+ switch (type) {
+ case MLD2_MODE_IS_INCLUDE:
+ case MLD2_MODE_IS_EXCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
+ if (pmc->mca_sfmode == MCAST_INCLUDE)
+ return 1;
+ /* don't include if this source is excluded
+ * in all filters
+ */
+ if (psf->sf_count[MCAST_INCLUDE])
+ return type == MLD2_MODE_IS_INCLUDE;
+ return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+ psf->sf_count[MCAST_EXCLUDE];
+ }
+ return 0;
+ case MLD2_CHANGE_TO_INCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ return psf->sf_count[MCAST_INCLUDE] != 0;
+ case MLD2_CHANGE_TO_EXCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
+ psf->sf_count[MCAST_INCLUDE])
+ return 0;
+ return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+ psf->sf_count[MCAST_EXCLUDE];
+ case MLD2_ALLOW_NEW_SOURCES:
+ if (gdeleted || !psf->sf_crcount)
+ return 0;
+ return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
+ case MLD2_BLOCK_OLD_SOURCES:
+ if (pmc->mca_sfmode == MCAST_INCLUDE)
+ return gdeleted || (psf->sf_crcount && sdeleted);
+ return psf->sf_crcount && !gdeleted && !sdeleted;
+ }
+ return 0;
+}
+
+static int
+mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
+{
+ struct ip6_sf_list *psf;
+ int scount = 0;
+
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted))
+ continue;
+ scount++;
+ }
+ return scount;
+}
+
+static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+{
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.igmp_sk;
+ struct sk_buff *skb;
+ struct mld2_report *pmr;
+ struct in6_addr addr_buf;
+ const struct in6_addr *saddr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ int err;
+ u8 ra[8] = { IPPROTO_ICMPV6, 0,
+ IPV6_TLV_ROUTERALERT, 2, 0, 0,
+ IPV6_TLV_PADN, 0 };
+
+ /* we assume size > sizeof(ra) here */
+ size += hlen + tlen;
+ /* limit our allocations to order-0 page */
+ size = min_t(int, size, SKB_MAX_ORDER(0, 0));
+ skb = sock_alloc_send_skb(sk, size, 1, &err);
+
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, hlen);
+
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ /* <draft-ietf-magma-mld-source-05.txt>:
+ * use unspecified address as the source address
+ * when a valid link-local address is not available.
+ */
+ saddr = &in6addr_any;
+ } else
+ saddr = &addr_buf;
+
+ ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+
+ memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+ skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+ skb_put(skb, sizeof(*pmr));
+ pmr = (struct mld2_report *)skb_transport_header(skb);
+ pmr->mld2r_type = ICMPV6_MLD2_REPORT;
+ pmr->mld2r_resv1 = 0;
+ pmr->mld2r_cksum = 0;
+ pmr->mld2r_resv2 = 0;
+ pmr->mld2r_ngrec = 0;
+ return skb;
+}
+
+static void mld_sendpack(struct sk_buff *skb)
+{
+ struct ipv6hdr *pip6 = ipv6_hdr(skb);
+ struct mld2_report *pmr =
+ (struct mld2_report *)skb_transport_header(skb);
+ int payload_len, mldlen;
+ struct inet6_dev *idev;
+ struct net *net = dev_net(skb->dev);
+ int err;
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
+ payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
+ mldlen = skb->tail - skb->transport_header;
+ pip6->payload_len = htons(payload_len);
+
+ pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+ IPPROTO_ICMPV6,
+ csum_partial(skb_transport_header(skb),
+ mldlen, 0));
+
+ icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+
+ err = 0;
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ }
+ skb_dst_set(skb, dst);
+ if (err)
+ goto err_out;
+
+ payload_len = skb->len;
+
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+ dst_output);
+out:
+ if (!err) {
+ ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+
+ rcu_read_unlock();
+ return;
+
+err_out:
+ kfree_skb(skb);
+ goto out;
+}
+
+static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
+{
+ return sizeof(struct mld2_grec) + 16 * mld_scount(pmc,type,gdel,sdel);
+}
+
+static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+ int type, struct mld2_grec **ppgr)
+{
+ struct net_device *dev = pmc->idev->dev;
+ struct mld2_report *pmr;
+ struct mld2_grec *pgr;
+
+ if (!skb)
+ skb = mld_newpack(dev, dev->mtu);
+ if (!skb)
+ return NULL;
+ pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
+ pgr->grec_type = type;
+ pgr->grec_auxwords = 0;
+ pgr->grec_nsrcs = 0;
+ pgr->grec_mca = pmc->mca_addr; /* structure copy */
+ pmr = (struct mld2_report *)skb_transport_header(skb);
+ pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
+ *ppgr = pgr;
+ return skb;
+}
+
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+ skb_tailroom(skb)) : 0)
+
+static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+ int type, int gdeleted, int sdeleted)
+{
+ struct net_device *dev = pmc->idev->dev;
+ struct mld2_report *pmr;
+ struct mld2_grec *pgr = NULL;
+ struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+ int scount, stotal, first, isquery, truncate;
+
+ if (pmc->mca_flags & MAF_NOREPORT)
+ return skb;
+
+ isquery = type == MLD2_MODE_IS_INCLUDE ||
+ type == MLD2_MODE_IS_EXCLUDE;
+ truncate = type == MLD2_MODE_IS_EXCLUDE ||
+ type == MLD2_CHANGE_TO_EXCLUDE;
+
+ stotal = scount = 0;
+
+ psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
+
+ if (!*psf_list)
+ goto empty_source;
+
+ pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
+
+ /* EX and TO_EX get a fresh packet, if needed */
+ if (truncate) {
+ if (pmr && pmr->mld2r_ngrec &&
+ AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+ if (skb)
+ mld_sendpack(skb);
+ skb = mld_newpack(dev, dev->mtu);
+ }
+ }
+ first = 1;
+ psf_prev = NULL;
+ for (psf=*psf_list; psf; psf=psf_next) {
+ struct in6_addr *psrc;
+
+ psf_next = psf->sf_next;
+
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+ psf_prev = psf;
+ continue;
+ }
+
+ /* clear marks on query responses */
+ if (isquery)
+ psf->sf_gsresp = 0;
+
+ if (AVAILABLE(skb) < sizeof(*psrc) +
+ first*sizeof(struct mld2_grec)) {
+ if (truncate && !first)
+ break; /* truncate these */
+ if (pgr)
+ pgr->grec_nsrcs = htons(scount);
+ if (skb)
+ mld_sendpack(skb);
+ skb = mld_newpack(dev, dev->mtu);
+ first = 1;
+ scount = 0;
+ }
+ if (first) {
+ skb = add_grhead(skb, pmc, type, &pgr);
+ first = 0;
+ }
+ if (!skb)
+ return NULL;
+ psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
+ *psrc = psf->sf_addr;
+ scount++; stotal++;
+ if ((type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+ psf->sf_crcount--;
+ if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ *psf_list = psf->sf_next;
+ kfree(psf);
+ continue;
+ }
+ }
+ psf_prev = psf;
+ }
+
+empty_source:
+ if (!stotal) {
+ if (type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES)
+ return skb;
+ if (pmc->mca_crcount || isquery) {
+ /* make sure we have room for group header */
+ if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
+ mld_sendpack(skb);
+ skb = NULL; /* add_grhead will get a new one */
+ }
+ skb = add_grhead(skb, pmc, type, &pgr);
+ }
+ }
+ if (pgr)
+ pgr->grec_nsrcs = htons(scount);
+
+ if (isquery)
+ pmc->mca_flags &= ~MAF_GSQUERY; /* clear query state */
+ return skb;
+}
+
+static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
+{
+ struct sk_buff *skb = NULL;
+ int type;
+
+ if (!pmc) {
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (pmc->mca_flags & MAF_NOREPORT)
+ continue;
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_MODE_IS_EXCLUDE;
+ else
+ type = MLD2_MODE_IS_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ } else {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_MODE_IS_EXCLUDE;
+ else
+ type = MLD2_MODE_IS_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ if (skb)
+ mld_sendpack(skb);
+}
+
+/*
+ * remove zero-count source records from a source filter list
+ */
+static void mld_clear_zeros(struct ip6_sf_list **ppsf)
+{
+ struct ip6_sf_list *psf_prev, *psf_next, *psf;
+
+ psf_prev = NULL;
+ for (psf=*ppsf; psf; psf = psf_next) {
+ psf_next = psf->sf_next;
+ if (psf->sf_crcount == 0) {
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ *ppsf = psf->sf_next;
+ kfree(psf);
+ } else
+ psf_prev = psf;
+ }
+}
+
+static void mld_send_cr(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
+ struct sk_buff *skb = NULL;
+ int type, dtype;
+
+ read_lock_bh(&idev->lock);
+ spin_lock(&idev->mc_lock);
+
+ /* deleted MCA's */
+ pmc_prev = NULL;
+ for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) {
+ pmc_next = pmc->next;
+ if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ type = MLD2_BLOCK_OLD_SOURCES;
+ dtype = MLD2_BLOCK_OLD_SOURCES;
+ skb = add_grec(skb, pmc, type, 1, 0);
+ skb = add_grec(skb, pmc, dtype, 1, 1);
+ }
+ if (pmc->mca_crcount) {
+ if (pmc->mca_sfmode == MCAST_EXCLUDE) {
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 1, 0);
+ }
+ pmc->mca_crcount--;
+ if (pmc->mca_crcount == 0) {
+ mld_clear_zeros(&pmc->mca_tomb);
+ mld_clear_zeros(&pmc->mca_sources);
+ }
+ }
+ if (pmc->mca_crcount == 0 && !pmc->mca_tomb &&
+ !pmc->mca_sources) {
+ if (pmc_prev)
+ pmc_prev->next = pmc_next;
+ else
+ idev->mc_tomb = pmc_next;
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ } else
+ pmc_prev = pmc;
+ }
+ spin_unlock(&idev->mc_lock);
+
+ /* change recs */
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ type = MLD2_BLOCK_OLD_SOURCES;
+ dtype = MLD2_ALLOW_NEW_SOURCES;
+ } else {
+ type = MLD2_ALLOW_NEW_SOURCES;
+ dtype = MLD2_BLOCK_OLD_SOURCES;
+ }
+ skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */
+
+ /* filter mode changes */
+ if (pmc->mca_crcount) {
+ if (pmc->mca_sfmode == MCAST_EXCLUDE)
+ type = MLD2_CHANGE_TO_EXCLUDE;
+ else
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ pmc->mca_crcount--;
+ }
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ if (!skb)
+ return;
+ (void) mld_sendpack(skb);
+}
+
+static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
+{
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.igmp_sk;
+ struct inet6_dev *idev;
+ struct sk_buff *skb;
+ struct mld_msg *hdr;
+ const struct in6_addr *snd_addr, *saddr;
+ struct in6_addr addr_buf;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ int err, len, payload_len, full_len;
+ u8 ra[8] = { IPPROTO_ICMPV6, 0,
+ IPV6_TLV_ROUTERALERT, 2, 0, 0,
+ IPV6_TLV_PADN, 0 };
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+
+ if (type == ICMPV6_MGM_REDUCTION)
+ snd_addr = &in6addr_linklocal_allrouters;
+ else
+ snd_addr = addr;
+
+ len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ payload_len = len + sizeof(ra);
+ full_len = sizeof(struct ipv6hdr) + payload_len;
+
+ rcu_read_lock();
+ IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
+ IPSTATS_MIB_OUT, full_len);
+ rcu_read_unlock();
+
+ skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
+
+ if (skb == NULL) {
+ rcu_read_lock();
+ IP6_INC_STATS(net, __in6_dev_get(dev),
+ IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
+ return;
+ }
+
+ skb_reserve(skb, hlen);
+
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ /* <draft-ietf-magma-mld-source-05.txt>:
+ * use unspecified address as the source address
+ * when a valid link-local address is not available.
+ */
+ saddr = &in6addr_any;
+ } else
+ saddr = &addr_buf;
+
+ ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
+
+ memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+ hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
+ memset(hdr, 0, sizeof(struct mld_msg));
+ hdr->mld_type = type;
+ hdr->mld_mca = *addr;
+
+ hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
+ IPPROTO_ICMPV6,
+ csum_partial(hdr, len, 0));
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+
+ icmpv6_flow_init(sk, &fl6, type,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto err_out;
+ }
+
+ skb_dst_set(skb, dst);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+ dst_output);
+out:
+ if (!err) {
+ ICMP6MSGOUT_INC_STATS(net, idev, type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
+ } else
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+
+ rcu_read_unlock();
+ return;
+
+err_out:
+ kfree_skb(skb);
+ goto out;
+}
+
+static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
+ const struct in6_addr *psfsrc)
+{
+ struct ip6_sf_list *psf, *psf_prev;
+ int rv = 0;
+
+ psf_prev = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+ break;
+ psf_prev = psf;
+ }
+ if (!psf || psf->sf_count[sfmode] == 0) {
+ /* source filter not found, or count wrong => bug */
+ return -ESRCH;
+ }
+ psf->sf_count[sfmode]--;
+ if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
+ struct inet6_dev *idev = pmc->idev;
+
+ /* no more filters for this source */
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ pmc->mca_sources = psf->sf_next;
+ if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
+ !MLD_V1_SEEN(idev)) {
+ psf->sf_crcount = idev->mc_qrv;
+ psf->sf_next = pmc->mca_tomb;
+ pmc->mca_tomb = psf;
+ rv = 1;
+ } else
+ kfree(psf);
+ }
+ return rv;
+}
+
+static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
+ int delta)
+{
+ struct ifmcaddr6 *pmc;
+ int changerec = 0;
+ int i, err;
+
+ if (!idev)
+ return -ENODEV;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+ break;
+ }
+ if (!pmc) {
+ /* MCA not found?? bug */
+ read_unlock_bh(&idev->lock);
+ return -ESRCH;
+ }
+ spin_lock_bh(&pmc->mca_lock);
+ sf_markstate(pmc);
+ if (!delta) {
+ if (!pmc->mca_sfcount[sfmode]) {
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return -EINVAL;
+ }
+ pmc->mca_sfcount[sfmode]--;
+ }
+ err = 0;
+ for (i=0; i<sfcount; i++) {
+ int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+
+ changerec |= rv > 0;
+ if (!err && rv < 0)
+ err = rv;
+ }
+ if (pmc->mca_sfmode == MCAST_EXCLUDE &&
+ pmc->mca_sfcount[MCAST_EXCLUDE] == 0 &&
+ pmc->mca_sfcount[MCAST_INCLUDE]) {
+ struct ip6_sf_list *psf;
+
+ /* filter mode change */
+ pmc->mca_sfmode = MCAST_INCLUDE;
+ pmc->mca_crcount = idev->mc_qrv;
+ idev->mc_ifc_count = pmc->mca_crcount;
+ for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+ mld_ifc_event(pmc->idev);
+ } else if (sf_setstate(pmc) || changerec)
+ mld_ifc_event(pmc->idev);
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+/*
+ * Add multicast single-source filter to the interface list
+ */
+static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
+ const struct in6_addr *psfsrc)
+{
+ struct ip6_sf_list *psf, *psf_prev;
+
+ psf_prev = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+ break;
+ psf_prev = psf;
+ }
+ if (!psf) {
+ psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
+ if (!psf)
+ return -ENOBUFS;
+
+ psf->sf_addr = *psfsrc;
+ if (psf_prev) {
+ psf_prev->sf_next = psf;
+ } else
+ pmc->mca_sources = psf;
+ }
+ psf->sf_count[sfmode]++;
+ return 0;
+}
+
+static void sf_markstate(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf;
+ int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ psf->sf_oldin = mca_xcount ==
+ psf->sf_count[MCAST_EXCLUDE] &&
+ !psf->sf_count[MCAST_INCLUDE];
+ } else
+ psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
+}
+
+static int sf_setstate(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf, *dpsf;
+ int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+ int qrv = pmc->idev->mc_qrv;
+ int new_in, rv;
+
+ rv = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
+ !psf->sf_count[MCAST_INCLUDE];
+ } else
+ new_in = psf->sf_count[MCAST_INCLUDE] != 0;
+ if (new_in) {
+ if (!psf->sf_oldin) {
+ struct ip6_sf_list *prev = NULL;
+
+ for (dpsf=pmc->mca_tomb; dpsf;
+ dpsf=dpsf->sf_next) {
+ if (ipv6_addr_equal(&dpsf->sf_addr,
+ &psf->sf_addr))
+ break;
+ prev = dpsf;
+ }
+ if (dpsf) {
+ if (prev)
+ prev->sf_next = dpsf->sf_next;
+ else
+ pmc->mca_tomb = dpsf->sf_next;
+ kfree(dpsf);
+ }
+ psf->sf_crcount = qrv;
+ rv++;
+ }
+ } else if (psf->sf_oldin) {
+ psf->sf_crcount = 0;
+ /*
+ * add or update "delete" records if an active filter
+ * is now inactive
+ */
+ for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next)
+ if (ipv6_addr_equal(&dpsf->sf_addr,
+ &psf->sf_addr))
+ break;
+ if (!dpsf) {
+ dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+ if (!dpsf)
+ continue;
+ *dpsf = *psf;
+ /* pmc->mca_lock held by callers */
+ dpsf->sf_next = pmc->mca_tomb;
+ pmc->mca_tomb = dpsf;
+ }
+ dpsf->sf_crcount = qrv;
+ rv++;
+ }
+ }
+ return rv;
+}
+
+/*
+ * Add multicast source filter list to the interface list
+ */
+static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
+ int sfmode, int sfcount, const struct in6_addr *psfsrc,
+ int delta)
+{
+ struct ifmcaddr6 *pmc;
+ int isexclude;
+ int i, err;
+
+ if (!idev)
+ return -ENODEV;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+ break;
+ }
+ if (!pmc) {
+ /* MCA not found?? bug */
+ read_unlock_bh(&idev->lock);
+ return -ESRCH;
+ }
+ spin_lock_bh(&pmc->mca_lock);
+
+ sf_markstate(pmc);
+ isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
+ if (!delta)
+ pmc->mca_sfcount[sfmode]++;
+ err = 0;
+ for (i=0; i<sfcount; i++) {
+ err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
+ if (err)
+ break;
+ }
+ if (err) {
+ int j;
+
+ if (!delta)
+ pmc->mca_sfcount[sfmode]--;
+ for (j=0; j<i; j++)
+ ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
+ } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
+ struct ip6_sf_list *psf;
+
+ /* filter mode change */
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ pmc->mca_sfmode = MCAST_EXCLUDE;
+ else if (pmc->mca_sfcount[MCAST_INCLUDE])
+ pmc->mca_sfmode = MCAST_INCLUDE;
+ /* else no filters; keep old mode for reports */
+
+ pmc->mca_crcount = idev->mc_qrv;
+ idev->mc_ifc_count = pmc->mca_crcount;
+ for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+ mld_ifc_event(idev);
+ } else if (sf_setstate(pmc))
+ mld_ifc_event(idev);
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf, *nextpsf;
+
+ for (psf=pmc->mca_tomb; psf; psf=nextpsf) {
+ nextpsf = psf->sf_next;
+ kfree(psf);
+ }
+ pmc->mca_tomb = NULL;
+ for (psf=pmc->mca_sources; psf; psf=nextpsf) {
+ nextpsf = psf->sf_next;
+ kfree(psf);
+ }
+ pmc->mca_sources = NULL;
+ pmc->mca_sfmode = MCAST_EXCLUDE;
+ pmc->mca_sfcount[MCAST_INCLUDE] = 0;
+ pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+}
+
+
+static void igmp6_join_group(struct ifmcaddr6 *ma)
+{
+ unsigned long delay;
+
+ if (ma->mca_flags & MAF_NOREPORT)
+ return;
+
+ igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+
+ delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+
+ spin_lock_bh(&ma->mca_lock);
+ if (del_timer(&ma->mca_timer)) {
+ atomic_dec(&ma->mca_refcnt);
+ delay = ma->mca_timer.expires - jiffies;
+ }
+
+ if (!mod_timer(&ma->mca_timer, jiffies + delay))
+ atomic_inc(&ma->mca_refcnt);
+ ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
+ spin_unlock_bh(&ma->mca_lock);
+}
+
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+ struct inet6_dev *idev)
+{
+ int err;
+
+ /* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+ * so no other readers or writers of iml or its sflist
+ */
+ if (!iml->sflist) {
+ /* any-source empty exclude case */
+ return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ }
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+ iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+ iml->sflist = NULL;
+ return err;
+}
+
+static void igmp6_leave_group(struct ifmcaddr6 *ma)
+{
+ if (MLD_V1_SEEN(ma->idev)) {
+ if (ma->mca_flags & MAF_LAST_REPORTER)
+ igmp6_send(&ma->mca_addr, ma->idev->dev,
+ ICMPV6_MGM_REDUCTION);
+ } else {
+ mld_add_delrec(ma->idev, ma);
+ mld_ifc_event(ma->idev);
+ }
+}
+
+static void mld_gq_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ idev->mc_gq_running = 0;
+ mld_send_report(idev, NULL);
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ mld_send_cr(idev);
+ if (idev->mc_ifc_count) {
+ idev->mc_ifc_count--;
+ if (idev->mc_ifc_count)
+ mld_ifc_start_timer(idev, idev->mc_maxdelay);
+ }
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_event(struct inet6_dev *idev)
+{
+ if (MLD_V1_SEEN(idev))
+ return;
+ idev->mc_ifc_count = idev->mc_qrv;
+ mld_ifc_start_timer(idev, 1);
+}
+
+
+static void igmp6_timer_handler(unsigned long data)
+{
+ struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+
+ if (MLD_V1_SEEN(ma->idev))
+ igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+ else
+ mld_send_report(ma->idev, ma);
+
+ spin_lock(&ma->mca_lock);
+ ma->mca_flags |= MAF_LAST_REPORTER;
+ ma->mca_flags &= ~MAF_TIMER_RUNNING;
+ spin_unlock(&ma->mca_lock);
+ ma_put(ma);
+}
+
+/* Device changing type */
+
+void ipv6_mc_unmap(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Install multicast list, except for all-nodes (already installed) */
+
+ read_lock_bh(&idev->lock);
+ for (i = idev->mc_list; i; i = i->next)
+ igmp6_group_dropped(i);
+ read_unlock_bh(&idev->lock);
+}
+
+void ipv6_mc_remap(struct inet6_dev *idev)
+{
+ ipv6_mc_up(idev);
+}
+
+/* Device going down */
+
+void ipv6_mc_down(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Withdraw multicast list */
+
+ read_lock_bh(&idev->lock);
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+ idev->mc_gq_running = 0;
+ if (del_timer(&idev->mc_gq_timer))
+ __in6_dev_put(idev);
+
+ for (i = idev->mc_list; i; i=i->next)
+ igmp6_group_dropped(i);
+ read_unlock_bh(&idev->lock);
+
+ mld_clear_delrec(idev);
+}
+
+
+/* Device going up */
+
+void ipv6_mc_up(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Install multicast list, except for all-nodes (already installed) */
+
+ read_lock_bh(&idev->lock);
+ for (i = idev->mc_list; i; i=i->next)
+ igmp6_group_added(i);
+ read_unlock_bh(&idev->lock);
+}
+
+/* IPv6 device initialization. */
+
+void ipv6_mc_init_dev(struct inet6_dev *idev)
+{
+ write_lock_bh(&idev->lock);
+ spin_lock_init(&idev->mc_lock);
+ idev->mc_gq_running = 0;
+ setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
+ (unsigned long)idev);
+ idev->mc_tomb = NULL;
+ idev->mc_ifc_count = 0;
+ setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
+ (unsigned long)idev);
+ idev->mc_qrv = MLD_QRV_DEFAULT;
+ idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+ idev->mc_v1_seen = 0;
+ write_unlock_bh(&idev->lock);
+}
+
+/*
+ * Device is about to be destroyed: clean up.
+ */
+
+void ipv6_mc_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Deactivate timers */
+ ipv6_mc_down(idev);
+
+ /* Delete all-nodes address. */
+ /* We cannot call ipv6_dev_mc_dec() directly, our caller in
+ * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
+ * fail.
+ */
+ __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allnodes);
+
+ if (idev->cnf.forwarding)
+ __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);
+
+ write_lock_bh(&idev->lock);
+ while ((i = idev->mc_list) != NULL) {
+ idev->mc_list = i->next;
+ write_unlock_bh(&idev->lock);
+
+ igmp6_group_dropped(i);
+ ma_put(i);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
+#ifdef CONFIG_PROC_FS
+struct igmp6_mc_iter_state {
+ struct seq_net_private p;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+};
+
+#define igmp6_mc_seq_private(seq) ((struct igmp6_mc_iter_state *)(seq)->private)
+
+static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
+{
+ struct ifmcaddr6 *im = NULL;
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ state->idev = NULL;
+ for_each_netdev_rcu(net, state->dev) {
+ struct inet6_dev *idev;
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->mc_list;
+ if (im) {
+ state->idev = idev;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr6 *im)
+{
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ im = im->next;
+ while (!im) {
+ if (likely(state->idev != NULL))
+ read_unlock_bh(&state->idev->lock);
+
+ state->dev = next_net_device_rcu(state->dev);
+ if (!state->dev) {
+ state->idev = NULL;
+ break;
+ }
+ state->idev = __in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ im = state->idev->mc_list;
+ }
+ return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ifmcaddr6 *im = igmp6_mc_get_first(seq);
+ if (im)
+ while (pos && (im = igmp6_mc_get_next(seq, im)) != NULL)
+ --pos;
+ return pos ? NULL : im;
+}
+
+static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return igmp6_mc_get_idx(seq, *pos);
+}
+
+static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);
+
+ ++*pos;
+ return im;
+}
+
+static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ state->idev = NULL;
+ }
+ state->dev = NULL;
+ rcu_read_unlock();
+}
+
+static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
+{
+ struct ifmcaddr6 *im = (struct ifmcaddr6 *)v;
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ seq_printf(seq,
+ "%-4d %-15s %pi6 %5d %08X %ld\n",
+ state->dev->ifindex, state->dev->name,
+ &im->mca_addr,
+ im->mca_users, im->mca_flags,
+ (im->mca_flags&MAF_TIMER_RUNNING) ?
+ jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
+ return 0;
+}
+
+static const struct seq_operations igmp6_mc_seq_ops = {
+ .start = igmp6_mc_seq_start,
+ .next = igmp6_mc_seq_next,
+ .stop = igmp6_mc_seq_stop,
+ .show = igmp6_mc_seq_show,
+};
+
+static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &igmp6_mc_seq_ops,
+ sizeof(struct igmp6_mc_iter_state));
+}
+
+static const struct file_operations igmp6_mc_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = igmp6_mc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+struct igmp6_mcf_iter_state {
+ struct seq_net_private p;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct ifmcaddr6 *im;
+};
+
+#define igmp6_mcf_seq_private(seq) ((struct igmp6_mcf_iter_state *)(seq)->private)
+
+static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
+{
+ struct ip6_sf_list *psf = NULL;
+ struct ifmcaddr6 *im = NULL;
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ state->idev = NULL;
+ state->im = NULL;
+ for_each_netdev_rcu(net, state->dev) {
+ struct inet6_dev *idev;
+ idev = __in6_dev_get(state->dev);
+ if (unlikely(idev == NULL))
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->mc_list;
+ if (likely(im != NULL)) {
+ spin_lock_bh(&im->mca_lock);
+ psf = im->mca_sources;
+ if (likely(psf != NULL)) {
+ state->im = im;
+ state->idev = idev;
+ break;
+ }
+ spin_unlock_bh(&im->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_sf_list *psf)
+{
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+ psf = psf->sf_next;
+ while (!psf) {
+ spin_unlock_bh(&state->im->mca_lock);
+ state->im = state->im->next;
+ while (!state->im) {
+ if (likely(state->idev != NULL))
+ read_unlock_bh(&state->idev->lock);
+
+ state->dev = next_net_device_rcu(state->dev);
+ if (!state->dev) {
+ state->idev = NULL;
+ goto out;
+ }
+ state->idev = __in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ state->im = state->idev->mc_list;
+ }
+ if (!state->im)
+ break;
+ spin_lock_bh(&state->im->mca_lock);
+ psf = state->im->mca_sources;
+ }
+out:
+ return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ip6_sf_list *psf = igmp6_mcf_get_first(seq);
+ if (psf)
+ while (pos && (psf = igmp6_mcf_get_next(seq, psf)) != NULL)
+ --pos;
+ return pos ? NULL : psf;
+}
+
+static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip6_sf_list *psf;
+ if (v == SEQ_START_TOKEN)
+ psf = igmp6_mcf_get_first(seq);
+ else
+ psf = igmp6_mcf_get_next(seq, v);
+ ++*pos;
+ return psf;
+}
+
+static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+ if (likely(state->im != NULL)) {
+ spin_unlock_bh(&state->im->mca_lock);
+ state->im = NULL;
+ }
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ state->idev = NULL;
+ }
+ state->dev = NULL;
+ rcu_read_unlock();
+}
+
+static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
+{
+ struct ip6_sf_list *psf = (struct ip6_sf_list *)v;
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq,
+ "%3s %6s "
+ "%32s %32s %6s %6s\n", "Idx",
+ "Device", "Multicast Address",
+ "Source Address", "INC", "EXC");
+ } else {
+ seq_printf(seq,
+ "%3d %6.6s %pi6 %pi6 %6lu %6lu\n",
+ state->dev->ifindex, state->dev->name,
+ &state->im->mca_addr,
+ &psf->sf_addr,
+ psf->sf_count[MCAST_INCLUDE],
+ psf->sf_count[MCAST_EXCLUDE]);
+ }
+ return 0;
+}
+
+static const struct seq_operations igmp6_mcf_seq_ops = {
+ .start = igmp6_mcf_seq_start,
+ .next = igmp6_mcf_seq_next,
+ .stop = igmp6_mcf_seq_stop,
+ .show = igmp6_mcf_seq_show,
+};
+
+static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &igmp6_mcf_seq_ops,
+ sizeof(struct igmp6_mcf_iter_state));
+}
+
+static const struct file_operations igmp6_mcf_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = igmp6_mcf_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init igmp6_proc_init(struct net *net)
+{
+ int err;
+
+ err = -ENOMEM;
+ if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops))
+ goto out;
+ if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO,
+ &igmp6_mcf_seq_fops))
+ goto out_proc_net_igmp6;
+
+ err = 0;
+out:
+ return err;
+
+out_proc_net_igmp6:
+ proc_net_remove(net, "igmp6");
+ goto out;
+}
+
+static void __net_exit igmp6_proc_exit(struct net *net)
+{
+ proc_net_remove(net, "mcfilter6");
+ proc_net_remove(net, "igmp6");
+}
+#else
+static inline int igmp6_proc_init(struct net *net)
+{
+ return 0;
+}
+static inline void igmp6_proc_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init igmp6_net_init(struct net *net)
+{
+ int err;
+
+ err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
+ if (err < 0) {
+ printk(KERN_ERR
+ "Failed to initialize the IGMP6 control socket (err %d).\n",
+ err);
+ goto out;
+ }
+
+ inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
+
+ err = igmp6_proc_init(net);
+ if (err)
+ goto out_sock_create;
+out:
+ return err;
+
+out_sock_create:
+ inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+ goto out;
+}
+
+static void __net_exit igmp6_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+ igmp6_proc_exit(net);
+}
+
+static struct pernet_operations igmp6_net_ops = {
+ .init = igmp6_net_init,
+ .exit = igmp6_net_exit,
+};
+
+int __init igmp6_init(void)
+{
+ return register_pernet_subsys(&igmp6_net_ops);
+}
+
+void igmp6_cleanup(void)
+{
+ unregister_pernet_subsys(&igmp6_net_ops);
+}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 00000000..7e1e0fbf
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * Authors:
+ * Noriaki TAKAMIYA @USAGI
+ * Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/time.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/rawv6.h>
+#include <net/xfrm.h>
+#include <net/mip6.h>
+
+static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
+{
+ return (n - len + 16) & 0x7;
+}
+
+static inline void *mip6_padn(__u8 *data, __u8 padlen)
+{
+ if (!data)
+ return NULL;
+ if (padlen == 1) {
+ data[0] = IPV6_TLV_PAD0;
+ } else if (padlen > 1) {
+ data[0] = IPV6_TLV_PADN;
+ data[1] = padlen - 2;
+ if (padlen > 2)
+ memset(data+2, 0, data[1]);
+ }
+ return data + padlen;
+}
+
+static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+ icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
+}
+
+static int mip6_mh_len(int type)
+{
+ int len = 0;
+
+ switch (type) {
+ case IP6_MH_TYPE_BRR:
+ len = 0;
+ break;
+ case IP6_MH_TYPE_HOTI:
+ case IP6_MH_TYPE_COTI:
+ case IP6_MH_TYPE_BU:
+ case IP6_MH_TYPE_BACK:
+ len = 1;
+ break;
+ case IP6_MH_TYPE_HOT:
+ case IP6_MH_TYPE_COT:
+ case IP6_MH_TYPE_BERROR:
+ len = 2;
+ break;
+ }
+ return len;
+}
+
+static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct ip6_mh *mh;
+
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3))))
+ return -1;
+
+ mh = (struct ip6_mh *)skb_transport_header(skb);
+
+ if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+ mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+ skb_network_header(skb)));
+ return -1;
+ }
+
+ if (mh->ip6mh_proto != IPPROTO_NONE) {
+ LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+ mh->ip6mh_proto);
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+ skb_network_header(skb)));
+ return -1;
+ }
+
+ return 0;
+}
+
+struct mip6_report_rate_limiter {
+ spinlock_t lock;
+ struct timeval stamp;
+ int iif;
+ struct in6_addr src;
+ struct in6_addr dst;
+};
+
+static struct mip6_report_rate_limiter mip6_report_rl = {
+ .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
+};
+
+static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+ int err = destopt->nexthdr;
+
+ spin_lock(&x->lock);
+ if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+ !ipv6_addr_any((struct in6_addr *)x->coaddr))
+ err = -ENOENT;
+ spin_unlock(&x->lock);
+
+ return err;
+}
+
+/* Destination Option Header is inserted.
+ * IP Header's src address is replaced with Home Address Option in
+ * Destination Option Header.
+ */
+static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ struct ipv6_destopt_hdr *dstopt;
+ struct ipv6_destopt_hao *hao;
+ u8 nexthdr;
+ int len;
+
+ skb_push(skb, -skb_network_offset(skb));
+ iph = ipv6_hdr(skb);
+
+ nexthdr = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_DSTOPTS;
+
+ dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
+ dstopt->nexthdr = nexthdr;
+
+ hao = mip6_padn((char *)(dstopt + 1),
+ calc_padlen(sizeof(*dstopt), 6));
+
+ hao->type = IPV6_TLV_HAO;
+ BUILD_BUG_ON(sizeof(*hao) != 18);
+ hao->length = sizeof(*hao) - 2;
+
+ len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
+
+ memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
+ spin_lock_bh(&x->lock);
+ memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+ spin_unlock_bh(&x->lock);
+
+ WARN_ON(len != x->props.header_len);
+ dstopt->hdrlen = (x->props.header_len >> 3) - 1;
+
+ return 0;
+}
+
+static inline int mip6_report_rl_allow(struct timeval *stamp,
+ const struct in6_addr *dst,
+ const struct in6_addr *src, int iif)
+{
+ int allow = 0;
+
+ spin_lock_bh(&mip6_report_rl.lock);
+ if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
+ mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+ mip6_report_rl.iif != iif ||
+ !ipv6_addr_equal(&mip6_report_rl.src, src) ||
+ !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
+ mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
+ mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+ mip6_report_rl.iif = iif;
+ mip6_report_rl.src = *src;
+ mip6_report_rl.dst = *dst;
+ allow = 1;
+ }
+ spin_unlock_bh(&mip6_report_rl.lock);
+ return allow;
+}
+
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
+ const struct flowi *fl)
+{
+ struct net *net = xs_net(x);
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ const struct flowi6 *fl6 = &fl->u.ip6;
+ struct ipv6_destopt_hao *hao = NULL;
+ struct xfrm_selector sel;
+ int offset;
+ struct timeval stamp;
+ int err = 0;
+
+ if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
+ fl6->fl6_mh_type <= IP6_MH_TYPE_MAX))
+ goto out;
+
+ if (likely(opt->dsthao)) {
+ offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+ if (likely(offset >= 0))
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + offset);
+ }
+
+ skb_get_timestamp(skb, &stamp);
+
+ if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+ hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
+ opt->iif))
+ goto out;
+
+ memset(&sel, 0, sizeof(sel));
+ memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ sizeof(sel.daddr));
+ sel.prefixlen_d = 128;
+ memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+ sizeof(sel.saddr));
+ sel.prefixlen_s = 128;
+ sel.family = AF_INET6;
+ sel.proto = fl6->flowi6_proto;
+ sel.dport = xfrm_flowi_dport(fl, &fl6->uli);
+ if (sel.dport)
+ sel.dport_mask = htons(~0);
+ sel.sport = xfrm_flowi_sport(fl, &fl6->uli);
+ if (sel.sport)
+ sel.sport_mask = htons(~0);
+ sel.ifindex = fl6->flowi6_oif;
+
+ err = km_report(net, IPPROTO_DSTOPTS, &sel,
+ (hao ? (xfrm_address_t *)&hao->addr : NULL));
+
+ out:
+ return err;
+}
+
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
+ int found_rhdr = 0;
+
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ /*
+ * HAO MUST NOT appear more than once.
+ * XXX: It is better to try to find by the end of
+ * XXX: packet if HAO exists.
+ */
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+ LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+ return offset;
+ }
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+ }
+
+ return offset;
+}
+
+static int mip6_destopt_init_state(struct xfrm_state *x)
+{
+ if (x->id.spi) {
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
+ x->id.spi);
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+ printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ return -EINVAL;
+ }
+
+ x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
+ calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
+ sizeof(struct ipv6_destopt_hao);
+ WARN_ON(x->props.header_len != 24);
+
+ return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for
+ * destination options header unlike IPsec protocols.
+ */
+static void mip6_destopt_destroy(struct xfrm_state *x)
+{
+}
+
+static const struct xfrm_type mip6_destopt_type =
+{
+ .description = "MIP6DESTOPT",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_DSTOPTS,
+ .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
+ .init_state = mip6_destopt_init_state,
+ .destructor = mip6_destopt_destroy,
+ .input = mip6_destopt_input,
+ .output = mip6_destopt_output,
+ .reject = mip6_destopt_reject,
+ .hdr_offset = mip6_destopt_offset,
+};
+
+static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+ int err = rt2->rt_hdr.nexthdr;
+
+ spin_lock(&x->lock);
+ if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
+ !ipv6_addr_any((struct in6_addr *)x->coaddr))
+ err = -ENOENT;
+ spin_unlock(&x->lock);
+
+ return err;
+}
+
+/* Routing Header type 2 is inserted.
+ * IP Header's dst address is replaced with Routing Header's Home Address.
+ */
+static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ struct rt2_hdr *rt2;
+ u8 nexthdr;
+
+ skb_push(skb, -skb_network_offset(skb));
+ iph = ipv6_hdr(skb);
+
+ nexthdr = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_ROUTING;
+
+ rt2 = (struct rt2_hdr *)skb_transport_header(skb);
+ rt2->rt_hdr.nexthdr = nexthdr;
+ rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
+ rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
+ rt2->rt_hdr.segments_left = 1;
+ memset(&rt2->reserved, 0, sizeof(rt2->reserved));
+
+ WARN_ON(rt2->rt_hdr.hdrlen != 2);
+
+ memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
+ spin_lock_bh(&x->lock);
+ memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+ spin_unlock_bh(&x->lock);
+
+ return 0;
+}
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
+ int found_rhdr = 0;
+
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ if (offset + 3 <= packet_len) {
+ struct ipv6_rt_hdr *rt;
+ rt = (struct ipv6_rt_hdr *)(nh + offset);
+ if (rt->type != 0)
+ return offset;
+ }
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ return offset;
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+ }
+
+ return offset;
+}
+
+static int mip6_rthdr_init_state(struct xfrm_state *x)
+{
+ if (x->id.spi) {
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
+ x->id.spi);
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+ printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ return -EINVAL;
+ }
+
+ x->props.header_len = sizeof(struct rt2_hdr);
+
+ return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for routing
+ * header type 2 unlike IPsec protocols.
+ */
+static void mip6_rthdr_destroy(struct xfrm_state *x)
+{
+}
+
+static const struct xfrm_type mip6_rthdr_type =
+{
+ .description = "MIP6RT",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ROUTING,
+ .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
+ .init_state = mip6_rthdr_init_state,
+ .destructor = mip6_rthdr_destroy,
+ .input = mip6_rthdr_input,
+ .output = mip6_rthdr_output,
+ .hdr_offset = mip6_rthdr_offset,
+};
+
+static int __init mip6_init(void)
+{
+ printk(KERN_INFO "Mobile IPv6\n");
+
+ if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
+ printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__);
+ goto mip6_destopt_xfrm_fail;
+ }
+ if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
+ printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__);
+ goto mip6_rthdr_xfrm_fail;
+ }
+ if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
+ printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__);
+ goto mip6_rawv6_mh_fail;
+ }
+
+
+ return 0;
+
+ mip6_rawv6_mh_fail:
+ xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+ mip6_rthdr_xfrm_fail:
+ xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
+ return -EAGAIN;
+}
+
+static void __exit mip6_fini(void)
+{
+ if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
+ printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__);
+ if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__);
+ if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__);
+}
+
+module_init(mip6_init);
+module_exit(mip6_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
new file mode 100644
index 00000000..176b4693
--- /dev/null
+++ b/net/ipv6/ndisc.c
@@ -0,0 +1,1892 @@
+/*
+ * Neighbour Discovery for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Mike Shaver <shaver@ingenia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Pierre Ynard : export userland ND options
+ * through netlink (RDNSS support)
+ * Lars Fenneberg : fixed MTU setting on receipt
+ * of an RA.
+ * Janos Farkas : kmalloc failure checks
+ * Alexey Kuznetsov : state machine reworked
+ * and moved to net/core.
+ * Pekka Savola : RFC2461 validation
+ * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
+ */
+
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
+#define ND_NOPRINTK(x...) do { ; } while(0)
+#define ND_PRINTK0 ND_PRINTK
+#define ND_PRINTK1 ND_NOPRINTK
+#define ND_PRINTK2 ND_NOPRINTK
+#define ND_PRINTK3 ND_NOPRINTK
+#if ND_DEBUG >= 1
+#undef ND_PRINTK1
+#define ND_PRINTK1 ND_PRINTK
+#endif
+#if ND_DEBUG >= 2
+#undef ND_PRINTK2
+#define ND_PRINTK2 ND_PRINTK
+#endif
+#if ND_DEBUG >= 3
+#undef ND_PRINTK3
+#define ND_PRINTK3 ND_PRINTK
+#endif
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/if_addr.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <net/flow.h>
+#include <net/ip6_checksum.h>
+#include <net/inet_common.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+static u32 ndisc_hash(const void *pkey,
+ const struct net_device *dev,
+ __u32 *hash_rnd);
+static int ndisc_constructor(struct neighbour *neigh);
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static int pndisc_constructor(struct pneigh_entry *n);
+static void pndisc_destructor(struct pneigh_entry *n);
+static void pndisc_redo(struct sk_buff *skb);
+
+static const struct neigh_ops ndisc_generic_ops = {
+ .family = AF_INET6,
+ .solicit = ndisc_solicit,
+ .error_report = ndisc_error_report,
+ .output = neigh_resolve_output,
+ .connected_output = neigh_connected_output,
+};
+
+static const struct neigh_ops ndisc_hh_ops = {
+ .family = AF_INET6,
+ .solicit = ndisc_solicit,
+ .error_report = ndisc_error_report,
+ .output = neigh_resolve_output,
+ .connected_output = neigh_resolve_output,
+};
+
+
+static const struct neigh_ops ndisc_direct_ops = {
+ .family = AF_INET6,
+ .output = neigh_direct_output,
+ .connected_output = neigh_direct_output,
+};
+
+struct neigh_table nd_tbl = {
+ .family = AF_INET6,
+ .key_len = sizeof(struct in6_addr),
+ .hash = ndisc_hash,
+ .constructor = ndisc_constructor,
+ .pconstructor = pndisc_constructor,
+ .pdestructor = pndisc_destructor,
+ .proxy_redo = pndisc_redo,
+ .id = "ndisc_cache",
+ .parms = {
+ .tbl = &nd_tbl,
+ .base_reachable_time = ND_REACHABLE_TIME,
+ .retrans_time = ND_RETRANS_TIMER,
+ .gc_staletime = 60 * HZ,
+ .reachable_time = ND_REACHABLE_TIME,
+ .delay_probe_time = 5 * HZ,
+ .queue_len_bytes = 64*1024,
+ .ucast_probes = 3,
+ .mcast_probes = 3,
+ .anycast_delay = 1 * HZ,
+ .proxy_delay = (8 * HZ) / 10,
+ .proxy_qlen = 64,
+ },
+ .gc_interval = 30 * HZ,
+ .gc_thresh1 = 128,
+ .gc_thresh2 = 512,
+ .gc_thresh3 = 1024,
+};
+
+/* ND options */
+struct ndisc_options {
+ struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ struct nd_opt_hdr *nd_opts_ri;
+ struct nd_opt_hdr *nd_opts_ri_end;
+#endif
+ struct nd_opt_hdr *nd_useropts;
+ struct nd_opt_hdr *nd_useropts_end;
+};
+
+#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+
+#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
+
+/*
+ * Return the padding between the option length and the start of the
+ * link addr. Currently only IP-over-InfiniBand needs this, although
+ * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
+ * also need a pad of 2.
+ */
+static int ndisc_addr_option_pad(unsigned short type)
+{
+ switch (type) {
+ case ARPHRD_INFINIBAND: return 2;
+ default: return 0;
+ }
+}
+
+static inline int ndisc_opt_addr_space(struct net_device *dev)
+{
+ return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
+}
+
+static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
+ unsigned short addr_type)
+{
+ int space = NDISC_OPT_SPACE(data_len);
+ int pad = ndisc_addr_option_pad(addr_type);
+
+ opt[0] = type;
+ opt[1] = space>>3;
+
+ memset(opt + 2, 0, pad);
+ opt += pad;
+ space -= pad;
+
+ memcpy(opt+2, data, data_len);
+ data_len += 2;
+ opt += data_len;
+ if ((space -= data_len) > 0)
+ memset(opt, 0, space);
+ return opt + space;
+}
+
+static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+ struct nd_opt_hdr *end)
+{
+ int type;
+ if (!cur || !end || cur >= end)
+ return NULL;
+ type = cur->nd_opt_type;
+ do {
+ cur = ((void *)cur) + (cur->nd_opt_len << 3);
+ } while(cur < end && cur->nd_opt_type != type);
+ return cur <= end && cur->nd_opt_type == type ? cur : NULL;
+}
+
+static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+{
+ return opt->nd_opt_type == ND_OPT_RDNSS;
+}
+
+static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+ struct nd_opt_hdr *end)
+{
+ if (!cur || !end || cur >= end)
+ return NULL;
+ do {
+ cur = ((void *)cur) + (cur->nd_opt_len << 3);
+ } while(cur < end && !ndisc_is_useropt(cur));
+ return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+}
+
+static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+ struct ndisc_options *ndopts)
+{
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
+
+ if (!nd_opt || opt_len < 0 || !ndopts)
+ return NULL;
+ memset(ndopts, 0, sizeof(*ndopts));
+ while (opt_len) {
+ int l;
+ if (opt_len < sizeof(struct nd_opt_hdr))
+ return NULL;
+ l = nd_opt->nd_opt_len << 3;
+ if (opt_len < l || l == 0)
+ return NULL;
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ case ND_OPT_MTU:
+ case ND_OPT_REDIRECT_HDR:
+ if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
+ ND_PRINTK2(KERN_WARNING
+ "%s(): duplicated ND6 option found: type=%d\n",
+ __func__,
+ nd_opt->nd_opt_type);
+ } else {
+ ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ }
+ break;
+ case ND_OPT_PREFIX_INFO:
+ ndopts->nd_opts_pi_end = nd_opt;
+ if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
+ ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ break;
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ case ND_OPT_ROUTE_INFO:
+ ndopts->nd_opts_ri_end = nd_opt;
+ if (!ndopts->nd_opts_ri)
+ ndopts->nd_opts_ri = nd_opt;
+ break;
+#endif
+ default:
+ if (ndisc_is_useropt(nd_opt)) {
+ ndopts->nd_useropts_end = nd_opt;
+ if (!ndopts->nd_useropts)
+ ndopts->nd_useropts = nd_opt;
+ } else {
+ /*
+ * Unknown options must be silently ignored,
+ * to accommodate future extension to the
+ * protocol.
+ */
+ ND_PRINTK2(KERN_NOTICE
+ "%s(): ignored unsupported option; type=%d, len=%d\n",
+ __func__,
+ nd_opt->nd_opt_type, nd_opt->nd_opt_len);
+ }
+ }
+ opt_len -= l;
+ nd_opt = ((void *)nd_opt) + l;
+ }
+ return ndopts;
+}
+
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+ struct net_device *dev)
+{
+ u8 *lladdr = (u8 *)(p + 1);
+ int lladdrlen = p->nd_opt_len << 3;
+ int prepad = ndisc_addr_option_pad(dev->type);
+ if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
+ return NULL;
+ return lladdr + prepad;
+}
+
+int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
+{
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
+ case ARPHRD_FDDI:
+ ipv6_eth_mc_map(addr, buf);
+ return 0;
+ case ARPHRD_IEEE802_TR:
+ ipv6_tr_mc_map(addr,buf);
+ return 0;
+ case ARPHRD_ARCNET:
+ ipv6_arcnet_mc_map(addr, buf);
+ return 0;
+ case ARPHRD_INFINIBAND:
+ ipv6_ib_mc_map(addr, dev->broadcast, buf);
+ return 0;
+ case ARPHRD_IPGRE:
+ return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
+ default:
+ if (dir) {
+ memcpy(buf, dev->broadcast, dev->addr_len);
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL(ndisc_mc_map);
+
+static u32 ndisc_hash(const void *pkey,
+ const struct net_device *dev,
+ __u32 *hash_rnd)
+{
+ return ndisc_hashfn(pkey, dev, hash_rnd);
+}
+
+static int ndisc_constructor(struct neighbour *neigh)
+{
+ struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
+ struct net_device *dev = neigh->dev;
+ struct inet6_dev *in6_dev;
+ struct neigh_parms *parms;
+ int is_multicast = ipv6_addr_is_multicast(addr);
+
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev == NULL) {
+ return -EINVAL;
+ }
+
+ parms = in6_dev->nd_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+
+ neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
+ if (!dev->header_ops) {
+ neigh->nud_state = NUD_NOARP;
+ neigh->ops = &ndisc_direct_ops;
+ neigh->output = neigh_direct_output;
+ } else {
+ if (is_multicast) {
+ neigh->nud_state = NUD_NOARP;
+ ndisc_mc_map(addr, neigh->ha, dev, 1);
+ } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+ if (dev->flags&IFF_LOOPBACK)
+ neigh->type = RTN_LOCAL;
+ } else if (dev->flags&IFF_POINTOPOINT) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+ }
+ if (dev->header_ops->cache)
+ neigh->ops = &ndisc_hh_ops;
+ else
+ neigh->ops = &ndisc_generic_ops;
+ if (neigh->nud_state&NUD_VALID)
+ neigh->output = neigh->ops->connected_output;
+ else
+ neigh->output = neigh->ops->output;
+ }
+ in6_dev_put(in6_dev);
+ return 0;
+}
+
+static int pndisc_constructor(struct pneigh_entry *n)
+{
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct net_device *dev = n->dev;
+
+ if (dev == NULL || __in6_dev_get(dev) == NULL)
+ return -EINVAL;
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+ return 0;
+}
+
+static void pndisc_destructor(struct pneigh_entry *n)
+{
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct net_device *dev = n->dev;
+
+ if (dev == NULL || __in6_dev_get(dev) == NULL)
+ return;
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
+}
+
+struct sk_buff *ndisc_build_skb(struct net_device *dev,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h,
+ const struct in6_addr *target,
+ int llinfo)
+{
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
+ struct sk_buff *skb;
+ struct icmp6hdr *hdr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+ int len;
+ int err;
+ u8 *opt;
+
+ if (!dev->addr_len)
+ llinfo = 0;
+
+ len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
+ if (llinfo)
+ len += ndisc_opt_addr_space(dev);
+
+ skb = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + hlen + tlen),
+ 1, &err);
+ if (!skb) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
+ __func__, err);
+ return NULL;
+ }
+
+ skb_reserve(skb, hlen);
+ ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+
+ skb->transport_header = skb->tail;
+ skb_put(skb, len);
+
+ hdr = (struct icmp6hdr *)skb_transport_header(skb);
+ memcpy(hdr, icmp6h, sizeof(*hdr));
+
+ opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
+ if (target) {
+ *(struct in6_addr *)opt = *target;
+ opt += sizeof(*target);
+ }
+
+ if (llinfo)
+ ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
+ dev->addr_len, dev->type);
+
+ hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial(hdr,
+ len, 0));
+
+ return skb;
+}
+
+EXPORT_SYMBOL(ndisc_build_skb);
+
+void ndisc_send_skb(struct sk_buff *skb,
+ struct net_device *dev,
+ struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
+ struct inet6_dev *idev;
+ int err;
+ u8 type;
+
+ type = icmp6h->icmp6_type;
+
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
+ dst = icmp6_dst_alloc(dev, neigh, &fl6);
+ if (IS_ERR(dst)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_dst_set(skb, dst);
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
+ dst_output);
+ if (!err) {
+ ICMP6MSGOUT_INC_STATS(net, idev, type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ rcu_read_unlock();
+}
+
+EXPORT_SYMBOL(ndisc_send_skb);
+
+/*
+ * Send a Neighbour Discover packet
+ */
+static void __ndisc_send(struct net_device *dev,
+ struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h, const struct in6_addr *target,
+ int llinfo)
+{
+ struct sk_buff *skb;
+
+ skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
+ if (!skb)
+ return;
+
+ ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
+}
+
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ int router, int solicited, int override, int inc_opt)
+{
+ struct in6_addr tmpaddr;
+ struct inet6_ifaddr *ifp;
+ const struct in6_addr *src_addr;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+ };
+
+ /* for anycast or proxy, solicited_addr != src_addr */
+ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
+ if (ifp) {
+ src_addr = solicited_addr;
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ override = 0;
+ inc_opt |= ifp->idev->cnf.force_tllao;
+ in6_ifa_put(ifp);
+ } else {
+ if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+ inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+ &tmpaddr))
+ return;
+ src_addr = &tmpaddr;
+ }
+
+ icmp6h.icmp6_router = router;
+ icmp6h.icmp6_solicited = solicited;
+ icmp6h.icmp6_override = override;
+
+ __ndisc_send(dev, neigh, daddr, src_addr,
+ &icmp6h, solicited_addr,
+ inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
+}
+
+static void ndisc_send_unsol_na(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ struct in6_addr mcaddr;
+
+ idev = in6_dev_get(dev);
+ if (!idev)
+ return;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
+ ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
+ /*router=*/ !!idev->cnf.forwarding,
+ /*solicited=*/ false, /*override=*/ true,
+ /*inc_opt=*/ true);
+ }
+ read_unlock_bh(&idev->lock);
+
+ in6_dev_put(idev);
+}
+
+void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
+ const struct in6_addr *solicit,
+ const struct in6_addr *daddr, const struct in6_addr *saddr)
+{
+ struct in6_addr addr_buf;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+ };
+
+ if (saddr == NULL) {
+ if (ipv6_get_lladdr(dev, &addr_buf,
+ (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
+ return;
+ saddr = &addr_buf;
+ }
+
+ __ndisc_send(dev, neigh, daddr, saddr,
+ &icmp6h, solicit,
+ !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
+}
+
+void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_ROUTER_SOLICITATION,
+ };
+ int send_sllao = dev->addr_len;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * According to section 2.2 of RFC 4429, we must not
+ * send router solicitations with a sllao from
+ * optimistic addresses, but we may send the solicitation
+ * if we don't include the sllao. So here we check
+ * if our address is optimistic, and if so, we
+ * suppress the inclusion of the sllao.
+ */
+ if (send_sllao) {
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
+ dev, 1);
+ if (ifp) {
+ if (ifp->flags & IFA_F_OPTIMISTIC) {
+ send_sllao = 0;
+ }
+ in6_ifa_put(ifp);
+ } else {
+ send_sllao = 0;
+ }
+ }
+#endif
+ __ndisc_send(dev, NULL, daddr, saddr,
+ &icmp6h, NULL,
+ send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
+}
+
+
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+ /*
+ * "The sender MUST return an ICMP
+ * destination unreachable"
+ */
+ dst_link_failure(skb);
+ kfree_skb(skb);
+}
+
+/* Called with locked neigh: either read or both */
+
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
+{
+ struct in6_addr *saddr = NULL;
+ struct in6_addr mcaddr;
+ struct net_device *dev = neigh->dev;
+ struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
+ int probes = atomic_read(&neigh->probes);
+
+ if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
+ saddr = &ipv6_hdr(skb)->saddr;
+
+ if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if (!(neigh->nud_state & NUD_VALID)) {
+ ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
+ __func__, target);
+ }
+ ndisc_send_ns(dev, neigh, target, target, saddr);
+ } else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+ neigh_app_ns(neigh);
+#endif
+ } else {
+ addrconf_addr_solict_mult(target, &mcaddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+ }
+}
+
+static int pndisc_is_router(const void *pkey,
+ struct net_device *dev)
+{
+ struct pneigh_entry *n;
+ int ret = -1;
+
+ read_lock_bh(&nd_tbl.lock);
+ n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
+ if (n)
+ ret = !!(n->flags & NTF_ROUTER);
+ read_unlock_bh(&nd_tbl.lock);
+
+ return ret;
+}
+
+static void ndisc_recv_ns(struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
+ struct ndisc_options ndopts;
+ struct net_device *dev = skb->dev;
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev = NULL;
+ struct neighbour *neigh;
+ int dad = ipv6_addr_any(saddr);
+ int inc;
+ int is_router = -1;
+
+ if (ipv6_addr_is_multicast(&msg->target)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: multicast target address");
+ return;
+ }
+
+ /*
+ * RFC2461 7.1.1:
+ * DAD has to be destined for solicited node multicast address.
+ */
+ if (dad &&
+ !(daddr->s6_addr32[0] == htonl(0xff020000) &&
+ daddr->s6_addr32[1] == htonl(0x00000000) &&
+ daddr->s6_addr32[2] == htonl(0x00000001) &&
+ daddr->s6_addr [12] == 0xff )) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: bad DAD packet (wrong destination)\n");
+ return;
+ }
+
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid ND options\n");
+ return;
+ }
+
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid link-layer address length\n");
+ return;
+ }
+
+ /* RFC2461 7.1.1:
+ * If the IP source address is the unspecified address,
+ * there MUST NOT be source link-layer address option
+ * in the message.
+ */
+ if (dad) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
+ return;
+ }
+ }
+
+ inc = ipv6_addr_is_multicast(daddr);
+
+ ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+ if (ifp) {
+
+ if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+ if (dad) {
+ if (dev->type == ARPHRD_IEEE802_TR) {
+ const unsigned char *sadr;
+ sadr = skb_mac_header(skb);
+ if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+ sadr[9] == dev->dev_addr[1] &&
+ sadr[10] == dev->dev_addr[2] &&
+ sadr[11] == dev->dev_addr[3] &&
+ sadr[12] == dev->dev_addr[4] &&
+ sadr[13] == dev->dev_addr[5]) {
+ /* looped-back to us */
+ goto out;
+ }
+ }
+
+ /*
+ * We are colliding with another node
+ * who is doing DAD
+ * so fail our DAD process
+ */
+ addrconf_dad_failure(ifp);
+ return;
+ } else {
+ /*
+ * This is not a dad solicitation.
+ * If we are an optimistic node,
+ * we should respond.
+ * Otherwise, we should ignore it.
+ */
+ if (!(ifp->flags & IFA_F_OPTIMISTIC))
+ goto out;
+ }
+ }
+
+ idev = ifp->idev;
+ } else {
+ struct net *net = dev_net(dev);
+
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ /* XXX: count this drop? */
+ return;
+ }
+
+ if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
+ (idev->cnf.forwarding &&
+ (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
+ (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
+ if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
+ skb->pkt_type != PACKET_HOST &&
+ inc != 0 &&
+ idev->nd_parms->proxy_delay != 0) {
+ /*
+ * for anycast or proxy,
+ * sender should delay its response
+ * by a random time between 0 and
+ * MAX_ANYCAST_DELAY_TIME seconds.
+ * (RFC2461) -- yoshfuji
+ */
+ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
+ if (n)
+ pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
+ goto out;
+ }
+ } else
+ goto out;
+ }
+
+ if (is_router < 0)
+ is_router = !!idev->cnf.forwarding;
+
+ if (dad) {
+ ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
+ is_router, 0, (ifp != NULL), 1);
+ goto out;
+ }
+
+ if (inc)
+ NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
+ else
+ NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
+
+ /*
+ * update / create cache entry
+ * for the source address
+ */
+ neigh = __neigh_lookup(&nd_tbl, saddr, dev,
+ !inc || lladdr || !dev->addr_len);
+ if (neigh)
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE);
+ if (neigh || !dev->header_ops) {
+ ndisc_send_na(dev, neigh, saddr, &msg->target,
+ is_router,
+ 1, (ifp != NULL && inc), inc);
+ if (neigh)
+ neigh_release(neigh);
+ }
+
+out:
+ if (ifp)
+ in6_ifa_put(ifp);
+ else
+ in6_dev_put(idev);
+}
+
+static void ndisc_recv_na(struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
+ struct ndisc_options ndopts;
+ struct net_device *dev = skb->dev;
+ struct inet6_ifaddr *ifp;
+ struct neighbour *neigh;
+
+ if (skb->len < sizeof(struct nd_msg)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: packet too short\n");
+ return;
+ }
+
+ if (ipv6_addr_is_multicast(&msg->target)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: target address is multicast.\n");
+ return;
+ }
+
+ if (ipv6_addr_is_multicast(daddr) &&
+ msg->icmph.icmp6_solicited) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: solicited NA is multicasted.\n");
+ return;
+ }
+
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid ND option\n");
+ return;
+ }
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: invalid link-layer address length\n");
+ return;
+ }
+ }
+ ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+ if (ifp) {
+ if (skb->pkt_type != PACKET_LOOPBACK
+ && (ifp->flags & IFA_F_TENTATIVE)) {
+ addrconf_dad_failure(ifp);
+ return;
+ }
+ /* What should we make now? The advertisement
+ is invalid, but ndisc specs say nothing
+ about it. It could be misconfiguration, or
+ an smart proxy agent tries to help us :-)
+
+ We should not print the error if NA has been
+ received from loopback - it is just our own
+ unsolicited advertisement.
+ */
+ if (skb->pkt_type != PACKET_LOOPBACK)
+ ND_PRINTK1(KERN_WARNING
+ "ICMPv6 NA: someone advertises our address %pI6 on %s!\n",
+ &ifp->addr, ifp->idev->dev->name);
+ in6_ifa_put(ifp);
+ return;
+ }
+ neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+
+ if (neigh) {
+ u8 old_flags = neigh->flags;
+ struct net *net = dev_net(dev);
+
+ if (neigh->nud_state & NUD_FAILED)
+ goto out;
+
+ /*
+ * Don't update the neighbor cache entry on a proxy NA from
+ * ourselves because either the proxied node is off link or it
+ * has already sent a NA to us.
+ */
+ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+ net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+ pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
+ /* XXX: idev->cnf.prixy_ndp */
+ goto out;
+ }
+
+ neigh_update(neigh, lladdr,
+ msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+
+ if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
+ /*
+ * Change: router to host
+ */
+ struct rt6_info *rt;
+ rt = rt6_get_dflt_router(saddr, dev);
+ if (rt)
+ ip6_del_rt(rt);
+ }
+
+out:
+ neigh_release(neigh);
+ }
+}
+
+static void ndisc_recv_rs(struct sk_buff *skb)
+{
+ struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
+ unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
+ struct neighbour *neigh;
+ struct inet6_dev *idev;
+ const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct ndisc_options ndopts;
+ u8 *lladdr = NULL;
+
+ if (skb->len < sizeof(*rs_msg))
+ return;
+
+ idev = __in6_dev_get(skb->dev);
+ if (!idev) {
+ if (net_ratelimit())
+ ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
+ return;
+ }
+
+ /* Don't accept RS if we're not in router mode */
+ if (!idev->cnf.forwarding)
+ goto out;
+
+ /*
+ * Don't update NCE if src = ::;
+ * this implies that the source node has no ip address assigned yet.
+ */
+ if (ipv6_addr_any(saddr))
+ goto out;
+
+ /* Parse ND options */
+ if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (net_ratelimit())
+ ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
+ goto out;
+ }
+
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+ skb->dev);
+ if (!lladdr)
+ goto out;
+ }
+
+ neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
+ if (neigh) {
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ neigh_release(neigh);
+ }
+out:
+ return;
+}
+
+static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
+{
+ struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ struct nduseroptmsg *ndmsg;
+ struct net *net = dev_net(ra->dev);
+ int err;
+ int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
+ + (opt->nd_opt_len << 3));
+ size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
+
+ skb = nlmsg_new(msg_size, GFP_ATOMIC);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
+ if (nlh == NULL) {
+ goto nla_put_failure;
+ }
+
+ ndmsg = nlmsg_data(nlh);
+ ndmsg->nduseropt_family = AF_INET6;
+ ndmsg->nduseropt_ifindex = ra->dev->ifindex;
+ ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
+ ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
+ ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
+
+ memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
+
+ NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
+ &ipv6_hdr(ra)->saddr);
+ nlmsg_end(skb, nlh);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+ err = -EMSGSIZE;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
+}
+
+static inline int accept_ra(struct inet6_dev *in6_dev)
+{
+ /*
+ * If forwarding is enabled, RA are not accepted unless the special
+ * hybrid mode (accept_ra=2) is enabled.
+ */
+ if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
+ return 0;
+
+ return in6_dev->cnf.accept_ra;
+}
+
+static void ndisc_router_discovery(struct sk_buff *skb)
+{
+ struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
+ struct neighbour *neigh = NULL;
+ struct inet6_dev *in6_dev;
+ struct rt6_info *rt = NULL;
+ int lifetime;
+ struct ndisc_options ndopts;
+ int optlen;
+ unsigned int pref = 0;
+
+ __u8 * opt = (__u8 *)(ra_msg + 1);
+
+ optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
+
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: source address is not link-local.\n");
+ return;
+ }
+ if (optlen < 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: packet too short\n");
+ return;
+ }
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: from host or unauthorized router\n");
+ return;
+ }
+#endif
+
+ /*
+ * set the RA_RECV flag in the interface
+ */
+
+ in6_dev = __in6_dev_get(skb->dev);
+ if (in6_dev == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: can't find inet6 device for %s.\n",
+ skb->dev->name);
+ return;
+ }
+
+ if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMP6 RA: invalid ND options\n");
+ return;
+ }
+
+ if (!accept_ra(in6_dev))
+ goto skip_linkparms;
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ /* skip link-specific parameters from interior routers */
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ goto skip_linkparms;
+#endif
+
+ if (in6_dev->if_flags & IF_RS_SENT) {
+ /*
+ * flag that an RA was received after an RS was sent
+ * out on this interface.
+ */
+ in6_dev->if_flags |= IF_RA_RCVD;
+ }
+
+ /*
+ * Remember the managed/otherconf flags from most recently
+ * received RA message (RFC 2462) -- yoshfuji
+ */
+ in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
+ IF_RA_OTHERCONF)) |
+ (ra_msg->icmph.icmp6_addrconf_managed ?
+ IF_RA_MANAGED : 0) |
+ (ra_msg->icmph.icmp6_addrconf_other ?
+ IF_RA_OTHERCONF : 0);
+
+ if (!in6_dev->cnf.accept_ra_defrtr)
+ goto skip_defrtr;
+
+ if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ goto skip_defrtr;
+
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ pref = ra_msg->icmph.icmp6_router_pref;
+ /* 10b is handled as if it were 00b (medium) */
+ if (pref == ICMPV6_ROUTER_PREF_INVALID ||
+ !in6_dev->cnf.accept_ra_rtr_pref)
+ pref = ICMPV6_ROUTER_PREF_MEDIUM;
+#endif
+
+ rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
+
+ if (rt) {
+ neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+ if (!neigh) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: %s() got default router without neighbour.\n",
+ __func__);
+ dst_release(&rt->dst);
+ return;
+ }
+ }
+ if (rt && lifetime == 0) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ }
+
+ if (rt == NULL && lifetime) {
+ ND_PRINTK3(KERN_DEBUG
+ "ICMPv6 RA: adding default router.\n");
+
+ rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
+ if (rt == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: %s() failed to add default route.\n",
+ __func__);
+ return;
+ }
+
+ neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+ if (neigh == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: %s() got default router without neighbour.\n",
+ __func__);
+ dst_release(&rt->dst);
+ return;
+ }
+ neigh->flags |= NTF_ROUTER;
+ } else if (rt) {
+ rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ }
+
+ if (rt)
+ rt6_set_expires(rt, jiffies + (HZ * lifetime));
+ if (ra_msg->icmph.icmp6_hop_limit) {
+ in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (rt)
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
+ }
+
+skip_defrtr:
+
+ /*
+ * Update Reachable Time and Retrans Timer
+ */
+
+ if (in6_dev->nd_parms) {
+ unsigned long rtime = ntohl(ra_msg->retrans_timer);
+
+ if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
+ rtime = (rtime*HZ)/1000;
+ if (rtime < HZ/10)
+ rtime = HZ/10;
+ in6_dev->nd_parms->retrans_time = rtime;
+ in6_dev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ }
+
+ rtime = ntohl(ra_msg->reachable_time);
+ if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
+ rtime = (rtime*HZ)/1000;
+
+ if (rtime < HZ/10)
+ rtime = HZ/10;
+
+ if (rtime != in6_dev->nd_parms->base_reachable_time) {
+ in6_dev->nd_parms->base_reachable_time = rtime;
+ in6_dev->nd_parms->gc_staletime = 3 * rtime;
+ in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
+ in6_dev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ }
+ }
+ }
+
+skip_linkparms:
+
+ /*
+ * Process options.
+ */
+
+ if (!neigh)
+ neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
+ skb->dev, 1);
+ if (neigh) {
+ u8 *lladdr = NULL;
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid link-layer address length\n");
+ goto out;
+ }
+ }
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+ NEIGH_UPDATE_F_ISROUTER);
+ }
+
+ if (!accept_ra(in6_dev))
+ goto out;
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+ if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ goto skip_routeinfo;
+
+ if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
+ struct nd_opt_hdr *p;
+ for (p = ndopts.nd_opts_ri;
+ p;
+ p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
+ struct route_info *ri = (struct route_info *)p;
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
+ ri->prefix_len == 0)
+ continue;
+#endif
+ if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+ continue;
+ rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
+ &ipv6_hdr(skb)->saddr);
+ }
+ }
+
+skip_routeinfo:
+#endif
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ /* skip link-specific ndopts from interior routers */
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ goto out;
+#endif
+
+ if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
+ struct nd_opt_hdr *p;
+ for (p = ndopts.nd_opts_pi;
+ p;
+ p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
+ addrconf_prefix_rcv(skb->dev, (u8 *)p,
+ (p->nd_opt_len) << 3,
+ ndopts.nd_opts_src_lladdr != NULL);
+ }
+ }
+
+ if (ndopts.nd_opts_mtu) {
+ __be32 n;
+ u32 mtu;
+
+ memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+ mtu = ntohl(n);
+
+ if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid mtu: %d\n",
+ mtu);
+ } else if (in6_dev->cnf.mtu6 != mtu) {
+ in6_dev->cnf.mtu6 = mtu;
+
+ if (rt)
+ dst_metric_set(&rt->dst, RTAX_MTU, mtu);
+
+ rt6_mtu_change(skb->dev, mtu);
+ }
+ }
+
+ if (ndopts.nd_useropts) {
+ struct nd_opt_hdr *p;
+ for (p = ndopts.nd_useropts;
+ p;
+ p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ ndisc_ra_useropt(skb, p);
+ }
+ }
+
+ if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid RA options");
+ }
+out:
+ if (rt)
+ dst_release(&rt->dst);
+ if (neigh)
+ neigh_release(neigh);
+}
+
+static void ndisc_redirect_rcv(struct sk_buff *skb)
+{
+ struct inet6_dev *in6_dev;
+ struct icmp6hdr *icmph;
+ const struct in6_addr *dest;
+ const struct in6_addr *target; /* new first hop to destination */
+ struct neighbour *neigh;
+ int on_link = 0;
+ struct ndisc_options ndopts;
+ int optlen;
+ u8 *lladdr = NULL;
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ switch (skb->ndisc_nodetype) {
+ case NDISC_NODETYPE_HOST:
+ case NDISC_NODETYPE_NODEFAULT:
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: from host or unauthorized router\n");
+ return;
+ }
+#endif
+
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: source address is not link-local.\n");
+ return;
+ }
+
+ optlen = skb->tail - skb->transport_header;
+ optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+
+ if (optlen < 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: packet too short\n");
+ return;
+ }
+
+ icmph = icmp6_hdr(skb);
+ target = (const struct in6_addr *) (icmph + 1);
+ dest = target + 1;
+
+ if (ipv6_addr_is_multicast(dest)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: destination address is multicast.\n");
+ return;
+ }
+
+ if (ipv6_addr_equal(dest, target)) {
+ on_link = 1;
+ } else if (ipv6_addr_type(target) !=
+ (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: target address is not link-local unicast.\n");
+ return;
+ }
+
+ in6_dev = __in6_dev_get(skb->dev);
+ if (!in6_dev)
+ return;
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ return;
+
+ /* RFC2461 8.1:
+ * The IP source address of the Redirect MUST be the same as the current
+ * first-hop router for the specified ICMP Destination Address.
+ */
+
+ if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: invalid ND options\n");
+ return;
+ }
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: invalid link-layer address length\n");
+ return;
+ }
+ }
+
+ neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ if (neigh) {
+ rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr, neigh, lladdr,
+ on_link);
+ neigh_release(neigh);
+ }
+}
+
+void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
+{
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
+ int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ struct sk_buff *buff;
+ struct icmp6hdr *icmph;
+ struct in6_addr saddr_buf;
+ struct in6_addr *addrp;
+ struct rt6_info *rt;
+ struct dst_entry *dst;
+ struct inet6_dev *idev;
+ struct flowi6 fl6;
+ u8 *opt;
+ int hlen, tlen;
+ int rd_len;
+ int err;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+
+ if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: no link-local address on %s\n",
+ dev->name);
+ return;
+ }
+
+ if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
+ ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: target address is not link-local unicast.\n");
+ return;
+ }
+
+ icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
+ &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ return;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ return;
+
+ rt = (struct rt6_info *) dst;
+
+ if (rt->rt6i_flags & RTF_GATEWAY) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: destination is not a neighbour.\n");
+ goto release;
+ }
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+ if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ goto release;
+
+ if (dev->addr_len) {
+ struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
+ if (!neigh) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: no neigh for target address\n");
+ goto release;
+ }
+
+ read_lock_bh(&neigh->lock);
+ if (neigh->nud_state & NUD_VALID) {
+ memcpy(ha_buf, neigh->ha, dev->addr_len);
+ read_unlock_bh(&neigh->lock);
+ ha = ha_buf;
+ len += ndisc_opt_addr_space(dev);
+ } else
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+ }
+
+ rd_len = min_t(unsigned int,
+ IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+ rd_len &= ~0x7;
+ len += rd_len;
+
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ buff = sock_alloc_send_skb(sk,
+ (MAX_HEADER + sizeof(struct ipv6hdr) +
+ len + hlen + tlen),
+ 1, &err);
+ if (buff == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n",
+ __func__, err);
+ goto release;
+ }
+
+ skb_reserve(buff, hlen);
+ ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
+ IPPROTO_ICMPV6, len);
+
+ skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
+ skb_put(buff, len);
+ icmph = icmp6_hdr(buff);
+
+ memset(icmph, 0, sizeof(struct icmp6hdr));
+ icmph->icmp6_type = NDISC_REDIRECT;
+
+ /*
+ * copy target and destination addresses
+ */
+
+ addrp = (struct in6_addr *)(icmph + 1);
+ *addrp = *target;
+ addrp++;
+ *addrp = ipv6_hdr(skb)->daddr;
+
+ opt = (u8*) (addrp + 1);
+
+ /*
+ * include target_address option
+ */
+
+ if (ha)
+ opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
+ dev->addr_len, dev->type);
+
+ /*
+ * build redirect option and copy skb over to the new packet.
+ */
+
+ memset(opt, 0, 8);
+ *(opt++) = ND_OPT_REDIRECT_HDR;
+ *(opt++) = (rd_len >> 3);
+ opt += 6;
+
+ memcpy(opt, ipv6_hdr(skb), rd_len - 8);
+
+ icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
+ len, IPPROTO_ICMPV6,
+ csum_partial(icmph, len, 0));
+
+ skb_dst_set(buff, dst);
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
+ dst_output);
+ if (!err) {
+ ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ rcu_read_unlock();
+ return;
+
+release:
+ dst_release(dst);
+}
+
+static void pndisc_redo(struct sk_buff *skb)
+{
+ ndisc_recv_ns(skb);
+ kfree_skb(skb);
+}
+
+int ndisc_rcv(struct sk_buff *skb)
+{
+ struct nd_msg *msg;
+
+ if (!pskb_may_pull(skb, skb->len))
+ return 0;
+
+ msg = (struct nd_msg *)skb_transport_header(skb);
+
+ __skb_push(skb, skb->data - skb_transport_header(skb));
+
+ if (ipv6_hdr(skb)->hop_limit != 255) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NDISC: invalid hop-limit: %d\n",
+ ipv6_hdr(skb)->hop_limit);
+ return 0;
+ }
+
+ if (msg->icmph.icmp6_code != 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
+ msg->icmph.icmp6_code);
+ return 0;
+ }
+
+ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
+
+ switch (msg->icmph.icmp6_type) {
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ ndisc_recv_ns(skb);
+ break;
+
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ ndisc_recv_na(skb);
+ break;
+
+ case NDISC_ROUTER_SOLICITATION:
+ ndisc_recv_rs(skb);
+ break;
+
+ case NDISC_ROUTER_ADVERTISEMENT:
+ ndisc_router_discovery(skb);
+ break;
+
+ case NDISC_REDIRECT:
+ ndisc_redirect_rcv(skb);
+ break;
+ }
+
+ return 0;
+}
+
+static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
+
+ switch (event) {
+ case NETDEV_CHANGEADDR:
+ neigh_changeaddr(&nd_tbl, dev);
+ fib6_run_gc(~0UL, net);
+ break;
+ case NETDEV_DOWN:
+ neigh_ifdown(&nd_tbl, dev);
+ fib6_run_gc(~0UL, net);
+ break;
+ case NETDEV_NOTIFY_PEERS:
+ ndisc_send_unsol_na(dev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ndisc_netdev_notifier = {
+ .notifier_call = ndisc_netdev_event,
+};
+
+#ifdef CONFIG_SYSCTL
+static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+ const char *func, const char *dev_name)
+{
+ static char warncomm[TASK_COMM_LEN];
+ static int warned;
+ if (strcmp(warncomm, current->comm) && warned < 5) {
+ strcpy(warncomm, current->comm);
+ printk(KERN_WARNING
+ "process `%s' is using deprecated sysctl (%s) "
+ "net.ipv6.neigh.%s.%s; "
+ "Use net.ipv6.neigh.%s.%s_ms "
+ "instead.\n",
+ warncomm, func,
+ dev_name, ctl->procname,
+ dev_name, ctl->procname);
+ warned++;
+ }
+}
+
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net_device *dev = ctl->extra1;
+ struct inet6_dev *idev;
+ int ret;
+
+ if ((strcmp(ctl->procname, "retrans_time") == 0) ||
+ (strcmp(ctl->procname, "base_reachable_time") == 0))
+ ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
+
+ if (strcmp(ctl->procname, "retrans_time") == 0)
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+ else if (strcmp(ctl->procname, "base_reachable_time") == 0)
+ ret = proc_dointvec_jiffies(ctl, write,
+ buffer, lenp, ppos);
+
+ else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
+ (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
+ ret = proc_dointvec_ms_jiffies(ctl, write,
+ buffer, lenp, ppos);
+ else
+ ret = -1;
+
+ if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
+ if (ctl->data == &idev->nd_parms->base_reachable_time)
+ idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+ in6_dev_put(idev);
+ }
+ return ret;
+}
+
+
+#endif
+
+static int __net_init ndisc_net_init(struct net *net)
+{
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ int err;
+
+ err = inet_ctl_sock_create(&sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
+ if (err < 0) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
+ err);
+ return err;
+ }
+
+ net->ipv6.ndisc_sk = sk;
+
+ np = inet6_sk(sk);
+ np->hop_limit = 255;
+ /* Do not loopback ndisc messages */
+ np->mc_loop = 0;
+
+ return 0;
+}
+
+static void __net_exit ndisc_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
+}
+
+static struct pernet_operations ndisc_net_ops = {
+ .init = ndisc_net_init,
+ .exit = ndisc_net_exit,
+};
+
+int __init ndisc_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&ndisc_net_ops);
+ if (err)
+ return err;
+ /*
+ * Initialize the neighbour table
+ */
+ neigh_table_init(&nd_tbl);
+
+#ifdef CONFIG_SYSCTL
+ err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
+ &ndisc_ifinfo_sysctl_change);
+ if (err)
+ goto out_unregister_pernet;
+#endif
+ err = register_netdevice_notifier(&ndisc_netdev_notifier);
+ if (err)
+ goto out_unregister_sysctl;
+out:
+ return err;
+
+out_unregister_sysctl:
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&nd_tbl.parms);
+out_unregister_pernet:
+#endif
+ unregister_pernet_subsys(&ndisc_net_ops);
+ goto out;
+}
+
+void ndisc_cleanup(void)
+{
+ unregister_netdevice_notifier(&ndisc_netdev_notifier);
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&nd_tbl.parms);
+#endif
+ neigh_table_clear(&nd_tbl);
+ unregister_pernet_subsys(&ndisc_net_ops);
+}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
new file mode 100644
index 00000000..db31561c
--- /dev/null
+++ b/net/ipv6/netfilter.c
@@ -0,0 +1,196 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ip6_checksum.h>
+#include <net/netfilter/nf_queue.h>
+
+int ip6_route_me_harder(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {
+ .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ .flowi6_mark = skb->mark,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ };
+
+ dst = ip6_route_output(net, skb->sk, &fl6);
+ if (dst->error) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+ dst_release(dst);
+ return -EINVAL;
+ }
+
+ /* Drop old route. */
+ skb_dst_drop(skb);
+
+ skb_dst_set(skb, dst);
+
+#ifdef CONFIG_XFRM
+ if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
+ skb_dst_set(skb, NULL);
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+ if (IS_ERR(dst))
+ return -1;
+ skb_dst_set(skb, dst);
+ }
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_route_me_harder);
+
+/*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+
+struct ip6_rt_info {
+ struct in6_addr daddr;
+ struct in6_addr saddr;
+ u_int32_t mark;
+};
+
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->hook == NF_INET_LOCAL_OUT) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
+static int nf_ip6_reroute(struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+{
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->hook == NF_INET_LOCAL_OUT) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
+ !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
+ skb->mark != rt_info->mark)
+ return ip6_route_me_harder(skb);
+ }
+ return 0;
+}
+
+static int nf_ip6_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict)
+{
+ static const struct ipv6_pinfo fake_pinfo;
+ static const struct inet_sock fake_sk = {
+ /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
+ .sk.sk_bound_dev_if = 1,
+ .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
+ };
+ const void *sk = strict ? &fake_sk : NULL;
+ struct dst_entry *result;
+ int err;
+
+ result = ip6_route_output(net, sk, &fl->u.ip6);
+ err = result->error;
+ if (err)
+ dst_release(result);
+ else
+ *dst = result;
+ return err;
+}
+
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+ break;
+ if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ skb->len - dataoff, protocol,
+ csum_sub(skb->csum,
+ skb_checksum(skb, 0,
+ dataoff, 0)))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = ~csum_unfold(
+ csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ skb->len - dataoff,
+ protocol,
+ csum_sub(0,
+ skb_checksum(skb, 0,
+ dataoff, 0))));
+ csum = __skb_checksum_complete(skb);
+ }
+ return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum hsum;
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (len == skb->len - dataoff)
+ return nf_ip6_checksum(skb, hook, dataoff, protocol);
+ /* fall through */
+ case CHECKSUM_NONE:
+ hsum = skb_checksum(skb, 0, dataoff, 0);
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+ &ip6h->daddr,
+ skb->len - dataoff,
+ protocol,
+ csum_sub(0, hsum)));
+ skb->ip_summed = CHECKSUM_NONE;
+ return __skb_checksum_complete_head(skb, dataoff + len);
+ }
+ return csum;
+};
+
+static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+ .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
+ .saveroute = nf_ip6_saveroute,
+ .reroute = nf_ip6_reroute,
+ .route_key_size = sizeof(struct ip6_rt_info),
+};
+
+int __init ipv6_netfilter_init(void)
+{
+ return nf_register_afinfo(&nf_ip6_afinfo);
+}
+
+/* This can be called from inet6_init() on errors, so it cannot
+ * be marked __exit. -DaveM
+ */
+void ipv6_netfilter_fini(void)
+{
+ nf_unregister_afinfo(&nf_ip6_afinfo);
+}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
new file mode 100644
index 00000000..acd7c9e1
--- /dev/null
+++ b/net/ipv6/netfilter/Kconfig
@@ -0,0 +1,224 @@
+#
+# IP netfilter configuration
+#
+
+menu "IPv6: Netfilter Configuration"
+ depends on INET && IPV6 && NETFILTER
+
+config NF_DEFRAG_IPV6
+ tristate
+ default n
+
+config NF_CONNTRACK_IPV6
+ tristate "IPv6 connection tracking support"
+ depends on INET && IPV6 && NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV6
+ ---help---
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is IPv6 support on Layer 3 independent connection tracking.
+ Layer 3 independent connection tracking is experimental scheme
+ which generalize ip_conntrack to support other layer 3 protocols.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_QUEUE
+ tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
+ depends on INET && IPV6 && NETFILTER
+ depends on NETFILTER_ADVANCED
+ ---help---
+
+ This option adds a queue handler to the kernel for IPv6
+ packets which enables users to receive the filtered packets
+ with QUEUE target using libipq.
+
+ This option enables the old IPv6-only "ip6_queue" implementation
+ which has been obsoleted by the new "nfnetlink_queue" code (see
+ CONFIG_NETFILTER_NETLINK_QUEUE).
+
+ (C) Fernando Anton 2001
+ IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ Universidad Carlos III de Madrid
+ Universidad Politecnica de Alcala de Henares
+ email: <fanton@it.uc3m.es>.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_IPTABLES
+ tristate "IP6 tables support (required for filtering)"
+ depends on INET && IPV6
+ select NETFILTER_XTABLES
+ default m if NETFILTER_ADVANCED=n
+ help
+ ip6tables is a general, extensible packet identification framework.
+ Currently only the packet filtering and packet mangling subsystem
+ for IPv6 use this, but connection tracking is going to follow.
+ Say 'Y' or 'M' here if you want to use either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP6_NF_IPTABLES
+
+# The simple matches.
+config IP6_NF_MATCH_AH
+ tristate '"ah" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This module allows one to match AH packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_EUI64
+ tristate '"eui64" address check'
+ depends on NETFILTER_ADVANCED
+ help
+ This module performs checking on the IPv6 source address
+ Compares the last 64 bits with the EUI64 (delivered
+ from the MAC address) address
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_FRAG
+ tristate '"frag" Fragmentation header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ frag matching allows you to match packets based on the fragmentation
+ header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_OPTS
+ tristate '"hbh" hop-by-hop and "dst" opts header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This allows one to match packets based on the hop-by-hop
+ and destination options headers of a packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_HL
+ tristate '"hl" hoplimit match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_MATCH_HL
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
+
+config IP6_NF_MATCH_IPV6HEADER
+ tristate '"ipv6header" IPv6 Extension Headers Match'
+ default m if NETFILTER_ADVANCED=n
+ help
+ This module allows one to match packets based upon
+ the ipv6 extension headers.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_MH
+ tristate '"mh" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This module allows one to match MH packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
+
+ To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ip6t_rpfilter.
+
+config IP6_NF_MATCH_RT
+ tristate '"rt" Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ rt matching allows you to match packets based on the routing
+ header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# The targets
+config IP6_NF_TARGET_HL
+ tristate '"HL" hoplimit target support'
+ depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
+ select NETFILTER_XT_TARGET_HL
+ ---help---
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
+
+config IP6_NF_FILTER
+ tristate "Packet filtering"
+ default m if NETFILTER_ADVANCED=n
+ help
+ Packet filtering defines a table `filter', which has a series of
+ rules for simple packet filtering at local input, forwarding and
+ local output. See the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_REJECT
+ tristate "REJECT target support"
+ depends on IP6_NF_FILTER
+ default m if NETFILTER_ADVANCED=n
+ help
+ The REJECT target allows a filtering rule to specify that an ICMPv6
+ error should be issued in response to an incoming packet, rather
+ than silently being dropped.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_REJECT_SKERR
+ bool "Force socket error when rejecting with icmp*"
+ depends on IP6_NF_TARGET_REJECT
+ default n
+ help
+ This option enables turning a "--reject-with icmp*" into a matching
+ socket error also.
+ The REJECT target normally allows sending an ICMP message. But it
+ leaves the local socket unaware of any ingress rejects.
+
+ If unsure, say N.
+
+config IP6_NF_MANGLE
+ tristate "Packet mangling"
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `mangle' table to iptables: see the man page for
+ iptables(8). This table is used for various packet alterations
+ which can effect how the packet is routed.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_RAW
+ tristate 'raw table support (required for TRACE)'
+ help
+ This option adds a `raw' table to ip6tables. This table is the very
+ first in the netfilter framework and hooks in at the PREROUTING
+ and OUTPUT chains.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+# security table for MAC policy
+config IP6_NF_SECURITY
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
+endif # IP6_NF_IPTABLES
+
+endmenu
+
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
new file mode 100644
index 00000000..d4dfd0a2
--- /dev/null
+++ b/net/ipv6/netfilter/Makefile
@@ -0,0 +1,34 @@
+#
+# Makefile for the netfilter modules on top of IPv6.
+#
+
+# Link order matters here.
+obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
+obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
+obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
+obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
+obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+
+# defrag
+nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+
+# matches
+obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
+obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
+obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
+obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
+obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
+obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+
+# targets
+obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 00000000..a34c9e4c
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,641 @@
+/*
+ * This is a module which is used for queueing IPv6 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2001 Fernando Anton, this code is GPL.
+ * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
+ * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
+ * email: fanton@it.uc3m.es
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/nf_queue.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip6_queue"
+#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
+
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
+
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
+static DEFINE_SPINLOCK(queue_lock);
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl __read_mostly;
+static LIST_HEAD(queue_list);
+static DEFINE_MUTEX(ipqnl_mutex);
+
+static inline void
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
+{
+ list_add_tail(&entry->list, &queue_list);
+ queue_total++;
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status = 0;
+
+ switch(mode) {
+ case IPQ_COPY_NONE:
+ case IPQ_COPY_META:
+ copy_mode = mode;
+ copy_range = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ if (range > 0xFFFF)
+ range = 0xFFFF;
+ copy_range = range;
+ copy_mode = mode;
+ break;
+
+ default:
+ status = -EINVAL;
+
+ }
+ return status;
+}
+
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+
+static inline void
+__ipq_reset(void)
+{
+ peer_pid = 0;
+ net_disable_timestamp();
+ __ipq_set_mode(IPQ_COPY_NONE, 0);
+ __ipq_flush(NULL, 0);
+}
+
+static struct nf_queue_entry *
+ipq_find_dequeue_entry(unsigned long id)
+{
+ struct nf_queue_entry *entry = NULL, *i;
+
+ spin_lock_bh(&queue_lock);
+
+ list_for_each_entry(i, &queue_list, list) {
+ if ((unsigned long)i == id) {
+ entry = i;
+ break;
+ }
+ }
+
+ if (entry) {
+ list_del(&entry->list);
+ queue_total--;
+ }
+
+ spin_unlock_bh(&queue_lock);
+ return entry;
+}
+
+static void
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct nf_queue_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, &queue_list, list) {
+ if (!cmpfn || cmpfn(entry, data)) {
+ list_del(&entry->list);
+ queue_total--;
+ nf_reinject(entry, NF_DROP);
+ }
+ }
+}
+
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+ spin_lock_bh(&queue_lock);
+ __ipq_flush(cmpfn, data);
+ spin_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+{
+ sk_buff_data_t old_tail;
+ size_t size = 0;
+ size_t data_len = 0;
+ struct sk_buff *skb;
+ struct ipq_packet_msg *pmsg;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+
+ switch (ACCESS_ONCE(copy_mode)) {
+ case IPQ_COPY_META:
+ case IPQ_COPY_NONE:
+ size = NLMSG_SPACE(sizeof(*pmsg));
+ break;
+
+ case IPQ_COPY_PACKET:
+ if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
+ (*errp = skb_checksum_help(entry->skb)))
+ return NULL;
+
+ data_len = ACCESS_ONCE(copy_range);
+ if (data_len == 0 || data_len > entry->skb->len)
+ data_len = entry->skb->len;
+
+ size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+ break;
+
+ default:
+ *errp = -EINVAL;
+ return NULL;
+ }
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ goto nlmsg_failure;
+
+ old_tail = skb->tail;
+ nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+ pmsg = NLMSG_DATA(nlh);
+ memset(pmsg, 0, sizeof(*pmsg));
+
+ pmsg->packet_id = (unsigned long )entry;
+ pmsg->data_len = data_len;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
+ pmsg->mark = entry->skb->mark;
+ pmsg->hook = entry->hook;
+ pmsg->hw_protocol = entry->skb->protocol;
+
+ if (entry->indev)
+ strcpy(pmsg->indev_name, entry->indev->name);
+ else
+ pmsg->indev_name[0] = '\0';
+
+ if (entry->outdev)
+ strcpy(pmsg->outdev_name, entry->outdev->name);
+ else
+ pmsg->outdev_name[0] = '\0';
+
+ if (entry->indev && entry->skb->dev &&
+ entry->skb->mac_header != entry->skb->network_header) {
+ pmsg->hw_type = entry->skb->dev->type;
+ pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
+ }
+
+ if (data_len)
+ if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+ BUG();
+
+ nlh->nlmsg_len = skb->tail - old_tail;
+ return skb;
+
+nlmsg_failure:
+ kfree_skb(skb);
+ *errp = -EINVAL;
+ printk(KERN_ERR "ip6_queue: error creating packet message\n");
+ return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+ int status = -EINVAL;
+ struct sk_buff *nskb;
+
+ if (copy_mode == IPQ_COPY_NONE)
+ return -EAGAIN;
+
+ nskb = ipq_build_packet_message(entry, &status);
+ if (nskb == NULL)
+ return status;
+
+ spin_lock_bh(&queue_lock);
+
+ if (!peer_pid)
+ goto err_out_free_nskb;
+
+ if (queue_total >= queue_maxlen) {
+ queue_dropped++;
+ status = -ENOSPC;
+ if (net_ratelimit())
+ printk (KERN_WARNING "ip6_queue: fill at %d entries, "
+ "dropping packet(s). Dropped: %d\n", queue_total,
+ queue_dropped);
+ goto err_out_free_nskb;
+ }
+
+ /* netlink_unicast will either free the nskb or attach it to a socket */
+ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+ if (status < 0) {
+ queue_user_dropped++;
+ goto err_out_unlock;
+ }
+
+ __ipq_enqueue_entry(entry);
+
+ spin_unlock_bh(&queue_lock);
+ return status;
+
+err_out_free_nskb:
+ kfree_skb(nskb);
+
+err_out_unlock:
+ spin_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
+{
+ int diff;
+ struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
+ struct sk_buff *nskb;
+
+ if (v->data_len < sizeof(*user_iph))
+ return 0;
+ diff = v->data_len - e->skb->len;
+ if (diff < 0) {
+ if (pskb_trim(e->skb, v->data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+ if (v->data_len > 0xFFFF)
+ return -EINVAL;
+ if (diff > skb_tailroom(e->skb)) {
+ nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+ diff, GFP_ATOMIC);
+ if (!nskb) {
+ printk(KERN_WARNING "ip6_queue: OOM "
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
+ }
+ kfree_skb(e->skb);
+ e->skb = nskb;
+ }
+ skb_put(e->skb, diff);
+ }
+ if (!skb_make_writable(e->skb, v->data_len))
+ return -ENOMEM;
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
+ e->skb->ip_summed = CHECKSUM_NONE;
+
+ return 0;
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+ struct nf_queue_entry *entry;
+
+ if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
+ return -EINVAL;
+
+ entry = ipq_find_dequeue_entry(vmsg->id);
+ if (entry == NULL)
+ return -ENOENT;
+ else {
+ int verdict = vmsg->value;
+
+ if (vmsg->data_len && vmsg->data_len == len)
+ if (ipq_mangle_ipv6(vmsg, entry) < 0)
+ verdict = NF_DROP;
+
+ nf_reinject(entry, verdict);
+ return 0;
+ }
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status;
+
+ spin_lock_bh(&queue_lock);
+ status = __ipq_set_mode(mode, range);
+ spin_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+ unsigned char type, unsigned int len)
+{
+ int status = 0;
+
+ if (len < sizeof(*pmsg))
+ return -EINVAL;
+
+ switch (type) {
+ case IPQM_MODE:
+ status = ipq_set_mode(pmsg->msg.mode.value,
+ pmsg->msg.mode.range);
+ break;
+
+ case IPQM_VERDICT:
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
+ default:
+ status = -EINVAL;
+ }
+ return status;
+}
+
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+ if (entry->indev)
+ if (entry->indev->ifindex == ifindex)
+ return 1;
+
+ if (entry->outdev)
+ if (entry->outdev->ifindex == ifindex)
+ return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ if (entry->skb->nf_bridge->physindev &&
+ entry->skb->nf_bridge->physindev->ifindex == ifindex)
+ return 1;
+ if (entry->skb->nf_bridge->physoutdev &&
+ entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+ ipq_flush(dev_cmp, ifindex);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+__ipq_rcv_skb(struct sk_buff *skb)
+{
+ int status, type, pid, flags;
+ unsigned int nlmsglen, skblen;
+ struct nlmsghdr *nlh;
+ bool enable_timestamp = false;
+
+ skblen = skb->len;
+ if (skblen < sizeof(*nlh))
+ return;
+
+ nlh = nlmsg_hdr(skb);
+ nlmsglen = nlh->nlmsg_len;
+ if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+ return;
+
+ pid = nlh->nlmsg_pid;
+ flags = nlh->nlmsg_flags;
+
+ if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (flags & MSG_TRUNC)
+ RCV_SKB_FAIL(-ECOMM);
+
+ type = nlh->nlmsg_type;
+ if (type < NLMSG_NOOP || type >= IPQM_MAX)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (type <= IPQM_BASE)
+ return;
+
+ if (!capable(CAP_NET_ADMIN))
+ RCV_SKB_FAIL(-EPERM);
+
+ spin_lock_bh(&queue_lock);
+
+ if (peer_pid) {
+ if (peer_pid != pid) {
+ spin_unlock_bh(&queue_lock);
+ RCV_SKB_FAIL(-EBUSY);
+ }
+ } else {
+ enable_timestamp = true;
+ peer_pid = pid;
+ }
+
+ spin_unlock_bh(&queue_lock);
+ if (enable_timestamp)
+ net_enable_timestamp();
+
+ status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+ nlmsglen - NLMSG_LENGTH(0));
+ if (status < 0)
+ RCV_SKB_FAIL(status);
+
+ if (flags & NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+}
+
+static void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+ mutex_lock(&ipqnl_mutex);
+ __ipq_rcv_skb(skb);
+ mutex_unlock(&ipqnl_mutex);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+ .notifier_call = ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
+ spin_lock_bh(&queue_lock);
+ if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
+ __ipq_reset();
+ spin_unlock_bh(&queue_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+ .notifier_call = ipq_rcv_nl_event,
+};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+ {
+ .procname = NET_IPQ_QMAX_NAME,
+ .data = &queue_maxlen,
+ .maxlen = sizeof(queue_maxlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { }
+};
+#endif
+
+#ifdef CONFIG_PROC_FS
+static int ip6_queue_show(struct seq_file *m, void *v)
+{
+ spin_lock_bh(&queue_lock);
+
+ seq_printf(m,
+ "Peer PID : %d\n"
+ "Copy mode : %hu\n"
+ "Copy range : %u\n"
+ "Queue length : %u\n"
+ "Queue max. length : %u\n"
+ "Queue dropped : %u\n"
+ "Netfilter dropped : %u\n",
+ peer_pid,
+ copy_mode,
+ copy_range,
+ queue_total,
+ queue_maxlen,
+ queue_dropped,
+ queue_user_dropped);
+
+ spin_unlock_bh(&queue_lock);
+ return 0;
+}
+
+static int ip6_queue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ip6_queue_show, NULL);
+}
+
+static const struct file_operations ip6_queue_proc_fops = {
+ .open = ip6_queue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static const struct nf_queue_handler nfqh = {
+ .name = "ip6_queue",
+ .outfn = &ipq_enqueue_packet,
+};
+
+static int __init ip6_queue_init(void)
+{
+ int status = -ENOMEM;
+ struct proc_dir_entry *proc __maybe_unused;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+ ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
+ ipq_rcv_skb, NULL, THIS_MODULE);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
+ &ip6_queue_proc_fops);
+ if (!proc) {
+ printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
+ goto cleanup_ipqnl;
+ }
+#endif
+ register_netdevice_notifier(&ipq_dev_notifier);
+#ifdef CONFIG_SYSCTL
+ ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
+#endif
+ status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
+ if (status < 0) {
+ printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
+ goto cleanup_sysctl;
+ }
+ return status;
+
+cleanup_sysctl:
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ipq_sysctl_header);
+#endif
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+cleanup_ipqnl: __maybe_unused
+ netlink_kernel_release(ipqnl);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
+
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&ipq_nl_notifier);
+ return status;
+}
+
+static void __exit ip6_queue_fini(void)
+{
+ nf_unregister_queue_handlers(&nfqh);
+
+ ipq_flush(NULL, 0);
+
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(ipq_sysctl_header);
+#endif
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+ netlink_kernel_release(ipqnl);
+ mutex_lock(&ipqnl_mutex);
+ mutex_unlock(&ipqnl_mutex);
+
+ netlink_unregister_notifier(&ipq_nl_notifier);
+}
+
+MODULE_DESCRIPTION("IPv6 packet queue handler");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
+
+module_init(ip6_queue_init);
+module_exit(ip6_queue_fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
new file mode 100644
index 00000000..e641f8fa
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -0,0 +1,2361 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/capability.h>
+#include <linux/in.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/poison.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/compat.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/err.h>
+#include <linux/cpumask.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv6 packet filter");
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) pr_info(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) pr_info(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) WARN_ON(!(x))
+#else
+#define IP_NF_ASSERT(x)
+#endif
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+void *ip6t_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(ip6t, IP6T);
+}
+EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
+
+/*
+ We keep a set of rules for each CPU, so we can avoid write-locking
+ them in the softirq when updating the counters and therefore
+ only need to read-lock in the softirq; doing a write_lock_bh() in user
+ context stops packets coming through and allows user context to read
+ the counters or update the rules.
+
+ Hence the start of any table is given by get_table() below. */
+
+/* Returns whether matches rule or not. */
+/* Performance critical - called for every packet */
+static inline bool
+ip6_packet_match(const struct sk_buff *skb,
+ const char *indev,
+ const char *outdev,
+ const struct ip6t_ip6 *ip6info,
+ unsigned int *protoff,
+ int *fragoff, bool *hotdrop)
+{
+ unsigned long ret;
+ const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
+
+#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
+
+ if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+ &ip6info->src), IP6T_INV_SRCIP) ||
+ FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+ &ip6info->dst), IP6T_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+/*
+ dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+ ipinfo->smsk.s_addr, ipinfo->src.s_addr,
+ ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+ ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
+ ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+ return false;
+ }
+
+ ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
+
+ if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ip6info->iniface,
+ ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+ return false;
+ }
+
+ ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
+
+ if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ip6info->outiface,
+ ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+ return false;
+ }
+
+/* ... might want to do something with class and flowlabel here ... */
+
+ /* look for the desired protocol header */
+ if((ip6info->flags & IP6T_F_PROTO)) {
+ int protohdr;
+ unsigned short _frag_off;
+
+ protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+ if (protohdr < 0) {
+ if (_frag_off == 0)
+ *hotdrop = true;
+ return false;
+ }
+ *fragoff = _frag_off;
+
+ dprintf("Packet protocol %hi ?= %s%hi.\n",
+ protohdr,
+ ip6info->invflags & IP6T_INV_PROTO ? "!":"",
+ ip6info->proto);
+
+ if (ip6info->proto == protohdr) {
+ if(ip6info->invflags & IP6T_INV_PROTO) {
+ return false;
+ }
+ return true;
+ }
+
+ /* We need match for the '-p all', too! */
+ if ((ip6info->proto != 0) &&
+ !(ip6info->invflags & IP6T_INV_PROTO))
+ return false;
+ }
+ return true;
+}
+
+/* should be ip6 safe */
+static bool
+ip6_checkentry(const struct ip6t_ip6 *ipv6)
+{
+ if (ipv6->flags & ~IP6T_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ipv6->flags & ~IP6T_F_MASK);
+ return false;
+ }
+ if (ipv6->invflags & ~IP6T_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ipv6->invflags & ~IP6T_INV_MASK);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ if (net_ratelimit())
+ pr_info("error: `%s'\n", (const char *)par->targinfo);
+
+ return NF_DROP;
+}
+
+static inline struct ip6t_entry *
+get_entry(const void *base, unsigned int offset)
+{
+ return (struct ip6t_entry *)(base + offset);
+}
+
+/* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
+static inline bool unconditional(const struct ip6t_ip6 *ipv6)
+{
+ static const struct ip6t_ip6 uncond;
+
+ return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
+}
+
+static inline const struct xt_entry_target *
+ip6t_get_target_c(const struct ip6t_entry *e)
+{
+ return ip6t_get_target((struct ip6t_entry *)e);
+}
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+/* This cries for unification! */
+static const char *const hooknames[] = {
+ [NF_INET_PRE_ROUTING] = "PREROUTING",
+ [NF_INET_LOCAL_IN] = "INPUT",
+ [NF_INET_FORWARD] = "FORWARD",
+ [NF_INET_LOCAL_OUT] = "OUTPUT",
+ [NF_INET_POST_ROUTING] = "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+ NF_IP6_TRACE_COMMENT_RULE,
+ NF_IP6_TRACE_COMMENT_RETURN,
+ NF_IP6_TRACE_COMMENT_POLICY,
+};
+
+static const char *const comments[] = {
+ [NF_IP6_TRACE_COMMENT_RULE] = "rule",
+ [NF_IP6_TRACE_COMMENT_RETURN] = "return",
+ [NF_IP6_TRACE_COMMENT_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
+{
+ const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
+
+ if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
+ /* Head of user chain: ERROR target with chainname */
+ *chainname = t->target.data;
+ (*rulenum) = 0;
+ } else if (s == e) {
+ (*rulenum)++;
+
+ if (s->target_offset == sizeof(struct ip6t_entry) &&
+ strcmp(t->target.u.kernel.target->name,
+ XT_STANDARD_TARGET) == 0 &&
+ t->verdict < 0 &&
+ unconditional(&s->ipv6)) {
+ /* Tail of chains: STANDARD target (return/policy) */
+ *comment = *chainname == hookname
+ ? comments[NF_IP6_TRACE_COMMENT_POLICY]
+ : comments[NF_IP6_TRACE_COMMENT_RETURN];
+ }
+ return 1;
+ } else
+ (*rulenum)++;
+
+ return 0;
+}
+
+static void trace_packet(const struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *tablename,
+ const struct xt_table_info *private,
+ const struct ip6t_entry *e)
+{
+ const void *table_base;
+ const struct ip6t_entry *root;
+ const char *hookname, *chainname, *comment;
+ const struct ip6t_entry *iter;
+ unsigned int rulenum = 0;
+
+ table_base = private->entries[smp_processor_id()];
+ root = get_entry(table_base, private->hook_entry[hook]);
+
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP6_TRACE_COMMENT_RULE];
+
+ xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+ if (get_chainname_rulenum(iter, e, hookname,
+ &chainname, &comment, &rulenum) != 0)
+ break;
+
+ nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
+}
+#endif
+
+static inline __pure struct ip6t_entry *
+ip6t_next_entry(const struct ip6t_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ip6t_do_table(struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct xt_table *table)
+{
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ const void *table_base;
+ struct ip6t_entry *e, **jumpstack;
+ unsigned int *stackptr, origptr, cpu;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
+
+ /* Initialization */
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+ acpar.hotdrop = false;
+ acpar.in = in;
+ acpar.out = out;
+ acpar.family = NFPROTO_IPV6;
+ acpar.hooknum = hook;
+
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
+ private = table->private;
+ cpu = smp_processor_id();
+ table_base = private->entries[cpu];
+ jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
+ origptr = *stackptr;
+
+ e = get_entry(table_base, private->hook_entry[hook]);
+
+ do {
+ const struct xt_entry_target *t;
+ const struct xt_entry_match *ematch;
+
+ IP_NF_ASSERT(e);
+ if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
+ &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
+ no_match:
+ e = ip6t_next_entry(e);
+ continue;
+ }
+
+ xt_ematch_foreach(ematch, e) {
+ acpar.match = ematch->u.kernel.match;
+ acpar.matchinfo = ematch->data;
+ if (!acpar.match->match(skb, &acpar))
+ goto no_match;
+ }
+
+ ADD_COUNTER(e->counters, skb->len, 1);
+
+ t = ip6t_get_target_c(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
+#endif
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ if (*stackptr <= origptr)
+ e = get_entry(table_base,
+ private->underflow[hook]);
+ else
+ e = ip6t_next_entry(jumpstack[--*stackptr]);
+ continue;
+ }
+ if (table_base + v != ip6t_next_entry(e) &&
+ !(e->ipv6.flags & IP6T_F_GOTO)) {
+ if (*stackptr >= private->stacksize) {
+ verdict = NF_DROP;
+ break;
+ }
+ jumpstack[(*stackptr)++] = e;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
+ }
+
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+
+ verdict = t->u.kernel.target->target(skb, &acpar);
+ if (verdict == XT_CONTINUE)
+ e = ip6t_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+
+ *stackptr = origptr;
+
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (acpar.hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(const struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ const struct xt_standard_target *t
+ = (void *)ip6t_get_target_c(e);
+ int visited = e->comefrom & (1 << hook);
+
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
+ pr_err("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if ((e->target_offset == sizeof(struct ip6t_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 &&
+ unconditional(&e->ipv6)) || visited) {
+ unsigned int oldpos, size;
+
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_INET_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_INET_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ip6t_entry *)
+ (entry0 + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ip6t_entry *)
+ (entry0 + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ip6t_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ip6t_entry *)
+ (entry0 + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
+{
+ struct xt_mtdtor_param par;
+
+ par.net = net;
+ par.match = m->u.kernel.match;
+ par.matchinfo = m->data;
+ par.family = NFPROTO_IPV6;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
+ module_put(par.match->me);
+}
+
+static int
+check_entry(const struct ip6t_entry *e, const char *name)
+{
+ const struct xt_entry_target *t;
+
+ if (!ip6_checkentry(&e->ipv6)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ if (e->target_offset + sizeof(struct xt_entry_target) >
+ e->next_offset)
+ return -EINVAL;
+
+ t = ip6t_get_target_c(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+ const struct ip6t_ip6 *ipv6 = par->entryinfo;
+ int ret;
+
+ par->match = m->u.kernel.match;
+ par->matchinfo = m->data;
+
+ ret = xt_check_match(par, m->u.match_size - sizeof(*m),
+ ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
+ if (ret < 0) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ par.match->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+ struct xt_match *match;
+ int ret;
+
+ match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ return PTR_ERR(match);
+ }
+ m->u.kernel.match = match;
+
+ ret = check_match(m, par);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
+}
+
+static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
+{
+ struct xt_entry_target *t = ip6t_get_target(e);
+ struct xt_tgchk_param par = {
+ .net = net,
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_IPV6,
+ };
+ int ret;
+
+ t = ip6t_get_target(e);
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
+ if (ret < 0) {
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
+ unsigned int size)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+ unsigned int j;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
+
+ j = 0;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ipv6;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV6;
+ xt_ematch_foreach(ematch, e) {
+ ret = find_check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
+
+ t = ip6t_get_target(e);
+ target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = PTR_ERR(target);
+ goto cleanup_matches;
+ }
+ t->u.kernel.target = target;
+
+ ret = check_target(e, net, name);
+ if (ret)
+ goto err;
+ return 0;
+ err:
+ module_put(t->u.kernel.target->me);
+ cleanup_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
+ return ret;
+}
+
+static bool check_underflow(const struct ip6t_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->ipv6))
+ return false;
+ t = ip6t_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
+static int
+check_entry_size_and_hooks(struct ip6t_entry *e,
+ struct xt_table_info *newinfo,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int valid_hooks)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
+ newinfo->underflow[h] = underflows[h];
+ }
+ }
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct xt_counters) { 0, 0 });
+ e->comefrom = 0;
+ return 0;
+}
+
+static void cleanup_entry(struct ip6t_entry *e, struct net *net)
+{
+ struct xt_tgdtor_param par;
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
+
+ /* Cleanup all matches */
+ xt_ematch_foreach(ematch, e)
+ cleanup_match(ematch, net);
+ t = ip6t_get_target(e);
+
+ par.net = net;
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_IPV6;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ip6t_replace *repl)
+{
+ struct ip6t_entry *iter;
+ unsigned int i;
+ int ret = 0;
+
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ return ret;
+ ++i;
+ if (strcmp(ip6t_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+
+ if (i != repl->num_entries) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, repl->num_entries);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(repl->valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, repl->hook_entry[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, repl->underflow[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, net, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
+
+ if (ret != 0) {
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter, net);
+ }
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i) {
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
+ }
+
+ return ret;
+}
+
+static void
+get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ip6t_entry *iter;
+ unsigned int cpu;
+ unsigned int i;
+
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
+ i = 0;
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i;
+ }
+ }
+}
+
+static struct xt_counters *alloc_counters(const struct xt_table *table)
+{
+ unsigned int countersize;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vzalloc(countersize);
+
+ if (counters == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ get_counters(private, counters);
+
+ return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ const struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ const struct ip6t_entry *e;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ int ret = 0;
+ const void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ const struct xt_entry_match *m;
+ const struct xt_entry_target *t;
+
+ e = (struct ip6t_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ip6t_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ip6t_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct xt_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ip6t_get_target_c(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct xt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(AF_INET6, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(AF_INET6, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct ip6t_entry *e,
+ const struct xt_table_info *info,
+ const void *base, struct xt_table_info *newinfo)
+{
+ const struct xt_entry_match *ematch;
+ const struct xt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+ entry_offset = (void *)e - base;
+ xt_ematch_foreach(ematch, e)
+ off += xt_compat_match_offset(ematch->u.kernel.match);
+ t = ip6t_get_target_c(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct ip6t_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ struct ip6t_entry *iter;
+ void *loc_cpu_entry;
+ int ret;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET6, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
+{
+ char name[XT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct ip6t_getinfo)) {
+ duprintf("length %u != %zu\n", *len,
+ sizeof(struct ip6t_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(AF_INET6);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
+ "ip6table_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct ip6t_getinfo info;
+ const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
+ if (compat) {
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(AF_INET6);
+ private = &tmp;
+ }
+#endif
+ memset(&info, 0, sizeof(info));
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(AF_INET6);
+#endif
+ return ret;
+}
+
+static int
+get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
+ const int *len)
+{
+ int ret;
+ struct ip6t_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct ip6t_get_entries) + get.size) {
+ duprintf("get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, AF_INET6, get.name);
+ if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n", private->number);
+ if (get.size == private->size)
+ ret = copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_table_info *newinfo, unsigned int num_counters,
+ void __user *counters_ptr)
+{
+ int ret;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
+ struct xt_counters *counters;
+ const void *loc_cpu_old_entry;
+ struct ip6t_entry *iter;
+
+ ret = 0;
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
+ "ip6table_%s", name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters, and synchronize with replace */
+ get_counters(oldinfo, counters);
+
+ /* Decrease module usage counts and free resource */
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter, net);
+
+ xt_free_table_info(oldinfo);
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ xt_table_unlock(t);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ xt_table_unlock(t);
+ free_newinfo_counters_untrans:
+ vfree(counters);
+ out:
+ return ret;
+}
+
+static int
+do_replace(struct net *net, const void __user *user, unsigned int len)
+{
+ int ret;
+ struct ip6t_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ip6t_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("ip_tables: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int
+do_add_counters(struct net *net, const void __user *user, unsigned int len,
+ int compat)
+{
+ unsigned int i, curcpu;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
+ int ret = 0;
+ const void *loc_cpu_entry;
+ struct ip6t_entry *iter;
+ unsigned int addend;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
+
+ if (copy_from_user(ptmp, user, size) != 0)
+ return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len - size);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = xt_find_table_lock(net, AF_INET6, name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+
+ local_bh_disable();
+ private = t->private;
+ if (private->number != num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ /* Choose the copy that is on our node */
+ curcpu = smp_processor_id();
+ addend = xt_write_recseq_begin();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
+
+ unlock_up_free:
+ local_bh_enable();
+ xt_table_unlock(t);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ip6t_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_INET_NUMHOOKS];
+ u32 underflow[NF_INET_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters; /* struct xt_counters * */
+ struct compat_ip6t_entry entries[0];
+};
+
+static int
+compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
+ unsigned int *size, struct xt_counters *counters,
+ unsigned int i)
+{
+ struct xt_entry_target *t;
+ struct compat_ip6t_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ const struct xt_entry_match *ematch;
+ int ret = 0;
+
+ origsize = *size;
+ ce = (struct compat_ip6t_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
+
+ *dstptr += sizeof(struct compat_ip6t_entry);
+ *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_to_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ target_offset = e->target_offset - (origsize - *size);
+ t = ip6t_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ return ret;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int
+compat_find_calc_match(struct xt_entry_match *m,
+ const char *name,
+ const struct ip6t_ip6 *ipv6,
+ unsigned int hookmask,
+ int *size)
+{
+ struct xt_match *match;
+
+ match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("compat_check_calc_match: `%s' not found\n",
+ m->u.user.name);
+ return PTR_ERR(match);
+ }
+ m->u.kernel.match = match;
+ *size += xt_compat_match_offset(match);
+ return 0;
+}
+
+static void compat_release_entry(struct compat_ip6t_entry *e)
+{
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
+
+ /* Cleanup all matches */
+ xt_ematch_foreach(ematch, e)
+ module_put(ematch->u.kernel.match->me);
+ t = compat_ip6t_get_target(e);
+ module_put(t->u.kernel.target->me);
+}
+
+static int
+check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ const char *name)
+{
+ struct xt_entry_match *ematch;
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ unsigned int j;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_ip6t_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct ip6t_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+ entry_offset = (void *)e - (void *)base;
+ j = 0;
+ xt_ematch_foreach(ematch, e) {
+ ret = compat_find_calc_match(ematch, name,
+ &e->ipv6, e->comefrom, &off);
+ if (ret != 0)
+ goto release_matches;
+ ++j;
+ }
+
+ t = compat_ip6t_get_target(e);
+ target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = PTR_ERR(target);
+ goto release_matches;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
+ if (ret)
+ goto out;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+ return 0;
+
+out:
+ module_put(t->u.kernel.target->me);
+release_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ module_put(ematch->u.kernel.match->me);
+ }
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct xt_entry_target *t;
+ struct ip6t_entry *de;
+ unsigned int origsize;
+ int ret, h;
+ struct xt_entry_match *ematch;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct ip6t_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct ip6t_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct ip6t_entry);
+ *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_from_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_ip6t_get_target(e);
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int compat_check_entry(struct ip6t_entry *e, struct net *net,
+ const char *name)
+{
+ unsigned int j;
+ int ret = 0;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+
+ j = 0;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ipv6;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV6;
+ xt_ematch_foreach(ematch, e) {
+ ret = check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
+
+ ret = check_target(e, net, name);
+ if (ret)
+ goto cleanup_matches;
+ return 0;
+
+ cleanup_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
+ return ret;
+}
+
+static int
+translate_compat_table(struct net *net,
+ const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ struct compat_ip6t_entry *iter0;
+ struct ip6t_entry *iter1;
+ unsigned int size;
+ int ret = 0;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(AF_INET6);
+ xt_compat_init_offsets(AF_INET6, number);
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = compat_check_entry(iter1, net, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(ip6t_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
+ j -= i;
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1, net);
+ }
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ goto out;
+}
+
+static int
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct compat_ip6t_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ip6t_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int
+compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_SET_REPLACE:
+ ret = compat_do_replace(sock_net(sk), user, len);
+ break;
+
+ case IP6T_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 1);
+ break;
+
+ default:
+ duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct compat_ip6t_get_entries {
+ char name[XT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_ip6t_entry entrytable[0];
+};
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ const void *loc_cpu_entry;
+ unsigned int i = 0;
+ struct ip6t_entry *iter;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
+
+ vfree(counters);
+ return ret;
+}
+
+static int
+compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_ip6t_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+
+ if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(AF_INET6);
+ t = xt_find_table_lock(net, AF_INET6, get.name);
+ if (t && !IS_ERR(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ xt_compat_flush_offsets(AF_INET6);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(AF_INET6);
+ return ret;
+}
+
+static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case IP6T_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_ip6t_get_ctl(sk, cmd, user, len);
+ }
+ return ret;
+}
+#endif
+
+static int
+do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case IP6T_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case IP6T_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
+ case IP6T_SO_GET_REVISION_MATCH:
+ case IP6T_SO_GET_REVISION_TARGET: {
+ struct xt_get_revision rev;
+ int target;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ rev.name[sizeof(rev.name)-1] = 0;
+
+ if (cmd == IP6T_SO_GET_REVISION_TARGET)
+ target = 1;
+ else
+ target = 0;
+
+ try_then_request_module(xt_find_revision(AF_INET6, rev.name,
+ rev.revision,
+ target, &ret),
+ "ip6t_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct xt_table *ip6t_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct ip6t_replace *repl)
+{
+ int ret;
+ struct xt_table_info *newinfo;
+ struct xt_table_info bootstrap = {0};
+ void *loc_cpu_entry;
+ struct xt_table *new_table;
+
+ newinfo = xt_alloc_table_info(repl->size);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* choose the copy on our node/cpu, but dont care about preemption */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
+ if (ret != 0)
+ goto out_free;
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
+ }
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
+}
+
+void ip6t_unregister_table(struct net *net, struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct ip6t_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline bool
+icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+ u_int8_t type, u_int8_t code,
+ bool invert)
+{
+ return (type == test_type && code >= min_code && code <= max_code)
+ ^ invert;
+}
+
+static bool
+icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct icmp6hdr *ic;
+ struct icmp6hdr _icmph;
+ const struct ip6t_icmp *icmpinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
+ if (ic == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ICMP tinygram.\n");
+ par->hotdrop = true;
+ return false;
+ }
+
+ return icmp6_type_code_match(icmpinfo->type,
+ icmpinfo->code[0],
+ icmpinfo->code[1],
+ ic->icmp6_type, ic->icmp6_code,
+ !!(icmpinfo->invflags&IP6T_ICMP_INV));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int icmp6_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_icmp *icmpinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0;
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct xt_target ip6t_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_IPV6,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = ip6t_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_IPV6,
+ },
+};
+
+static struct nf_sockopt_ops ip6t_sockopts = {
+ .pf = PF_INET6,
+ .set_optmin = IP6T_BASE_CTL,
+ .set_optmax = IP6T_SO_SET_MAX+1,
+ .set = do_ip6t_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_ip6t_set_ctl,
+#endif
+ .get_optmin = IP6T_BASE_CTL,
+ .get_optmax = IP6T_SO_GET_MAX+1,
+ .get = do_ip6t_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_ip6t_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static struct xt_match ip6t_builtin_mt[] __read_mostly = {
+ {
+ .name = "icmp6",
+ .match = icmp6_match,
+ .matchsize = sizeof(struct ip6t_icmp),
+ .checkentry = icmp6_checkentry,
+ .proto = IPPROTO_ICMPV6,
+ .family = NFPROTO_IPV6,
+ },
+};
+
+static int __net_init ip6_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_IPV6);
+}
+
+static void __net_exit ip6_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_IPV6);
+}
+
+static struct pernet_operations ip6_tables_net_ops = {
+ .init = ip6_tables_net_init,
+ .exit = ip6_tables_net_exit,
+};
+
+static int __init ip6_tables_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6_tables_net_ops);
+ if (ret < 0)
+ goto err1;
+
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+ if (ret < 0)
+ goto err4;
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&ip6t_sockopts);
+ if (ret < 0)
+ goto err5;
+
+ pr_info("(C) 2000-2006 Netfilter Core Team\n");
+ return 0;
+
+err5:
+ xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+err4:
+ xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+err2:
+ unregister_pernet_subsys(&ip6_tables_net_ops);
+err1:
+ return ret;
+}
+
+static void __exit ip6_tables_fini(void)
+{
+ nf_unregister_sockopt(&ip6t_sockopts);
+
+ xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+ xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
+ unregister_pernet_subsys(&ip6_tables_net_ops);
+}
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -ENOENT or -EBADMSG.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless. If fragoff is not NULL, the fragment offset is
+ * stored in *fragoff; if it is NULL, return -EINVAL.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff)
+{
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ unsigned int len = skb->len - start;
+
+ if (fragoff)
+ *fragoff = 0;
+
+ while (nexthdr != target) {
+ struct ipv6_opt_hdr _hdr, *hp;
+ unsigned int hdrlen;
+
+ if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+ if (target < 0)
+ break;
+ return -ENOENT;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -EBADMSG;
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ unsigned short _frag_off;
+ __be16 *fp;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -EBADMSG;
+
+ _frag_off = ntohs(*fp) & ~0x7;
+ if (_frag_off) {
+ if (target < 0 &&
+ ((!ipv6_ext_hdr(hp->nexthdr)) ||
+ hp->nexthdr == NEXTHDR_NONE)) {
+ if (fragoff) {
+ *fragoff = _frag_off;
+ return hp->nexthdr;
+ } else {
+ return -EINVAL;
+ }
+ }
+ return -ENOENT;
+ }
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hp->hdrlen + 2) << 2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+
+ *offset = start;
+ return nexthdr;
+}
+
+EXPORT_SYMBOL(ip6t_register_table);
+EXPORT_SYMBOL(ip6t_unregister_table);
+EXPORT_SYMBOL(ip6t_do_table);
+EXPORT_SYMBOL(ipv6_find_hdr);
+
+module_init(ip6_tables_init);
+module_exit(ip6_tables_fini);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
new file mode 100644
index 00000000..09155e34
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -0,0 +1,272 @@
+/*
+ * IP6 tables REJECT target module
+ * Linux INET6 implementation
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Authors:
+ * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on net/ipv4/netfilter/ipt_REJECT.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/icmp.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_REJECT.h>
+
+MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
+MODULE_LICENSE("GPL");
+
+/* Send RST reply */
+static void send_reset(struct net *net, struct sk_buff *oldskb)
+{
+ struct sk_buff *nskb;
+ struct tcphdr otcph, *tcph;
+ unsigned int otcplen, hh_len;
+ int tcphoff, needs_ack;
+ const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
+ struct ipv6hdr *ip6h;
+#define DEFAULT_TOS_VALUE 0x0U
+ const __u8 tclass = DEFAULT_TOS_VALUE;
+ struct dst_entry *dst = NULL;
+ u8 proto;
+ __be16 frag_off;
+ struct flowi6 fl6;
+
+ if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
+ (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
+ pr_debug("addr is not unicast.\n");
+ return;
+ }
+
+ proto = oip6h->nexthdr;
+ tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
+
+ if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
+ pr_debug("Cannot get TCP header.\n");
+ return;
+ }
+
+ otcplen = oldskb->len - tcphoff;
+
+ /* IP header checks: fragment, too short. */
+ if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
+ pr_debug("proto(%d) != IPPROTO_TCP, "
+ "or too short. otcplen = %d\n",
+ proto, otcplen);
+ return;
+ }
+
+ if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
+ BUG();
+
+ /* No RST for RST. */
+ if (otcph.rst) {
+ pr_debug("RST is set\n");
+ return;
+ }
+
+ /* Check checksum. */
+ if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
+ skb_checksum(oldskb, tcphoff, otcplen, 0))) {
+ pr_debug("TCP checksum is invalid\n");
+ return;
+ }
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = oip6h->daddr;
+ fl6.daddr = oip6h->saddr;
+ fl6.fl6_sport = otcph.dest;
+ fl6.fl6_dport = otcph.source;
+ security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ return;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ return;
+
+ hh_len = (dst->dev->hard_header_len + 15)&~15;
+ nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+ + sizeof(struct tcphdr) + dst->trailer_len,
+ GFP_ATOMIC);
+
+ if (!nskb) {
+ if (net_ratelimit())
+ pr_debug("cannot alloc skb\n");
+ dst_release(dst);
+ return;
+ }
+
+ skb_dst_set(nskb, dst);
+
+ skb_reserve(nskb, hh_len + dst->header_len);
+
+ skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(nskb);
+ ip6h = ipv6_hdr(nskb);
+ *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20));
+ ip6h->hop_limit = ip6_dst_hoplimit(dst);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->saddr = oip6h->daddr;
+ ip6h->daddr = oip6h->saddr;
+
+ tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+ /* Truncate to length (no data) */
+ tcph->doff = sizeof(struct tcphdr)/4;
+ tcph->source = otcph.dest;
+ tcph->dest = otcph.source;
+
+ if (otcph.ack) {
+ needs_ack = 0;
+ tcph->seq = otcph.ack_seq;
+ tcph->ack_seq = 0;
+ } else {
+ needs_ack = 1;
+ tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ + otcplen - (otcph.doff<<2));
+ tcph->seq = 0;
+ }
+
+ /* Reset flags */
+ ((u_int8_t *)tcph)[13] = 0;
+ tcph->rst = 1;
+ tcph->ack = needs_ack;
+ tcph->window = 0;
+ tcph->urg_ptr = 0;
+ tcph->check = 0;
+
+ /* Adjust TCP checksum */
+ tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+ &ipv6_hdr(nskb)->daddr,
+ sizeof(struct tcphdr), IPPROTO_TCP,
+ csum_partial(tcph,
+ sizeof(struct tcphdr), 0));
+
+ nf_ct_attach(nskb, oldskb);
+
+ ip6_local_out(nskb);
+}
+
+static inline void
+send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
+ unsigned int hooknum)
+{
+ if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+ skb_in->dev = net->loopback_dev;
+
+ icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR
+ if (skb_in->sk) {
+ icmpv6_err_convert(ICMPV6_DEST_UNREACH, code,
+ &skb_in->sk->sk_err);
+ skb_in->sk->sk_error_report(skb_in->sk);
+ pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n",
+ skb_in->sk->sk_err, skb_in, skb_in->sk);
+ }
+#endif
+}
+
+static unsigned int
+reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_reject_info *reject = par->targinfo;
+ struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
+
+ pr_debug("%s: medium point\n", __func__);
+ switch (reject->with) {
+ case IP6T_ICMP6_NO_ROUTE:
+ send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
+ break;
+ case IP6T_ICMP6_ADM_PROHIBITED:
+ send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+ break;
+ case IP6T_ICMP6_NOT_NEIGHBOUR:
+ send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+ break;
+ case IP6T_ICMP6_ADDR_UNREACH:
+ send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+ break;
+ case IP6T_ICMP6_PORT_UNREACH:
+ send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+ break;
+ case IP6T_ICMP6_ECHOREPLY:
+ /* Do nothing */
+ break;
+ case IP6T_TCP_RESET:
+ send_reset(net, skb);
+ break;
+ default:
+ if (net_ratelimit())
+ pr_info("case %u not handled yet\n", reject->with);
+ break;
+ }
+
+ return NF_DROP;
+}
+
+static int reject_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct ip6t_reject_info *rejinfo = par->targinfo;
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
+ pr_info("ECHOREPLY is not supported.\n");
+ return -EINVAL;
+ } else if (rejinfo->with == IP6T_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+ if (e->ipv6.proto != IPPROTO_TCP ||
+ (e->ipv6.invflags & XT_INV_PROTO)) {
+ pr_info("TCP_RESET illegal for non-tcp\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static struct xt_target reject_tg6_reg __read_mostly = {
+ .name = "REJECT",
+ .family = NFPROTO_IPV6,
+ .target = reject_tg6,
+ .targetsize = sizeof(struct ip6t_reject_info),
+ .table = "filter",
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT),
+ .checkentry = reject_tg6_check,
+ .me = THIS_MODULE
+};
+
+static int __init reject_tg6_init(void)
+{
+ return xt_register_target(&reject_tg6_reg);
+}
+
+static void __exit reject_tg6_exit(void)
+{
+ xt_unregister_target(&reject_tg6_reg);
+}
+
+module_init(reject_tg6_init);
+module_exit(reject_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
new file mode 100644
index 00000000..89cccc5a
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -0,0 +1,121 @@
+/* Kernel module to match AH parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_ah.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+ bool r;
+
+ pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, spi, max);
+ r = (spi >= min && spi <= max) ^ invert;
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ip_auth_hdr _ah;
+ const struct ip_auth_hdr *ah;
+ const struct ip6t_ah *ahinfo = par->matchinfo;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ int err;
+
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ par->hotdrop = true;
+ return false;
+ }
+
+ ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
+ if (ah == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
+
+ hdrlen = (ah->hdrlen + 2) << 2;
+
+ pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
+ pr_debug("RES %04X ", ah->reserved);
+ pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
+
+ pr_debug("IPv6 AH spi %02X ",
+ spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ ntohl(ah->spi),
+ !!(ahinfo->invflags & IP6T_AH_INV_SPI)));
+ pr_debug("len %02X %04X %02X ",
+ ahinfo->hdrlen, hdrlen,
+ (!ahinfo->hdrlen ||
+ (ahinfo->hdrlen == hdrlen) ^
+ !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
+ pr_debug("res %02X %04X %02X\n",
+ ahinfo->hdrres, ah->reserved,
+ !(ahinfo->hdrres && ah->reserved));
+
+ return (ah != NULL) &&
+ spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ ntohl(ah->spi),
+ !!(ahinfo->invflags & IP6T_AH_INV_SPI)) &&
+ (!ahinfo->hdrlen ||
+ (ahinfo->hdrlen == hdrlen) ^
+ !!(ahinfo->invflags & IP6T_AH_INV_LEN)) &&
+ !(ahinfo->hdrres && ah->reserved);
+}
+
+static int ah_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_ah *ahinfo = par->matchinfo;
+
+ if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
+ pr_debug("unknown flags %X\n", ahinfo->invflags);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_match ah_mt6_reg __read_mostly = {
+ .name = "ah",
+ .family = NFPROTO_IPV6,
+ .match = ah_mt6,
+ .matchsize = sizeof(struct ip6t_ah),
+ .checkentry = ah_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ah_mt6_init(void)
+{
+ return xt_register_match(&ah_mt6_reg);
+}
+
+static void __exit ah_mt6_exit(void)
+{
+ xt_unregister_match(&ah_mt6_reg);
+}
+
+module_init(ah_mt6_init);
+module_exit(ah_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
new file mode 100644
index 00000000..aab07069
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -0,0 +1,74 @@
+/* Kernel module to match EUI64 address parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+static bool
+eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ unsigned char eui64[8];
+
+ if (!(skb_mac_header(skb) >= skb->head &&
+ skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
+ par->fragoff != 0) {
+ par->hotdrop = true;
+ return false;
+ }
+
+ memset(eui64, 0, sizeof(eui64));
+
+ if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
+ if (ipv6_hdr(skb)->version == 0x6) {
+ memcpy(eui64, eth_hdr(skb)->h_source, 3);
+ memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
+ eui64[3] = 0xff;
+ eui64[4] = 0xfe;
+ eui64[0] ^= 0x02;
+
+ if (!memcmp(ipv6_hdr(skb)->saddr.s6_addr + 8, eui64,
+ sizeof(eui64)))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct xt_match eui64_mt6_reg __read_mostly = {
+ .name = "eui64",
+ .family = NFPROTO_IPV6,
+ .match = eui64_mt6,
+ .matchsize = sizeof(int),
+ .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD),
+ .me = THIS_MODULE,
+};
+
+static int __init eui64_mt6_init(void)
+{
+ return xt_register_match(&eui64_mt6_reg);
+}
+
+static void __exit eui64_mt6_exit(void)
+{
+ xt_unregister_match(&eui64_mt6_reg);
+}
+
+module_init(eui64_mt6_init);
+module_exit(eui64_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
new file mode 100644
index 00000000..eda898fd
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -0,0 +1,136 @@
+/* Kernel module to match FRAG parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_frag.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 fragment match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+/* Returns 1 if the id is matched by the range, 0 otherwise */
+static inline bool
+id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
+{
+ bool r;
+ pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ',
+ min, id, max);
+ r = (id >= min && id <= max) ^ invert;
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool
+frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct frag_hdr _frag;
+ const struct frag_hdr *fh;
+ const struct ip6t_frag *fraginfo = par->matchinfo;
+ unsigned int ptr;
+ int err;
+
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ par->hotdrop = true;
+ return false;
+ }
+
+ fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
+ if (fh == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
+
+ pr_debug("INFO %04X ", fh->frag_off);
+ pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
+ pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
+ pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF));
+ pr_debug("ID %u %08X\n", ntohl(fh->identification),
+ ntohl(fh->identification));
+
+ pr_debug("IPv6 FRAG id %02X ",
+ id_match(fraginfo->ids[0], fraginfo->ids[1],
+ ntohl(fh->identification),
+ !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)));
+ pr_debug("res %02X %02X%04X %02X ",
+ fraginfo->flags & IP6T_FRAG_RES, fh->reserved,
+ ntohs(fh->frag_off) & 0x6,
+ !((fraginfo->flags & IP6T_FRAG_RES) &&
+ (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+ pr_debug("first %02X %02X %02X ",
+ fraginfo->flags & IP6T_FRAG_FST,
+ ntohs(fh->frag_off) & ~0x7,
+ !((fraginfo->flags & IP6T_FRAG_FST) &&
+ (ntohs(fh->frag_off) & ~0x7)));
+ pr_debug("mf %02X %02X %02X ",
+ fraginfo->flags & IP6T_FRAG_MF,
+ ntohs(fh->frag_off) & IP6_MF,
+ !((fraginfo->flags & IP6T_FRAG_MF) &&
+ !((ntohs(fh->frag_off) & IP6_MF))));
+ pr_debug("last %02X %02X %02X\n",
+ fraginfo->flags & IP6T_FRAG_NMF,
+ ntohs(fh->frag_off) & IP6_MF,
+ !((fraginfo->flags & IP6T_FRAG_NMF) &&
+ (ntohs(fh->frag_off) & IP6_MF)));
+
+ return (fh != NULL) &&
+ id_match(fraginfo->ids[0], fraginfo->ids[1],
+ ntohl(fh->identification),
+ !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) &&
+ !((fraginfo->flags & IP6T_FRAG_RES) &&
+ (fh->reserved || (ntohs(fh->frag_off) & 0x6))) &&
+ !((fraginfo->flags & IP6T_FRAG_FST) &&
+ (ntohs(fh->frag_off) & ~0x7)) &&
+ !((fraginfo->flags & IP6T_FRAG_MF) &&
+ !(ntohs(fh->frag_off) & IP6_MF)) &&
+ !((fraginfo->flags & IP6T_FRAG_NMF) &&
+ (ntohs(fh->frag_off) & IP6_MF));
+}
+
+static int frag_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_frag *fraginfo = par->matchinfo;
+
+ if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
+ pr_debug("unknown flags %X\n", fraginfo->invflags);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_match frag_mt6_reg __read_mostly = {
+ .name = "frag",
+ .family = NFPROTO_IPV6,
+ .match = frag_mt6,
+ .matchsize = sizeof(struct ip6t_frag),
+ .checkentry = frag_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init frag_mt6_init(void)
+{
+ return xt_register_match(&frag_mt6_reg);
+}
+
+static void __exit frag_mt6_exit(void)
+{
+ xt_unregister_match(&frag_mt6_reg);
+}
+
+module_init(frag_mt6_init);
+module_exit(frag_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
new file mode 100644
index 00000000..59df051e
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -0,0 +1,215 @@
+/* Kernel module to match Hop-by-Hop and Destination parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <asm/byteorder.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_opts.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+MODULE_ALIAS("ip6t_dst");
+
+/*
+ * (Type & 0xC0) >> 6
+ * 0 -> ignorable
+ * 1 -> must drop the packet
+ * 2 -> send ICMP PARM PROB regardless and drop packet
+ * 3 -> Send ICMP if not a multicast address and drop packet
+ * (Type & 0x20) >> 5
+ * 0 -> invariant
+ * 1 -> can change the routing
+ * (Type & 0x1F) Type
+ * 0 -> Pad1 (only 1 byte!)
+ * 1 -> PadN LENGTH info (total length = length + 2)
+ * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k )
+ * 5 -> RTALERT 2 x x
+ */
+
+static struct xt_match hbh_mt6_reg[] __read_mostly;
+
+static bool
+hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ipv6_opt_hdr _optsh;
+ const struct ipv6_opt_hdr *oh;
+ const struct ip6t_opts *optinfo = par->matchinfo;
+ unsigned int temp;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ bool ret = false;
+ u8 _opttype;
+ u8 _optlen;
+ const u_int8_t *tp = NULL;
+ const u_int8_t *lp = NULL;
+ unsigned int optlen;
+ int err;
+
+ err = ipv6_find_hdr(skb, &ptr,
+ (par->match == &hbh_mt6_reg[0]) ?
+ NEXTHDR_HOP : NEXTHDR_DEST, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ par->hotdrop = true;
+ return false;
+ }
+
+ oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+ if (oh == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
+
+ hdrlen = ipv6_optlen(oh);
+ if (skb->len - ptr < hdrlen) {
+ /* Packet smaller than it's length field */
+ return false;
+ }
+
+ pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+ pr_debug("len %02X %04X %02X ",
+ optinfo->hdrlen, hdrlen,
+ (!(optinfo->flags & IP6T_OPTS_LEN) ||
+ ((optinfo->hdrlen == hdrlen) ^
+ !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+ ret = (oh != NULL) &&
+ (!(optinfo->flags & IP6T_OPTS_LEN) ||
+ ((optinfo->hdrlen == hdrlen) ^
+ !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+ ptr += 2;
+ hdrlen -= 2;
+ if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
+ return ret;
+ } else {
+ pr_debug("Strict ");
+ pr_debug("#%d ", optinfo->optsnr);
+ for (temp = 0; temp < optinfo->optsnr; temp++) {
+ /* type field exists ? */
+ if (hdrlen < 1)
+ break;
+ tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
+ &_opttype);
+ if (tp == NULL)
+ break;
+
+ /* Type check */
+ if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
+ pr_debug("Tbad %02X %02X\n", *tp,
+ (optinfo->opts[temp] & 0xFF00) >> 8);
+ return false;
+ } else {
+ pr_debug("Tok ");
+ }
+ /* Length check */
+ if (*tp) {
+ u16 spec_len;
+
+ /* length field exists ? */
+ if (hdrlen < 2)
+ break;
+ lp = skb_header_pointer(skb, ptr + 1,
+ sizeof(_optlen),
+ &_optlen);
+ if (lp == NULL)
+ break;
+ spec_len = optinfo->opts[temp] & 0x00FF;
+
+ if (spec_len != 0x00FF && spec_len != *lp) {
+ pr_debug("Lbad %02X %04X\n", *lp,
+ spec_len);
+ return false;
+ }
+ pr_debug("Lok ");
+ optlen = *lp + 2;
+ } else {
+ pr_debug("Pad1\n");
+ optlen = 1;
+ }
+
+ /* Step to the next */
+ pr_debug("len%04X\n", optlen);
+
+ if ((ptr > skb->len - optlen || hdrlen < optlen) &&
+ temp < optinfo->optsnr - 1) {
+ pr_debug("new pointer is too large!\n");
+ break;
+ }
+ ptr += optlen;
+ hdrlen -= optlen;
+ }
+ if (temp == optinfo->optsnr)
+ return ret;
+ else
+ return false;
+ }
+
+ return false;
+}
+
+static int hbh_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_opts *optsinfo = par->matchinfo;
+
+ if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+ pr_debug("unknown flags %X\n", optsinfo->invflags);
+ return -EINVAL;
+ }
+
+ if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
+ pr_debug("Not strict - not implemented");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match hbh_mt6_reg[] __read_mostly = {
+ {
+ /* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
+ .name = "hbh",
+ .family = NFPROTO_IPV6,
+ .match = hbh_mt6,
+ .matchsize = sizeof(struct ip6t_opts),
+ .checkentry = hbh_mt6_check,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "dst",
+ .family = NFPROTO_IPV6,
+ .match = hbh_mt6,
+ .matchsize = sizeof(struct ip6t_opts),
+ .checkentry = hbh_mt6_check,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init hbh_mt6_init(void)
+{
+ return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
+}
+
+static void __exit hbh_mt6_exit(void)
+{
+ xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
+}
+
+module_init(hbh_mt6_init);
+module_exit(hbh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
new file mode 100644
index 00000000..54bd9790
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -0,0 +1,154 @@
+/* ipv6header match - matches IPv6 packets based
+ on whether they contain certain headers */
+
+/* Original idea: Brad Chapman
+ * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 header types match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+static bool
+ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct ip6t_ipv6header_info *info = par->matchinfo;
+ unsigned int temp;
+ int len;
+ u8 nexthdr;
+ unsigned int ptr;
+
+ /* Make sure this isn't an evil packet */
+
+ /* type of the 1st exthdr */
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ /* pointer to the 1st exthdr */
+ ptr = sizeof(struct ipv6hdr);
+ /* available length */
+ len = skb->len - ptr;
+ temp = 0;
+
+ while (ip6t_ext_hdr(nexthdr)) {
+ const struct ipv6_opt_hdr *hp;
+ struct ipv6_opt_hdr _hdr;
+ int hdrlen;
+
+ /* No more exthdr -> evaluate */
+ if (nexthdr == NEXTHDR_NONE) {
+ temp |= MASK_NONE;
+ break;
+ }
+ /* Is there enough space for the next ext header? */
+ if (len < (int)sizeof(struct ipv6_opt_hdr))
+ return false;
+ /* ESP -> evaluate */
+ if (nexthdr == NEXTHDR_ESP) {
+ temp |= MASK_ESP;
+ break;
+ }
+
+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+ BUG_ON(hp == NULL);
+
+ /* Calculate the header length */
+ if (nexthdr == NEXTHDR_FRAGMENT)
+ hdrlen = 8;
+ else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hp->hdrlen + 2) << 2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ /* set the flag */
+ switch (nexthdr) {
+ case NEXTHDR_HOP:
+ temp |= MASK_HOPOPTS;
+ break;
+ case NEXTHDR_ROUTING:
+ temp |= MASK_ROUTING;
+ break;
+ case NEXTHDR_FRAGMENT:
+ temp |= MASK_FRAGMENT;
+ break;
+ case NEXTHDR_AUTH:
+ temp |= MASK_AH;
+ break;
+ case NEXTHDR_DEST:
+ temp |= MASK_DSTOPTS;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ ptr += hdrlen;
+ if (ptr > skb->len)
+ break;
+ }
+
+ if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP)
+ temp |= MASK_PROTO;
+
+ if (info->modeflag)
+ return !((temp ^ info->matchflags ^ info->invflags)
+ & info->matchflags);
+ else {
+ if (info->invflags)
+ return temp != info->matchflags;
+ else
+ return temp == info->matchflags;
+ }
+}
+
+static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_ipv6header_info *info = par->matchinfo;
+
+ /* invflags is 0 or 0xff in hard mode */
+ if ((!info->modeflag) && info->invflags != 0x00 &&
+ info->invflags != 0xFF)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct xt_match ipv6header_mt6_reg __read_mostly = {
+ .name = "ipv6header",
+ .family = NFPROTO_IPV6,
+ .match = ipv6header_mt6,
+ .matchsize = sizeof(struct ip6t_ipv6header_info),
+ .checkentry = ipv6header_mt6_check,
+ .destroy = NULL,
+ .me = THIS_MODULE,
+};
+
+static int __init ipv6header_mt6_init(void)
+{
+ return xt_register_match(&ipv6header_mt6_reg);
+}
+
+static void __exit ipv6header_mt6_exit(void)
+{
+ xt_unregister_match(&ipv6header_mt6_reg);
+}
+
+module_init(ipv6header_mt6_init);
+module_exit(ipv6header_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
new file mode 100644
index 00000000..0c90c66b
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C)2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com>
+ *
+ * Based on net/netfilter/xt_tcpudp.c
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/mip6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_mh.h>
+
+MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
+MODULE_LICENSE("GPL");
+
+/* Returns 1 if the type is matched by the range, 0 otherwise */
+static inline bool
+type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
+{
+ return (type >= min && type <= max) ^ invert;
+}
+
+static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ip6_mh _mh;
+ const struct ip6_mh *mh;
+ const struct ip6t_mh *mhinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh);
+ if (mh == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ pr_debug("Dropping evil MH tinygram.\n");
+ par->hotdrop = true;
+ return false;
+ }
+
+ if (mh->ip6mh_proto != IPPROTO_NONE) {
+ pr_debug("Dropping invalid MH Payload Proto: %u\n",
+ mh->ip6mh_proto);
+ par->hotdrop = true;
+ return false;
+ }
+
+ return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
+ !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
+}
+
+static int mh_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_mh *mhinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0;
+}
+
+static struct xt_match mh_mt6_reg __read_mostly = {
+ .name = "mh",
+ .family = NFPROTO_IPV6,
+ .checkentry = mh_mt6_check,
+ .match = mh_mt6,
+ .matchsize = sizeof(struct ip6t_mh),
+ .proto = IPPROTO_MH,
+ .me = THIS_MODULE,
+};
+
+static int __init mh_mt6_init(void)
+{
+ return xt_register_match(&mh_mt6_reg);
+}
+
+static void __exit mh_mt6_exit(void)
+{
+ xt_unregister_match(&mh_mt6_reg);
+}
+
+module_init(mh_mt6_init);
+module_exit(mh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
new file mode 100644
index 00000000..5d1d8b04
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match");
+
+static bool rpfilter_addr_unicast(const struct in6_addr *addr)
+{
+ int addr_type = ipv6_addr_type(addr);
+ return addr_type & IPV6_ADDR_UNICAST;
+}
+
+static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+ const struct net_device *dev, u8 flags)
+{
+ struct rt6_info *rt;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ bool ret = false;
+ struct flowi6 fl6 = {
+ .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+ .flowi6_proto = iph->nexthdr,
+ .daddr = iph->saddr,
+ };
+ int lookup_flags;
+
+ if (rpfilter_addr_unicast(&iph->daddr)) {
+ memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr));
+ lookup_flags = RT6_LOOKUP_F_HAS_SADDR;
+ } else {
+ lookup_flags = 0;
+ }
+
+ fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ if ((flags & XT_RPFILTER_LOOSE) == 0) {
+ fl6.flowi6_oif = dev->ifindex;
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ }
+
+ rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+ if (rt->dst.error)
+ goto out;
+
+ if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST))
+ goto out;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ ret = flags & XT_RPFILTER_ACCEPT_LOCAL;
+ goto out;
+ }
+
+ if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ ret = true;
+ out:
+ dst_release(&rt->dst);
+ return ret;
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ int saddrtype;
+ struct ipv6hdr *iph;
+ bool invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (par->in->flags & IFF_LOOPBACK)
+ return true ^ invert;
+
+ iph = ipv6_hdr(skb);
+ saddrtype = ipv6_addr_type(&iph->saddr);
+ if (unlikely(saddrtype == IPV6_ADDR_ANY))
+ return true ^ invert; /* not routable: forward path will drop it */
+
+ return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV6,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
new file mode 100644
index 00000000..d8488c50
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -0,0 +1,225 @@
+/* Kernel module to match ROUTING parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <asm/byteorder.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_rt.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+/* Returns 1 if the id is matched by the range, 0 otherwise */
+static inline bool
+segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
+{
+ bool r;
+ pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, id, max);
+ r = (id >= min && id <= max) ^ invert;
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ipv6_rt_hdr _route;
+ const struct ipv6_rt_hdr *rh;
+ const struct ip6t_rt *rtinfo = par->matchinfo;
+ unsigned int temp;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ bool ret = false;
+ struct in6_addr _addr;
+ const struct in6_addr *ap;
+ int err;
+
+ err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
+ if (err < 0) {
+ if (err != -ENOENT)
+ par->hotdrop = true;
+ return false;
+ }
+
+ rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+ if (rh == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
+
+ hdrlen = ipv6_optlen(rh);
+ if (skb->len - ptr < hdrlen) {
+ /* Pcket smaller than its length field */
+ return false;
+ }
+
+ pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
+ pr_debug("TYPE %04X ", rh->type);
+ pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
+
+ pr_debug("IPv6 RT segsleft %02X ",
+ segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+ rh->segments_left,
+ !!(rtinfo->invflags & IP6T_RT_INV_SGS)));
+ pr_debug("type %02X %02X %02X ",
+ rtinfo->rt_type, rh->type,
+ (!(rtinfo->flags & IP6T_RT_TYP) ||
+ ((rtinfo->rt_type == rh->type) ^
+ !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
+ pr_debug("len %02X %04X %02X ",
+ rtinfo->hdrlen, hdrlen,
+ !(rtinfo->flags & IP6T_RT_LEN) ||
+ ((rtinfo->hdrlen == hdrlen) ^
+ !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
+ pr_debug("res %02X %02X %02X ",
+ rtinfo->flags & IP6T_RT_RES,
+ ((const struct rt0_hdr *)rh)->reserved,
+ !((rtinfo->flags & IP6T_RT_RES) &&
+ (((const struct rt0_hdr *)rh)->reserved)));
+
+ ret = (rh != NULL) &&
+ (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+ rh->segments_left,
+ !!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
+ (!(rtinfo->flags & IP6T_RT_LEN) ||
+ ((rtinfo->hdrlen == hdrlen) ^
+ !!(rtinfo->invflags & IP6T_RT_INV_LEN))) &&
+ (!(rtinfo->flags & IP6T_RT_TYP) ||
+ ((rtinfo->rt_type == rh->type) ^
+ !!(rtinfo->invflags & IP6T_RT_INV_TYP)));
+
+ if (ret && (rtinfo->flags & IP6T_RT_RES)) {
+ const u_int32_t *rp;
+ u_int32_t _reserved;
+ rp = skb_header_pointer(skb,
+ ptr + offsetof(struct rt0_hdr,
+ reserved),
+ sizeof(_reserved),
+ &_reserved);
+
+ ret = (*rp == 0);
+ }
+
+ pr_debug("#%d ", rtinfo->addrnr);
+ if (!(rtinfo->flags & IP6T_RT_FST)) {
+ return ret;
+ } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
+ pr_debug("Not strict ");
+ if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
+ pr_debug("There isn't enough space\n");
+ return false;
+ } else {
+ unsigned int i = 0;
+
+ pr_debug("#%d ", rtinfo->addrnr);
+ for (temp = 0;
+ temp < (unsigned int)((hdrlen - 8) / 16);
+ temp++) {
+ ap = skb_header_pointer(skb,
+ ptr
+ + sizeof(struct rt0_hdr)
+ + temp * sizeof(_addr),
+ sizeof(_addr),
+ &_addr);
+
+ BUG_ON(ap == NULL);
+
+ if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
+ pr_debug("i=%d temp=%d;\n", i, temp);
+ i++;
+ }
+ if (i == rtinfo->addrnr)
+ break;
+ }
+ pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
+ if (i == rtinfo->addrnr)
+ return ret;
+ else
+ return false;
+ }
+ } else {
+ pr_debug("Strict ");
+ if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
+ pr_debug("There isn't enough space\n");
+ return false;
+ } else {
+ pr_debug("#%d ", rtinfo->addrnr);
+ for (temp = 0; temp < rtinfo->addrnr; temp++) {
+ ap = skb_header_pointer(skb,
+ ptr
+ + sizeof(struct rt0_hdr)
+ + temp * sizeof(_addr),
+ sizeof(_addr),
+ &_addr);
+ BUG_ON(ap == NULL);
+
+ if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
+ break;
+ }
+ pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
+ if (temp == rtinfo->addrnr &&
+ temp == (unsigned int)((hdrlen - 8) / 16))
+ return ret;
+ else
+ return false;
+ }
+ }
+
+ return false;
+}
+
+static int rt_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_rt *rtinfo = par->matchinfo;
+
+ if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
+ pr_debug("unknown flags %X\n", rtinfo->invflags);
+ return -EINVAL;
+ }
+ if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
+ (!(rtinfo->flags & IP6T_RT_TYP) ||
+ (rtinfo->rt_type != 0) ||
+ (rtinfo->invflags & IP6T_RT_INV_TYP))) {
+ pr_debug("`--rt-type 0' required before `--rt-0-*'");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rt_mt6_reg __read_mostly = {
+ .name = "rt",
+ .family = NFPROTO_IPV6,
+ .match = rt_mt6,
+ .matchsize = sizeof(struct ip6t_rt),
+ .checkentry = rt_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init rt_mt6_init(void)
+{
+ return xt_register_match(&rt_mt6_reg);
+}
+
+static void __exit rt_mt6_exit(void)
+{
+ xt_unregister_match(&rt_mt6_reg);
+}
+
+module_init(rt_mt6_init);
+module_exit(rt_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
new file mode 100644
index 00000000..325e59a0
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -0,0 +1,108 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT))
+
+static const struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_FILTER,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct net *net = dev_net((in != NULL) ? in : out);
+
+ return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+}
+
+static struct nf_hook_ops *filter_ops __read_mostly;
+
+/* Default to forward because I got too much mail already. */
+static bool forward = true;
+module_param(forward, bool, 0000);
+
+static int __net_init ip6table_filter_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
+ /* Entry 1 is the FORWARD hook */
+ ((struct ip6t_standard *)repl->entries)[1].target.verdict =
+ forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+
+ net->ipv6.ip6table_filter =
+ ip6t_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_filter))
+ return PTR_ERR(net->ipv6.ip6table_filter);
+ return 0;
+}
+
+static void __net_exit ip6table_filter_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_filter);
+}
+
+static struct pernet_operations ip6table_filter_net_ops = {
+ .init = ip6table_filter_net_init,
+ .exit = ip6table_filter_net_exit,
+};
+
+static int __init ip6table_filter_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6table_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
+ if (IS_ERR(filter_ops)) {
+ ret = PTR_ERR(filter_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
+ return ret;
+}
+
+static void __exit ip6table_filter_fini(void)
+{
+ xt_hook_unlink(&packet_filter, filter_ops);
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
+}
+
+module_init(ip6table_filter_init);
+module_exit(ip6table_filter_fini);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
new file mode 100644
index 00000000..00d19173
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -0,0 +1,145 @@
+/*
+ * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6
+ *
+ * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables mangle table");
+
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+ (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT) | \
+ (1 << NF_INET_POST_ROUTING))
+
+static const struct xt_table packet_mangler = {
+ .name = "mangle",
+ .valid_hooks = MANGLE_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_MANGLE,
+};
+
+static unsigned int
+ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
+{
+ unsigned int ret;
+ struct in6_addr saddr, daddr;
+ u_int8_t hop_limit;
+ u_int32_t flowlabel, mark;
+
+#if 0
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ pr_warning("ip6t_hook: happy cracking.\n");
+ return NF_ACCEPT;
+ }
+#endif
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority, */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either */
+ flowlabel = *((u_int32_t *)ipv6_hdr(skb));
+
+ ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
+ dev_net(out)->ipv6.ip6table_mangle);
+
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+ return ret;
+}
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (hook == NF_INET_LOCAL_OUT)
+ return ip6t_mangle_out(skb, out);
+ if (hook == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, hook, in, out,
+ dev_net(out)->ipv6.ip6table_mangle);
+ /* INPUT/FORWARD */
+ return ip6t_do_table(skb, hook, in, out,
+ dev_net(in)->ipv6.ip6table_mangle);
+}
+
+static struct nf_hook_ops *mangle_ops __read_mostly;
+static int __net_init ip6table_mangle_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&packet_mangler);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_mangle =
+ ip6t_register_table(net, &packet_mangler, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_mangle))
+ return PTR_ERR(net->ipv6.ip6table_mangle);
+ return 0;
+}
+
+static void __net_exit ip6table_mangle_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
+}
+
+static struct pernet_operations ip6table_mangle_net_ops = {
+ .init = ip6table_mangle_net_init,
+ .exit = ip6table_mangle_net_exit,
+};
+
+static int __init ip6table_mangle_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6table_mangle_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
+ if (IS_ERR(mangle_ops)) {
+ ret = PTR_ERR(mangle_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ return ret;
+}
+
+static void __exit ip6table_mangle_fini(void)
+{
+ xt_hook_unlink(&packet_mangler, mangle_ops);
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
+}
+
+module_init(ip6table_mangle_init);
+module_exit(ip6table_mangle_fini);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
new file mode 100644
index 00000000..5b9926a0
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -0,0 +1,88 @@
+/*
+ * IPv6 raw table, a port of the IPv4 raw table to IPv6
+ *
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
+
+static const struct xt_table packet_raw = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_RAW,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct net *net = dev_net((in != NULL) ? in : out);
+
+ return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+}
+
+static struct nf_hook_ops *rawtable_ops __read_mostly;
+
+static int __net_init ip6table_raw_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&packet_raw);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_raw =
+ ip6t_register_table(net, &packet_raw, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_raw))
+ return PTR_ERR(net->ipv6.ip6table_raw);
+ return 0;
+}
+
+static void __net_exit ip6table_raw_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_raw);
+}
+
+static struct pernet_operations ip6table_raw_net_ops = {
+ .init = ip6table_raw_net_init,
+ .exit = ip6table_raw_net_exit,
+};
+
+static int __init ip6table_raw_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6table_raw_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
+ if (IS_ERR(rawtable_ops)) {
+ ret = PTR_ERR(rawtable_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+ cleanup_table:
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
+ return ret;
+}
+
+static void __exit ip6table_raw_fini(void)
+{
+ xt_hook_unlink(&packet_raw, rawtable_ops);
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
+}
+
+module_init(ip6table_raw_init);
+module_exit(ip6table_raw_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
new file mode 100644
index 00000000..91aa2b4d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -0,0 +1,105 @@
+/*
+ * "security" table for IPv6
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static const struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_SECURITY,
+};
+
+static unsigned int
+ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct net *net = dev_net((in != NULL) ? in : out);
+
+ return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+}
+
+static struct nf_hook_ops *sectbl_ops __read_mostly;
+
+static int __net_init ip6table_security_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&security_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_security =
+ ip6t_register_table(net, &security_table, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_security))
+ return PTR_ERR(net->ipv6.ip6table_security);
+
+ return 0;
+}
+
+static void __net_exit ip6table_security_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_security);
+}
+
+static struct pernet_operations ip6table_security_net_ops = {
+ .init = ip6table_security_net_init,
+ .exit = ip6table_security_net_exit,
+};
+
+static int __init ip6table_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6table_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
+ if (IS_ERR(sectbl_ops)) {
+ ret = PTR_ERR(sectbl_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+ return ret;
+}
+
+static void __exit ip6table_security_fini(void)
+{
+ xt_hook_unlink(&security_table, sectbl_ops);
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+}
+
+module_init(ip6table_security_init);
+module_exit(ip6table_security_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644
index 00000000..4111050a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_log.h>
+
+static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const u_int32_t *ap;
+ u_int32_t _addrs[8];
+
+ ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
+ sizeof(_addrs), _addrs);
+ if (ap == NULL)
+ return false;
+
+ memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+ memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+
+ return true;
+}
+
+static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
+ memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
+
+ return true;
+}
+
+static int ipv6_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "src=%pI6 dst=%pI6 ",
+ tuple->src.u3.ip6, tuple->dst.u3.ip6);
+}
+
+/*
+ * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * if success, *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - it may return pointer pointing beyond end of packet,
+ * if the last recognized header is truncated in the middle.
+ * - if packet is truncated, so that all parsed headers are skipped,
+ * it returns -1.
+ * - if packet is fragmented, return pointer of the fragment header.
+ * - ESP is unparsable for now and considered like
+ * normal payload protocol.
+ * - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ */
+
+static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
+ u8 *nexthdrp, int len)
+{
+ u8 nexthdr = *nexthdrp;
+
+ while (ipv6_ext_hdr(nexthdr)) {
+ struct ipv6_opt_hdr hdr;
+ int hdrlen;
+
+ if (len < (int)sizeof(struct ipv6_opt_hdr))
+ return -1;
+ if (nexthdr == NEXTHDR_NONE)
+ break;
+ if (nexthdr == NEXTHDR_FRAGMENT)
+ break;
+ if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+ BUG();
+ if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hdr.hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(&hdr);
+
+ nexthdr = hdr.nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+
+ *nexthdrp = nexthdr;
+ return start;
+}
+
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ unsigned int *dataoff, u_int8_t *protonum)
+{
+ unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+ unsigned char pnum;
+ int protoff;
+
+ if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+ &pnum, sizeof(pnum)) != 0) {
+ pr_debug("ip6_conntrack_core: can't get nexthdr\n");
+ return -NF_ACCEPT;
+ }
+ protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff);
+ /*
+ * (protoff == skb->len) mean that the packet doesn't have no data
+ * except of IPv6 & ext headers. but it's tracked anyway. - YK
+ */
+ if ((protoff < 0) || (protoff > skb->len)) {
+ pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
+ return -NF_ACCEPT;
+ }
+
+ *dataoff = protoff;
+ *protonum = pnum;
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv6_confirm(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret, protoff;
+ unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ help = nfct_help(ct);
+ if (!help)
+ goto out;
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out;
+
+ protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
+ skb->len - extoff);
+ if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+
+ ret = helper->help(skb, protoff, ct, ctinfo);
+ if (ret != NF_ACCEPT) {
+ nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
+ "nf_ct_%s: dropping packet", helper->name);
+ return ret;
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
+}
+
+static unsigned int __ipv6_conntrack_in(struct net *net,
+ unsigned int hooknum,
+ struct sk_buff *skb,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *reasm = skb->nfct_reasm;
+
+ /* This packet is fragmented and has reassembled packet. */
+ if (reasm) {
+ /* Reassembled packet isn't parsed yet ? */
+ if (!reasm->nfct) {
+ unsigned int ret;
+
+ ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ nf_conntrack_get(reasm->nfct);
+ skb->nfct = reasm->nfct;
+ skb->nfctinfo = reasm->nfctinfo;
+ return NF_ACCEPT;
+ }
+
+ return nf_conntrack_in(net, PF_INET6, hooknum, skb);
+}
+
+static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
+}
+
+static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr)) {
+ if (net_ratelimit())
+ pr_notice("ipv6_conntrack_local: packet too short\n");
+ return NF_ACCEPT;
+ }
+ return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
+}
+
+static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+ {
+ .hook = ipv6_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv6_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv6_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_LAST,
+ },
+ {
+ .hook = ipv6_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_LAST-1,
+ },
+};
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+ &tuple->src.u3.ip6);
+ NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+ &tuple->dst.u3.ip6);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
+ [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 },
+ [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 },
+};
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+ return -EINVAL;
+
+ memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]),
+ sizeof(u_int32_t) * 4);
+ memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]),
+ sizeof(u_int32_t) * 4);
+
+ return 0;
+}
+
+static int ipv6_nlattr_tuple_size(void)
+{
+ return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
+}
+#endif
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+ .l3proto = PF_INET6,
+ .name = "ipv6",
+ .pkt_to_tuple = ipv6_pkt_to_tuple,
+ .invert_tuple = ipv6_invert_tuple,
+ .print_tuple = ipv6_print_tuple,
+ .get_l4proto = ipv6_get_l4proto,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = ipv6_tuple_to_nlattr,
+ .nlattr_tuple_size = ipv6_nlattr_tuple_size,
+ .nlattr_to_tuple = ipv6_nlattr_to_tuple,
+ .nla_policy = ipv6_nla_policy,
+#endif
+ .me = THIS_MODULE,
+};
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+
+static int __init nf_conntrack_l3proto_ipv6_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv6_enable();
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register tcp.\n");
+ return ret;
+ }
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register udp.\n");
+ goto cleanup_tcp;
+ }
+
+ ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
+ goto cleanup_udp;
+ }
+
+ ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register ipv6\n");
+ goto cleanup_icmpv6;
+ }
+
+ ret = nf_register_hooks(ipv6_conntrack_ops,
+ ARRAY_SIZE(ipv6_conntrack_ops));
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
+ "hook.\n");
+ goto cleanup_ipv6;
+ }
+ return ret;
+
+ cleanup_ipv6:
+ nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ cleanup_icmpv6:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ cleanup_udp:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ cleanup_tcp:
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+ return ret;
+}
+
+static void __exit nf_conntrack_l3proto_ipv6_fini(void)
+{
+ synchronize_net();
+ nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
+ nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
+ nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+}
+
+module_init(nf_conntrack_l3proto_ipv6_init);
+module_exit(nf_conntrack_l3proto_ipv6_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 00000000..92cc9f29
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct icmp6hdr *hp;
+ struct icmp6hdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+ tuple->dst.u.icmp.type = hp->icmp6_type;
+ tuple->src.u.icmp.id = hp->icmp6_identifier;
+ tuple->dst.u.icmp.code = hp->icmp6_code;
+
+ return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
+ [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
+ [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
+ [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1
+};
+
+static const u_int8_t noct_valid_new[] = {
+ [ICMPV6_MGM_QUERY - 130] = 1,
+ [ICMPV6_MGM_REPORT -130] = 1,
+ [ICMPV6_MGM_REDUCTION - 130] = 1,
+ [NDISC_ROUTER_SOLICITATION - 130] = 1,
+ [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+ [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+ [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+ [ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ int type = orig->dst.u.icmp.type - 128;
+ if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+ return false;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmpv6_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+ return &nf_ct_icmpv6_timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum,
+ unsigned int *timeout)
+{
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff, unsigned int *timeouts)
+{
+ static const u_int8_t valid_new[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = 1,
+ [ICMPV6_NI_QUERY - 128] = 1
+ };
+ int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+ if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+ /* Can't create a new ICMPv6 `conn' with this. */
+ pr_debug("icmpv6: can't create new conn with type %u\n",
+ type + 128);
+ nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+ if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
+ nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmpv6: invalid new with type %d ",
+ type + 128);
+ return false;
+ }
+ return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb,
+ unsigned int icmp6off,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple intuple, origtuple;
+ const struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_l4proto *inproto;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+
+ NF_CT_ASSERT(skb->nfct == NULL);
+
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb)
+ + sizeof(struct ipv6hdr)
+ + sizeof(struct icmp6hdr),
+ PF_INET6, &origtuple)) {
+ pr_debug("icmpv6_error: Can't get tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&intuple, &origtuple,
+ &nf_conntrack_l3proto_ipv6, inproto)) {
+ pr_debug("icmpv6_error: Can't invert tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(net, zone, &intuple);
+ if (!h) {
+ pr_debug("icmpv6_error: no match\n");
+ return -NF_ACCEPT;
+ } else {
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ }
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return NF_ACCEPT;
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
+{
+ const struct icmp6hdr *icmp6h;
+ struct icmp6hdr _ih;
+ int type;
+
+ icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+ if (icmp6h == NULL) {
+ if (LOG_INVALID(net, IPPROTO_ICMPV6))
+ nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmpv6: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+ if (LOG_INVALID(net, IPPROTO_ICMPV6))
+ nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmpv6: ICMPv6 checksum failed ");
+ return -NF_ACCEPT;
+ }
+
+ type = icmp6h->icmp6_type - 130;
+ if (type >= 0 && type < sizeof(noct_valid_new) &&
+ noct_valid_new[type]) {
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+ return NF_ACCEPT;
+ }
+
+ /* is not error message ? */
+ if (icmp6h->icmp6_type >= 128)
+ return NF_ACCEPT;
+
+ return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
+}
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
+ NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
+ NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+ !tb[CTA_PROTO_ICMPV6_CODE] ||
+ !tb[CTA_PROTO_ICMPV6_ID])
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+ if (tuple->dst.u.icmp.type < 128 ||
+ tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type - 128])
+ return -EINVAL;
+
+ return 0;
+}
+
+static int icmpv6_nlattr_tuple_size(void)
+{
+ return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
+{
+ unsigned int *timeout = data;
+
+ if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMPv6 timeout. */
+ *timeout = nf_ct_icmpv6_timeout;
+ }
+ return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ));
+
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+ [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *icmpv6_sysctl_header;
+static struct ctl_table icmpv6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmpv6_timeout",
+ .data = &nf_ct_icmpv6_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#endif /* CONFIG_SYSCTL */
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+{
+ .l3proto = PF_INET6,
+ .l4proto = IPPROTO_ICMPV6,
+ .name = "icmpv6",
+ .pkt_to_tuple = icmpv6_pkt_to_tuple,
+ .invert_tuple = icmpv6_invert_tuple,
+ .print_tuple = icmpv6_print_tuple,
+ .packet = icmpv6_packet,
+ .get_timeouts = icmpv6_get_timeouts,
+ .new = icmpv6_new,
+ .error = icmpv6_error,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
+ .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
+ .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nla_policy = icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmpv6_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#ifdef CONFIG_SYSCTL
+ .ctl_table_header = &icmpv6_sysctl_header,
+ .ctl_table = icmpv6_sysctl_table,
+#endif
+};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644
index 00000000..38f00b02
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -0,0 +1,650 @@
+/*
+ * IPv6 fragment reassembly for connection tracking
+ *
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * Author:
+ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+#include <net/inet_frag.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+
+struct nf_ct_frag6_skb_cb
+{
+ struct inet6_skb_parm h;
+ int offset;
+ struct sk_buff *orig;
+};
+
+#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+
+struct nf_ct_frag6_queue
+{
+ struct inet_frag_queue q;
+
+ __be32 id; /* fragment id */
+ u32 user;
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+
+ unsigned int csum;
+ __u16 nhoffset;
+};
+
+static struct inet_frags nf_frags;
+static struct netns_frags nf_init_frags;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_frag6_timeout",
+ .data = &nf_init_frags.timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_frag6_low_thresh",
+ .data = &nf_init_frags.low_thresh,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "nf_conntrack_frag6_high_thresh",
+ .data = &nf_init_frags.high_thresh,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static struct ctl_table_header *nf_ct_frag6_sysctl_header;
+#endif
+
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
+{
+ const struct nf_ct_frag6_queue *nq;
+
+ nq = container_of(q, struct nf_ct_frag6_queue, q);
+ return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+}
+
+static void nf_skb_free(struct sk_buff *skb)
+{
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
+
+/* Destruction primitives. */
+
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+{
+ inet_frag_put(&fq->q, &nf_frags);
+}
+
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+{
+ inet_frag_kill(&fq->q, &nf_frags);
+}
+
+static void nf_ct_frag6_evictor(void)
+{
+ local_bh_disable();
+ inet_frag_evictor(&nf_init_frags, &nf_frags);
+ local_bh_enable();
+}
+
+static void nf_ct_frag6_expire(unsigned long data)
+{
+ struct nf_ct_frag6_queue *fq;
+
+ fq = container_of((struct inet_frag_queue *)data,
+ struct nf_ct_frag6_queue, q);
+
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto out;
+
+ fq_kill(fq);
+
+out:
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
+}
+
+/* Creation primitives. */
+
+static __inline__ struct nf_ct_frag6_queue *
+fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
+{
+ struct inet_frag_queue *q;
+ struct ip6_create_arg arg;
+ unsigned int hash;
+
+ arg.id = id;
+ arg.user = user;
+ arg.src = src;
+ arg.dst = dst;
+
+ read_lock_bh(&nf_frags.lock);
+ hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
+
+ q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
+ local_bh_enable();
+ if (q == NULL)
+ goto oom;
+
+ return container_of(q, struct nf_ct_frag6_queue, q);
+
+oom:
+ return NULL;
+}
+
+
+static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
+ const struct frag_hdr *fhdr, int nhoff)
+{
+ struct sk_buff *prev, *next;
+ int offset, end;
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE) {
+ pr_debug("Already completed\n");
+ goto err;
+ }
+
+ offset = ntohs(fhdr->frag_off) & ~0x7;
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+
+ if ((unsigned int)end > IPV6_MAXPLEN) {
+ pr_debug("offset is too large.\n");
+ return -1;
+ }
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
+ skb->csum = csum_sub(skb->csum,
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+ 0));
+ }
+
+ /* Is this the final fragment? */
+ if (!(fhdr->frag_off & htons(IP6_MF))) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrupted.
+ */
+ if (end < fq->q.len ||
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
+ pr_debug("already received last fragment\n");
+ goto err;
+ }
+ fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.len = end;
+ } else {
+ /* Check if the fragment is rounded to 8 bytes.
+ * Required by the RFC.
+ */
+ if (end & 0x7) {
+ /* RFC2460 says always send parameter problem in
+ * this case. -DaveM
+ */
+ pr_debug("end of fragment not rounded to 8 bytes.\n");
+ return -1;
+ }
+ if (end > fq->q.len) {
+ /* Some bits beyond end -> corruption. */
+ if (fq->q.last_in & INET_FRAG_LAST_IN) {
+ pr_debug("last packet already reached.\n");
+ goto err;
+ }
+ fq->q.len = end;
+ }
+ }
+
+ if (end == offset)
+ goto err;
+
+ /* Point into the IP datagram 'data' part. */
+ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
+ pr_debug("queue: message is too short.\n");
+ goto err;
+ }
+ if (pskb_trim_rcsum(skb, end - offset)) {
+ pr_debug("Can't trim\n");
+ goto err;
+ }
+
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = fq->q.fragments_tail;
+ if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+ next = NULL;
+ goto found;
+ }
+ prev = NULL;
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
+ if (NFCT_FRAG6_CB(next)->offset >= offset)
+ break; /* bingo! */
+ prev = next;
+ }
+
+found:
+ /* RFC5722, Section 4:
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments, including those not yet received) MUST be silently
+ * discarded.
+ */
+
+ /* Check for overlap with preceding fragment. */
+ if (prev &&
+ (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+ goto discard_fq;
+
+ /* Look for overlap with succeeding segment. */
+ if (next && NFCT_FRAG6_CB(next)->offset < end)
+ goto discard_fq;
+
+ NFCT_FRAG6_CB(skb)->offset = offset;
+
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (!next)
+ fq->q.fragments_tail = skb;
+ if (prev)
+ prev->next = skb;
+ else
+ fq->q.fragments = skb;
+
+ skb->dev = NULL;
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &nf_init_frags.mem);
+
+ /* The first fragment.
+ * nhoffset is obtained from the first fragment, of course.
+ */
+ if (offset == 0) {
+ fq->nhoffset = nhoff;
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
+ }
+ write_lock(&nf_frags.lock);
+ list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
+ write_unlock(&nf_frags.lock);
+ return 0;
+
+discard_fq:
+ fq_kill(fq);
+err:
+ return -1;
+}
+
+/*
+ * Check if this packet is complete.
+ * Returns NULL on failure by any reason, and pointer
+ * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+ */
+static struct sk_buff *
+nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+{
+ struct sk_buff *fp, *op, *head = fq->q.fragments;
+ int payload_len;
+
+ fq_kill(fq);
+
+ WARN_ON(head == NULL);
+ WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+
+ /* Unfragmented part is taken from the first segment. */
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->q.len -
+ sizeof(struct frag_hdr));
+ if (payload_len > IPV6_MAXPLEN) {
+ pr_debug("payload len is too large.\n");
+ goto out_oversize;
+ }
+
+ /* Head of list must not be cloned. */
+ if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+ pr_debug("skb is cloned but can't expand head");
+ goto out_oom;
+ }
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments. */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (clone == NULL)
+ goto out_oom;
+
+ clone->next = head->next;
+ head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = clone->data_len = head->data_len - plen;
+ head->data_len -= clone->len;
+ head->len -= clone->len;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+
+ NFCT_FRAG6_CB(clone)->orig = NULL;
+ atomic_add(clone->truesize, &nf_init_frags.mem);
+ }
+
+ /* We have to remove fragment header from datagram and to relocate
+ * header in order to calculate ICV correctly. */
+ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
+ memmove(head->head + sizeof(struct frag_hdr), head->head,
+ (head->data - head->head) - sizeof(struct frag_hdr));
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
+
+ skb_shinfo(head)->frag_list = head->next;
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
+
+ for (fp=head->next; fp; fp = fp->next) {
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ }
+ atomic_sub(head->truesize, &nf_init_frags.mem);
+
+ head->next = NULL;
+ head->dev = dev;
+ head->tstamp = fq->q.stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
+
+ /* Yes, and fold redundant checksum back. 8) */
+ if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
+
+ fq->q.fragments = NULL;
+ fq->q.fragments_tail = NULL;
+
+ /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+ fp = skb_shinfo(head)->frag_list;
+ if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
+ /* at above code, head skb is divided into two skbs. */
+ fp = fp->next;
+
+ op = NFCT_FRAG6_CB(head)->orig;
+ for (; fp; fp = fp->next) {
+ struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
+
+ op->next = orig;
+ op = orig;
+ NFCT_FRAG6_CB(fp)->orig = NULL;
+ }
+
+ return head;
+
+out_oversize:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
+ goto out_fail;
+out_oom:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
+out_fail:
+ return NULL;
+}
+
+/*
+ * find the header just before Fragment Header.
+ *
+ * if success return 0 and set ...
+ * (*prevhdrp): the value of "Next Header Field" in the header
+ * just before Fragment Header.
+ * (*prevhoff): the offset of "Next Header Field" in the header
+ * just before Fragment Header.
+ * (*fhoff) : the offset of Fragment Header.
+ *
+ * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
+ *
+ */
+static int
+find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
+{
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ const int netoff = skb_network_offset(skb);
+ u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+ int start = netoff + sizeof(struct ipv6hdr);
+ int len = skb->len - start;
+ u8 prevhdr = NEXTHDR_IPV6;
+
+ while (nexthdr != NEXTHDR_FRAGMENT) {
+ struct ipv6_opt_hdr hdr;
+ int hdrlen;
+
+ if (!ipv6_ext_hdr(nexthdr)) {
+ return -1;
+ }
+ if (nexthdr == NEXTHDR_NONE) {
+ pr_debug("next header is none\n");
+ return -1;
+ }
+ if (len < (int)sizeof(struct ipv6_opt_hdr)) {
+ pr_debug("too short\n");
+ return -1;
+ }
+ if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+ BUG();
+ if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hdr.hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(&hdr);
+
+ prevhdr = nexthdr;
+ prev_nhoff = start;
+
+ nexthdr = hdr.nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+
+ if (len < 0)
+ return -1;
+
+ *prevhdrp = prevhdr;
+ *prevhoff = prev_nhoff;
+ *fhoff = start;
+
+ return 0;
+}
+
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
+{
+ struct sk_buff *clone;
+ struct net_device *dev = skb->dev;
+ struct frag_hdr *fhdr;
+ struct nf_ct_frag6_queue *fq;
+ struct ipv6hdr *hdr;
+ int fhoff, nhoff;
+ u8 prevhdr;
+ struct sk_buff *ret_skb = NULL;
+
+ /* Jumbo payload inhibits frag. header */
+ if (ipv6_hdr(skb)->payload_len == 0) {
+ pr_debug("payload len = 0\n");
+ return skb;
+ }
+
+ if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
+ return skb;
+
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (clone == NULL) {
+ pr_debug("Can't clone skb\n");
+ return skb;
+ }
+
+ NFCT_FRAG6_CB(clone)->orig = skb;
+
+ if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
+ pr_debug("message is too short.\n");
+ goto ret_orig;
+ }
+
+ skb_set_transport_header(clone, fhoff);
+ hdr = ipv6_hdr(clone);
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
+
+ if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
+ nf_ct_frag6_evictor();
+
+ fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ if (fq == NULL) {
+ pr_debug("Can't find and can't create new queue\n");
+ goto ret_orig;
+ }
+
+ spin_lock_bh(&fq->q.lock);
+
+ if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+ spin_unlock_bh(&fq->q.lock);
+ pr_debug("Can't insert skb to queue\n");
+ fq_put(fq);
+ goto ret_orig;
+ }
+
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ ret_skb = nf_ct_frag6_reasm(fq, dev);
+ if (ret_skb == NULL)
+ pr_debug("Can't reassemble fragmented packets\n");
+ }
+ spin_unlock_bh(&fq->q.lock);
+
+ fq_put(fq);
+ return ret_skb;
+
+ret_orig:
+ kfree_skb(clone);
+ return skb;
+}
+
+void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+ struct net_device *in, struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *s, *s2;
+
+ for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
+ nf_conntrack_put_reasm(s->nfct_reasm);
+ nf_conntrack_get_reasm(skb);
+ s->nfct_reasm = skb;
+
+ s2 = s->next;
+ s->next = NULL;
+
+ NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
+ NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ s = s2;
+ }
+ nf_conntrack_put_reasm(skb);
+}
+
+int nf_ct_frag6_init(void)
+{
+ nf_frags.hashfn = nf_hashfn;
+ nf_frags.constructor = ip6_frag_init;
+ nf_frags.destructor = NULL;
+ nf_frags.skb_free = nf_skb_free;
+ nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ nf_frags.match = ip6_frag_match;
+ nf_frags.frag_expire = nf_ct_frag6_expire;
+ nf_frags.secret_interval = 10 * 60 * HZ;
+ nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
+ nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ inet_frags_init_net(&nf_init_frags);
+ inet_frags_init(&nf_frags);
+
+#ifdef CONFIG_SYSCTL
+ nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
+ nf_ct_frag6_sysctl_table);
+ if (!nf_ct_frag6_sysctl_header) {
+ inet_frags_fini(&nf_frags);
+ return -ENOMEM;
+ }
+#endif
+
+ return 0;
+}
+
+void nf_ct_frag6_cleanup(void)
+{
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(nf_ct_frag6_sysctl_header);
+ nf_ct_frag6_sysctl_header = NULL;
+#endif
+ inet_frags_fini(&nf_frags);
+
+ nf_init_frags.low_thresh = 0;
+ nf_ct_frag6_evictor();
+}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 00000000..cdd6d045
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,137 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ u16 zone = NF_CT_DEFAULT_ZONE;
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ if (skb->nfct)
+ zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING)
+ return IP6_DEFRAG_CONNTRACK_IN + zone;
+ else
+ return IP6_DEFRAG_CONNTRACK_OUT + zone;
+
+}
+
+static unsigned int ipv6_defrag(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *reasm;
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ /* Previously seen (loopback)? */
+ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+ return NF_ACCEPT;
+#endif
+
+ reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+ /* queued */
+ if (reasm == NULL)
+ return NF_STOLEN;
+
+ /* error occurred or not fragmented */
+ if (reasm == skb)
+ return NF_ACCEPT;
+
+ nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+ (struct net_device *)out, okfn);
+
+ return NF_STOLEN;
+}
+
+static struct nf_hook_ops ipv6_defrag_ops[] = {
+ {
+ .hook = ipv6_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
+ },
+ {
+ .hook = ipv6_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
+ },
+};
+
+static int __init nf_defrag_init(void)
+{
+ int ret = 0;
+
+ ret = nf_ct_frag6_init();
+ if (ret < 0) {
+ pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
+ return ret;
+ }
+ ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+ if (ret < 0) {
+ pr_err("nf_defrag_ipv6: can't register hooks\n");
+ goto cleanup_frag6;
+ }
+ return ret;
+
+cleanup_frag6:
+ nf_ct_frag6_cleanup();
+ return ret;
+
+}
+
+static void __exit nf_defrag_fini(void)
+{
+ nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+ nf_ct_frag6_cleanup();
+}
+
+void nf_defrag_ipv6_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100755
index 00000000..f46e315b
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,222 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * "Ping" sockets
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors: Lorenzo Colitti (IPv6 support)
+ * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+#include <linux/module.h>
+
+struct proto pingv6_prot = {
+ .name = "PINGv6",
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .sendmsg = ping_v6_sendmsg,
+ .recvmsg = ping_recvmsg,
+ .bind = ping_bind,
+ .backlog_rcv = ping_queue_rcv_skb,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
+ .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+ .prot = &pingv6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ return -EAFNOSUPPORT;
+}
+void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload) {}
+int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, int strict)
+{
+ return 0;
+}
+
+int __init pingv6_init(void)
+{
+ pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+ pingv6_ops.datagram_recv_ctl = datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+ return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+ pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+ pingv6_ops.datagram_recv_ctl = dummy_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+ inet6_unregister_protosw(&pingv6_protosw);
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr user_icmph;
+ int addr_type;
+ struct in6_addr *daddr;
+ int iif = 0;
+ struct flowi6 fl6;
+ int err;
+ int hlimit;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct pingfakehdr pfh;
+
+ pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
+ if (msg->msg_name) {
+ struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+ if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+ u->sin6_family != AF_INET6) {
+ return -EINVAL;
+ }
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != u->sin6_scope_id) {
+ return -EINVAL;
+ }
+ daddr = &(u->sin6_addr);
+ iif = u->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &np->daddr;
+ }
+
+ if (!iif)
+ iif = sk->sk_bound_dev_if;
+
+ addr_type = ipv6_addr_type(daddr);
+ if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+ return -EINVAL;
+ if (addr_type & IPV6_ADDR_MAPPED)
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.saddr = np->saddr;
+ fl6.daddr = *daddr;
+ fl6.fl6_icmp_type = user_icmph.icmp6_type;
+ fl6.fl6_icmp_code = user_icmph.icmp6_code;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+ if (!np)
+ return -EBADF;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ pfh.icmph.type = user_icmph.icmp6_type;
+ pfh.icmph.code = user_icmph.icmp6_code;
+ pfh.icmph.checksum = 0;
+ pfh.icmph.un.echo.id = inet->inet_sport;
+ pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+ pfh.iov = msg->msg_iov;
+ pfh.wcheck = 0;
+ pfh.family = AF_INET6;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ lock_sock(sk);
+ err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ 0, hlimit,
+ np->tclass, NULL, &fl6, rt,
+ MSG_DONTWAIT, np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *) &pfh.icmph,
+ len);
+ }
+ release_sock(sk);
+
+ if (err)
+ return err;
+
+ return len;
+}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
new file mode 100644
index 00000000..da2e92d0
--- /dev/null
+++ b/net/ipv6/proc.c
@@ -0,0 +1,338 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * This file implements the various access functions for the
+ * PROC file system. This is very similar to the IPv4 version,
+ * except it reports the sockets in the INET6 address family.
+ *
+ * Authors: David S. Miller (davem@caip.rutgers.edu)
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ipv6.h>
+
+static int sockstat6_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq->private;
+
+ seq_printf(seq, "TCP6: inuse %d\n",
+ sock_prot_inuse_get(net, &tcpv6_prot));
+ seq_printf(seq, "UDP6: inuse %d\n",
+ sock_prot_inuse_get(net, &udpv6_prot));
+ seq_printf(seq, "UDPLITE6: inuse %d\n",
+ sock_prot_inuse_get(net, &udplitev6_prot));
+ seq_printf(seq, "RAW6: inuse %d\n",
+ sock_prot_inuse_get(net, &rawv6_prot));
+ seq_printf(seq, "FRAG6: inuse %d memory %d\n",
+ ip6_frag_nqueues(net), ip6_frag_mem(net));
+ return 0;
+}
+
+static int sockstat6_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, sockstat6_seq_show);
+}
+
+static const struct file_operations sockstat6_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = sockstat6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+static const struct snmp_mib snmp6_ipstats_list[] = {
+/* ipv6 mib according to RFC 2465 */
+ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
+ SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
+ SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
+ SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
+ SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS),
+ SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
+ SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
+ SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
+ SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
+ SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
+ SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS),
+ SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
+ SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
+ SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
+ SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS),
+ SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS),
+ SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS),
+ SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS),
+ SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS),
+ SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
+ SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
+ SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
+ SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS),
+ SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS),
+ SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
+ SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
+ SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
+ SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+ SNMP_MIB_SENTINEL
+};
+
+static const struct snmp_mib snmp6_icmp6_list[] = {
+/* icmpv6 mib according to RFC 2466 */
+ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
+ SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
+ SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+ SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+ SNMP_MIB_SENTINEL
+};
+
+/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
+static const char *const icmp6type2name[256] = {
+ [ICMPV6_DEST_UNREACH] = "DestUnreachs",
+ [ICMPV6_PKT_TOOBIG] = "PktTooBigs",
+ [ICMPV6_TIME_EXCEED] = "TimeExcds",
+ [ICMPV6_PARAMPROB] = "ParmProblems",
+ [ICMPV6_ECHO_REQUEST] = "Echos",
+ [ICMPV6_ECHO_REPLY] = "EchoReplies",
+ [ICMPV6_MGM_QUERY] = "GroupMembQueries",
+ [ICMPV6_MGM_REPORT] = "GroupMembResponses",
+ [ICMPV6_MGM_REDUCTION] = "GroupMembReductions",
+ [ICMPV6_MLD2_REPORT] = "MLDv2Reports",
+ [NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements",
+ [NDISC_ROUTER_SOLICITATION] = "RouterSolicits",
+ [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements",
+ [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits",
+ [NDISC_REDIRECT] = "Redirects",
+};
+
+
+static const struct snmp_mib snmp6_udp6_list[] = {
+ SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
+ SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
+ SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
+ SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_SENTINEL
+};
+
+static const struct snmp_mib snmp6_udplite6_list[] = {
+ SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
+ SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
+ SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
+ SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_SENTINEL
+};
+
+static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
+{
+ char name[32];
+ int i;
+
+ /* print by name -- deprecated items */
+ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
+ int icmptype;
+ const char *p;
+
+ icmptype = i & 0xff;
+ p = icmp6type2name[icmptype];
+ if (!p) /* don't print un-named types here */
+ continue;
+ snprintf(name, sizeof(name), "Icmp6%s%s",
+ i & 0x100 ? "Out" : "In", p);
+ seq_printf(seq, "%-32s\t%lu\n", name,
+ atomic_long_read(smib + i));
+ }
+
+ /* print by number (nonzero only) - ICMPMsgStat format */
+ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
+ unsigned long val;
+
+ val = atomic_long_read(smib + i);
+ if (!val)
+ continue;
+ snprintf(name, sizeof(name), "Icmp6%sType%u",
+ i & 0x100 ? "Out" : "In", i & 0xff);
+ seq_printf(seq, "%-32s\t%lu\n", name, val);
+ }
+}
+
+/* can be called either with percpu mib (pcpumib != NULL),
+ * or shared one (smib != NULL)
+ */
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
+ atomic_long_t *smib,
+ const struct snmp_mib *itemlist)
+{
+ int i;
+ unsigned long val;
+
+ for (i = 0; itemlist[i].name; i++) {
+ val = pcpumib ?
+ snmp_fold_field(pcpumib, itemlist[i].entry) :
+ atomic_long_read(smib + itemlist[i].entry);
+ seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+ }
+}
+
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+ const struct snmp_mib *itemlist, size_t syncpoff)
+{
+ int i;
+
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+ snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
+static int snmp6_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = (struct net *)seq->private;
+
+ snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+ snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
+ snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
+ NULL, snmp6_icmp6_list);
+ snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
+ snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
+ NULL, snmp6_udp6_list);
+ snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
+ NULL, snmp6_udplite6_list);
+ return 0;
+}
+
+static int snmp6_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, snmp6_seq_show);
+}
+
+static const struct file_operations snmp6_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = snmp6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)seq->private;
+
+ seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+ snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+ snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
+ snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
+ snmp6_icmp6_list);
+ snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
+ return 0;
+}
+
+static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, snmp6_dev_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations snmp6_dev_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = snmp6_dev_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int snmp6_register_dev(struct inet6_dev *idev)
+{
+ struct proc_dir_entry *p;
+ struct net *net;
+
+ if (!idev || !idev->dev)
+ return -EINVAL;
+
+ net = dev_net(idev->dev);
+ if (!net->mib.proc_net_devsnmp6)
+ return -ENOENT;
+
+ p = proc_create_data(idev->dev->name, S_IRUGO,
+ net->mib.proc_net_devsnmp6,
+ &snmp6_dev_seq_fops, idev);
+ if (!p)
+ return -ENOMEM;
+
+ idev->stats.proc_dir_entry = p;
+ return 0;
+}
+
+int snmp6_unregister_dev(struct inet6_dev *idev)
+{
+ struct net *net = dev_net(idev->dev);
+ if (!net->mib.proc_net_devsnmp6)
+ return -ENOENT;
+ if (!idev->stats.proc_dir_entry)
+ return -EINVAL;
+ remove_proc_entry(idev->stats.proc_dir_entry->name,
+ net->mib.proc_net_devsnmp6);
+ idev->stats.proc_dir_entry = NULL;
+ return 0;
+}
+
+static int __net_init ipv6_proc_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
+ &sockstat6_seq_fops))
+ return -ENOMEM;
+
+ if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+ goto proc_snmp6_fail;
+
+ net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
+ if (!net->mib.proc_net_devsnmp6)
+ goto proc_dev_snmp6_fail;
+ return 0;
+
+proc_snmp6_fail:
+ proc_net_remove(net, "sockstat6");
+proc_dev_snmp6_fail:
+ proc_net_remove(net, "dev_snmp6");
+ return -ENOMEM;
+}
+
+static void __net_exit ipv6_proc_exit_net(struct net *net)
+{
+ proc_net_remove(net, "sockstat6");
+ proc_net_remove(net, "dev_snmp6");
+ proc_net_remove(net, "snmp6");
+}
+
+static struct pernet_operations ipv6_proc_ops = {
+ .init = ipv6_proc_init_net,
+ .exit = ipv6_proc_exit_net,
+};
+
+int __init ipv6_misc_proc_init(void)
+{
+ return register_pernet_subsys(&ipv6_proc_ops);
+}
+
+void ipv6_misc_proc_exit(void)
+{
+ unregister_pernet_subsys(&ipv6_proc_ops);
+}
+
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
new file mode 100644
index 00000000..9a7978fd
--- /dev/null
+++ b/net/ipv6/protocol.c
@@ -0,0 +1,54 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * PF_INET6 protocol dispatch tables.
+ *
+ * Authors: Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Vince Laviano (vince@cs.stanford.edu) 16 May 2001
+ * - Removed unused variable 'inet6_protocol_base'
+ * - Modified inet6_del_protocol() to correctly maintain copy bit.
+ */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <net/protocol.h>
+
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+
+int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
+{
+ int hash = protocol & (MAX_INET_PROTOS - 1);
+
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ NULL, prot) ? 0 : -1;
+}
+EXPORT_SYMBOL(inet6_add_protocol);
+
+/*
+ * Remove a protocol from the hash tables.
+ */
+
+int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
+{
+ int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
new file mode 100644
index 00000000..5bddea77
--- /dev/null
+++ b/net/ipv6/raw.c
@@ -0,0 +1,1368 @@
+/*
+ * RAW sockets for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Adapted from linux/net/ipv4/raw.c
+ *
+ * Fixes:
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
+ * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/slab.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/tcp_states.h>
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#include <net/mip6.h>
+#endif
+#include <linux/mroute6.h>
+
+#include <net/raw.h>
+#include <net/rawv6.h>
+#include <net/xfrm.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+
+static struct raw_hashinfo raw_v6_hashinfo = {
+ .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
+};
+
+static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+ unsigned short num, const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif)
+{
+ struct hlist_node *node;
+ int is_multicast = ipv6_addr_is_multicast(loc_addr);
+
+ sk_for_each_from(sk, node)
+ if (inet_sk(sk)->inet_num == num) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+
+ if (!ipv6_addr_any(&np->daddr) &&
+ !ipv6_addr_equal(&np->daddr, rmt_addr))
+ continue;
+
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ continue;
+
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
+ if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ goto found;
+ if (is_multicast &&
+ inet6_mc_check(sk, loc_addr, rmt_addr))
+ goto found;
+ continue;
+ }
+ goto found;
+ }
+ sk = NULL;
+found:
+ return sk;
+}
+
+/*
+ * 0 - deliver
+ * 1 - block
+ */
+static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct icmp6hdr *icmph;
+ struct raw6_sock *rp = raw6_sk(sk);
+
+ if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
+ __u32 *data = &rp->filter.data[0];
+ int bit_nr;
+
+ icmph = (struct icmp6hdr *) skb->data;
+ bit_nr = icmph->icmp6_type;
+
+ return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
+ }
+ return 0;
+}
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
+
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
+{
+ rcu_assign_pointer(mh_filter, filter);
+ return 0;
+}
+EXPORT_SYMBOL(rawv6_mh_filter_register);
+
+int rawv6_mh_filter_unregister(mh_filter_t filter)
+{
+ RCU_INIT_POINTER(mh_filter, NULL);
+ synchronize_rcu();
+ return 0;
+}
+EXPORT_SYMBOL(rawv6_mh_filter_unregister);
+
+#endif
+
+/*
+ * demultiplex raw sockets.
+ * (should consider queueing the skb in the sock receive_queue
+ * without calling rawv6.c)
+ *
+ * Caller owns SKB so we must make clones.
+ */
+static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+{
+ const struct in6_addr *saddr;
+ const struct in6_addr *daddr;
+ struct sock *sk;
+ int delivered = 0;
+ __u8 hash;
+ struct net *net;
+
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = saddr + 1;
+
+ hash = nexthdr & (MAX_INET_PROTOS - 1);
+
+ read_lock(&raw_v6_hashinfo.lock);
+ sk = sk_head(&raw_v6_hashinfo.ht[hash]);
+
+ if (sk == NULL)
+ goto out;
+
+ net = dev_net(skb->dev);
+ sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+
+ while (sk) {
+ int filtered;
+
+ delivered = 1;
+ switch (nexthdr) {
+ case IPPROTO_ICMPV6:
+ filtered = icmpv6_filter(sk, skb);
+ break;
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPPROTO_MH:
+ {
+ /* XXX: To validate MH only once for each packet,
+ * this is placed here. It should be after checking
+ * xfrm policy, however it doesn't. The checking xfrm
+ * policy is placed in rawv6_rcv() because it is
+ * required for each socket.
+ */
+ mh_filter_t *filter;
+
+ filter = rcu_dereference(mh_filter);
+ filtered = filter ? (*filter)(sk, skb) : 0;
+ break;
+ }
+#endif
+ default:
+ filtered = 0;
+ break;
+ }
+
+ if (filtered < 0)
+ break;
+ if (filtered == 0) {
+ struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
+ /* Not releasing hash table! */
+ if (clone) {
+ nf_reset(clone);
+ rawv6_rcv(sk, clone);
+ }
+ }
+ sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
+ IP6CB(skb)->iif);
+ }
+out:
+ read_unlock(&raw_v6_hashinfo.lock);
+ return delivered;
+}
+
+int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
+{
+ struct sock *raw_sk;
+
+ raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
+ if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
+ raw_sk = NULL;
+
+ return raw_sk != NULL;
+}
+
+/* This cleans up af_inet6 a bit. -DaveM */
+static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
+ __be32 v4addr = 0;
+ int addr_type;
+ int err;
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+ addr_type = ipv6_addr_type(&addr->sin6_addr);
+
+ /* Raw sockets are IPv6 only */
+ if (addr_type == IPV6_ADDR_MAPPED)
+ return -EADDRNOTAVAIL;
+
+ lock_sock(sk);
+
+ err = -EINVAL;
+ if (sk->sk_state != TCP_CLOSE)
+ goto out;
+
+ rcu_read_lock();
+ /* Check if the address belongs to the host. */
+ if (addr_type != IPV6_ADDR_ANY) {
+ struct net_device *dev = NULL;
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ addr->sin6_scope_id) {
+ /* Override any existing binding, if another
+ * one is supplied by user.
+ */
+ sk->sk_bound_dev_if = addr->sin6_scope_id;
+ }
+
+ /* Binding to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if)
+ goto out_unlock;
+
+ err = -ENODEV;
+ dev = dev_get_by_index_rcu(sock_net(sk),
+ sk->sk_bound_dev_if);
+ if (!dev)
+ goto out_unlock;
+ }
+
+ /* ipv4 addr of the socket is invalid. Only the
+ * unspecified and mapped address have a v4 equivalent.
+ */
+ v4addr = LOOPBACK4_IPV6;
+ if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ err = -EADDRNOTAVAIL;
+ if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
+ dev, 0)) {
+ goto out_unlock;
+ }
+ }
+ }
+
+ inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
+ np->rcv_saddr = addr->sin6_addr;
+ if (!(addr_type & IPV6_ADDR_MULTICAST))
+ np->saddr = addr->sin6_addr;
+ err = 0;
+out_unlock:
+ rcu_read_unlock();
+out:
+ release_sock(sk);
+ return err;
+}
+
+static void rawv6_err(struct sock *sk, struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int err;
+ int harderr;
+
+ /* Report error on raw socket, if:
+ 1. User requested recverr.
+ 2. Socket is connected (otherwise the error indication
+ is useless without recverr and error is hard.
+ */
+ if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+ return;
+
+ harderr = icmpv6_err_convert(type, code, &err);
+ if (type == ICMPV6_PKT_TOOBIG)
+ harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+
+ if (np->recverr) {
+ u8 *payload = skb->data;
+ if (!inet->hdrincl)
+ payload += offset;
+ ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
+ }
+
+ if (np->recverr || harderr) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ }
+}
+
+void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
+ u8 type, u8 code, int inner_offset, __be32 info)
+{
+ struct sock *sk;
+ int hash;
+ const struct in6_addr *saddr, *daddr;
+ struct net *net;
+
+ hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+
+ read_lock(&raw_v6_hashinfo.lock);
+ sk = sk_head(&raw_v6_hashinfo.ht[hash]);
+ if (sk != NULL) {
+ /* Note: ipv6_hdr(skb) != skb->data */
+ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ net = dev_net(skb->dev);
+
+ while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
+ IP6CB(skb)->iif))) {
+ rawv6_err(sk, skb, NULL, type, code,
+ inner_offset, info);
+ sk = sk_next(sk);
+ }
+ }
+ read_unlock(&raw_v6_hashinfo.lock);
+}
+
+static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
+ skb_checksum_complete(skb)) {
+ atomic_inc(&sk->sk_drops);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ /* Charge it to the socket. */
+ skb_dst_drop(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ return 0;
+}
+
+/*
+ * This is next to useless...
+ * if we demultiplex in network layer we don't need the extra call
+ * just to queue the skb...
+ * maybe we could have the network decide upon a hint if it
+ * should call raw_rcv for demultiplexing
+ */
+int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
+
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ atomic_inc(&sk->sk_drops);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ if (!rp->checksum)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len, inet->inet_num, skb->csum))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len,
+ inet->inet_num, 0));
+
+ if (inet->hdrincl) {
+ if (skb_checksum_complete(skb)) {
+ atomic_inc(&sk->sk_drops);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+ }
+
+ rawv6_rcv_skb(sk, skb);
+ return 0;
+}
+
+
+/*
+ * This should be easy, if there is something there
+ * we return it, otherwise we block.
+ */
+
+static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
+ struct sk_buff *skb;
+ size_t copied;
+ int err;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ if (addr_len)
+ *addr_len=sizeof(*sin6);
+
+ if (flags & MSG_ERRQUEUE)
+ return ipv6_recv_error(sk, msg, len);
+
+ if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+ return ipv6_recv_rxpmtu(sk, msg, len);
+
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ copied = len;
+ msg->msg_flags |= MSG_TRUNC;
+ }
+
+ if (skb_csum_unnecessary(skb)) {
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ } else if (msg->msg_flags&MSG_TRUNC) {
+ if (__skb_checksum_complete(skb))
+ goto csum_copy_err;
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ } else {
+ err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
+ if (err == -EINVAL)
+ goto csum_copy_err;
+ }
+ if (err)
+ goto out_free;
+
+ /* Copy the address. */
+ if (sin6) {
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = 0;
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+ if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6->sin6_scope_id = IP6CB(skb)->iif;
+ }
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (np->rxopt.all)
+ datagram_recv_ctl(sk, msg, skb);
+
+ err = copied;
+ if (flags & MSG_TRUNC)
+ err = skb->len;
+
+out_free:
+ skb_free_datagram(sk, skb);
+out:
+ return err;
+
+csum_copy_err:
+ skb_kill_datagram(sk, skb, flags);
+
+ /* Error for blocking case is chosen to masquerade
+ as some normal condition.
+ */
+ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
+ goto out;
+}
+
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct raw6_sock *rp)
+{
+ struct sk_buff *skb;
+ int err = 0;
+ int offset;
+ int len;
+ int total_len;
+ __wsum tmp_csum;
+ __sum16 csum;
+
+ if (!rp->checksum)
+ goto send;
+
+ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+ goto out;
+
+ offset = rp->offset;
+ total_len = inet_sk(sk)->cork.base.length;
+ if (offset >= total_len - 1) {
+ err = -EINVAL;
+ ip6_flush_pending_frames(sk);
+ goto out;
+ }
+
+ /* should be check HW csum miyazawa */
+ if (skb_queue_len(&sk->sk_write_queue) == 1) {
+ /*
+ * Only one fragment on the socket.
+ */
+ tmp_csum = skb->csum;
+ } else {
+ struct sk_buff *csum_skb = NULL;
+ tmp_csum = 0;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
+
+ if (csum_skb)
+ continue;
+
+ len = skb->len - skb_transport_offset(skb);
+ if (offset >= len) {
+ offset -= len;
+ continue;
+ }
+
+ csum_skb = skb;
+ }
+
+ skb = csum_skb;
+ }
+
+ offset += skb_transport_offset(skb);
+ if (skb_copy_bits(skb, offset, &csum, 2))
+ BUG();
+
+ /* in case cksum was not initialized */
+ if (unlikely(csum))
+ tmp_csum = csum_sub(tmp_csum, csum_unfold(csum));
+
+ csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+ total_len, fl6->flowi6_proto, tmp_csum);
+
+ if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
+ csum = CSUM_MANGLED_0;
+
+ if (skb_store_bits(skb, offset, &csum, 2))
+ BUG();
+
+send:
+ err = ip6_push_pending_frames(sk);
+out:
+ return err;
+}
+
+static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
+ struct flowi6 *fl6, struct dst_entry **dstp,
+ unsigned int flags)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *iph;
+ struct sk_buff *skb;
+ int err;
+ struct rt6_info *rt = (struct rt6_info *)*dstp;
+ int hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ int tlen = rt->dst.dev->needed_tailroom;
+
+ if (length > rt->dst.dev->mtu) {
+ ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
+ return -EMSGSIZE;
+ }
+ if (flags&MSG_PROBE)
+ goto out;
+
+ skb = sock_alloc_send_skb(sk,
+ length + hlen + tlen + 15,
+ flags & MSG_DONTWAIT, &err);
+ if (skb == NULL)
+ goto error;
+ skb_reserve(skb, hlen);
+
+ skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
+ skb_dst_set(skb, &rt->dst);
+ *dstp = NULL;
+
+ skb_put(skb, length);
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->transport_header = skb->network_header;
+ err = memcpy_fromiovecend((void *)iph, from, 0, length);
+ if (err)
+ goto error_fault;
+
+ IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ rt->dst.dev, dst_output);
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto error;
+out:
+ return 0;
+
+error_fault:
+ err = -EFAULT;
+ kfree_skb(skb);
+error:
+ IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ if (err == -ENOBUFS && !np->recverr)
+ err = 0;
+ return err;
+}
+
+static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
+{
+ struct iovec *iov;
+ u8 __user *type = NULL;
+ u8 __user *code = NULL;
+ u8 len = 0;
+ int probed = 0;
+ int i;
+
+ if (!msg->msg_iov)
+ return 0;
+
+ for (i = 0; i < msg->msg_iovlen; i++) {
+ iov = &msg->msg_iov[i];
+ if (!iov)
+ continue;
+
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_ICMPV6:
+ /* check if one-byte field is readable or not. */
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+
+ if (!type) {
+ type = iov->iov_base;
+ /* check if code field is readable or not. */
+ if (iov->iov_len > 1)
+ code = type + 1;
+ } else if (!code)
+ code = iov->iov_base;
+
+ if (type && code) {
+ if (get_user(fl6->fl6_icmp_type, type) ||
+ get_user(fl6->fl6_icmp_code, code))
+ return -EFAULT;
+ probed = 1;
+ }
+ break;
+ case IPPROTO_MH:
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+ /* check if type field is readable or not. */
+ if (iov->iov_len > 2 - len) {
+ u8 __user *p = iov->iov_base;
+ if (get_user(fl6->fl6_mh_type, &p[2 - len]))
+ return -EFAULT;
+ probed = 1;
+ } else
+ len += iov->iov_len;
+
+ break;
+ default:
+ probed = 1;
+ break;
+ }
+ if (probed)
+ break;
+ }
+ return 0;
+}
+
+static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len)
+{
+ struct ipv6_txoptions opt_space;
+ struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ struct in6_addr *daddr, *final_p, final;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
+ struct ipv6_txoptions *opt = NULL;
+ struct ip6_flowlabel *flowlabel = NULL;
+ struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
+ int addr_len = msg->msg_namelen;
+ int hlimit = -1;
+ int tclass = -1;
+ int dontfrag = -1;
+ u16 proto;
+ int err;
+
+ /* Rough check on arithmetic overflow,
+ better check is made in ip6_append_data().
+ */
+ if (len > INT_MAX)
+ return -EMSGSIZE;
+
+ /* Mirror BSD error message compatibility */
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ /*
+ * Get and verify the address.
+ */
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_mark = sk->sk_mark;
+
+ if (sin6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ /* port is the proto value [0..255] carried in nexthdr */
+ proto = ntohs(sin6->sin6_port);
+
+ if (!proto)
+ proto = inet->inet_num;
+ else if (proto != inet->inet_num)
+ return -EINVAL;
+
+ if (proto > 255)
+ return -EINVAL;
+
+ daddr = &sin6->sin6_addr;
+ if (np->sndflow) {
+ fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ daddr = &flowlabel->dst;
+ }
+ }
+
+ /*
+ * Otherwise it will be difficult to maintain
+ * sk->sk_dst_cache.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ ipv6_addr_equal(daddr, &np->daddr))
+ daddr = &np->daddr;
+
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ sin6->sin6_scope_id &&
+ ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = sin6->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+
+ proto = inet->inet_num;
+ daddr = &np->daddr;
+ fl6.flowlabel = np->flow_label;
+ }
+
+ if (fl6.flowi6_oif == 0)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+
+ if (msg->msg_controllen) {
+ opt = &opt_space;
+ memset(opt, 0, sizeof(struct ipv6_txoptions));
+ opt->tot_len = sizeof(struct ipv6_txoptions);
+
+ err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
+ if (err < 0) {
+ fl6_sock_release(flowlabel);
+ return err;
+ }
+ if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ }
+ if (!(opt->opt_nflen|opt->opt_flen))
+ opt = NULL;
+ }
+ if (opt == NULL)
+ opt = np->opt;
+ if (flowlabel)
+ opt = fl6_merge_options(&opt_space, flowlabel, opt);
+ opt = ipv6_fixup_options(&opt_space, opt);
+
+ fl6.flowi6_proto = proto;
+ err = rawv6_probe_proto_opt(&fl6, msg);
+ if (err)
+ goto out;
+
+ if (!ipv6_addr_any(daddr))
+ fl6.daddr = *daddr;
+ else
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+ fl6.saddr = np->saddr;
+
+ final_p = fl6_update_dst(&fl6, opt, &final);
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto out;
+ }
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+ }
+
+ if (tclass < 0)
+ tclass = np->tclass;
+
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
+
+ if (msg->msg_flags&MSG_CONFIRM)
+ goto do_confirm;
+
+back_from_confirm:
+ if (inet->hdrincl)
+ err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
+ else {
+ lock_sock(sk);
+ err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
+ len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
+ msg->msg_flags, dontfrag);
+
+ if (err)
+ ip6_flush_pending_frames(sk);
+ else if (!(msg->msg_flags & MSG_MORE))
+ err = rawv6_push_pending_frames(sk, &fl6, rp);
+ release_sock(sk);
+ }
+done:
+ dst_release(dst);
+out:
+ fl6_sock_release(flowlabel);
+ return err<0?err:len;
+do_confirm:
+ dst_confirm(dst);
+ if (!(msg->msg_flags & MSG_PROBE) || len)
+ goto back_from_confirm;
+ err = 0;
+ goto done;
+}
+
+static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
+{
+ switch (optname) {
+ case ICMPV6_FILTER:
+ if (optlen > sizeof(struct icmp6_filter))
+ optlen = sizeof(struct icmp6_filter);
+ if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ return 0;
+}
+
+static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ int len;
+
+ switch (optname) {
+ case ICMPV6_FILTER:
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < 0)
+ return -EINVAL;
+ if (len > sizeof(struct icmp6_filter))
+ len = sizeof(struct icmp6_filter);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ return 0;
+}
+
+
+static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct raw6_sock *rp = raw6_sk(sk);
+ int val;
+
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ switch (optname) {
+ case IPV6_CHECKSUM:
+ if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
+ level == IPPROTO_IPV6) {
+ /*
+ * RFC3542 tells that IPV6_CHECKSUM socket
+ * option in the IPPROTO_IPV6 level is not
+ * allowed on ICMPv6 sockets.
+ * If you want to set it, use IPPROTO_RAW
+ * level IPV6_CHECKSUM socket option
+ * (Linux extension).
+ */
+ return -EINVAL;
+ }
+
+ /* You may get strange result with a positive odd offset;
+ RFC2292bis agrees with me. */
+ if (val > 0 && (val&1))
+ return -EINVAL;
+ if (val < 0) {
+ rp->checksum = 0;
+ } else {
+ rp->checksum = 1;
+ rp->offset = val;
+ }
+
+ return 0;
+
+ default:
+ return -ENOPROTOOPT;
+ }
+}
+
+static int rawv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ switch (level) {
+ case SOL_RAW:
+ break;
+
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return ipv6_setsockopt(sk, level, optname, optval, optlen);
+ }
+
+ return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ switch (level) {
+ case SOL_RAW:
+ break;
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return compat_ipv6_setsockopt(sk, level, optname,
+ optval, optlen);
+ }
+ return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
+}
+#endif
+
+static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct raw6_sock *rp = raw6_sk(sk);
+ int val, len;
+
+ if (get_user(len,optlen))
+ return -EFAULT;
+
+ switch (optname) {
+ case IPV6_CHECKSUM:
+ /*
+ * We allow getsockopt() for IPPROTO_IPV6-level
+ * IPV6_CHECKSUM socket option on ICMPv6 sockets
+ * since RFC3542 is silent about it.
+ */
+ if (rp->checksum == 0)
+ val = -1;
+ else
+ val = rp->offset;
+ break;
+
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ len = min_t(unsigned int, sizeof(int), len);
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval,&val,len))
+ return -EFAULT;
+ return 0;
+}
+
+static int rawv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ switch (level) {
+ case SOL_RAW:
+ break;
+
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
+ }
+
+ return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ switch (level) {
+ case SOL_RAW:
+ break;
+ case SOL_ICMPV6:
+ if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ case SOL_IPV6:
+ if (optname == IPV6_CHECKSUM)
+ break;
+ default:
+ return compat_ipv6_getsockopt(sk, level, optname,
+ optval, optlen);
+ }
+ return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
+}
+#endif
+
+static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SIOCOUTQ: {
+ int amount = sk_wmem_alloc_get(sk);
+
+ return put_user(amount, (int __user *)arg);
+ }
+ case SIOCINQ: {
+ struct sk_buff *skb;
+ int amount = 0;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb != NULL)
+ amount = skb->tail - skb->transport_header;
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ return put_user(amount, (int __user *)arg);
+ }
+
+ default:
+#ifdef CONFIG_IPV6_MROUTE
+ return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+#else
+ return -ENOIOCTLCMD;
+#endif
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SIOCOUTQ:
+ case SIOCINQ:
+ return -ENOIOCTLCMD;
+ default:
+#ifdef CONFIG_IPV6_MROUTE
+ return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg));
+#else
+ return -ENOIOCTLCMD;
+#endif
+ }
+}
+#endif
+
+static void rawv6_close(struct sock *sk, long timeout)
+{
+ if (inet_sk(sk)->inet_num == IPPROTO_RAW)
+ ip6_ra_control(sk, -1);
+ ip6mr_sk_done(sk);
+ sk_common_release(sk);
+}
+
+static void raw6_destroy(struct sock *sk)
+{
+ lock_sock(sk);
+ ip6_flush_pending_frames(sk);
+ release_sock(sk);
+
+ inet6_destroy_sock(sk);
+}
+
+static int rawv6_init_sk(struct sock *sk)
+{
+ struct raw6_sock *rp = raw6_sk(sk);
+
+ switch (inet_sk(sk)->inet_num) {
+ case IPPROTO_ICMPV6:
+ rp->checksum = 1;
+ rp->offset = 2;
+ break;
+ case IPPROTO_MH:
+ rp->checksum = 1;
+ rp->offset = 4;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+struct proto rawv6_prot = {
+ .name = "RAWv6",
+ .owner = THIS_MODULE,
+ .close = rawv6_close,
+ .destroy = raw6_destroy,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = rawv6_ioctl,
+ .init = rawv6_init_sk,
+ .setsockopt = rawv6_setsockopt,
+ .getsockopt = rawv6_getsockopt,
+ .sendmsg = rawv6_sendmsg,
+ .recvmsg = rawv6_recvmsg,
+ .bind = rawv6_bind,
+ .backlog_rcv = rawv6_rcv_skb,
+ .hash = raw_hash_sk,
+ .unhash = raw_unhash_sk,
+ .obj_size = sizeof(struct raw6_sock),
+ .h.raw_hash = &raw_v6_hashinfo,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_rawv6_setsockopt,
+ .compat_getsockopt = compat_rawv6_getsockopt,
+ .compat_ioctl = compat_rawv6_ioctl,
+#endif
+};
+
+#ifdef CONFIG_PROC_FS
+static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
+{
+ struct ipv6_pinfo *np = inet6_sk(sp);
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+
+ dest = &np->daddr;
+ src = &np->rcv_saddr;
+ destp = 0;
+ srcp = inet_sk(sp)->inet_num;
+ seq_printf(seq,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ sk_wmem_alloc_get(sp),
+ sk_rmem_alloc_get(sp),
+ 0, 0L, 0,
+ sock_i_uid(sp), 0,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
+}
+
+static int raw6_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_printf(seq,
+ " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode ref pointer drops\n");
+ else
+ raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
+ return 0;
+}
+
+static const struct seq_operations raw6_seq_ops = {
+ .start = raw_seq_start,
+ .next = raw_seq_next,
+ .stop = raw_seq_stop,
+ .show = raw6_seq_show,
+};
+
+static int raw6_seq_open(struct inode *inode, struct file *file)
+{
+ return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
+}
+
+static const struct file_operations raw6_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = raw6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init raw6_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __net_exit raw6_exit_net(struct net *net)
+{
+ proc_net_remove(net, "raw6");
+}
+
+static struct pernet_operations raw6_net_ops = {
+ .init = raw6_init_net,
+ .exit = raw6_exit_net,
+};
+
+int __init raw6_proc_init(void)
+{
+ return register_pernet_subsys(&raw6_net_ops);
+}
+
+void raw6_proc_exit(void)
+{
+ unregister_pernet_subsys(&raw6_net_ops);
+}
+#endif /* CONFIG_PROC_FS */
+
+/* Same as inet6_dgram_ops, sans udp_poll. */
+static const struct proto_ops inet6_sockraw_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_dgram_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = sock_no_accept, /* a do nothing */
+ .getname = inet6_getname,
+ .poll = datagram_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = sock_no_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = sock_common_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw rawv6_protosw = {
+ .type = SOCK_RAW,
+ .protocol = IPPROTO_IP, /* wild card */
+ .prot = &rawv6_prot,
+ .ops = &inet6_sockraw_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+int __init rawv6_init(void)
+{
+ int ret;
+
+ ret = inet6_register_protosw(&rawv6_protosw);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+void rawv6_exit(void)
+{
+ inet6_unregister_protosw(&rawv6_protosw);
+}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
new file mode 100644
index 00000000..9447bd69
--- /dev/null
+++ b/net/ipv6/reassembly.c
@@ -0,0 +1,769 @@
+/*
+ * IPv6 fragment reassembly
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on: net/ipv4/ip_fragment.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Fixes:
+ * Andi Kleen Make it work with multiple hosts.
+ * More RFC compliance.
+ *
+ * Horst von Brand Add missing #include <linux/string.h>
+ * Alexey Kuznetsov SMP races, threading, cleanup.
+ * Patrick McHardy LRU queue of frag heads for evictor.
+ * Mitsuru KANDA @USAGI Register inet6_protocol{}.
+ * David Stevens and
+ * YOSHIFUJI,H. @USAGI Always remove fragment header to
+ * calculate ICV correctly.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/inet_frag.h>
+
+struct ip6frag_skb_cb
+{
+ struct inet6_skb_parm h;
+ int offset;
+};
+
+#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
+
+
+/*
+ * Equivalent of ipv4 struct ipq
+ */
+
+struct frag_queue
+{
+ struct inet_frag_queue q;
+
+ __be32 id; /* fragment id */
+ u32 user;
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+
+ int iif;
+ unsigned int csum;
+ __u16 nhoffset;
+};
+
+static struct inet_frags ip6_frags;
+
+int ip6_frag_nqueues(struct net *net)
+{
+ return net->ipv6.frags.nqueues;
+}
+
+int ip6_frag_mem(struct net *net)
+{
+ return atomic_read(&net->ipv6.frags.mem);
+}
+
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev);
+
+/*
+ * callers should be careful not to use the hash value outside the ipfrag_lock
+ * as doing so could race with ipfrag_hash_rnd being recalculated.
+ */
+unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, u32 rnd)
+{
+ u32 c;
+
+ c = jhash_3words((__force u32)saddr->s6_addr32[0],
+ (__force u32)saddr->s6_addr32[1],
+ (__force u32)saddr->s6_addr32[2],
+ rnd);
+
+ c = jhash_3words((__force u32)saddr->s6_addr32[3],
+ (__force u32)daddr->s6_addr32[0],
+ (__force u32)daddr->s6_addr32[1],
+ c);
+
+ c = jhash_3words((__force u32)daddr->s6_addr32[2],
+ (__force u32)daddr->s6_addr32[3],
+ (__force u32)id,
+ c);
+
+ return c & (INETFRAGS_HASHSZ - 1);
+}
+EXPORT_SYMBOL_GPL(inet6_hash_frag);
+
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
+{
+ struct frag_queue *fq;
+
+ fq = container_of(q, struct frag_queue, q);
+ return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
+}
+
+int ip6_frag_match(struct inet_frag_queue *q, void *a)
+{
+ struct frag_queue *fq;
+ struct ip6_create_arg *arg = a;
+
+ fq = container_of(q, struct frag_queue, q);
+ return (fq->id == arg->id && fq->user == arg->user &&
+ ipv6_addr_equal(&fq->saddr, arg->src) &&
+ ipv6_addr_equal(&fq->daddr, arg->dst));
+}
+EXPORT_SYMBOL(ip6_frag_match);
+
+void ip6_frag_init(struct inet_frag_queue *q, void *a)
+{
+ struct frag_queue *fq = container_of(q, struct frag_queue, q);
+ struct ip6_create_arg *arg = a;
+
+ fq->id = arg->id;
+ fq->user = arg->user;
+ fq->saddr = *arg->src;
+ fq->daddr = *arg->dst;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+/* Destruction primitives. */
+
+static __inline__ void fq_put(struct frag_queue *fq)
+{
+ inet_frag_put(&fq->q, &ip6_frags);
+}
+
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct frag_queue *fq)
+{
+ inet_frag_kill(&fq->q, &ip6_frags);
+}
+
+static void ip6_evictor(struct net *net, struct inet6_dev *idev)
+{
+ int evicted;
+
+ evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
+ if (evicted)
+ IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
+}
+
+static void ip6_frag_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net_device *dev = NULL;
+ struct net *net;
+
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto out;
+
+ fq_kill(fq);
+
+ net = container_of(fq->q.net, struct net, ipv6.frags);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
+ if (!dev)
+ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+
+ /* Don't send error if the first segment did not arrive. */
+ if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+ goto out_rcu_unlock;
+
+ /*
+ But use as source device on which LAST ARRIVED
+ segment was received. And do not use fq->dev
+ pointer directly, device might already disappeared.
+ */
+ fq->q.fragments->dev = dev;
+ icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+out_rcu_unlock:
+ rcu_read_unlock();
+out:
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
+}
+
+static __inline__ struct frag_queue *
+fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst)
+{
+ struct inet_frag_queue *q;
+ struct ip6_create_arg arg;
+ unsigned int hash;
+
+ arg.id = id;
+ arg.user = IP6_DEFRAG_LOCAL_DELIVER;
+ arg.src = src;
+ arg.dst = dst;
+
+ read_lock(&ip6_frags.lock);
+ hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
+
+ q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
+ if (q == NULL)
+ return NULL;
+
+ return container_of(q, struct frag_queue, q);
+}
+
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+ struct frag_hdr *fhdr, int nhoff)
+{
+ struct sk_buff *prev, *next;
+ struct net_device *dev;
+ int offset, end;
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto err;
+
+ offset = ntohs(fhdr->frag_off) & ~0x7;
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+
+ if ((unsigned int)end > IPV6_MAXPLEN) {
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((u8 *)&fhdr->frag_off -
+ skb_network_header(skb)));
+ return -1;
+ }
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
+ skb->csum = csum_sub(skb->csum,
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+ 0));
+ }
+
+ /* Is this the final fragment? */
+ if (!(fhdr->frag_off & htons(IP6_MF))) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrupted.
+ */
+ if (end < fq->q.len ||
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ goto err;
+ fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.len = end;
+ } else {
+ /* Check if the fragment is rounded to 8 bytes.
+ * Required by the RFC.
+ */
+ if (end & 0x7) {
+ /* RFC2460 says always send parameter problem in
+ * this case. -DaveM
+ */
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, payload_len));
+ return -1;
+ }
+ if (end > fq->q.len) {
+ /* Some bits beyond end -> corruption. */
+ if (fq->q.last_in & INET_FRAG_LAST_IN)
+ goto err;
+ fq->q.len = end;
+ }
+ }
+
+ if (end == offset)
+ goto err;
+
+ /* Point into the IP datagram 'data' part. */
+ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
+ goto err;
+
+ if (pskb_trim_rcsum(skb, end - offset))
+ goto err;
+
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = fq->q.fragments_tail;
+ if (!prev || FRAG6_CB(prev)->offset < offset) {
+ next = NULL;
+ goto found;
+ }
+ prev = NULL;
+ for(next = fq->q.fragments; next != NULL; next = next->next) {
+ if (FRAG6_CB(next)->offset >= offset)
+ break; /* bingo! */
+ prev = next;
+ }
+
+found:
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ */
+
+ /* Check for overlap with preceding fragment. */
+ if (prev &&
+ (FRAG6_CB(prev)->offset + prev->len) > offset)
+ goto discard_fq;
+
+ /* Look for overlap with succeeding segment. */
+ if (next && FRAG6_CB(next)->offset < end)
+ goto discard_fq;
+
+ FRAG6_CB(skb)->offset = offset;
+
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (!next)
+ fq->q.fragments_tail = skb;
+ if (prev)
+ prev->next = skb;
+ else
+ fq->q.fragments = skb;
+
+ dev = skb->dev;
+ if (dev) {
+ fq->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &fq->q.net->mem);
+
+ /* The first fragment.
+ * nhoffset is obtained from the first fragment, of course.
+ */
+ if (offset == 0) {
+ fq->nhoffset = nhoff;
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
+ }
+
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len)
+ return ip6_frag_reasm(fq, prev, dev);
+
+ write_lock(&ip6_frags.lock);
+ list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
+ write_unlock(&ip6_frags.lock);
+ return -1;
+
+discard_fq:
+ fq_kill(fq);
+err:
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
+ kfree_skb(skb);
+ return -1;
+}
+
+/*
+ * Check if this packet is complete.
+ * Returns NULL on failure by any reason, and pointer
+ * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+ */
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev)
+{
+ struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+ struct sk_buff *fp, *head = fq->q.fragments;
+ int payload_len;
+ unsigned int nhoff;
+
+ fq_kill(fq);
+
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_oom;
+
+ fp->next = head->next;
+ if (!fp->next)
+ fq->q.fragments_tail = fp;
+ prev->next = fp;
+
+ skb_morph(head, fq->q.fragments);
+ head->next = fq->q.fragments->next;
+
+ kfree_skb(fq->q.fragments);
+ fq->q.fragments = head;
+ }
+
+ WARN_ON(head == NULL);
+ WARN_ON(FRAG6_CB(head)->offset != 0);
+
+ /* Unfragmented part is taken from the first segment. */
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->q.len -
+ sizeof(struct frag_hdr));
+ if (payload_len > IPV6_MAXPLEN)
+ goto out_oversize;
+
+ /* Head of list must not be cloned. */
+ if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+ goto out_oom;
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments. */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
+ goto out_oom;
+ clone->next = head->next;
+ head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = clone->data_len = head->data_len - plen;
+ head->data_len -= clone->len;
+ head->len -= clone->len;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ atomic_add(clone->truesize, &fq->q.net->mem);
+ }
+
+ /* We have to remove fragment header from datagram and to relocate
+ * header in order to calculate ICV correctly. */
+ nhoff = fq->nhoffset;
+ skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
+ memmove(head->head + sizeof(struct frag_hdr), head->head,
+ (head->data - head->head) - sizeof(struct frag_hdr));
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
+
+ skb_shinfo(head)->frag_list = head->next;
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
+
+ for (fp=head->next; fp; fp = fp->next) {
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ }
+ atomic_sub(head->truesize, &fq->q.net->mem);
+
+ head->next = NULL;
+ head->dev = dev;
+ head->tstamp = fq->q.stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
+ IP6CB(head)->nhoff = nhoff;
+
+ /* Yes, and fold redundant checksum back. 8) */
+ if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
+
+ rcu_read_lock();
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ rcu_read_unlock();
+ fq->q.fragments = NULL;
+ fq->q.fragments_tail = NULL;
+ return 1;
+
+out_oversize:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
+ goto out_fail;
+out_oom:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
+out_fail:
+ rcu_read_lock();
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ rcu_read_unlock();
+ return -1;
+}
+
+static int ipv6_frag_rcv(struct sk_buff *skb)
+{
+ struct frag_hdr *fhdr;
+ struct frag_queue *fq;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
+
+ /* Jumbo payload inhibits frag. header */
+ if (hdr->payload_len==0)
+ goto fail_hdr;
+
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+ sizeof(struct frag_hdr))))
+ goto fail_hdr;
+
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
+
+ if (!(fhdr->frag_off & htons(0xFFF9))) {
+ /* It is not a fragmented frame */
+ skb->transport_header += sizeof(struct frag_hdr);
+ IP6_INC_STATS_BH(net,
+ ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
+
+ IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ return 1;
+ }
+
+ if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
+ ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
+
+ fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
+ if (fq != NULL) {
+ int ret;
+
+ spin_lock(&fq->q.lock);
+
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
+ return ret;
+ }
+
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
+ kfree_skb(skb);
+ return -1;
+
+fail_hdr:
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
+ return -1;
+}
+
+static const struct inet6_protocol frag_protocol =
+{
+ .handler = ipv6_frag_rcv,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ {
+ .procname = "ip6frag_high_thresh",
+ .data = &init_net.ipv6.frags.high_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ip6frag_low_thresh",
+ .data = &init_net.ipv6.frags.low_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ip6frag_time",
+ .data = &init_net.ipv6.frags.timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+
+static struct ctl_table ip6_frags_ctl_table[] = {
+ {
+ .procname = "ip6frag_secret_interval",
+ .data = &ip6_frags.secret_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+
+static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
+{
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = ip6_frags_ns_ctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &net->ipv6.frags.high_thresh;
+ table[1].data = &net->ipv6.frags.low_thresh;
+ table[2].data = &net->ipv6.frags.timeout;
+ }
+
+ hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->ipv6.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct ctl_table_header *ip6_ctl_header;
+
+static int ip6_frags_sysctl_register(void)
+{
+ ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
+ ip6_frags_ctl_table);
+ return ip6_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void ip6_frags_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(ip6_ctl_header);
+}
+#else
+static inline int ip6_frags_ns_sysctl_register(struct net *net)
+{
+ return 0;
+}
+
+static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int ip6_frags_sysctl_register(void)
+{
+ return 0;
+}
+
+static inline void ip6_frags_sysctl_unregister(void)
+{
+}
+#endif
+
+static int __net_init ipv6_frags_init_net(struct net *net)
+{
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+
+ inet_frags_init_net(&net->ipv6.frags);
+
+ return ip6_frags_ns_sysctl_register(net);
+}
+
+static void __net_exit ipv6_frags_exit_net(struct net *net)
+{
+ ip6_frags_ns_sysctl_unregister(net);
+ inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+}
+
+static struct pernet_operations ip6_frags_ops = {
+ .init = ipv6_frags_init_net,
+ .exit = ipv6_frags_exit_net,
+};
+
+int __init ipv6_frag_init(void)
+{
+ int ret;
+
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto out;
+
+ ret = ip6_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&ip6_frags_ops);
+ if (ret)
+ goto err_pernet;
+
+ ip6_frags.hashfn = ip6_hashfn;
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.skb_free = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.match = ip6_frag_match;
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.secret_interval = 10 * 60 * HZ;
+ inet_frags_init(&ip6_frags);
+out:
+ return ret;
+
+err_pernet:
+ ip6_frags_sysctl_unregister();
+err_sysctl:
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ goto out;
+}
+
+void ipv6_frag_exit(void)
+{
+ inet_frags_fini(&ip6_frags);
+ ip6_frags_sysctl_unregister();
+ unregister_pernet_subsys(&ip6_frags_ops);
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
new file mode 100644
index 00000000..c4920ca8
--- /dev/null
+++ b/net/ipv6/route.c
@@ -0,0 +1,3104 @@
+/*
+ * Linux INET6 implementation
+ * FIB front-end.
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ *
+ * YOSHIFUJI Hideaki @USAGI
+ * reworked default router selection.
+ * - respect outgoing interface
+ * - select from (probably) reachable routers (i.e.
+ * routers in REACHABLE, STALE, DELAY or PROBE states).
+ * - always select the same router if it is (probably)
+ * reachable. otherwise, round-robin the list.
+ * Ville Nuorvala
+ * Fixed routing subtrees.
+ */
+
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/times.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/mroute6.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include <net/snmp.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/tcp.h>
+#include <linux/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
+
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+ const struct in6_addr *dest);
+static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int ip6_mtu(const struct dst_entry *dst);
+static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_dst_destroy(struct dst_entry *);
+static void ip6_dst_ifdown(struct dst_entry *,
+ struct net_device *dev, int how);
+static int ip6_dst_gc(struct dst_ops *ops);
+
+static int ip6_pkt_discard(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sk_buff *skb);
+static void ip6_link_failure(struct sk_buff *skb);
+static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_add_route_info(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex,
+ unsigned pref);
+static struct rt6_info *rt6_get_route_info(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex);
+#endif
+
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ struct inet_peer *peer;
+ u32 *p = NULL;
+
+ if (!(rt->dst.flags & DST_HOST))
+ return NULL;
+
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+
+ peer = rt->rt6i_peer;
+ if (peer) {
+ u32 *old_p = __DST_METRICS_PTR(old);
+ unsigned long prev, new;
+
+ p = peer->metrics;
+ if (inet_metrics_new(peer))
+ memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+ new = (unsigned long) p;
+ prev = cmpxchg(&dst->_metrics, old, new);
+
+ if (prev != old) {
+ p = __DST_METRICS_PTR(prev);
+ if (prev & DST_METRICS_READ_ONLY)
+ p = NULL;
+ }
+ }
+ return p;
+}
+
+static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+{
+ struct in6_addr *p = &rt->rt6i_gateway;
+
+ if (!ipv6_addr_any(p))
+ return (const void *) p;
+ return daddr;
+}
+
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ struct neighbour *n;
+
+ daddr = choose_neigh_daddr(rt, daddr);
+ n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+ if (n)
+ return n;
+ return neigh_create(&nd_tbl, daddr, dst->dev);
+}
+
+static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
+{
+ struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
+ if (!n) {
+ n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ }
+ dst_set_neighbour(&rt->dst, n);
+
+ return 0;
+}
+
+static struct dst_ops ip6_dst_ops_template = {
+ .family = AF_INET6,
+ .protocol = cpu_to_be16(ETH_P_IPV6),
+ .gc = ip6_dst_gc,
+ .gc_thresh = 1024,
+ .check = ip6_dst_check,
+ .default_advmss = ip6_default_advmss,
+ .mtu = ip6_mtu,
+ .cow_metrics = ipv6_cow_metrics,
+ .destroy = ip6_dst_destroy,
+ .ifdown = ip6_dst_ifdown,
+ .negative_advice = ip6_negative_advice,
+ .link_failure = ip6_link_failure,
+ .update_pmtu = ip6_rt_update_pmtu,
+ .local_out = __ip6_local_out,
+ .neigh_lookup = ip6_neigh_lookup,
+};
+
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
+{
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
+}
+
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+}
+
+static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
+ unsigned long old)
+{
+ return NULL;
+}
+
+static struct dst_ops ip6_dst_blackhole_ops = {
+ .family = AF_INET6,
+ .protocol = cpu_to_be16(ETH_P_IPV6),
+ .destroy = ip6_dst_destroy,
+ .check = ip6_dst_check,
+ .mtu = ip6_blackhole_mtu,
+ .default_advmss = ip6_default_advmss,
+ .update_pmtu = ip6_rt_blackhole_update_pmtu,
+ .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .neigh_lookup = ip6_neigh_lookup,
+};
+
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+ [RTAX_HOPLIMIT - 1] = 255,
+};
+
+static struct rt6_info ip6_null_entry_template = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+ .obsolete = -1,
+ .error = -ENETUNREACH,
+ .input = ip6_pkt_discard,
+ .output = ip6_pkt_discard_out,
+ },
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .rt6i_protocol = RTPROT_KERNEL,
+ .rt6i_metric = ~(u32) 0,
+ .rt6i_ref = ATOMIC_INIT(1),
+};
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+static int ip6_pkt_prohibit(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+
+static struct rt6_info ip6_prohibit_entry_template = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+ .obsolete = -1,
+ .error = -EACCES,
+ .input = ip6_pkt_prohibit,
+ .output = ip6_pkt_prohibit_out,
+ },
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .rt6i_protocol = RTPROT_KERNEL,
+ .rt6i_metric = ~(u32) 0,
+ .rt6i_ref = ATOMIC_INIT(1),
+};
+
+static struct rt6_info ip6_blk_hole_entry_template = {
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+ .obsolete = -1,
+ .error = -EINVAL,
+ .input = dst_discard,
+ .output = dst_discard,
+ },
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .rt6i_protocol = RTPROT_KERNEL,
+ .rt6i_metric = ~(u32) 0,
+ .rt6i_ref = ATOMIC_INIT(1),
+};
+
+#endif
+
+/* allocate dst with ip6_dst_ops */
+static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+ struct net_device *dev,
+ int flags)
+{
+ struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+
+ if (rt)
+ memset(&rt->rt6i_table, 0,
+ sizeof(*rt) - sizeof(struct dst_entry));
+
+ return rt;
+}
+
+static void ip6_dst_destroy(struct dst_entry *dst)
+{
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct inet_peer *peer = rt->rt6i_peer;
+
+ if (!(rt->dst.flags & DST_HOST))
+ dst_destroy_metrics_generic(dst);
+
+ if (idev) {
+ rt->rt6i_idev = NULL;
+ in6_dev_put(idev);
+ }
+
+ if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
+ dst_release(dst->from);
+
+ if (peer) {
+ rt->rt6i_peer = NULL;
+ inet_putpeer(peer);
+ }
+}
+
+static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt6_peer_genid(void)
+{
+ return atomic_read(&__rt6_peer_genid);
+}
+
+void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+ struct inet_peer *peer;
+
+ peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+ if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+ inet_putpeer(peer);
+ else
+ rt->rt6i_peer_genid = rt6_peer_genid();
+}
+
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+ int how)
+{
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct net_device *loopback_dev =
+ dev_net(dev)->loopback_dev;
+
+ if (dev != loopback_dev && idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
+}
+
+static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+{
+ struct rt6_info *ort = NULL;
+
+ if (rt->rt6i_flags & RTF_EXPIRES) {
+ if (time_after(jiffies, rt->dst.expires))
+ return 1;
+ } else if (rt->dst.from) {
+ ort = (struct rt6_info *) rt->dst.from;
+ return (ort->rt6i_flags & RTF_EXPIRES) &&
+ time_after(jiffies, ort->dst.expires);
+ }
+ return 0;
+}
+
+static inline int rt6_need_strict(const struct in6_addr *daddr)
+{
+ return ipv6_addr_type(daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
+}
+
+/*
+ * Route lookup. Any table->tb6_lock is implied.
+ */
+
+static inline struct rt6_info *rt6_device_match(struct net *net,
+ struct rt6_info *rt,
+ const struct in6_addr *saddr,
+ int oif,
+ int flags)
+{
+ struct rt6_info *local = NULL;
+ struct rt6_info *sprt;
+
+ if (!oif && ipv6_addr_any(saddr))
+ goto out;
+
+ for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+ struct net_device *dev = sprt->dst.dev;
+
+ if (oif) {
+ if (dev->ifindex == oif)
+ return sprt;
+ if (dev->flags & IFF_LOOPBACK) {
+ if (!sprt->rt6i_idev ||
+ sprt->rt6i_idev->dev->ifindex != oif) {
+ if (flags & RT6_LOOKUP_F_IFACE && oif)
+ continue;
+ if (local && (!oif ||
+ local->rt6i_idev->dev->ifindex == oif))
+ continue;
+ }
+ local = sprt;
+ }
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return sprt;
+ }
+ }
+
+ if (oif) {
+ if (local)
+ return local;
+
+ if (flags & RT6_LOOKUP_F_IFACE)
+ return net->ipv6.ip6_null_entry;
+ }
+out:
+ return rt;
+}
+
+#ifdef CONFIG_IPV6_ROUTER_PREF
+static void rt6_probe(struct rt6_info *rt)
+{
+ struct neighbour *neigh;
+ /*
+ * Okay, this does not seem to be appropriate
+ * for now, however, we need to check if it
+ * is really so; aka Router Reachability Probing.
+ *
+ * Router Reachability Probe MUST be rate-limited
+ * to no more than one per minute.
+ */
+ rcu_read_lock();
+ neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+ if (!neigh || (neigh->nud_state & NUD_VALID))
+ goto out;
+ read_lock_bh(&neigh->lock);
+ if (!(neigh->nud_state & NUD_VALID) &&
+ time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ struct in6_addr mcaddr;
+ struct in6_addr *target;
+
+ neigh->updated = jiffies;
+ read_unlock_bh(&neigh->lock);
+
+ target = (struct in6_addr *)&neigh->primary_key;
+ addrconf_addr_solict_mult(target, &mcaddr);
+ ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+ } else {
+ read_unlock_bh(&neigh->lock);
+ }
+out:
+ rcu_read_unlock();
+}
+#else
+static inline void rt6_probe(struct rt6_info *rt)
+{
+}
+#endif
+
+/*
+ * Default Router Selection (RFC 2461 6.3.6)
+ */
+static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+{
+ struct net_device *dev = rt->dst.dev;
+ if (!oif || dev->ifindex == oif)
+ return 2;
+ if ((dev->flags & IFF_LOOPBACK) &&
+ rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+ return 1;
+ return 0;
+}
+
+static inline int rt6_check_neigh(struct rt6_info *rt)
+{
+ struct neighbour *neigh;
+ int m;
+
+ rcu_read_lock();
+ neigh = dst_get_neighbour_noref(&rt->dst);
+ if (rt->rt6i_flags & RTF_NONEXTHOP ||
+ !(rt->rt6i_flags & RTF_GATEWAY))
+ m = 1;
+ else if (neigh) {
+ read_lock_bh(&neigh->lock);
+ if (neigh->nud_state & NUD_VALID)
+ m = 2;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ else if (neigh->nud_state & NUD_FAILED)
+ m = 0;
+#endif
+ else
+ m = 1;
+ read_unlock_bh(&neigh->lock);
+ } else
+ m = 0;
+ rcu_read_unlock();
+ return m;
+}
+
+static int rt6_score_route(struct rt6_info *rt, int oif,
+ int strict)
+{
+ int m, n;
+
+ m = rt6_check_dev(rt, oif);
+ if (!m && (strict & RT6_LOOKUP_F_IFACE))
+ return -1;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+#endif
+ n = rt6_check_neigh(rt);
+ if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
+ return -1;
+ return m;
+}
+
+static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
+ int *mpri, struct rt6_info *match)
+{
+ int m;
+
+ if (rt6_check_expired(rt))
+ goto out;
+
+ m = rt6_score_route(rt, oif, strict);
+ if (m < 0)
+ goto out;
+
+ if (m > *mpri) {
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(match);
+ *mpri = m;
+ match = rt;
+ } else if (strict & RT6_LOOKUP_F_REACHABLE) {
+ rt6_probe(rt);
+ }
+
+out:
+ return match;
+}
+
+static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *rr_head,
+ u32 metric, int oif, int strict)
+{
+ struct rt6_info *rt, *match;
+ int mpri = -1;
+
+ match = NULL;
+ for (rt = rr_head; rt && rt->rt6i_metric == metric;
+ rt = rt->dst.rt6_next)
+ match = find_match(rt, oif, strict, &mpri, match);
+ for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
+ rt = rt->dst.rt6_next)
+ match = find_match(rt, oif, strict, &mpri, match);
+
+ return match;
+}
+
+static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+{
+ struct rt6_info *match, *rt0;
+ struct net *net;
+
+ rt0 = fn->rr_ptr;
+ if (!rt0)
+ fn->rr_ptr = rt0 = fn->leaf;
+
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
+
+ if (!match &&
+ (strict & RT6_LOOKUP_F_REACHABLE)) {
+ struct rt6_info *next = rt0->dst.rt6_next;
+
+ /* no entries matched; do round-robin */
+ if (!next || next->rt6i_metric != rt0->rt6i_metric)
+ next = fn->leaf;
+
+ if (next != rt0)
+ fn->rr_ptr = next;
+ }
+
+ net = dev_net(rt0->dst.dev);
+ return match ? match : net->ipv6.ip6_null_entry;
+}
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
+ const struct in6_addr *gwaddr)
+{
+ struct net *net = dev_net(dev);
+ struct route_info *rinfo = (struct route_info *) opt;
+ struct in6_addr prefix_buf, *prefix;
+ unsigned int pref;
+ unsigned long lifetime;
+ struct rt6_info *rt;
+
+ if (len < sizeof(struct route_info)) {
+ return -EINVAL;
+ }
+
+ /* Sanity check for prefix_len and length */
+ if (rinfo->length > 3) {
+ return -EINVAL;
+ } else if (rinfo->prefix_len > 128) {
+ return -EINVAL;
+ } else if (rinfo->prefix_len > 64) {
+ if (rinfo->length < 2) {
+ return -EINVAL;
+ }
+ } else if (rinfo->prefix_len > 0) {
+ if (rinfo->length < 1) {
+ return -EINVAL;
+ }
+ }
+
+ pref = rinfo->route_pref;
+ if (pref == ICMPV6_ROUTER_PREF_INVALID)
+ return -EINVAL;
+
+ lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
+
+ if (rinfo->length == 3)
+ prefix = (struct in6_addr *)rinfo->prefix;
+ else {
+ /* this function is safe */
+ ipv6_addr_prefix(&prefix_buf,
+ (struct in6_addr *)rinfo->prefix,
+ rinfo->prefix_len);
+ prefix = &prefix_buf;
+ }
+
+ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
+ dev->ifindex);
+
+ if (rt && !lifetime) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ }
+
+ if (!rt && lifetime)
+ rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
+ pref);
+ else if (rt)
+ rt->rt6i_flags = RTF_ROUTEINFO |
+ (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+
+ if (rt) {
+ if (!addrconf_finite_timeout(lifetime))
+ rt6_clean_expires(rt);
+ else
+ rt6_set_expires(rt, jiffies + HZ * lifetime);
+
+ dst_release(&rt->dst);
+ }
+ return 0;
+}
+#endif
+
+#define BACKTRACK(__net, saddr) \
+do { \
+ if (rt == __net->ipv6.ip6_null_entry) { \
+ struct fib6_node *pn; \
+ while (1) { \
+ if (fn->fn_flags & RTN_TL_ROOT) \
+ goto out; \
+ pn = fn->parent; \
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+ fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
+ else \
+ fn = pn; \
+ if (fn->fn_flags & RTN_RTINFO) \
+ goto restart; \
+ } \
+ } \
+} while (0)
+
+static struct rt6_info *ip6_pol_route_lookup(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6, int flags)
+{
+ struct fib6_node *fn;
+ struct rt6_info *rt;
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+ rt = fn->leaf;
+ rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+ BACKTRACK(net, &fl6->saddr);
+out:
+ dst_use(&rt->dst, jiffies);
+ read_unlock_bh(&table->tb6_lock);
+ return rt;
+
+}
+
+struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+ int flags)
+{
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+}
+EXPORT_SYMBOL_GPL(ip6_route_lookup);
+
+struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int oif, int strict)
+{
+ struct flowi6 fl6 = {
+ .flowi6_oif = oif,
+ .daddr = *daddr,
+ };
+ struct dst_entry *dst;
+ int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
+
+ if (saddr) {
+ memcpy(&fl6.saddr, saddr, sizeof(*saddr));
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+ }
+
+ dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ if (dst->error == 0)
+ return (struct rt6_info *) dst;
+
+ dst_release(dst);
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(rt6_lookup);
+
+/* ip6_ins_rt is called with FREE table->tb6_lock.
+ It takes new route entry, the addition fails by any reason the
+ route is freed. In any case, if caller does not hold it, it may
+ be destroyed.
+ */
+
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+{
+ int err;
+ struct fib6_table *table;
+
+ table = rt->rt6i_table;
+ write_lock_bh(&table->tb6_lock);
+ err = fib6_add(&table->tb6_root, rt, info);
+ write_unlock_bh(&table->tb6_lock);
+
+ return err;
+}
+
+int ip6_ins_rt(struct rt6_info *rt)
+{
+ struct nl_info info = {
+ .nl_net = dev_net(rt->dst.dev),
+ };
+ return __ip6_ins_rt(rt, &info);
+}
+
+static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_info *rt;
+
+ /*
+ * Clone the route.
+ */
+
+ rt = ip6_rt_copy(ort, daddr);
+
+ if (rt) {
+ int attempts = !in_softirq();
+
+ if (!(rt->rt6i_flags & RTF_GATEWAY)) {
+ if (ort->rt6i_dst.plen != 128 &&
+ ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+ rt->rt6i_flags |= RTF_ANYCAST;
+ rt->rt6i_gateway = *daddr;
+ }
+
+ rt->rt6i_flags |= RTF_CACHE;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt->rt6i_src.plen && saddr) {
+ rt->rt6i_src.addr = *saddr;
+ rt->rt6i_src.plen = 128;
+ }
+#endif
+
+ retry:
+ if (rt6_bind_neighbour(rt, rt->dst.dev)) {
+ struct net *net = dev_net(rt->dst.dev);
+ int saved_rt_min_interval =
+ net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ int saved_rt_elasticity =
+ net->ipv6.sysctl.ip6_rt_gc_elasticity;
+
+ if (attempts-- > 0) {
+ net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
+ net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
+
+ ip6_dst_gc(&net->ipv6.ip6_dst_ops);
+
+ net->ipv6.sysctl.ip6_rt_gc_elasticity =
+ saved_rt_elasticity;
+ net->ipv6.sysctl.ip6_rt_gc_min_interval =
+ saved_rt_min_interval;
+ goto retry;
+ }
+
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ipv6: Neighbour table overflow.\n");
+ dst_free(&rt->dst);
+ return NULL;
+ }
+ }
+
+ return rt;
+}
+
+static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
+ const struct in6_addr *daddr)
+{
+ struct rt6_info *rt = ip6_rt_copy(ort, daddr);
+
+ if (rt) {
+ rt->rt6i_flags |= RTF_CACHE;
+ dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+ }
+ return rt;
+}
+
+static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
+ struct flowi6 *fl6, int flags)
+{
+ struct fib6_node *fn;
+ struct rt6_info *rt, *nrt;
+ int strict = 0;
+ int attempts = 3;
+ int err;
+ int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
+
+ strict |= flags & RT6_LOOKUP_F_IFACE;
+
+relookup:
+ read_lock_bh(&table->tb6_lock);
+
+restart_2:
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+
+restart:
+ rt = rt6_select(fn, oif, strict | reachable);
+
+ BACKTRACK(net, &fl6->saddr);
+ if (rt == net->ipv6.ip6_null_entry ||
+ rt->rt6i_flags & RTF_CACHE)
+ goto out;
+
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+
+ if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
+ else if (!(rt->dst.flags & DST_HOST))
+ nrt = rt6_alloc_clone(rt, &fl6->daddr);
+ else
+ goto out2;
+
+ dst_release(&rt->dst);
+ rt = nrt ? : net->ipv6.ip6_null_entry;
+
+ dst_hold(&rt->dst);
+ if (nrt) {
+ err = ip6_ins_rt(nrt);
+ if (!err)
+ goto out2;
+ }
+
+ if (--attempts <= 0)
+ goto out2;
+
+ /*
+ * Race condition! In the gap, when table->tb6_lock was
+ * released someone could insert this route. Relookup.
+ */
+ dst_release(&rt->dst);
+ goto relookup;
+
+out:
+ if (reachable) {
+ reachable = 0;
+ goto restart_2;
+ }
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+out2:
+ rt->dst.lastuse = jiffies;
+ rt->dst.__use++;
+
+ return rt;
+}
+
+static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
+ struct flowi6 *fl6, int flags)
+{
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+}
+
+static struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
+{
+ if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+}
+
+void ip6_route_input(struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = iph->nexthdr,
+ };
+
+ skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+}
+
+static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
+ struct flowi6 *fl6, int flags)
+{
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+}
+
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6)
+{
+ int flags = 0;
+
+ if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ if (!ipv6_addr_any(&fl6->saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+ else if (sk)
+ flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+}
+
+EXPORT_SYMBOL(ip6_route_output);
+
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
+{
+ struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct dst_entry *new = NULL;
+
+ rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
+ if (rt) {
+ memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+
+ new = &rt->dst;
+
+ new->__use = 1;
+ new->input = dst_discard;
+ new->output = dst_discard;
+
+ if (dst_metrics_read_only(&ort->dst))
+ new->_metrics = ort->dst._metrics;
+ else
+ dst_copy_metrics(new, &ort->dst);
+ rt->rt6i_idev = ort->rt6i_idev;
+ if (rt->rt6i_idev)
+ in6_dev_hold(rt->rt6i_idev);
+
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ rt->rt6i_flags = ort->rt6i_flags;
+ rt6_clean_expires(rt);
+ rt->rt6i_metric = 0;
+
+ memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
+#ifdef CONFIG_IPV6_SUBTREES
+ memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+#endif
+
+ dst_free(new);
+ }
+
+ dst_release(dst_orig);
+ return new ? new : ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Destination cache support functions
+ */
+
+static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
+{
+ struct rt6_info *rt;
+
+ rt = (struct rt6_info *) dst;
+
+ if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
+ if (rt->rt6i_peer_genid != rt6_peer_genid()) {
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 0);
+ rt->rt6i_peer_genid = rt6_peer_genid();
+ }
+ return dst;
+ }
+ return NULL;
+}
+
+static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+
+ if (rt) {
+ if (rt->rt6i_flags & RTF_CACHE) {
+ if (rt6_check_expired(rt)) {
+ ip6_del_rt(rt);
+ dst = NULL;
+ }
+ } else {
+ dst_release(dst);
+ dst = NULL;
+ }
+ }
+ return dst;
+}
+
+static void ip6_link_failure(struct sk_buff *skb)
+{
+ struct rt6_info *rt;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+
+ rt = (struct rt6_info *) skb_dst(skb);
+ if (rt) {
+ if (rt->rt6i_flags & RTF_CACHE)
+ rt6_update_expires(rt, 0);
+ else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+ rt->rt6i_node->fn_sernum = -1;
+ }
+}
+
+static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+ struct rt6_info *rt6 = (struct rt6_info*)dst;
+
+ if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ if (mtu < IPV6_MIN_MTU) {
+ u32 features = dst_metric(dst, RTAX_FEATURES);
+ mtu = IPV6_MIN_MTU;
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(dst, RTAX_FEATURES, features);
+ }
+ dst_metric_set(dst, RTAX_MTU, mtu);
+ }
+}
+
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+{
+ struct net_device *dev = dst->dev;
+ unsigned int mtu = dst_mtu(dst);
+ struct net *net = dev_net(dev);
+
+ mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+
+ if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
+ mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+
+ /*
+ * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
+ * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
+ * IPV6_MAXPLEN is also valid and means: "any MSS,
+ * rely only on pmtu discovery"
+ */
+ if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
+ mtu = IPV6_MAXPLEN;
+ return mtu;
+}
+
+static unsigned int ip6_mtu(const struct dst_entry *dst)
+{
+ struct inet6_dev *idev;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ if (mtu)
+ return mtu;
+
+ mtu = IPV6_MIN_MTU;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
+static struct dst_entry *icmp6_dst_gc_list;
+static DEFINE_SPINLOCK(icmp6_dst_lock);
+
+struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
+ struct neighbour *neigh,
+ struct flowi6 *fl6)
+{
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct inet6_dev *idev = in6_dev_get(dev);
+ struct net *net = dev_net(dev);
+
+ if (unlikely(!idev))
+ return ERR_PTR(-ENODEV);
+
+ rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+ if (unlikely(!rt)) {
+ in6_dev_put(idev);
+ dst = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ if (neigh)
+ neigh_hold(neigh);
+ else {
+ neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ if (IS_ERR(neigh)) {
+ in6_dev_put(idev);
+ dst_free(&rt->dst);
+ return ERR_CAST(neigh);
+ }
+ }
+
+ rt->dst.flags |= DST_HOST;
+ rt->dst.output = ip6_output;
+ dst_set_neighbour(&rt->dst, neigh);
+ atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_dst.addr = fl6->daddr;
+ rt->rt6i_dst.plen = 128;
+ rt->rt6i_idev = idev;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
+
+ spin_lock_bh(&icmp6_dst_lock);
+ rt->dst.next = icmp6_dst_gc_list;
+ icmp6_dst_gc_list = &rt->dst;
+ spin_unlock_bh(&icmp6_dst_lock);
+
+ fib6_force_start_gc(net);
+
+ dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
+
+out:
+ return dst;
+}
+
+int icmp6_dst_gc(void)
+{
+ struct dst_entry *dst, **pprev;
+ int more = 0;
+
+ spin_lock_bh(&icmp6_dst_lock);
+ pprev = &icmp6_dst_gc_list;
+
+ while ((dst = *pprev) != NULL) {
+ if (!atomic_read(&dst->__refcnt)) {
+ *pprev = dst->next;
+ dst_free(dst);
+ } else {
+ pprev = &dst->next;
+ ++more;
+ }
+ }
+
+ spin_unlock_bh(&icmp6_dst_lock);
+
+ return more;
+}
+
+static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
+ void *arg)
+{
+ struct dst_entry *dst, **pprev;
+
+ spin_lock_bh(&icmp6_dst_lock);
+ pprev = &icmp6_dst_gc_list;
+ while ((dst = *pprev) != NULL) {
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ if (func(rt, arg)) {
+ *pprev = dst->next;
+ dst_free(dst);
+ } else {
+ pprev = &dst->next;
+ }
+ }
+ spin_unlock_bh(&icmp6_dst_lock);
+}
+
+static int ip6_dst_gc(struct dst_ops *ops)
+{
+ unsigned long now = jiffies;
+ struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
+ int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
+ int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
+ int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
+ unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ int entries;
+
+ entries = dst_entries_get_fast(ops);
+ if (time_after(rt_last_gc + rt_min_interval, now) &&
+ entries <= rt_max_size)
+ goto out;
+
+ net->ipv6.ip6_rt_gc_expire++;
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
+ net->ipv6.ip6_rt_last_gc = now;
+ entries = dst_entries_get_slow(ops);
+ if (entries < ops->gc_thresh)
+ net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+out:
+ net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+ return entries > rt_max_size;
+}
+
+/* Clean host part of a prefix. Not necessary in radix tree,
+ but results in cleaner routing tables.
+
+ Remove it only when all the things will work!
+ */
+
+int ip6_dst_hoplimit(struct dst_entry *dst)
+{
+ int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+ if (hoplimit == 0) {
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev)
+ hoplimit = idev->cnf.hop_limit;
+ else
+ hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ rcu_read_unlock();
+ }
+ return hoplimit;
+}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
+
+/*
+ *
+ */
+
+int ip6_route_add(struct fib6_config *cfg)
+{
+ int err;
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ struct rt6_info *rt = NULL;
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
+ struct fib6_table *table;
+ int addr_type;
+
+ if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+ return -EINVAL;
+#ifndef CONFIG_IPV6_SUBTREES
+ if (cfg->fc_src_len)
+ return -EINVAL;
+#endif
+ if (cfg->fc_ifindex) {
+ err = -ENODEV;
+ dev = dev_get_by_index(net, cfg->fc_ifindex);
+ if (!dev)
+ goto out;
+ idev = in6_dev_get(dev);
+ if (!idev)
+ goto out;
+ }
+
+ if (cfg->fc_metric == 0)
+ cfg->fc_metric = IP6_RT_PRIO_USER;
+
+ err = -ENOBUFS;
+ if (cfg->fc_nlinfo.nlh &&
+ !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table) {
+ printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
+ table = fib6_new_table(net, cfg->fc_table);
+ }
+ } else {
+ table = fib6_new_table(net, cfg->fc_table);
+ }
+
+ if (!table)
+ goto out;
+
+ rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+
+ if (!rt) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rt->dst.obsolete = -1;
+
+ if (cfg->fc_flags & RTF_EXPIRES)
+ rt6_set_expires(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
+ else
+ rt6_clean_expires(rt);
+
+ if (cfg->fc_protocol == RTPROT_UNSPEC)
+ cfg->fc_protocol = RTPROT_BOOT;
+ rt->rt6i_protocol = cfg->fc_protocol;
+
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+
+ if (addr_type & IPV6_ADDR_MULTICAST)
+ rt->dst.input = ip6_mc_input;
+ else if (cfg->fc_flags & RTF_LOCAL)
+ rt->dst.input = ip6_input;
+ else
+ rt->dst.input = ip6_forward;
+
+ rt->dst.output = ip6_output;
+
+ ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+ rt->rt6i_dst.plen = cfg->fc_dst_len;
+ if (rt->rt6i_dst.plen == 128)
+ rt->dst.flags |= DST_HOST;
+
+ if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
+ u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!metrics) {
+ err = -ENOMEM;
+ goto out;
+ }
+ dst_init_metrics(&rt->dst, metrics, 0);
+ }
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
+ rt->rt6i_src.plen = cfg->fc_src_len;
+#endif
+
+ rt->rt6i_metric = cfg->fc_metric;
+
+ /* We cannot add true routes via loopback here,
+ they would result in kernel looping; promote them to reject routes
+ */
+ if ((cfg->fc_flags & RTF_REJECT) ||
+ (dev && (dev->flags & IFF_LOOPBACK) &&
+ !(addr_type & IPV6_ADDR_LOOPBACK) &&
+ !(cfg->fc_flags & RTF_LOCAL))) {
+ /* hold loopback dev/idev if we haven't done so. */
+ if (dev != net->loopback_dev) {
+ if (dev) {
+ dev_put(dev);
+ in6_dev_put(idev);
+ }
+ dev = net->loopback_dev;
+ dev_hold(dev);
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
+ rt->dst.output = ip6_pkt_discard_out;
+ rt->dst.input = ip6_pkt_discard;
+ rt->dst.error = -ENETUNREACH;
+ rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+ goto install_route;
+ }
+
+ if (cfg->fc_flags & RTF_GATEWAY) {
+ const struct in6_addr *gw_addr;
+ int gwa_type;
+
+ gw_addr = &cfg->fc_gateway;
+ rt->rt6i_gateway = *gw_addr;
+ gwa_type = ipv6_addr_type(gw_addr);
+
+ if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
+ struct rt6_info *grt;
+
+ /* IPv6 strictly inhibits using not link-local
+ addresses as nexthop address.
+ Otherwise, router will not able to send redirects.
+ It is very good, but in some (rare!) circumstances
+ (SIT, PtP, NBMA NOARP links) it is handy to allow
+ some exceptions. --ANK
+ */
+ err = -EINVAL;
+ if (!(gwa_type & IPV6_ADDR_UNICAST))
+ goto out;
+
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ err = -EHOSTUNREACH;
+ if (!grt)
+ goto out;
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ dst_release(&grt->dst);
+ goto out;
+ }
+ } else {
+ dev = grt->dst.dev;
+ idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+ dst_release(&grt->dst);
+
+ if (err)
+ goto out;
+ }
+ err = -EINVAL;
+ if (!dev || (dev->flags & IFF_LOOPBACK))
+ goto out;
+ }
+
+ err = -ENODEV;
+ if (!dev)
+ goto out;
+
+ if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+ if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+ err = -EINVAL;
+ goto out;
+ }
+ rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
+ rt->rt6i_prefsrc.plen = 128;
+ } else
+ rt->rt6i_prefsrc.plen = 0;
+
+ if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
+ err = rt6_bind_neighbour(rt, dev);
+ if (err)
+ goto out;
+ }
+
+ rt->rt6i_flags = cfg->fc_flags;
+
+install_route:
+ if (cfg->fc_mx) {
+ struct nlattr *nla;
+ int remaining;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ dst_metric_set(&rt->dst, type, nla_get_u32(nla));
+ }
+ }
+ }
+
+ rt->dst.dev = dev;
+ rt->rt6i_idev = idev;
+ rt->rt6i_table = table;
+
+ cfg->fc_nlinfo.nl_net = dev_net(dev);
+
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+
+out:
+ if (dev)
+ dev_put(dev);
+ if (idev)
+ in6_dev_put(idev);
+ if (rt)
+ dst_free(&rt->dst);
+ return err;
+}
+
+static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+{
+ int err;
+ struct fib6_table *table;
+ struct net *net = dev_net(rt->dst.dev);
+
+ if (rt == net->ipv6.ip6_null_entry)
+ return -ENOENT;
+
+ table = rt->rt6i_table;
+ write_lock_bh(&table->tb6_lock);
+
+ err = fib6_del(rt, info);
+ dst_release(&rt->dst);
+
+ write_unlock_bh(&table->tb6_lock);
+
+ return err;
+}
+
+int ip6_del_rt(struct rt6_info *rt)
+{
+ struct nl_info info = {
+ .nl_net = dev_net(rt->dst.dev),
+ };
+ return __ip6_del_rt(rt, &info);
+}
+
+static int ip6_route_del(struct fib6_config *cfg)
+{
+ struct fib6_table *table;
+ struct fib6_node *fn;
+ struct rt6_info *rt;
+ int err = -ESRCH;
+
+ table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
+ if (!table)
+ return err;
+
+ read_lock_bh(&table->tb6_lock);
+
+ fn = fib6_locate(&table->tb6_root,
+ &cfg->fc_dst, cfg->fc_dst_len,
+ &cfg->fc_src, cfg->fc_src_len);
+
+ if (fn) {
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (cfg->fc_ifindex &&
+ (!rt->dst.dev ||
+ rt->dst.dev->ifindex != cfg->fc_ifindex))
+ continue;
+ if (cfg->fc_flags & RTF_GATEWAY &&
+ !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+ continue;
+ if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+ continue;
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ }
+ }
+ read_unlock_bh(&table->tb6_lock);
+
+ return err;
+}
+
+/*
+ * Handle redirects
+ */
+struct ip6rd_flowi {
+ struct flowi6 fl6;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ int flags)
+{
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
+ struct rt6_info *rt;
+ struct fib6_node *fn;
+
+ /*
+ * Get the "current" route for this destination and
+ * check if the redirect has come from approriate router.
+ *
+ * RFC 2461 specifies that redirects should only be
+ * accepted if they come from the nexthop to the target.
+ * Due to the way the routes are chosen, this notion
+ * is a bit fuzzy and one might need to check all possible
+ * routes.
+ */
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ /*
+ * Current route is on-link; redirect is always invalid.
+ *
+ * Seems, previous statement is not true. It could
+ * be node, which looks for us as on-link (f.e. proxy ndisc)
+ * But then router serving it might decide, that we should
+ * know truth 8)8) --ANK (980726).
+ */
+ if (rt6_check_expired(rt))
+ continue;
+ if (!(rt->rt6i_flags & RTF_GATEWAY))
+ continue;
+ if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+ continue;
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ continue;
+ break;
+ }
+
+ if (!rt)
+ rt = net->ipv6.ip6_null_entry;
+ BACKTRACK(net, &fl6->saddr);
+out:
+ dst_hold(&rt->dst);
+
+ read_unlock_bh(&table->tb6_lock);
+
+ return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
+ const struct in6_addr *src,
+ const struct in6_addr *gateway,
+ struct net_device *dev)
+{
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct net *net = dev_net(dev);
+ struct ip6rd_flowi rdfl = {
+ .fl6 = {
+ .flowi6_oif = dev->ifindex,
+ .daddr = *dest,
+ .saddr = *src,
+ },
+ };
+
+ rdfl.gateway = *gateway;
+
+ if (rt6_need_strict(dest))
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
+ flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
+ const struct in6_addr *saddr,
+ struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+ struct rt6_info *rt, *nrt = NULL;
+ struct netevent_redirect netevent;
+ struct net *net = dev_net(neigh->dev);
+
+ rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+ if (rt == net->ipv6.ip6_null_entry) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
+ "for redirect target\n");
+ goto out;
+ }
+
+ /*
+ * We have finally decided to accept it.
+ */
+
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE|
+ (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+ NEIGH_UPDATE_F_ISROUTER))
+ );
+
+ /*
+ * Redirect received -> path was valid.
+ * Look, redirects are sent only in response to data packets,
+ * so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->dst);
+
+ /* Duplicate redirect: silently ignore. */
+ if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
+ goto out;
+
+ nrt = ip6_rt_copy(rt, dest);
+ if (!nrt)
+ goto out;
+
+ nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
+ if (on_link)
+ nrt->rt6i_flags &= ~RTF_GATEWAY;
+
+ nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
+ dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+
+ if (ip6_ins_rt(nrt))
+ goto out;
+
+ netevent.old = &rt->dst;
+ netevent.new = &nrt->dst;
+ call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
+
+ if (rt->rt6i_flags & RTF_CACHE) {
+ ip6_del_rt(rt);
+ return;
+ }
+
+out:
+ dst_release(&rt->dst);
+}
+
+/*
+ * Handle ICMP "packet too big" messages
+ * i.e. Path MTU discovery
+ */
+
+static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
+ struct net *net, u32 pmtu, int ifindex)
+{
+ struct rt6_info *rt, *nrt;
+ int allfrag = 0;
+again:
+ rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
+ if (!rt)
+ return;
+
+ if (rt6_check_expired(rt)) {
+ ip6_del_rt(rt);
+ goto again;
+ }
+
+ if (pmtu >= dst_mtu(&rt->dst))
+ goto out;
+
+ if (pmtu < IPV6_MIN_MTU) {
+ /*
+ * According to RFC2460, PMTU is set to the IPv6 Minimum Link
+ * MTU (1280) and a fragment header should always be included
+ * after a node receiving Too Big message reporting PMTU is
+ * less than the IPv6 Minimum Link MTU.
+ */
+ pmtu = IPV6_MIN_MTU;
+ allfrag = 1;
+ }
+
+ /* New mtu received -> path was valid.
+ They are sent only in response to data packets,
+ so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->dst);
+
+ /* Host route. If it is static, it would be better
+ not to override it, but add new one, so that
+ when cache entry will expire old pmtu
+ would return automatically.
+ */
+ if (rt->rt6i_flags & RTF_CACHE) {
+ dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&rt->dst, RTAX_FEATURES, features);
+ }
+ rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
+ rt->rt6i_flags |= RTF_MODIFIED;
+ goto out;
+ }
+
+ /* Network route.
+ Two cases are possible:
+ 1. It is connected route. Action: COW
+ 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
+ */
+ if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ nrt = rt6_alloc_cow(rt, daddr, saddr);
+ else
+ nrt = rt6_alloc_clone(rt, daddr);
+
+ if (nrt) {
+ dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
+ }
+
+ /* According to RFC 1981, detecting PMTU increase shouldn't be
+ * happened within 5 mins, the recommended timer is 10 mins.
+ * Here this route expiration time is set to ip6_rt_mtu_expires
+ * which is 10 mins. After 10 mins the decreased pmtu is expired
+ * and detecting PMTU increase will be automatically happened.
+ */
+ rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
+ nrt->rt6i_flags |= RTF_DYNAMIC;
+ ip6_ins_rt(nrt);
+ }
+out:
+ dst_release(&rt->dst);
+}
+
+void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
+ struct net_device *dev, u32 pmtu)
+{
+ struct net *net = dev_net(dev);
+
+ /*
+ * RFC 1981 states that a node "MUST reduce the size of the packets it
+ * is sending along the path" that caused the Packet Too Big message.
+ * Since it's not possible in the general case to determine which
+ * interface was used to send the original packet, we update the MTU
+ * on the interface that will be used to send future packets. We also
+ * update the MTU on the interface that received the Packet Too Big in
+ * case the original packet was forced out that interface with
+ * SO_BINDTODEVICE or similar. This is the next best thing to the
+ * correct behaviour, which would be to update the MTU on all
+ * interfaces.
+ */
+ rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
+ rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+}
+
+/*
+ * Misc support functions
+ */
+
+static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
+ const struct in6_addr *dest)
+{
+ struct net *net = dev_net(ort->dst.dev);
+ struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
+ ort->dst.dev, 0);
+
+ if (rt) {
+ rt->dst.input = ort->dst.input;
+ rt->dst.output = ort->dst.output;
+ rt->dst.flags |= DST_HOST;
+
+ rt->rt6i_dst.addr = *dest;
+ rt->rt6i_dst.plen = 128;
+ dst_copy_metrics(&rt->dst, &ort->dst);
+ rt->dst.error = ort->dst.error;
+ rt->rt6i_idev = ort->rt6i_idev;
+ if (rt->rt6i_idev)
+ in6_dev_hold(rt->rt6i_idev);
+ rt->dst.lastuse = jiffies;
+
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ rt->rt6i_flags = ort->rt6i_flags;
+ if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
+ (RTF_DEFAULT | RTF_ADDRCONF))
+ rt6_set_from(rt, ort);
+ else
+ rt6_clean_expires(rt);
+ rt->rt6i_metric = 0;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+#endif
+ memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
+ rt->rt6i_table = ort->rt6i_table;
+ }
+ return rt;
+}
+
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_get_route_info(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex)
+{
+ struct fib6_node *fn;
+ struct rt6_info *rt = NULL;
+ struct fib6_table *table;
+
+ table = fib6_get_table(net, RT6_TABLE_INFO);
+ if (!table)
+ return NULL;
+
+ write_lock_bh(&table->tb6_lock);
+ fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
+ if (!fn)
+ goto out;
+
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt->dst.dev->ifindex != ifindex)
+ continue;
+ if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+ continue;
+ if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+ continue;
+ dst_hold(&rt->dst);
+ break;
+ }
+out:
+ write_unlock_bh(&table->tb6_lock);
+ return rt;
+}
+
+static struct rt6_info *rt6_add_route_info(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
+ const struct in6_addr *gwaddr, int ifindex,
+ unsigned pref)
+{
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_INFO,
+ .fc_metric = IP6_RT_PRIO_USER,
+ .fc_ifindex = ifindex,
+ .fc_dst_len = prefixlen,
+ .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
+ RTF_UP | RTF_PREF(pref),
+ .fc_nlinfo.pid = 0,
+ .fc_nlinfo.nlh = NULL,
+ .fc_nlinfo.nl_net = net,
+ };
+
+ cfg.fc_dst = *prefix;
+ cfg.fc_gateway = *gwaddr;
+
+ /* We should treat it as a default route if prefix length is 0. */
+ if (!prefixlen)
+ cfg.fc_flags |= RTF_DEFAULT;
+
+ ip6_route_add(&cfg);
+
+ return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+}
+#endif
+
+struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
+{
+ struct rt6_info *rt;
+ struct fib6_table *table;
+
+ table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+ if (!table)
+ return NULL;
+
+ write_lock_bh(&table->tb6_lock);
+ for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
+ if (dev == rt->dst.dev &&
+ ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
+ ipv6_addr_equal(&rt->rt6i_gateway, addr))
+ break;
+ }
+ if (rt)
+ dst_hold(&rt->dst);
+ write_unlock_bh(&table->tb6_lock);
+ return rt;
+}
+
+struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+ struct net_device *dev,
+ unsigned int pref)
+{
+ struct fib6_config cfg = {
+ .fc_table = RT6_TABLE_DFLT,
+ .fc_metric = IP6_RT_PRIO_USER,
+ .fc_ifindex = dev->ifindex,
+ .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
+ RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ .fc_nlinfo.pid = 0,
+ .fc_nlinfo.nlh = NULL,
+ .fc_nlinfo.nl_net = dev_net(dev),
+ };
+
+ cfg.fc_gateway = *gwaddr;
+
+ ip6_route_add(&cfg);
+
+ return rt6_get_dflt_router(gwaddr, dev);
+}
+
+void rt6_purge_dflt_routers(struct net *net)
+{
+ struct rt6_info *rt;
+ struct fib6_table *table;
+
+ /* NOTE: Keep consistent with rt6_get_dflt_router */
+ table = fib6_get_table(net, RT6_TABLE_DFLT);
+ if (!table)
+ return;
+
+restart:
+ read_lock_bh(&table->tb6_lock);
+ for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+ ip6_del_rt(rt);
+ goto restart;
+ }
+ }
+ read_unlock_bh(&table->tb6_lock);
+}
+
+static void rtmsg_to_fib6_config(struct net *net,
+ struct in6_rtmsg *rtmsg,
+ struct fib6_config *cfg)
+{
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
+ cfg->fc_metric = rtmsg->rtmsg_metric;
+ cfg->fc_expires = rtmsg->rtmsg_info;
+ cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
+ cfg->fc_src_len = rtmsg->rtmsg_src_len;
+ cfg->fc_flags = rtmsg->rtmsg_flags;
+
+ cfg->fc_nlinfo.nl_net = net;
+
+ cfg->fc_dst = rtmsg->rtmsg_dst;
+ cfg->fc_src = rtmsg->rtmsg_src;
+ cfg->fc_gateway = rtmsg->rtmsg_gateway;
+}
+
+int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+{
+ struct fib6_config cfg;
+ struct in6_rtmsg rtmsg;
+ int err;
+
+ switch(cmd) {
+ case SIOCADDRT: /* Add a route */
+ case SIOCDELRT: /* Delete a route */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ err = copy_from_user(&rtmsg, arg,
+ sizeof(struct in6_rtmsg));
+ if (err)
+ return -EFAULT;
+
+ rtmsg_to_fib6_config(net, &rtmsg, &cfg);
+
+ rtnl_lock();
+ switch (cmd) {
+ case SIOCADDRT:
+ err = ip6_route_add(&cfg);
+ break;
+ case SIOCDELRT:
+ err = ip6_route_del(&cfg);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ rtnl_unlock();
+
+ return err;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Drop the packet on the floor
+ */
+
+static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
+{
+ int type;
+ struct dst_entry *dst = skb_dst(skb);
+ switch (ipstats_mib_noroutes) {
+ case IPSTATS_MIB_INNOROUTES:
+ type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
+ if (type == IPV6_ADDR_ANY) {
+ IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ IPSTATS_MIB_INADDRERRORS);
+ break;
+ }
+ /* FALLTHROUGH */
+ case IPSTATS_MIB_OUTNOROUTES:
+ IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ ipstats_mib_noroutes);
+ break;
+ }
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ip6_pkt_discard(struct sk_buff *skb)
+{
+ return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
+}
+
+static int ip6_pkt_discard_out(struct sk_buff *skb)
+{
+ skb->dev = skb_dst(skb)->dev;
+ return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+static int ip6_pkt_prohibit(struct sk_buff *skb)
+{
+ return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
+}
+
+static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+{
+ skb->dev = skb_dst(skb)->dev;
+ return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
+}
+
+#endif
+
+/*
+ * Allocate a dst for local (unicast / anycast) address.
+ */
+
+struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+ const struct in6_addr *addr,
+ bool anycast)
+{
+ struct net *net = dev_net(idev->dev);
+ struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
+ net->loopback_dev, 0);
+ int err;
+
+ if (!rt) {
+ if (net_ratelimit())
+ pr_warning("IPv6: Maximum number of routes reached,"
+ " consider increasing route/max_size.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ in6_dev_hold(idev);
+
+ rt->dst.flags |= DST_HOST;
+ rt->dst.input = ip6_input;
+ rt->dst.output = ip6_output;
+ rt->rt6i_idev = idev;
+ rt->dst.obsolete = -1;
+
+ rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
+ if (anycast)
+ rt->rt6i_flags |= RTF_ANYCAST;
+ else
+ rt->rt6i_flags |= RTF_LOCAL;
+ err = rt6_bind_neighbour(rt, rt->dst.dev);
+ if (err) {
+ dst_free(&rt->dst);
+ return ERR_PTR(err);
+ }
+
+ rt->rt6i_dst.addr = *addr;
+ rt->rt6i_dst.plen = 128;
+ rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+
+ atomic_set(&rt->dst.__refcnt, 1);
+
+ return rt;
+}
+
+int ip6_route_get_saddr(struct net *net,
+ struct rt6_info *rt,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr)
+{
+ struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+ int err = 0;
+ if (rt->rt6i_prefsrc.plen)
+ *saddr = rt->rt6i_prefsrc.addr;
+ else
+ err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+ daddr, prefs, saddr);
+ return err;
+}
+
+/* remove deleted ip from prefsrc entries */
+struct arg_dev_net_ip {
+ struct net_device *dev;
+ struct net *net;
+ struct in6_addr *addr;
+};
+
+static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+{
+ struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
+ struct net *net = ((struct arg_dev_net_ip *)arg)->net;
+ struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
+
+ if (((void *)rt->dst.dev == dev || !dev) &&
+ rt != net->ipv6.ip6_null_entry &&
+ ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ /* remove prefsrc entry */
+ rt->rt6i_prefsrc.plen = 0;
+ }
+ return 0;
+}
+
+void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
+{
+ struct net *net = dev_net(ifp->idev->dev);
+ struct arg_dev_net_ip adni = {
+ .dev = ifp->idev->dev,
+ .net = net,
+ .addr = &ifp->addr,
+ };
+ fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
+}
+
+struct arg_dev_net {
+ struct net_device *dev;
+ struct net *net;
+};
+
+static int fib6_ifdown(struct rt6_info *rt, void *arg)
+{
+ const struct arg_dev_net *adn = arg;
+ const struct net_device *dev = adn->dev;
+
+ if ((rt->dst.dev == dev || !dev) &&
+ rt != adn->net->ipv6.ip6_null_entry)
+ return -1;
+
+ return 0;
+}
+
+void rt6_ifdown(struct net *net, struct net_device *dev)
+{
+ struct arg_dev_net adn = {
+ .dev = dev,
+ .net = net,
+ };
+
+ fib6_clean_all(net, fib6_ifdown, 0, &adn);
+ icmp6_clean_all(fib6_ifdown, &adn);
+}
+
+struct rt6_mtu_change_arg
+{
+ struct net_device *dev;
+ unsigned mtu;
+};
+
+static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+{
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
+ struct inet6_dev *idev;
+
+ /* In IPv6 pmtu discovery is not optional,
+ so that RTAX_MTU lock cannot disable it.
+ We still use this lock to block changes
+ caused by addrconf/ndisc.
+ */
+
+ idev = __in6_dev_get(arg->dev);
+ if (!idev)
+ return 0;
+
+ /* For administrative MTU increase, there is no way to discover
+ IPv6 PMTU increase, so PMTU increase should be updated here.
+ Since RFC 1981 doesn't include administrative MTU increase
+ update PMTU increase is a MUST. (i.e. jumbo frame)
+ */
+ /*
+ If new MTU is less than route PMTU, this new MTU will be the
+ lowest MTU in the path, update the route PMTU to reflect PMTU
+ decreases; if new MTU is greater than route PMTU, and the
+ old MTU is the lowest MTU in the path, update the route PMTU
+ to reflect the increase. In this case if the other nodes' MTU
+ also have the lowest MTU, TOO BIG MESSAGE will be lead to
+ PMTU discouvery.
+ */
+ if (rt->dst.dev == arg->dev &&
+ !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+ (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ }
+ return 0;
+}
+
+void rt6_mtu_change(struct net_device *dev, unsigned mtu)
+{
+ struct rt6_mtu_change_arg arg = {
+ .dev = dev,
+ .mtu = mtu,
+ };
+
+ fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
+}
+
+static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+ [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
+ [RTA_OIF] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_PRIORITY] = { .type = NLA_U32 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+};
+
+static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct fib6_config *cfg)
+{
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = -EINVAL;
+ rtm = nlmsg_data(nlh);
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->fc_table = rtm->rtm_table;
+ cfg->fc_dst_len = rtm->rtm_dst_len;
+ cfg->fc_src_len = rtm->rtm_src_len;
+ cfg->fc_flags = RTF_UP;
+ cfg->fc_protocol = rtm->rtm_protocol;
+
+ if (rtm->rtm_type == RTN_UNREACHABLE)
+ cfg->fc_flags |= RTF_REJECT;
+
+ if (rtm->rtm_type == RTN_LOCAL)
+ cfg->fc_flags |= RTF_LOCAL;
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
+ cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
+
+ if (tb[RTA_GATEWAY]) {
+ nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+ cfg->fc_flags |= RTF_GATEWAY;
+ }
+
+ if (tb[RTA_DST]) {
+ int plen = (rtm->rtm_dst_len + 7) >> 3;
+
+ if (nla_len(tb[RTA_DST]) < plen)
+ goto errout;
+
+ nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
+ }
+
+ if (tb[RTA_SRC]) {
+ int plen = (rtm->rtm_src_len + 7) >> 3;
+
+ if (nla_len(tb[RTA_SRC]) < plen)
+ goto errout;
+
+ nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
+ }
+
+ if (tb[RTA_PREFSRC])
+ nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
+
+ if (tb[RTA_OIF])
+ cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ if (tb[RTA_PRIORITY])
+ cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
+
+ if (tb[RTA_METRICS]) {
+ cfg->fc_mx = nla_data(tb[RTA_METRICS]);
+ cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
+ }
+
+ if (tb[RTA_TABLE])
+ cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+
+ err = 0;
+errout:
+ return err;
+}
+
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib6_config cfg;
+ int err;
+
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+
+ return ip6_route_del(&cfg);
+}
+
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib6_config cfg;
+ int err;
+
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+
+ return ip6_route_add(&cfg);
+}
+
+static inline size_t rt6_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(16) /* RTA_SRC */
+ + nla_total_size(16) /* RTA_DST */
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + nla_total_size(16) /* RTA_PREFSRC */
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(4) /* RTA_IIF */
+ + nla_total_size(4) /* RTA_OIF */
+ + nla_total_size(4) /* RTA_PRIORITY */
+ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ + nla_total_size(sizeof(struct rta_cacheinfo));
+}
+
+static int rt6_fill_node(struct net *net,
+ struct sk_buff *skb, struct rt6_info *rt,
+ struct in6_addr *dst, struct in6_addr *src,
+ int iif, int type, u32 pid, u32 seq,
+ int prefix, int nowait, unsigned int flags)
+{
+ const struct inet_peer *peer;
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ long expires;
+ u32 table;
+ struct neighbour *n;
+ u32 ts, tsage;
+
+ if (prefix) { /* user wants prefix routes only */
+ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
+ /* success since this is not a prefix route */
+ return 1;
+ }
+ }
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rtm = nlmsg_data(nlh);
+ rtm->rtm_family = AF_INET6;
+ rtm->rtm_dst_len = rt->rt6i_dst.plen;
+ rtm->rtm_src_len = rt->rt6i_src.plen;
+ rtm->rtm_tos = 0;
+ if (rt->rt6i_table)
+ table = rt->rt6i_table->tb6_id;
+ else
+ table = RT6_TABLE_UNSPEC;
+ rtm->rtm_table = table;
+ NLA_PUT_U32(skb, RTA_TABLE, table);
+ if (rt->rt6i_flags & RTF_REJECT)
+ rtm->rtm_type = RTN_UNREACHABLE;
+ else if (rt->rt6i_flags & RTF_LOCAL)
+ rtm->rtm_type = RTN_LOCAL;
+ else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
+ rtm->rtm_type = RTN_LOCAL;
+ else
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_flags = 0;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = rt->rt6i_protocol;
+ if (rt->rt6i_flags & RTF_DYNAMIC)
+ rtm->rtm_protocol = RTPROT_REDIRECT;
+ else if (rt->rt6i_flags & RTF_ADDRCONF)
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ else if (rt->rt6i_flags & RTF_DEFAULT)
+ rtm->rtm_protocol = RTPROT_RA;
+
+ if (rt->rt6i_flags & RTF_CACHE)
+ rtm->rtm_flags |= RTM_F_CLONED;
+
+ if (dst) {
+ NLA_PUT(skb, RTA_DST, 16, dst);
+ rtm->rtm_dst_len = 128;
+ } else if (rtm->rtm_dst_len)
+ NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src) {
+ NLA_PUT(skb, RTA_SRC, 16, src);
+ rtm->rtm_src_len = 128;
+ } else if (rtm->rtm_src_len)
+ NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
+ if (iif) {
+#ifdef CONFIG_IPV6_MROUTE
+ if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+ int err = ip6mr_get_route(net, skb, rtm, nowait);
+ if (err <= 0) {
+ if (!nowait) {
+ if (err == 0)
+ return 0;
+ goto nla_put_failure;
+ } else {
+ if (err == -EMSGSIZE)
+ goto nla_put_failure;
+ }
+ }
+ } else
+#endif
+ NLA_PUT_U32(skb, RTA_IIF, iif);
+ } else if (dst) {
+ struct in6_addr saddr_buf;
+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
+ NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ }
+
+ if (rt->rt6i_prefsrc.plen) {
+ struct in6_addr saddr_buf;
+ saddr_buf = rt->rt6i_prefsrc.addr;
+ NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ }
+
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (n) {
+ if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+ }
+ rcu_read_unlock();
+
+ if (rt->dst.dev)
+ NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
+
+ NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
+
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ expires = 0;
+ else if (rt->dst.expires - jiffies < INT_MAX)
+ expires = rt->dst.expires - jiffies;
+ else
+ expires = INT_MAX;
+
+ peer = rt->rt6i_peer;
+ ts = tsage = 0;
+ if (peer && peer->tcp_ts_stamp) {
+ ts = peer->tcp_ts;
+ tsage = get_seconds() - peer->tcp_ts_stamp;
+ }
+
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
+ expires, rt->dst.error) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+{
+ struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+ int prefix;
+
+ if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+ struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
+ prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
+ } else
+ prefix = 0;
+
+ return rt6_fill_node(arg->net,
+ arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+ NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+ prefix, 0, NLM_F_MULTI);
+}
+
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[RTA_MAX+1];
+ struct rt6_info *rt;
+ struct sk_buff *skb;
+ struct rtmsg *rtm;
+ struct flowi6 fl6;
+ int err, iif = 0, oif = 0;
+
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = -EINVAL;
+ memset(&fl6, 0, sizeof(fl6));
+
+ if (tb[RTA_SRC]) {
+ if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+ goto errout;
+
+ fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
+ }
+
+ if (tb[RTA_DST]) {
+ if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+ goto errout;
+
+ fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
+ }
+
+ if (tb[RTA_IIF])
+ iif = nla_get_u32(tb[RTA_IIF]);
+
+ if (tb[RTA_OIF])
+ oif = nla_get_u32(tb[RTA_OIF]);
+
+ if (iif) {
+ struct net_device *dev;
+ int flags = 0;
+
+ dev = __dev_get_by_index(net, iif);
+ if (!dev) {
+ err = -ENODEV;
+ goto errout;
+ }
+
+ fl6.flowi6_iif = iif;
+
+ if (!ipv6_addr_any(&fl6.saddr))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
+ flags);
+ } else {
+ fl6.flowi6_oif = oif;
+
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
+ }
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb_reset_mac_header(skb);
+ skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+
+ skb_dst_set(skb, &rt->dst);
+
+ err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, 0, 0, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+errout:
+ return err;
+}
+
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+{
+ struct sk_buff *skb;
+ struct net *net = info->nl_net;
+ u32 seq;
+ int err;
+
+ err = -ENOBUFS;
+ seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+
+ skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
+ event, info->pid, seq, 0, 0, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
+static int ip6_route_dev_notify(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+ net->ipv6.ip6_null_entry->dst.dev = dev;
+ net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.ip6_prohibit_entry->dst.dev = dev;
+ net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
+ net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
+ net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
+#endif
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * /proc
+ */
+
+#ifdef CONFIG_PROC_FS
+
+struct rt6_proc_arg
+{
+ char *buffer;
+ int offset;
+ int length;
+ int skip;
+ int len;
+};
+
+static int rt6_info_route(struct rt6_info *rt, void *p_arg)
+{
+ struct seq_file *m = p_arg;
+ struct neighbour *n;
+
+ seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+#else
+ seq_puts(m, "00000000000000000000000000000000 00 ");
+#endif
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (n) {
+ seq_printf(m, "%pi6", n->primary_key);
+ } else {
+ seq_puts(m, "00000000000000000000000000000000");
+ }
+ rcu_read_unlock();
+ seq_printf(m, " %08x %08x %08x %08x %8s\n",
+ rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+ rt->dst.__use, rt->rt6i_flags,
+ rt->dst.dev ? rt->dst.dev->name : "");
+ return 0;
+}
+
+static int ipv6_route_show(struct seq_file *m, void *v)
+{
+ struct net *net = (struct net *)m->private;
+ fib6_clean_all_ro(net, rt6_info_route, 0, m);
+ return 0;
+}
+
+static int ipv6_route_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, ipv6_route_show);
+}
+
+static const struct file_operations ipv6_route_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ipv6_route_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+static int rt6_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = (struct net *)seq->private;
+ seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
+ net->ipv6.rt6_stats->fib_nodes,
+ net->ipv6.rt6_stats->fib_route_nodes,
+ net->ipv6.rt6_stats->fib_rt_alloc,
+ net->ipv6.rt6_stats->fib_rt_entries,
+ net->ipv6.rt6_stats->fib_rt_cache,
+ dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
+ net->ipv6.rt6_stats->fib_discarded_routes);
+
+ return 0;
+}
+
+static int rt6_stats_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, rt6_stats_seq_show);
+}
+
+static const struct file_operations rt6_stats_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rt6_stats_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_SYSCTL
+
+static
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net;
+ int delay;
+ if (!write)
+ return -EINVAL;
+
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
+ proc_dointvec(ctl, write, buffer, lenp, ppos);
+ fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+ return 0;
+}
+
+ctl_table ipv6_route_table_template[] = {
+ {
+ .procname = "flush",
+ .data = &init_net.ipv6.sysctl.flush_delay,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = ipv6_sysctl_rtcache_flush
+ },
+ {
+ .procname = "gc_thresh",
+ .data = &ip6_dst_ops_template.gc_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "max_size",
+ .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "gc_min_interval",
+ .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "gc_timeout",
+ .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "gc_interval",
+ .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "gc_elasticity",
+ .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "mtu_expires",
+ .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "min_adv_mss",
+ .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "gc_min_interval_ms",
+ .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ { }
+};
+
+struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(ipv6_route_table_template,
+ sizeof(ipv6_route_table_template),
+ GFP_KERNEL);
+
+ if (table) {
+ table[0].data = &net->ipv6.sysctl.flush_delay;
+ table[0].extra1 = net;
+ table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
+ table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+ table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
+ table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
+ table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
+ table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
+ table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
+ table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ }
+
+ return table;
+}
+#endif
+
+static int __net_init ip6_route_net_init(struct net *net)
+{
+ int ret = -ENOMEM;
+
+ memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
+ sizeof(net->ipv6.ip6_dst_ops));
+
+ if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
+ goto out_ip6_dst_ops;
+
+ net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
+ sizeof(*net->ipv6.ip6_null_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_null_entry)
+ goto out_ip6_dst_entries;
+ net->ipv6.ip6_null_entry->dst.path =
+ (struct dst_entry *)net->ipv6.ip6_null_entry;
+ net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+ ip6_template_metrics, true);
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
+ sizeof(*net->ipv6.ip6_prohibit_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_prohibit_entry)
+ goto out_ip6_null_entry;
+ net->ipv6.ip6_prohibit_entry->dst.path =
+ (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
+ net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+ ip6_template_metrics, true);
+
+ net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
+ sizeof(*net->ipv6.ip6_blk_hole_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_blk_hole_entry)
+ goto out_ip6_prohibit_entry;
+ net->ipv6.ip6_blk_hole_entry->dst.path =
+ (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
+ net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+ ip6_template_metrics, true);
+#endif
+
+ net->ipv6.sysctl.flush_delay = 0;
+ net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
+ net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
+ net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
+ net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
+ net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+ net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+
+ net->ipv6.ip6_rt_gc_expire = 30*HZ;
+
+ ret = 0;
+out:
+ return ret;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_ip6_prohibit_entry:
+ kfree(net->ipv6.ip6_prohibit_entry);
+out_ip6_null_entry:
+ kfree(net->ipv6.ip6_null_entry);
+#endif
+out_ip6_dst_entries:
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
+out_ip6_dst_ops:
+ goto out;
+}
+
+static void __net_exit ip6_route_net_exit(struct net *net)
+{
+ kfree(net->ipv6.ip6_null_entry);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ kfree(net->ipv6.ip6_prohibit_entry);
+ kfree(net->ipv6.ip6_blk_hole_entry);
+#endif
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
+}
+
+static int __net_init ip6_route_net_init_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
+ proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+#endif
+ return 0;
+}
+
+static void __net_exit ip6_route_net_exit_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "ipv6_route");
+ proc_net_remove(net, "rt6_stats");
+#endif
+}
+
+static struct pernet_operations ip6_route_net_ops = {
+ .init = ip6_route_net_init,
+ .exit = ip6_route_net_exit,
+};
+
+static struct pernet_operations ip6_route_net_late_ops = {
+ .init = ip6_route_net_init_late,
+ .exit = ip6_route_net_exit_late,
+};
+
+static struct notifier_block ip6_route_dev_notifier = {
+ .notifier_call = ip6_route_dev_notify,
+ .priority = 0,
+};
+
+int __init ip6_route_init(void)
+{
+ int ret;
+
+ ret = -ENOMEM;
+ ip6_dst_ops_template.kmem_cachep =
+ kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ip6_dst_ops_template.kmem_cachep)
+ goto out;
+
+ ret = dst_entries_init(&ip6_dst_blackhole_ops);
+ if (ret)
+ goto out_kmem_cache;
+
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_dst_entries;
+
+ ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
+
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+ ret = fib6_init();
+ if (ret)
+ goto out_register_subsys;
+
+ ret = xfrm6_init();
+ if (ret)
+ goto out_fib6_init;
+
+ ret = fib6_rules_init();
+ if (ret)
+ goto xfrm6_init;
+
+ ret = register_pernet_subsys(&ip6_route_net_late_ops);
+ if (ret)
+ goto fib6_rules_init;
+
+ ret = -ENOBUFS;
+ if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+ goto out_register_late_subsys;
+
+ ret = register_netdevice_notifier(&ip6_route_dev_notifier);
+ if (ret)
+ goto out_register_late_subsys;
+
+out:
+ return ret;
+
+out_register_late_subsys:
+ unregister_pernet_subsys(&ip6_route_net_late_ops);
+fib6_rules_init:
+ fib6_rules_cleanup();
+xfrm6_init:
+ xfrm6_fini();
+out_fib6_init:
+ fib6_gc_cleanup();
+out_register_subsys:
+ unregister_pernet_subsys(&ip6_route_net_ops);
+out_dst_entries:
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
+out_kmem_cache:
+ kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
+ goto out;
+}
+
+void ip6_route_cleanup(void)
+{
+ unregister_netdevice_notifier(&ip6_route_dev_notifier);
+ unregister_pernet_subsys(&ip6_route_net_late_ops);
+ fib6_rules_cleanup();
+ xfrm6_fini();
+ fib6_gc_cleanup();
+ unregister_pernet_subsys(&ip6_route_net_ops);
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
+ kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
+}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
new file mode 100644
index 00000000..c4ffd174
--- /dev/null
+++ b/net/ipv6/sit.c
@@ -0,0 +1,1306 @@
+/*
+ * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Roger Venning <r.venning@telstra.com>: 6to4 support
+ * Nate Thompson <nate@thebog.net>: 6to4 support
+ * Fred Templin <fred.l.templin@boeing.com>: isatap support
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/dsfield.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+/*
+ This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
+
+ For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
+#define HASH_SIZE 16
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+
+static int ipip6_tunnel_init(struct net_device *dev);
+static void ipip6_tunnel_setup(struct net_device *dev);
+static void ipip6_dev_free(struct net_device *dev);
+
+static int sit_net_id __read_mostly;
+struct sit_net {
+ struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_wc[1];
+ struct ip_tunnel __rcu **tunnels[4];
+
+ struct net_device *fb_tunnel_dev;
+};
+
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+} __attribute__((aligned(4*sizeof(unsigned long))));
+
+static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
+{
+ struct pcpu_tstats sum = { 0 };
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+ sum.rx_packets += tstats->rx_packets;
+ sum.rx_bytes += tstats->rx_bytes;
+ sum.tx_packets += tstats->tx_packets;
+ sum.tx_bytes += tstats->tx_bytes;
+ }
+ dev->stats.rx_packets = sum.rx_packets;
+ dev->stats.rx_bytes = sum.rx_bytes;
+ dev->stats.tx_packets = sum.tx_packets;
+ dev->stats.tx_bytes = sum.tx_bytes;
+ return &dev->stats;
+}
+/*
+ * Must be invoked with rcu_read_lock
+ */
+static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
+ struct net_device *dev, __be32 remote, __be32 local)
+{
+ unsigned int h0 = HASH(remote);
+ unsigned int h1 = HASH(local);
+ struct ip_tunnel *t;
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr &&
+ (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
+ if (remote == t->parms.iph.daddr &&
+ (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
+ if (local == t->parms.iph.saddr &&
+ (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ t = rcu_dereference(sitn->tunnels_wc[0]);
+ if ((t != NULL) && (t->dev->flags & IFF_UP))
+ return t;
+ return NULL;
+}
+
+static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
+ struct ip_tunnel_parm *parms)
+{
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ unsigned int h = 0;
+ int prio = 0;
+
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ return &sitn->tunnels[prio][h];
+}
+
+static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
+ struct ip_tunnel *t)
+{
+ return __ipip6_bucket(sitn, &t->parms);
+}
+
+static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
+{
+ struct ip_tunnel __rcu **tp;
+ struct ip_tunnel *iter;
+
+ for (tp = ipip6_bucket(sitn, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
+{
+ struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
+
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
+{
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->dev == sitn->fb_tunnel_dev) {
+ ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
+ t->ip6rd.relay_prefix = 0;
+ t->ip6rd.prefixlen = 16;
+ t->ip6rd.relay_prefixlen = 0;
+ } else {
+ struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
+ memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
+ }
+#endif
+}
+
+static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
+ struct ip_tunnel_parm *parms, int create)
+{
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ struct ip_tunnel *t, *nt;
+ struct ip_tunnel __rcu **tp;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ for (tp = __ipip6_bucket(sitn, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr &&
+ parms->link == t->parms.link) {
+ if (create)
+ return NULL;
+ else
+ return t;
+ }
+ }
+ if (!create)
+ goto failed;
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else
+ strcpy(name, "sit%d");
+
+ dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+ if (dev == NULL)
+ return NULL;
+
+ dev_net_set(dev, net);
+
+ nt = netdev_priv(dev);
+
+ nt->parms = *parms;
+ if (ipip6_tunnel_init(dev) < 0)
+ goto failed_free;
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ if (parms->i_flags & SIT_ISATAP)
+ dev->priv_flags |= IFF_ISATAP;
+
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
+
+ strcpy(nt->parms.name, dev->name);
+
+ dev_hold(dev);
+
+ ipip6_tunnel_link(sitn, nt);
+ return nt;
+
+failed_free:
+ ipip6_dev_free(dev);
+failed:
+ return NULL;
+}
+
+#define for_each_prl_rcu(start) \
+ for (prl = rcu_dereference(start); \
+ prl; \
+ prl = rcu_dereference(prl->next))
+
+static struct ip_tunnel_prl_entry *
+__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
+{
+ struct ip_tunnel_prl_entry *prl;
+
+ for_each_prl_rcu(t->prl)
+ if (prl->addr == addr)
+ break;
+ return prl;
+
+}
+
+static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
+ struct ip_tunnel_prl __user *a)
+{
+ struct ip_tunnel_prl kprl, *kp;
+ struct ip_tunnel_prl_entry *prl;
+ unsigned int cmax, c = 0, ca, len;
+ int ret = 0;
+
+ if (copy_from_user(&kprl, a, sizeof(kprl)))
+ return -EFAULT;
+ cmax = kprl.datalen / sizeof(kprl);
+ if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
+ cmax = 1;
+
+ /* For simple GET or for root users,
+ * we try harder to allocate.
+ */
+ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+ NULL;
+
+ rcu_read_lock();
+
+ ca = t->prl_count < cmax ? t->prl_count : cmax;
+
+ if (!kp) {
+ /* We don't try hard to allocate much memory for
+ * non-root users.
+ * For root users, retry allocating enough memory for
+ * the answer.
+ */
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ if (!kp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ c = 0;
+ for_each_prl_rcu(t->prl) {
+ if (c >= cmax)
+ break;
+ if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
+ continue;
+ kp[c].addr = prl->addr;
+ kp[c].flags = prl->flags;
+ c++;
+ if (kprl.addr != htonl(INADDR_ANY))
+ break;
+ }
+out:
+ rcu_read_unlock();
+
+ len = sizeof(*kp) * c;
+ ret = 0;
+ if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
+ ret = -EFAULT;
+
+ kfree(kp);
+
+ return ret;
+}
+
+static int
+ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
+{
+ struct ip_tunnel_prl_entry *p;
+ int err = 0;
+
+ if (a->addr == htonl(INADDR_ANY))
+ return -EINVAL;
+
+ ASSERT_RTNL();
+
+ for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
+ if (p->addr == a->addr) {
+ if (chg) {
+ p->flags = a->flags;
+ goto out;
+ }
+ err = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (chg) {
+ err = -ENXIO;
+ goto out;
+ }
+
+ p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
+ if (!p) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ p->next = t->prl;
+ p->addr = a->addr;
+ p->flags = a->flags;
+ t->prl_count++;
+ rcu_assign_pointer(t->prl, p);
+out:
+ return err;
+}
+
+static void prl_list_destroy_rcu(struct rcu_head *head)
+{
+ struct ip_tunnel_prl_entry *p, *n;
+
+ p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
+ do {
+ n = rcu_dereference_protected(p->next, 1);
+ kfree(p);
+ p = n;
+ } while (p);
+}
+
+static int
+ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
+{
+ struct ip_tunnel_prl_entry *x;
+ struct ip_tunnel_prl_entry __rcu **p;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ if (a && a->addr != htonl(INADDR_ANY)) {
+ for (p = &t->prl;
+ (x = rtnl_dereference(*p)) != NULL;
+ p = &x->next) {
+ if (x->addr == a->addr) {
+ *p = x->next;
+ kfree_rcu(x, rcu_head);
+ t->prl_count--;
+ goto out;
+ }
+ }
+ err = -ENXIO;
+ } else {
+ x = rtnl_dereference(t->prl);
+ if (x) {
+ t->prl_count = 0;
+ call_rcu(&x->rcu_head, prl_list_destroy_rcu);
+ t->prl = NULL;
+ }
+ }
+out:
+ return err;
+}
+
+static int
+isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
+{
+ struct ip_tunnel_prl_entry *p;
+ int ok = 1;
+
+ rcu_read_lock();
+ p = __ipip6_tunnel_locate_prl(t, iph->saddr);
+ if (p) {
+ if (p->flags & PRL_DEFAULT)
+ skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
+ else
+ skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
+ } else {
+ const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
+
+ if (ipv6_addr_is_isatap(addr6) &&
+ (addr6->s6_addr32[3] == iph->saddr) &&
+ ipv6_chk_prefix(addr6, t->dev))
+ skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
+ else
+ ok = 0;
+ }
+ rcu_read_unlock();
+ return ok;
+}
+
+static void ipip6_tunnel_uninit(struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev == sitn->fb_tunnel_dev) {
+ RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
+ } else {
+ ipip6_tunnel_unlink(sitn, netdev_priv(dev));
+ ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
+ }
+ dev_put(dev);
+}
+
+
+static int ipip6_err(struct sk_buff *skb, u32 info)
+{
+
+/* All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+ */
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err;
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return 0;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return 0;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return 0;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return 0;
+ break;
+ }
+
+ err = -ENOENT;
+
+ rcu_read_lock();
+ t = ipip6_tunnel_lookup(dev_net(skb->dev),
+ skb->dev,
+ iph->daddr,
+ iph->saddr);
+ if (t == NULL || t->parms.iph.daddr == 0)
+ goto out;
+
+ err = 0;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ goto out;
+
+ if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
+{
+ if (INET_ECN_is_ce(iph->tos))
+ IP6_ECN_set_ce(ipv6_hdr(skb));
+}
+
+static int ipip6_rcv(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto out;
+
+ iph = ip_hdr(skb);
+
+ rcu_read_lock();
+ tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->saddr, iph->daddr);
+ if (tunnel != NULL) {
+ struct pcpu_tstats *tstats;
+
+ secpath_reset(skb);
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
+ IPCB(skb)->flags = 0;
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->pkt_type = PACKET_HOST;
+
+ if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
+ !isatap_chksrc(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return 0;
+ }
+
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ ipip6_ecn_decapsulate(iph, skb);
+
+ netif_rx(skb);
+
+ rcu_read_unlock();
+ return 0;
+ }
+
+ /* no tunnel matched, let upstream know, ipsec may handle it */
+ rcu_read_unlock();
+ return 1;
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+/*
+ * Returns the embedded IPv4 address if the IPv6 address
+ * comes from 6rd / 6to4 (RFC 3056) addr space.
+ */
+static inline
+__be32 try_6rd(const struct in6_addr *v6dst, struct ip_tunnel *tunnel)
+{
+ __be32 dst = 0;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
+ tunnel->ip6rd.prefixlen)) {
+ unsigned int pbw0, pbi0;
+ int pbi1;
+ u32 d;
+
+ pbw0 = tunnel->ip6rd.prefixlen >> 5;
+ pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
+
+ d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
+ tunnel->ip6rd.relay_prefixlen;
+
+ pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
+ if (pbi1 > 0)
+ d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
+ (32 - pbi1);
+
+ dst = tunnel->ip6rd.relay_prefix | htonl(d);
+ }
+#else
+ if (v6dst->s6_addr16[0] == htons(0x2002)) {
+ /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
+ memcpy(&dst, &v6dst->s6_addr16[1], 4);
+ }
+#endif
+ return dst;
+}
+
+/*
+ * This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
+
+static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct pcpu_tstats *tstats;
+ const struct iphdr *tiph = &tunnel->parms.iph;
+ const struct ipv6hdr *iph6 = ipv6_hdr(skb);
+ u8 tos = tunnel->parms.iph.tos;
+ __be16 df = tiph->frag_off;
+ struct rtable *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ struct iphdr *iph; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ __be32 dst = tiph->daddr;
+ struct flowi4 fl4;
+ int mtu;
+ const struct in6_addr *addr6;
+ int addr_type;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto tx_error;
+
+ if (tos == 1)
+ tos = ipv6_get_dsfield(iph6);
+
+ /* ISATAP (RFC4214) - must come before 6to4 */
+ if (dev->priv_flags & IFF_ISATAP) {
+ struct neighbour *neigh = NULL;
+ bool do_tx_error = false;
+
+ if (skb_dst(skb))
+ neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
+
+ if (neigh == NULL) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "sit: nexthop == NULL\n");
+ goto tx_error;
+ }
+
+ addr6 = (const struct in6_addr*)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if ((addr_type & IPV6_ADDR_UNICAST) &&
+ ipv6_addr_is_isatap(addr6))
+ dst = addr6->s6_addr32[3];
+ else
+ do_tx_error = true;
+
+ neigh_release(neigh);
+ if (do_tx_error)
+ goto tx_error;
+ }
+
+ if (!dst)
+ dst = try_6rd(&iph6->daddr, tunnel);
+
+ if (!dst) {
+ struct neighbour *neigh = NULL;
+ bool do_tx_error = false;
+
+ if (skb_dst(skb))
+ neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
+
+ if (neigh == NULL) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "sit: nexthop == NULL\n");
+ goto tx_error;
+ }
+
+ addr6 = (const struct in6_addr*)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &ipv6_hdr(skb)->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
+ dst = addr6->s6_addr32[3];
+ else
+ do_tx_error = true;
+
+ neigh_release(neigh);
+ if (do_tx_error)
+ goto tx_error;
+ }
+
+ rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+ dst, tiph->saddr,
+ 0, 0,
+ IPPROTO_IPV6, RT_TOS(tos),
+ tunnel->parms.link);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ if (rt->rt_type != RTN_UNICAST) {
+ ip_rt_put(rt);
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ tdev = rt->dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ dev->stats.collisions++;
+ goto tx_error;
+ }
+
+ if (df) {
+ mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+
+ if (mtu < 68) {
+ dev->stats.collisions++;
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+
+ if (mtu < IPV6_MIN_MTU) {
+ mtu = IPV6_MIN_MTU;
+ df = 0;
+ }
+
+ if (tunnel->parms.iph.daddr && skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ dev_kfree_skb(skb);
+ skb = new_skb;
+ iph6 = ipv6_hdr(skb);
+ }
+
+ skb->transport_header = skb->network_header;
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags = 0;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+
+ /*
+ * Push down and install the IPIP header.
+ */
+
+ iph = ip_hdr(skb);
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr)>>2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_IPV6;
+ iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
+ iph->daddr = fl4.daddr;
+ iph->saddr = fl4.saddr;
+
+ if ((iph->ttl = tiph->ttl) == 0)
+ iph->ttl = iph6->hop_limit;
+
+ nf_reset(skb);
+ tstats = this_cpu_ptr(dev->tstats);
+ __IPTUNNEL_XMIT(tstats, &dev->stats);
+ return NETDEV_TX_OK;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void ipip6_tunnel_bind_dev(struct net_device *dev)
+{
+ struct net_device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ const struct iphdr *iph;
+ struct flowi4 fl4;
+
+ tunnel = netdev_priv(dev);
+ iph = &tunnel->parms.iph;
+
+ if (iph->daddr) {
+ struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+ iph->daddr, iph->saddr,
+ 0, 0,
+ IPPROTO_IPV6,
+ RT_TOS(iph->tos),
+ tunnel->parms.link);
+
+ if (!IS_ERR(rt)) {
+ tdev = rt->dst.dev;
+ ip_rt_put(rt);
+ }
+ dev->flags |= IFF_POINTOPOINT;
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ dev->iflink = tunnel->parms.link;
+}
+
+static int
+ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel_prl prl;
+ struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd ip6rd;
+#endif
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+#ifdef CONFIG_IPV6_SIT_6RD
+ case SIOCGET6RD:
+#endif
+ t = NULL;
+ if (dev == sitn->fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ t = ipip6_tunnel_locate(net, &p, 0);
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+
+ err = -EFAULT;
+ if (cmd == SIOCGETTUNNEL) {
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
+ sizeof(p)))
+ goto done;
+#ifdef CONFIG_IPV6_SIT_6RD
+ } else {
+ ip6rd.prefix = t->ip6rd.prefix;
+ ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+ ip6rd.prefixlen = t->ip6rd.prefixlen;
+ ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
+ sizeof(ip6rd)))
+ goto done;
+#endif
+ }
+ err = 0;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= htons(IP_DF);
+
+ t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
+
+ if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else {
+ if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
+ (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
+ err = -EINVAL;
+ break;
+ }
+ t = netdev_priv(dev);
+ ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
+ t->parms.iph.saddr = p.iph.saddr;
+ t->parms.iph.daddr = p.iph.daddr;
+ memcpy(dev->dev_addr, &p.iph.saddr, 4);
+ memcpy(dev->broadcast, &p.iph.daddr, 4);
+ ipip6_tunnel_link(sitn, t);
+ netdev_state_change(dev);
+ }
+ }
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ if (t->parms.link != p.link) {
+ t->parms.link = p.link;
+ ipip6_tunnel_bind_dev(dev);
+ netdev_state_change(dev);
+ }
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ if (dev == sitn->fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == netdev_priv(sitn->fb_tunnel_dev))
+ goto done;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ err = 0;
+ break;
+
+ case SIOCGETPRL:
+ err = -EINVAL;
+ if (dev == sitn->fb_tunnel_dev)
+ goto done;
+ err = -ENOENT;
+ if (!(t = netdev_priv(dev)))
+ goto done;
+ err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
+ break;
+
+ case SIOCADDPRL:
+ case SIOCDELPRL:
+ case SIOCCHGPRL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+ err = -EINVAL;
+ if (dev == sitn->fb_tunnel_dev)
+ goto done;
+ err = -EFAULT;
+ if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ goto done;
+ err = -ENOENT;
+ if (!(t = netdev_priv(dev)))
+ goto done;
+
+ switch (cmd) {
+ case SIOCDELPRL:
+ err = ipip6_tunnel_del_prl(t, &prl);
+ break;
+ case SIOCADDPRL:
+ case SIOCCHGPRL:
+ err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
+ break;
+ }
+ netdev_state_change(dev);
+ break;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ case SIOCADD6RD:
+ case SIOCCHG6RD:
+ case SIOCDEL6RD:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
+ sizeof(ip6rd)))
+ goto done;
+
+ t = netdev_priv(dev);
+
+ if (cmd != SIOCDEL6RD) {
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+
+ err = -EINVAL;
+ if (ip6rd.relay_prefixlen > 32 ||
+ ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
+ goto done;
+
+ ipv6_addr_prefix(&prefix, &ip6rd.prefix,
+ ip6rd.prefixlen);
+ if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
+ goto done;
+ if (ip6rd.relay_prefixlen)
+ relay_prefix = ip6rd.relay_prefix &
+ htonl(0xffffffffUL <<
+ (32 - ip6rd.relay_prefixlen));
+ else
+ relay_prefix = 0;
+ if (relay_prefix != ip6rd.relay_prefix)
+ goto done;
+
+ t->ip6rd.prefix = prefix;
+ t->ip6rd.relay_prefix = relay_prefix;
+ t->ip6rd.prefixlen = ip6rd.prefixlen;
+ t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
+ } else
+ ipip6_tunnel_clone_6rd(dev, sitn);
+
+ err = 0;
+ break;
+#endif
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+
+static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static const struct net_device_ops ipip6_netdev_ops = {
+ .ndo_uninit = ipip6_tunnel_uninit,
+ .ndo_start_xmit = ipip6_tunnel_xmit,
+ .ndo_do_ioctl = ipip6_tunnel_ioctl,
+ .ndo_change_mtu = ipip6_tunnel_change_mtu,
+ .ndo_get_stats = ipip6_get_stats,
+};
+
+static void ipip6_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static void ipip6_tunnel_setup(struct net_device *dev)
+{
+ dev->netdev_ops = &ipip6_netdev_ops;
+ dev->destructor = ipip6_dev_free;
+
+ dev->type = ARPHRD_SIT;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
+ dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
+ dev->flags = IFF_NOARP;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->features |= NETIF_F_LLTX;
+}
+
+static int ipip6_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+
+ memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+ ipip6_tunnel_bind_dev(dev);
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct iphdr *iph = &tunnel->parms.iph;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ iph->version = 4;
+ iph->protocol = IPPROTO_IPV6;
+ iph->ihl = 5;
+ iph->ttl = 64;
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+ dev_hold(dev);
+ rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
+ return 0;
+}
+
+static struct xfrm_tunnel sit_handler __read_mostly = {
+ .handler = ipip6_rcv,
+ .err_handler = ipip6_err,
+ .priority = 1,
+};
+
+static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
+{
+ int prio;
+
+ for (prio = 1; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+
+ t = rtnl_dereference(sitn->tunnels[prio][h]);
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, head);
+ t = rtnl_dereference(t->next);
+ }
+ }
+ }
+}
+
+static int __net_init sit_init_net(struct net *net)
+{
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct ip_tunnel *t;
+ int err;
+
+ sitn->tunnels[0] = sitn->tunnels_wc;
+ sitn->tunnels[1] = sitn->tunnels_l;
+ sitn->tunnels[2] = sitn->tunnels_r;
+ sitn->tunnels[3] = sitn->tunnels_r_l;
+
+ sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+ ipip6_tunnel_setup);
+ if (!sitn->fb_tunnel_dev) {
+ err = -ENOMEM;
+ goto err_alloc_dev;
+ }
+ dev_net_set(sitn->fb_tunnel_dev, net);
+
+ err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+ if (err)
+ goto err_dev_free;
+
+ ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+
+ if ((err = register_netdev(sitn->fb_tunnel_dev)))
+ goto err_reg_dev;
+
+ t = netdev_priv(sitn->fb_tunnel_dev);
+
+ strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
+ return 0;
+
+err_reg_dev:
+ dev_put(sitn->fb_tunnel_dev);
+err_dev_free:
+ ipip6_dev_free(sitn->fb_tunnel_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit sit_exit_net(struct net *net)
+{
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ sit_destroy_tunnels(sitn, &list);
+ unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static struct pernet_operations sit_net_ops = {
+ .init = sit_init_net,
+ .exit = sit_exit_net,
+ .id = &sit_net_id,
+ .size = sizeof(struct sit_net),
+};
+
+static void __exit sit_cleanup(void)
+{
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+
+ unregister_pernet_device(&sit_net_ops);
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+
+static int __init sit_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+
+ err = register_pernet_device(&sit_net_ops);
+ if (err < 0)
+ return err;
+ err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
+ if (err < 0) {
+ unregister_pernet_device(&sit_net_ops);
+ printk(KERN_INFO "sit init: Can't add protocol\n");
+ }
+ return err;
+}
+
+module_init(sit_init);
+module_exit(sit_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
new file mode 100644
index 00000000..8e951d8d
--- /dev/null
+++ b/net/ipv6/syncookies.c
@@ -0,0 +1,269 @@
+/*
+ * IPv6 Syncookies implementation for the Linux kernel
+ *
+ * Authors:
+ * Glenn Griffin <ggriffin.kernel@gmail.com>
+ *
+ * Based on IPv4 implementation by Andi Kleen
+ * linux/net/ipv4/syncookies.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/tcp.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
+
+#define COOKIEBITS 24 /* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+/* Table must be sorted. */
+static __u16 const msstab[] = {
+ 64,
+ 512,
+ 536,
+ 1280 - 60,
+ 1480 - 60,
+ 1500 - 60,
+ 4460 - 60,
+ 9000 - 60,
+};
+
+/*
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sock *child;
+
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+ if (child)
+ inet_csk_reqsk_queue_add(sk, req, child);
+ else
+ reqsk_free(req);
+
+ return child;
+}
+
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+ ipv6_cookie_scratch);
+
+static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
+ __be16 sport, __be16 dport, u32 count, int c)
+{
+ __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
+
+ /*
+ * we have 320 bits of information to hash, copy in the remaining
+ * 192 bits required for sha_transform, from the syncookie_secret
+ * and overwrite the digest with the secret
+ */
+ memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp, saddr, 16);
+ memcpy(tmp + 4, daddr, 16);
+ tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
+ tmp[9] = count;
+ sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+ return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __be16 sport, __be16 dport, __u32 sseq,
+ __u32 count, __u32 data)
+{
+ return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+ sseq + (count << COOKIEBITS) +
+ ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+ & COOKIEMASK));
+}
+
+static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, __be16 sport,
+ __be16 dport, __u32 sseq, __u32 count,
+ __u32 maxdiff)
+{
+ __u32 diff;
+
+ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
+ if (diff >= maxdiff)
+ return (__u32)-1;
+
+ return (cookie -
+ cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+ & COOKIEMASK;
+}
+
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int mssind;
+ const __u16 mss = *mssp;
+
+ tcp_synq_overflow(sk);
+
+ for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+ if (mss >= msstab[mssind])
+ break;
+
+ *mssp = msstab[mssind];
+
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
+ th->dest, ntohl(th->seq),
+ jiffies / (HZ * 60), mssind);
+}
+
+static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 seq = ntohl(th->seq) - 1;
+ __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq,
+ jiffies / (HZ * 60), COUNTER_TRIES);
+
+ return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_options_received tcp_opt;
+ const u8 *hash_location;
+ struct inet_request_sock *ireq;
+ struct inet6_request_sock *ireq6;
+ struct tcp_request_sock *treq;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 cookie = ntohl(th->ack_seq) - 1;
+ struct sock *ret = sk;
+ struct request_sock *req;
+ int mss;
+ struct dst_entry *dst;
+ __u8 rcv_wscale;
+ bool ecn_ok = false;
+
+ if (!sysctl_tcp_syncookies || !th->ack || th->rst)
+ goto out;
+
+ if (tcp_synq_no_recent_overflow(sk) ||
+ (mss = cookie_check(skb, cookie)) == 0) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ goto out;
+ }
+
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+
+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+ if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+ goto out;
+
+ ret = NULL;
+ req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ if (!req)
+ goto out;
+
+ ireq = inet_rsk(req);
+ ireq6 = inet6_rsk(req);
+ treq = tcp_rsk(req);
+
+ if (security_inet_conn_request(sk, skb, req))
+ goto out_free;
+
+ req->mss = mss;
+ ireq->rmt_port = th->source;
+ ireq->loc_port = th->dest;
+ ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ ireq6->pktopts = skb;
+ }
+
+ ireq6->iif = sk->sk_bound_dev_if;
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq6->iif = inet6_iif(skb);
+
+ req->expires = 0UL;
+ req->retrans = 0;
+ ireq->ecn_ok = ecn_ok;
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.saw_tstamp;
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = cookie;
+
+ /*
+ * We need to lookup the dst_entry to get the correct window size.
+ * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten
+ * me if there is a preferred way.
+ */
+ {
+ struct in6_addr *final_p, final;
+ struct flowi6 fl6;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = ireq6->rmt_addr;
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+ fl6.saddr = ireq6->loc_addr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet_rsk(req)->rmt_port;
+ fl6.fl6_sport = inet_sk(sk)->inet_sport;
+ security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (IS_ERR(dst))
+ goto out_free;
+ }
+
+ req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ &req->rcv_wnd, &req->window_clamp,
+ ireq->wscale_ok, &rcv_wscale,
+ dst_metric(dst, RTAX_INITRWND));
+
+ ireq->rcv_wscale = rcv_wscale;
+
+ ret = get_cookie_sock(sk, skb, req, dst);
+out:
+ return ret;
+out_free:
+ reqsk_free(req);
+ return NULL;
+}
+
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
new file mode 100644
index 00000000..166a57c4
--- /dev/null
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -0,0 +1,177 @@
+/*
+ * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
+ *
+ * Changes:
+ * YOSHIFUJI Hideaki @USAGI: added icmp sysctl table.
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/inet_frag.h>
+
+static struct ctl_table empty[1];
+
+static ctl_table ipv6_static_skeleton[] = {
+ {
+ .procname = "neigh",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = empty,
+ },
+ { }
+};
+
+static ctl_table ipv6_table_template[] = {
+ {
+ .procname = "route",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = ipv6_route_table_template
+ },
+ {
+ .procname = "icmp",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = ipv6_icmp_table_template
+ },
+ {
+ .procname = "bindv6only",
+ .data = &init_net.ipv6.sysctl.bindv6only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { }
+};
+
+static ctl_table ipv6_rotable[] = {
+ {
+ .procname = "mld_max_msf",
+ .data = &sysctl_mld_max_msf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { }
+};
+
+struct ctl_path net_ipv6_ctl_path[] = {
+ { .procname = "net", },
+ { .procname = "ipv6", },
+ { },
+};
+EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
+
+static int __net_init ipv6_sysctl_net_init(struct net *net)
+{
+ struct ctl_table *ipv6_table;
+ struct ctl_table *ipv6_route_table;
+ struct ctl_table *ipv6_icmp_table;
+ int err;
+
+ err = -ENOMEM;
+ ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
+ GFP_KERNEL);
+ if (!ipv6_table)
+ goto out;
+
+ ipv6_route_table = ipv6_route_sysctl_init(net);
+ if (!ipv6_route_table)
+ goto out_ipv6_table;
+ ipv6_table[0].child = ipv6_route_table;
+
+ ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
+ if (!ipv6_icmp_table)
+ goto out_ipv6_route_table;
+ ipv6_table[1].child = ipv6_icmp_table;
+
+ ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
+
+ net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
+ ipv6_table);
+ if (!net->ipv6.sysctl.table)
+ goto out_ipv6_icmp_table;
+
+ err = 0;
+out:
+ return err;
+
+out_ipv6_icmp_table:
+ kfree(ipv6_icmp_table);
+out_ipv6_route_table:
+ kfree(ipv6_route_table);
+out_ipv6_table:
+ kfree(ipv6_table);
+ goto out;
+}
+
+static void __net_exit ipv6_sysctl_net_exit(struct net *net)
+{
+ struct ctl_table *ipv6_table;
+ struct ctl_table *ipv6_route_table;
+ struct ctl_table *ipv6_icmp_table;
+
+ ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
+ ipv6_route_table = ipv6_table[0].child;
+ ipv6_icmp_table = ipv6_table[1].child;
+
+ unregister_net_sysctl_table(net->ipv6.sysctl.table);
+
+ kfree(ipv6_table);
+ kfree(ipv6_route_table);
+ kfree(ipv6_icmp_table);
+}
+
+static struct pernet_operations ipv6_sysctl_net_ops = {
+ .init = ipv6_sysctl_net_init,
+ .exit = ipv6_sysctl_net_exit,
+};
+
+static struct ctl_table_header *ip6_header;
+
+int ipv6_sysctl_register(void)
+{
+ int err = -ENOMEM;
+
+ ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable);
+ if (ip6_header == NULL)
+ goto out;
+
+ err = register_pernet_subsys(&ipv6_sysctl_net_ops);
+ if (err)
+ goto err_pernet;
+out:
+ return err;
+
+err_pernet:
+ unregister_net_sysctl_table(ip6_header);
+ goto out;
+}
+
+void ipv6_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(ip6_header);
+ unregister_pernet_subsys(&ipv6_sysctl_net_ops);
+}
+
+static struct ctl_table_header *ip6_base;
+
+int ipv6_static_sysctl_register(void)
+{
+ ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
+ if (ip6_base == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+void ipv6_static_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(ip6_base);
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
new file mode 100644
index 00000000..98256cf7
--- /dev/null
+++ b/net/ipv6/tcp_ipv6.c
@@ -0,0 +1,2196 @@
+/*
+ * TCP over IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on:
+ * linux/net/ipv4/tcp.c
+ * linux/net/ipv4/tcp_input.c
+ * linux/net/ipv4/tcp_output.c
+ *
+ * Fixes:
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
+ * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
+ * a single port at the same time.
+ * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bottom_half.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/jiffies.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/jhash.h>
+#include <linux/ipsec.h>
+#include <linux/times.h>
+#include <linux/slab.h>
+
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+
+#include <net/tcp.h>
+#include <net/ndisc.h>
+#include <net/inet6_hashtables.h>
+#include <net/inet6_connection_sock.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
+#include <net/inet_ecn.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/snmp.h>
+#include <net/dsfield.h>
+#include <net/timewait_sock.h>
+#include <net/netdma.h>
+#include <net/inet_common.h>
+#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req);
+
+static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+static void __tcp_v6_send_check(struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr);
+
+static const struct inet_connection_sock_af_ops ipv6_mapped;
+static const struct inet_connection_sock_af_ops ipv6_specific;
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+#else
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+ const struct in6_addr *addr)
+{
+ return NULL;
+}
+#endif
+
+static void tcp_v6_hash(struct sock *sk)
+{
+ if (sk->sk_state != TCP_CLOSE) {
+ if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
+ tcp_prot.hash(sk);
+ return;
+ }
+ local_bh_disable();
+ __inet6_hash(sk, NULL);
+ local_bh_enable();
+ }
+}
+
+static __inline__ __sum16 tcp_v6_check(int len,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __wsum base)
+{
+ return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
+}
+
+static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
+{
+ return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
+}
+
+static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct in6_addr *saddr = NULL, *final_p, final;
+ struct rt6_info *rt;
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int addr_type;
+ int err;
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (usin->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ if (np->sndflow) {
+ fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl6.flowlabel);
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ struct ip6_flowlabel *flowlabel;
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ usin->sin6_addr = flowlabel->dst;
+ fl6_sock_release(flowlabel);
+ }
+ }
+
+ /*
+ * connect() to INADDR_ANY means loopback (BSD'ism).
+ */
+
+ if(ipv6_addr_any(&usin->sin6_addr))
+ usin->sin6_addr.s6_addr[15] = 0x1;
+
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+ if(addr_type & IPV6_ADDR_MULTICAST)
+ return -ENETUNREACH;
+
+ if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ usin->sin6_scope_id) {
+ /* If interface is set while binding, indices
+ * must coincide.
+ */
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != usin->sin6_scope_id)
+ return -EINVAL;
+
+ sk->sk_bound_dev_if = usin->sin6_scope_id;
+ }
+
+ /* Connect to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if)
+ return -EINVAL;
+ }
+
+ if (tp->rx_opt.ts_recent_stamp &&
+ !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+ tp->rx_opt.ts_recent = 0;
+ tp->rx_opt.ts_recent_stamp = 0;
+ tp->write_seq = 0;
+ }
+
+ np->daddr = usin->sin6_addr;
+ np->flow_label = fl6.flowlabel;
+
+ /*
+ * TCP over IPv4
+ */
+
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ u32 exthdrlen = icsk->icsk_ext_hdr_len;
+ struct sockaddr_in sin;
+
+ SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+ if (__ipv6_only_sock(sk))
+ return -ENETUNREACH;
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = usin->sin6_port;
+ sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+ icsk->icsk_af_ops = &ipv6_mapped;
+ sk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
+
+ err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+
+ if (err) {
+ icsk->icsk_ext_hdr_len = exthdrlen;
+ icsk->icsk_af_ops = &ipv6_specific;
+ sk->sk_backlog_rcv = tcp_v6_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_specific;
+#endif
+ goto failure;
+ } else {
+ ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+ &np->rcv_saddr);
+ }
+
+ return err;
+ }
+
+ if (!ipv6_addr_any(&np->rcv_saddr))
+ saddr = &np->rcv_saddr;
+
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = np->daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = usin->sin6_port;
+ fl6.fl6_sport = inet->inet_sport;
+
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto failure;
+ }
+
+ if (saddr == NULL) {
+ saddr = &fl6.saddr;
+ np->rcv_saddr = *saddr;
+ }
+
+ /* set the source address */
+ np->saddr = *saddr;
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+
+ sk->sk_gso_type = SKB_GSO_TCPV6;
+ __ip6_dst_store(sk, dst, NULL, NULL);
+
+ rt = (struct rt6_info *) dst;
+ if (tcp_death_row.sysctl_tw_recycle &&
+ !tp->rx_opt.ts_recent_stamp &&
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
+ struct inet_peer *peer = rt6_get_peer(rt);
+ /*
+ * VJ's idea. We save last timestamp seen from
+ * the destination in peer table, when entering state
+ * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+ * when trying new connection.
+ */
+ if (peer) {
+ inet_peer_refcheck(peer);
+ if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+ tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+ tp->rx_opt.ts_recent = peer->tcp_ts;
+ }
+ }
+ }
+
+ icsk->icsk_ext_hdr_len = 0;
+ if (np->opt)
+ icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+ np->opt->opt_nflen);
+
+ tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+
+ inet->inet_dport = usin->sin6_port;
+
+ tcp_set_state(sk, TCP_SYN_SENT);
+ err = inet6_hash_connect(&tcp_death_row, sk);
+ if (err)
+ goto late_failure;
+
+ if (!tp->write_seq)
+ tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
+ np->daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport);
+
+ err = tcp_connect(sk);
+ if (err)
+ goto late_failure;
+
+ return 0;
+
+late_failure:
+ tcp_set_state(sk, TCP_CLOSE);
+ __sk_dst_reset(sk);
+failure:
+ inet->inet_dport = 0;
+ sk->sk_route_caps = 0;
+ return err;
+}
+
+static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
+ const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ int err;
+ struct tcp_sock *tp;
+ __u32 seq;
+ struct net *net = dev_net(skb->dev);
+
+ sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
+ th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
+
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
+ return;
+ }
+
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ inet_twsk_put(inet_twsk(sk));
+ return;
+ }
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+
+ if (sk->sk_state == TCP_CLOSE)
+ goto out;
+
+ if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ goto out;
+ }
+
+ tp = tcp_sk(sk);
+ seq = ntohl(th->seq);
+ if (sk->sk_state != TCP_LISTEN &&
+ !between(seq, tp->snd_una, tp->snd_nxt)) {
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ goto out;
+ }
+
+ np = inet6_sk(sk);
+
+ if (type == ICMPV6_PKT_TOOBIG) {
+ struct dst_entry *dst;
+
+ if (sock_owned_by_user(sk))
+ goto out;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto out;
+
+ /* icmp should have updated the destination cache entry */
+ dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst == NULL) {
+ struct inet_sock *inet = inet_sk(sk);
+ struct flowi6 fl6;
+
+ /* BUGGG_FUTURE: Again, it is not clear how
+ to handle rthdr case. Ignore this complexity
+ for now.
+ */
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet->inet_dport;
+ fl6.fl6_sport = inet->inet_sport;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
+ if (IS_ERR(dst)) {
+ sk->sk_err_soft = -PTR_ERR(dst);
+ goto out;
+ }
+
+ } else
+ dst_hold(dst);
+
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_simple_retransmit(sk);
+ } /* else let the usual retransmit timer handle it */
+ dst_release(dst);
+ goto out;
+ }
+
+ icmpv6_err_convert(type, code, &err);
+
+ /* Might be for an request_sock */
+ switch (sk->sk_state) {
+ struct request_sock *req, **prev;
+ case TCP_LISTEN:
+ if (sock_owned_by_user(sk))
+ goto out;
+
+ req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+ &hdr->saddr, inet6_iif(skb));
+ if (!req)
+ goto out;
+
+ /* ICMPs are not backlogged, hence we cannot get
+ * an established socket here.
+ */
+ WARN_ON(req->sk != NULL);
+
+ if (seq != tcp_rsk(req)->snt_isn) {
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ goto out;
+ }
+
+ inet_csk_reqsk_queue_drop(sk, req, prev);
+ goto out;
+
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV: /* Cannot happen.
+ It can, it SYNs are crossed. --ANK */
+ if (!sock_owned_by_user(sk)) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+
+ tcp_done(sk);
+ } else
+ sk->sk_err_soft = err;
+ goto out;
+ }
+
+ if (!sock_owned_by_user(sk) && np->recverr) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ } else
+ sk->sk_err_soft = err;
+
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+ struct request_values *rvp)
+{
+ struct inet6_request_sock *treq = inet6_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff * skb;
+ struct ipv6_txoptions *opt = NULL;
+ struct in6_addr * final_p, final;
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int err;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = treq->rmt_addr;
+ fl6.saddr = treq->loc_addr;
+ fl6.flowlabel = 0;
+ fl6.flowi6_oif = treq->iif;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = inet_rsk(req)->rmt_port;
+ fl6.fl6_sport = inet_rsk(req)->loc_port;
+ security_req_classify_flow(req, flowi6_to_flowi(&fl6));
+
+ opt = np->opt;
+ final_p = fl6_update_dst(&fl6, opt, &final);
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto done;
+ }
+ skb = tcp_make_synack(sk, dst, req, rvp);
+ err = -ENOMEM;
+ if (skb) {
+ __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+
+ fl6.daddr = treq->rmt_addr;
+ err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = net_xmit_eval(err);
+ }
+
+done:
+ if (opt && opt != np->opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ dst_release(dst);
+ return err;
+}
+
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
+ struct request_values *rvp)
+{
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ return tcp_v6_send_synack(sk, req, rvp);
+}
+
+static void tcp_v6_reqsk_destructor(struct request_sock *req)
+{
+ kfree_skb(inet6_rsk(req)->pktopts);
+}
+
+#ifdef CONFIG_TCP_MD5SIG
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+ const struct in6_addr *addr)
+{
+ return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+}
+
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+ struct sock *addr_sk)
+{
+ return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+}
+
+static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
+ struct request_sock *req)
+{
+ return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+}
+
+static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
+ int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin6->sin6_family != AF_INET6)
+ return -EINVAL;
+
+ if (!cmd.tcpm_keylen) {
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6);
+ }
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+}
+
+static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes)
+{
+ struct tcp6_pseudohdr *bp;
+ struct scatterlist sg;
+
+ bp = &hp->md5_blk.ip6;
+ /* 1. TCP pseudo-header (RFC2460) */
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+ bp->len = cpu_to_be32(nbytes);
+
+ sg_init_one(&sg, bp, sizeof(*bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
+
+static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ const struct in6_addr *daddr, struct in6_addr *saddr,
+ const struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
+
+ if (crypto_hash_init(desc))
+ goto clear_hash;
+ if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb)
+{
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ if (sk) {
+ saddr = &inet6_sk(sk)->saddr;
+ daddr = &inet6_sk(sk)->daddr;
+ } else if (req) {
+ saddr = &inet6_rsk(req)->loc_addr;
+ daddr = &inet6_rsk(req)->rmt_addr;
+ } else {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
+
+ if (crypto_hash_init(desc))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+{
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int genhash;
+ u8 newhash[16];
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return 0;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return 1;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return 1;
+ }
+
+ /* check the signature */
+ genhash = tcp_v6_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ if (net_ratelimit()) {
+ printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ genhash ? "failed" : "mismatch",
+ &ip6h->saddr, ntohs(th->source),
+ &ip6h->daddr, ntohs(th->dest));
+ }
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+ .family = AF_INET6,
+ .obj_size = sizeof(struct tcp6_request_sock),
+ .rtx_syn_ack = tcp_v6_rtx_synack,
+ .send_ack = tcp_v6_reqsk_send_ack,
+ .destructor = tcp_v6_reqsk_destructor,
+ .send_reset = tcp_v6_send_reset,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
+};
+
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+ .md5_lookup = tcp_v6_reqsk_md5_lookup,
+ .calc_md5_hash = tcp_v6_md5_hash_skb,
+};
+#endif
+
+static void __tcp_v6_send_check(struct sk_buff *skb,
+ const struct in6_addr *saddr, const struct in6_addr *daddr)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ } else {
+ th->check = tcp_v6_check(skb->len, saddr, daddr,
+ csum_partial(th, th->doff << 2,
+ skb->csum));
+ }
+}
+
+static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
+}
+
+static int tcp_v6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ return 0;
+}
+
+static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ /* fall through */
+ case CHECKSUM_NONE:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ return tcp_gro_receive(head, skb);
+}
+
+static int tcp6_gro_complete(struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
+ &iph->saddr, &iph->daddr, 0);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ return tcp_gro_complete(skb);
+}
+
+static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
+ u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcphdr *t1;
+ struct sk_buff *buff;
+ struct flowi6 fl6;
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ struct sock *ctl_sk = net->ipv6.tcp_sk;
+ unsigned int tot_len = sizeof(struct tcphdr);
+ struct dst_entry *dst;
+ __be32 *topt;
+
+ if (ts)
+ tot_len += TCPOLEN_TSTAMP_ALIGNED;
+#ifdef CONFIG_TCP_MD5SIG
+ if (key)
+ tot_len += TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+ buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
+ GFP_ATOMIC);
+ if (buff == NULL)
+ return;
+
+ skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
+
+ t1 = (struct tcphdr *) skb_push(buff, tot_len);
+ skb_reset_transport_header(buff);
+
+ /* Swap the send and the receive. */
+ memset(t1, 0, sizeof(*t1));
+ t1->dest = th->source;
+ t1->source = th->dest;
+ t1->doff = tot_len / 4;
+ t1->seq = htonl(seq);
+ t1->ack_seq = htonl(ack);
+ t1->ack = !rst || !th->ack;
+ t1->rst = rst;
+ t1->window = htons(win);
+
+ topt = (__be32 *)(t1 + 1);
+
+ if (ts) {
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+ *topt++ = htonl(tcp_time_stamp);
+ *topt++ = htonl(ts);
+ }
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (key) {
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+ tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, t1);
+ }
+#endif
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = ipv6_hdr(skb)->saddr;
+ fl6.saddr = ipv6_hdr(skb)->daddr;
+
+ buff->ip_summed = CHECKSUM_PARTIAL;
+ buff->csum = 0;
+
+ __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
+
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.flowi6_oif = inet6_iif(skb);
+ fl6.fl6_dport = t1->dest;
+ fl6.fl6_sport = t1->source;
+ security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+
+ /* Pass a socket to ip6_dst_lookup either it is for RST
+ * Underlying function will use this to retrieve the network
+ * namespace
+ */
+ dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
+ if (!IS_ERR(dst)) {
+ skb_dst_set(buff, dst);
+ ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ if (rst)
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+ return;
+ }
+
+ kfree_skb(buff);
+}
+
+static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ u32 seq = 0, ack_seq = 0;
+ struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+ const __u8 *hash_location = NULL;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
+#endif
+
+ if (th->rst)
+ return;
+
+ if (!ipv6_unicast_destination(skb))
+ return;
+
+#ifdef CONFIG_TCP_MD5SIG
+ hash_location = tcp_parse_md5sig_option(th);
+ if (!sk && hash_location) {
+ /*
+ * active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, &ipv6h->daddr,
+ ntohs(th->source), inet6_iif(skb));
+ if (!sk1)
+ return;
+
+ rcu_read_lock();
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!key)
+ goto release_sk1;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto release_sk1;
+ } else {
+ key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+ }
+#endif
+
+ if (th->ack)
+ seq = ntohl(th->ack_seq);
+ else
+ ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
+ (th->doff << 2);
+
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+ if (sk1) {
+ rcu_read_unlock();
+ sock_put(sk1);
+ }
+#endif
+}
+
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+ struct tcp_md5sig_key *key, u8 tclass)
+{
+ tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
+}
+
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+ tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
+ tw->tw_tclass);
+
+ inet_twsk_put(tw);
+}
+
+static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
+{
+ tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
+}
+
+
+static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+ struct request_sock *req, **prev;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct sock *nsk;
+
+ /* Find possible connection requests. */
+ req = inet6_csk_search_req(sk, &prev, th->source,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, inet6_iif(skb));
+ if (req)
+ return tcp_check_req(sk, skb, req, prev);
+
+ nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
+ &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
+
+ if (nsk) {
+ if (nsk->sk_state != TCP_TIME_WAIT) {
+ bh_lock_sock(nsk);
+ return nsk;
+ }
+ inet_twsk_put(inet_twsk(nsk));
+ return NULL;
+ }
+
+#ifdef CONFIG_SYN_COOKIES
+ if (!th->syn)
+ sk = cookie_v6_check(sk, skb);
+#endif
+ return sk;
+}
+
+/* FIXME: this is substantially similar to the ipv4 code.
+ * Can some kind of merge be done? -- erics
+ */
+static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_extend_values tmp_ext;
+ struct tcp_options_received tmp_opt;
+ const u8 *hash_location;
+ struct request_sock *req;
+ struct inet6_request_sock *treq;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ __u32 isn = TCP_SKB_CB(skb)->when;
+ struct dst_entry *dst = NULL;
+ int want_cookie = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return tcp_v4_conn_request(sk, skb);
+
+ if (!ipv6_unicast_destination(skb))
+ goto drop;
+
+ if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+ if (!want_cookie)
+ goto drop;
+ }
+
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+ goto drop;
+
+ req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ if (req == NULL)
+ goto drop;
+
+#ifdef CONFIG_TCP_MD5SIG
+ tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
+#endif
+
+ tcp_clear_options(&tmp_opt);
+ tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+ tmp_opt.user_mss = tp->rx_opt.user_mss;
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+
+ if (tmp_opt.cookie_plus > 0 &&
+ tmp_opt.saw_tstamp &&
+ !tp->rx_opt.cookie_out_never &&
+ (sysctl_tcp_cookie_size > 0 ||
+ (tp->cookie_values != NULL &&
+ tp->cookie_values->cookie_desired > 0))) {
+ u8 *c;
+ u32 *d;
+ u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
+ int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+
+ if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
+ goto drop_and_free;
+
+ /* Secret recipe starts with IP addresses */
+ d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+
+ /* plus variable length Initiator Cookie */
+ c = (u8 *)mess;
+ while (l-- > 0)
+ *c++ ^= *hash_location++;
+
+ want_cookie = 0; /* not our kind of cookie */
+ tmp_ext.cookie_out_never = 0; /* false */
+ tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+ } else if (!tp->rx_opt.cookie_in_always) {
+ /* redundant indications, but ensure initialization. */
+ tmp_ext.cookie_out_never = 1; /* true */
+ tmp_ext.cookie_plus = 0;
+ } else {
+ goto drop_and_free;
+ }
+ tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+
+ if (want_cookie && !tmp_opt.saw_tstamp)
+ tcp_clear_options(&tmp_opt);
+
+ tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+ tcp_openreq_init(req, &tmp_opt, skb);
+
+ treq = inet6_rsk(req);
+ treq->rmt_addr = ipv6_hdr(skb)->saddr;
+ treq->loc_addr = ipv6_hdr(skb)->daddr;
+ if (!want_cookie || tmp_opt.tstamp_ok)
+ TCP_ECN_create_request(req, tcp_hdr(skb));
+
+ treq->iif = sk->sk_bound_dev_if;
+
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ treq->iif = inet6_iif(skb);
+
+ if (!isn) {
+ struct inet_peer *peer = NULL;
+
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ treq->pktopts = skb;
+ }
+
+ if (want_cookie) {
+ isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+ req->cookie_ts = tmp_opt.tstamp_ok;
+ goto have_isn;
+ }
+
+ /* VJ's idea. We save last timestamp seen
+ * from the destination in peer table, when entering
+ * state TIME-WAIT, and check against it before
+ * accepting new connection request.
+ *
+ * If "isn" is not zero, this request hit alive
+ * timewait bucket, so that all the necessary checks
+ * are made in the function processing timewait state.
+ */
+ if (tmp_opt.saw_tstamp &&
+ tcp_death_row.sysctl_tw_recycle &&
+ (dst = inet6_csk_route_req(sk, req)) != NULL &&
+ (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
+ ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
+ &treq->rmt_addr)) {
+ inet_peer_refcheck(peer);
+ if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
+ (s32)(peer->tcp_ts - req->ts_recent) >
+ TCP_PAWS_WINDOW) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+ goto drop_and_release;
+ }
+ }
+ /* Kill the following clause, if you dislike this way. */
+ else if (!sysctl_tcp_syncookies &&
+ (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (sysctl_max_syn_backlog >> 2)) &&
+ (!peer || !peer->tcp_ts_stamp) &&
+ (!dst || !dst_metric(dst, RTAX_RTT))) {
+ /* Without syncookies last quarter of
+ * backlog is filled with destinations,
+ * proven to be alive.
+ * It means that we continue to communicate
+ * to destinations, already remembered
+ * to the moment of synflood.
+ */
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
+ &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+ goto drop_and_release;
+ }
+
+ isn = tcp_v6_init_sequence(skb);
+ }
+have_isn:
+ tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+
+ security_inet_conn_request(sk, skb, req);
+
+ if (tcp_v6_send_synack(sk, req,
+ (struct request_values *)&tmp_ext) ||
+ want_cookie)
+ goto drop_and_free;
+
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ return 0;
+
+drop_and_release:
+ dst_release(dst);
+drop_and_free:
+ reqsk_free(req);
+drop:
+ return 0; /* don't send reset */
+}
+
+static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
+{
+ struct inet6_request_sock *treq;
+ struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct tcp6_sock *newtcp6sk;
+ struct inet_sock *newinet;
+ struct tcp_sock *newtp;
+ struct sock *newsk;
+ struct ipv6_txoptions *opt;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ /*
+ * v6 mapped
+ */
+
+ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+
+ if (newsk == NULL)
+ return NULL;
+
+ newtcp6sk = (struct tcp6_sock *)newsk;
+ inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+
+ newinet = inet_sk(newsk);
+ newnp = inet6_sk(newsk);
+ newtp = tcp_sk(newsk);
+
+ memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+
+ ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
+
+ newnp->rcv_saddr = newnp->saddr;
+
+ inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
+ newsk->sk_backlog_rcv = tcp_v4_do_rcv;
+#ifdef CONFIG_TCP_MD5SIG
+ newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
+#endif
+
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
+ newnp->pktoptions = NULL;
+ newnp->opt = NULL;
+ newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+
+ /*
+ * No need to charge this sock to the relevant IPv6 refcnt debug socks count
+ * here, tcp_create_openreq_child now does this for us, see the comment in
+ * that function for the gory details. -acme
+ */
+
+ /* It is tricky place. Until this moment IPv4 tcp
+ worked with IPv6 icsk.icsk_af_ops.
+ Sync it now.
+ */
+ tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
+
+ return newsk;
+ }
+
+ treq = inet6_rsk(req);
+ opt = np->opt;
+
+ if (sk_acceptq_is_full(sk))
+ goto out_overflow;
+
+ if (!dst) {
+ dst = inet6_csk_route_req(sk, req);
+ if (!dst)
+ goto out;
+ }
+
+ newsk = tcp_create_openreq_child(sk, req, skb);
+ if (newsk == NULL)
+ goto out_nonewsk;
+
+ /*
+ * No need to charge this sock to the relevant IPv6 refcnt debug socks
+ * count here, tcp_create_openreq_child now does this for us, see the
+ * comment in that function for the gory details. -acme
+ */
+
+ newsk->sk_gso_type = SKB_GSO_TCPV6;
+ __ip6_dst_store(newsk, dst, NULL, NULL);
+
+ newtcp6sk = (struct tcp6_sock *)newsk;
+ inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+
+ newtp = tcp_sk(newsk);
+ newinet = inet_sk(newsk);
+ newnp = inet6_sk(newsk);
+
+ memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+ newnp->daddr = treq->rmt_addr;
+ newnp->saddr = treq->loc_addr;
+ newnp->rcv_saddr = treq->loc_addr;
+ newsk->sk_bound_dev_if = treq->iif;
+
+ /* Now IPv6 options...
+
+ First: no IPv4 options.
+ */
+ newinet->inet_opt = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
+
+ /* Clone RX bits */
+ newnp->rxopt.all = np->rxopt.all;
+
+ /* Clone pktoptions received with SYN */
+ newnp->pktoptions = NULL;
+ if (treq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+ kfree_skb(treq->pktopts);
+ treq->pktopts = NULL;
+ if (newnp->pktoptions)
+ skb_set_owner_r(newnp->pktoptions, newsk);
+ }
+ newnp->opt = NULL;
+ newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+
+ /* Clone native IPv6 options from listening socket (if any)
+
+ Yes, keeping reference count would be much more clever,
+ but we make one more one thing there: reattach optmem
+ to newsk.
+ */
+ if (opt) {
+ newnp->opt = ipv6_dup_options(newsk, opt);
+ if (opt != np->opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ }
+
+ inet_csk(newsk)->icsk_ext_hdr_len = 0;
+ if (newnp->opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+ newnp->opt->opt_flen);
+
+ tcp_mtup_init(newsk);
+ tcp_sync_mss(newsk, dst_mtu(dst));
+ newtp->advmss = dst_metric_advmss(dst);
+ if (tcp_sk(sk)->rx_opt.user_mss &&
+ tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+ newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
+ tcp_initialize_rcv_mss(newsk);
+ if (tcp_rsk(req)->snt_synack)
+ tcp_valid_rtt_meas(newsk,
+ tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ newtp->total_retrans = req->retrans;
+
+ newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+ newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
+
+#ifdef CONFIG_TCP_MD5SIG
+ /* Copy over the MD5 key from the original socket */
+ if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+ /* We're using one, so create a matching key
+ * on the newsk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+ AF_INET6, key->key, key->keylen, GFP_ATOMIC);
+ }
+#endif
+
+ if (__inet_inherit_port(sk, newsk) < 0) {
+ sock_put(newsk);
+ goto out;
+ }
+ __inet6_hash(newsk, NULL);
+
+ return newsk;
+
+out_overflow:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+out_nonewsk:
+ if (opt && opt != np->opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ dst_release(dst);
+out:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ return NULL;
+}
+
+static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ return 0;
+ }
+ }
+
+ skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0));
+
+ if (skb->len <= 76) {
+ return __skb_checksum_complete(skb);
+ }
+ return 0;
+}
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp;
+ struct sk_buff *opt_skb = NULL;
+
+ /* Imagine: socket is IPv6. IPv4 packet arrives,
+ goes to IPv4 receive handler and backlogged.
+ From backlog it always goes here. Kerboom...
+ Fortunately, tcp_rcv_established and rcv_established
+ handle them correctly, but it is not case with
+ tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
+ */
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return tcp_v4_do_rcv(sk, skb);
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (tcp_v6_inbound_md5_hash (sk, skb))
+ goto discard;
+#endif
+
+ if (sk_filter(sk, skb))
+ goto discard;
+
+ /*
+ * socket locking is here for SMP purposes as backlog rcv
+ * is currently called with bh processing disabled.
+ */
+
+ /* Do Stevens' IPV6_PKTOPTIONS.
+
+ Yes, guys, it is the only place in our code, where we
+ may make it not affecting IPv4.
+ The rest of code is protocol independent,
+ and I do not like idea to uglify IPv4.
+
+ Actually, all the idea behind IPV6_PKTOPTIONS
+ looks not very well thought. For now we latch
+ options, received in the last packet, enqueued
+ by tcp. Feel free to propose better solution.
+ --ANK (980728)
+ */
+ if (np->rxopt.all)
+ opt_skb = skb_clone(skb, GFP_ATOMIC);
+
+ if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ sock_rps_save_rxhash(sk, skb);
+ if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
+ goto reset;
+ if (opt_skb)
+ goto ipv6_pktoptions;
+ return 0;
+ }
+
+ if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ goto csum_err;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+ if (!nsk)
+ goto discard;
+
+ /*
+ * Queue it on the new socket if the new socket is active,
+ * otherwise we just shortcircuit this and continue with
+ * the new socket..
+ */
+ if(nsk != sk) {
+ sock_rps_save_rxhash(nsk, skb);
+ if (tcp_child_process(sk, nsk, skb))
+ goto reset;
+ if (opt_skb)
+ __kfree_skb(opt_skb);
+ return 0;
+ }
+ } else
+ sock_rps_save_rxhash(sk, skb);
+
+ if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
+ goto reset;
+ if (opt_skb)
+ goto ipv6_pktoptions;
+ return 0;
+
+reset:
+ tcp_v6_send_reset(sk, skb);
+discard:
+ if (opt_skb)
+ __kfree_skb(opt_skb);
+ kfree_skb(skb);
+ return 0;
+csum_err:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ goto discard;
+
+
+ipv6_pktoptions:
+ /* Do you ask, what is it?
+
+ 1. skb was enqueued by tcp.
+ 2. skb is added to tail of read queue, rather than out of order.
+ 3. socket is not in passive state.
+ 4. Finally, it really contains options, which user wants to receive.
+ */
+ tp = tcp_sk(sk);
+ if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
+ np->mcast_oif = inet6_iif(opt_skb);
+ if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+ np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ if (np->rxopt.bits.rxtclass)
+ np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+ if (ipv6_opt_accepted(sk, opt_skb)) {
+ skb_set_owner_r(opt_skb, sk);
+ opt_skb = xchg(&np->pktoptions, opt_skb);
+ } else {
+ __kfree_skb(opt_skb);
+ opt_skb = xchg(&np->pktoptions, NULL);
+ }
+ }
+
+ kfree_skb(opt_skb);
+ return 0;
+}
+
+static int tcp_v6_rcv(struct sk_buff *skb)
+{
+ const struct tcphdr *th;
+ const struct ipv6hdr *hdr;
+ struct sock *sk;
+ int ret;
+ struct net *net = dev_net(skb->dev);
+
+ if (skb->pkt_type != PACKET_HOST)
+ goto discard_it;
+
+ /*
+ * Count it even if it's bad.
+ */
+ TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+
+ if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+ goto discard_it;
+
+ th = tcp_hdr(skb);
+
+ if (th->doff < sizeof(struct tcphdr)/4)
+ goto bad_packet;
+ if (!pskb_may_pull(skb, th->doff*4))
+ goto discard_it;
+
+ if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+ goto bad_packet;
+
+ th = tcp_hdr(skb);
+ hdr = ipv6_hdr(skb);
+ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+ skb->len - th->doff*4);
+ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+ TCP_SKB_CB(skb)->when = 0;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+ TCP_SKB_CB(skb)->sacked = 0;
+
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ if (!sk)
+ goto no_tcp_socket;
+
+process:
+ if (sk->sk_state == TCP_TIME_WAIT)
+ goto do_time_wait;
+
+ if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ goto discard_and_relse;
+ }
+
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto discard_and_relse;
+
+ if (sk_filter(sk, skb))
+ goto discard_and_relse;
+
+ skb->dev = NULL;
+
+ bh_lock_sock_nested(sk);
+ ret = 0;
+ if (!sock_owned_by_user(sk)) {
+#ifdef CONFIG_NET_DMA
+ struct tcp_sock *tp = tcp_sk(sk);
+ if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+ tp->ucopy.dma_chan = net_dma_find_channel();
+ if (tp->ucopy.dma_chan)
+ ret = tcp_v6_do_rcv(sk, skb);
+ else
+#endif
+ {
+ if (!tcp_prequeue(sk, skb))
+ ret = tcp_v6_do_rcv(sk, skb);
+ }
+ } else if (unlikely(sk_add_backlog(sk, skb))) {
+ bh_unlock_sock(sk);
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ goto discard_and_relse;
+ }
+ bh_unlock_sock(sk);
+
+ sock_put(sk);
+ return ret ? -1 : 0;
+
+no_tcp_socket:
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard_it;
+
+ if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+bad_packet:
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ } else {
+ tcp_v6_send_reset(NULL, skb);
+ }
+
+discard_it:
+
+ /*
+ * Discard frame
+ */
+
+ kfree_skb(skb);
+ return 0;
+
+discard_and_relse:
+ sock_put(sk);
+ goto discard_it;
+
+do_time_wait:
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto discard_it;
+ }
+
+ if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ inet_twsk_put(inet_twsk(sk));
+ goto discard_it;
+ }
+
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ case TCP_TW_SYN:
+ {
+ struct sock *sk2;
+
+ sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ &ipv6_hdr(skb)->daddr,
+ ntohs(th->dest), inet6_iif(skb));
+ if (sk2 != NULL) {
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+ inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_put(tw);
+ sk = sk2;
+ goto process;
+ }
+ /* Fall through to ACK */
+ }
+ case TCP_TW_ACK:
+ tcp_v6_timewait_ack(sk, skb);
+ break;
+ case TCP_TW_RST:
+ goto no_tcp_socket;
+ case TCP_TW_SUCCESS:;
+ }
+ goto discard_it;
+}
+
+static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+{
+ struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_peer *peer;
+
+ if (!rt ||
+ !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
+ peer = inet_getpeer_v6(&np->daddr, 1);
+ *release_it = true;
+ } else {
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+ peer = rt->rt6i_peer;
+ *release_it = false;
+ }
+
+ return peer;
+}
+
+static void *tcp_v6_tw_get_peer(struct sock *sk)
+{
+ const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
+ const struct inet_timewait_sock *tw = inet_twsk(sk);
+
+ if (tw->tw_family == AF_INET)
+ return tcp_v4_tw_get_peer(sk);
+
+ return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+}
+
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
+ .twsk_unique = tcp_twsk_unique,
+ .twsk_destructor= tcp_twsk_destructor,
+ .twsk_getpeer = tcp_v6_tw_get_peer,
+};
+
+static const struct inet_connection_sock_af_ops ipv6_specific = {
+ .queue_xmit = inet6_csk_xmit,
+ .send_check = tcp_v6_send_check,
+ .rebuild_header = inet6_sk_rebuild_header,
+ .conn_request = tcp_v6_conn_request,
+ .syn_recv_sock = tcp_v6_syn_recv_sock,
+ .get_peer = tcp_v6_get_peer,
+ .net_header_len = sizeof(struct ipv6hdr),
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in6),
+ .bind_conflict = inet6_csk_bind_conflict,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ipv6_setsockopt,
+ .compat_getsockopt = compat_ipv6_getsockopt,
+#endif
+};
+
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+ .md5_lookup = tcp_v6_md5_lookup,
+ .calc_md5_hash = tcp_v6_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+#endif
+
+/*
+ * TCP over IPv4 via INET6 API
+ */
+
+static const struct inet_connection_sock_af_ops ipv6_mapped = {
+ .queue_xmit = ip_queue_xmit,
+ .send_check = tcp_v4_send_check,
+ .rebuild_header = inet_sk_rebuild_header,
+ .conn_request = tcp_v6_conn_request,
+ .syn_recv_sock = tcp_v6_syn_recv_sock,
+ .get_peer = tcp_v4_get_peer,
+ .net_header_len = sizeof(struct iphdr),
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in6),
+ .bind_conflict = inet6_csk_bind_conflict,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ipv6_setsockopt,
+ .compat_getsockopt = compat_ipv6_getsockopt,
+#endif
+};
+
+#ifdef CONFIG_TCP_MD5SIG
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+#endif
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ * sk_alloc() so need not be done here.
+ */
+static int tcp_v6_init_sock(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ skb_queue_head_init(&tp->out_of_order_queue);
+ tcp_init_xmit_timers(sk);
+ tcp_prequeue_init(tp);
+
+ icsk->icsk_rto = TCP_TIMEOUT_INIT;
+ tp->mdev = TCP_TIMEOUT_INIT;
+
+ /* So many TCP implementations out there (incorrectly) count the
+ * initial SYN frame in their delayed-ACK and congestion control
+ * algorithms that we must have the following bandaid to talk
+ * efficiently to them. -DaveM
+ */
+ tp->snd_cwnd = 2;
+
+ /* See draft-stevens-tcpca-spec-01 for discussion of the
+ * initialization of these values.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ tp->snd_cwnd_clamp = ~0;
+ tp->mss_cache = TCP_MSS_DEFAULT;
+
+ tp->reordering = sysctl_tcp_reordering;
+
+ sk->sk_state = TCP_CLOSE;
+
+ icsk->icsk_af_ops = &ipv6_specific;
+ icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ icsk->icsk_sync_mss = tcp_sync_mss;
+ sk->sk_write_space = sk_stream_write_space;
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+#ifdef CONFIG_TCP_MD5SIG
+ tp->af_specific = &tcp_sock_ipv6_specific;
+#endif
+
+ /* TCP Cookie Transactions */
+ if (sysctl_tcp_cookie_size > 0) {
+ /* Default, cookies without s_data_payload. */
+ tp->cookie_values =
+ kzalloc(sizeof(*tp->cookie_values),
+ sk->sk_allocation);
+ if (tp->cookie_values != NULL)
+ kref_init(&tp->cookie_values->kref);
+ }
+ /* Presumed zeroed, in order of appearance:
+ * cookie_in_always, cookie_out_never,
+ * s_data_constant, s_data_in, s_data_out
+ */
+ sk->sk_sndbuf = sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+ local_bh_disable();
+ sock_update_memcg(sk);
+ sk_sockets_allocated_inc(sk);
+ local_bh_enable();
+
+ return 0;
+}
+
+static void tcp_v6_destroy_sock(struct sock *sk)
+{
+ tcp_v4_destroy_sock(sk);
+ inet6_destroy_sock(sk);
+}
+
+#ifdef CONFIG_PROC_FS
+/* Proc filesystem TCPv6 sock list dumping. */
+static void get_openreq6(struct seq_file *seq,
+ const struct sock *sk, struct request_sock *req, int i, int uid)
+{
+ int ttd = req->expires - jiffies;
+ const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
+ const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ seq_printf(seq,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ ntohs(inet_rsk(req)->loc_port),
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ ntohs(inet_rsk(req)->rmt_port),
+ TCP_SYN_RECV,
+ 0,0, /* could print option size, but that is af dependent. */
+ 1, /* timers active (only the expire timer) */
+ jiffies_to_clock_t(ttd),
+ req->retrans,
+ uid,
+ 0, /* non standard timer */
+ 0, /* open_requests have no inode */
+ 0, req);
+}
+
+static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+{
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+ const struct inet_sock *inet = inet_sk(sp);
+ const struct tcp_sock *tp = tcp_sk(sp);
+ const struct inet_connection_sock *icsk = inet_csk(sp);
+ const struct ipv6_pinfo *np = inet6_sk(sp);
+
+ dest = &np->daddr;
+ src = &np->rcv_saddr;
+ destp = ntohs(inet->inet_dport);
+ srcp = ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = jiffies;
+ }
+
+ seq_printf(seq,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ tp->write_seq-tp->snd_una,
+ (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+ timer_active,
+ jiffies_to_clock_t(timer_expires - jiffies),
+ icsk->icsk_retransmits,
+ sock_i_uid(sp),
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
+ tp->snd_cwnd,
+ tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ );
+}
+
+static void get_timewait6_sock(struct seq_file *seq,
+ struct inet_timewait_sock *tw, int i)
+{
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
+ int ttd = tw->tw_ttd - jiffies;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ dest = &tw6->tw_v6_daddr;
+ src = &tw6->tw_v6_rcv_saddr;
+ destp = ntohs(tw->tw_dport);
+ srcp = ntohs(tw->tw_sport);
+
+ seq_printf(seq,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ tw->tw_substate, 0, 0,
+ 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ atomic_read(&tw->tw_refcnt), tw);
+}
+
+static int tcp6_seq_show(struct seq_file *seq, void *v)
+{
+ struct tcp_iter_state *st;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+ goto out;
+ }
+ st = seq->private;
+
+ switch (st->state) {
+ case TCP_SEQ_STATE_LISTENING:
+ case TCP_SEQ_STATE_ESTABLISHED:
+ get_tcp6_sock(seq, v, st->num);
+ break;
+ case TCP_SEQ_STATE_OPENREQ:
+ get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
+ break;
+ case TCP_SEQ_STATE_TIME_WAIT:
+ get_timewait6_sock(seq, v, st->num);
+ break;
+ }
+out:
+ return 0;
+}
+
+static const struct file_operations tcp6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = tcp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
+static struct tcp_seq_afinfo tcp6_seq_afinfo = {
+ .name = "tcp6",
+ .family = AF_INET6,
+ .seq_fops = &tcp6_afinfo_seq_fops,
+ .seq_ops = {
+ .show = tcp6_seq_show,
+ },
+};
+
+int __net_init tcp6_proc_init(struct net *net)
+{
+ return tcp_proc_register(net, &tcp6_seq_afinfo);
+}
+
+void tcp6_proc_exit(struct net *net)
+{
+ tcp_proc_unregister(net, &tcp6_seq_afinfo);
+}
+#endif
+
+struct proto tcpv6_prot = {
+ .name = "TCPv6",
+ .owner = THIS_MODULE,
+ .close = tcp_close,
+ .connect = tcp_v6_connect,
+ .disconnect = tcp_disconnect,
+ .accept = inet_csk_accept,
+ .ioctl = tcp_ioctl,
+ .init = tcp_v6_init_sock,
+ .destroy = tcp_v6_destroy_sock,
+ .shutdown = tcp_shutdown,
+ .setsockopt = tcp_setsockopt,
+ .getsockopt = tcp_getsockopt,
+ .recvmsg = tcp_recvmsg,
+ .sendmsg = tcp_sendmsg,
+ .sendpage = tcp_sendpage,
+ .backlog_rcv = tcp_v6_do_rcv,
+ .hash = tcp_v6_hash,
+ .unhash = inet_unhash,
+ .get_port = inet_csk_get_port,
+ .enter_memory_pressure = tcp_enter_memory_pressure,
+ .sockets_allocated = &tcp_sockets_allocated,
+ .memory_allocated = &tcp_memory_allocated,
+ .memory_pressure = &tcp_memory_pressure,
+ .orphan_count = &tcp_orphan_count,
+ .sysctl_wmem = sysctl_tcp_wmem,
+ .sysctl_rmem = sysctl_tcp_rmem,
+ .max_header = MAX_TCP_HEADER,
+ .obj_size = sizeof(struct tcp6_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
+ .twsk_prot = &tcp6_timewait_sock_ops,
+ .rsk_prot = &tcp6_request_sock_ops,
+ .h.hashinfo = &tcp_hashinfo,
+ .no_autobind = true,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_tcp_setsockopt,
+ .compat_getsockopt = compat_tcp_getsockopt,
+#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ .proto_cgroup = tcp_proto_cgroup,
+#endif
+};
+
+static const struct inet6_protocol tcpv6_protocol = {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .gso_send_check = tcp_v6_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static struct inet_protosw tcpv6_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ .prot = &tcpv6_prot,
+ .ops = &inet6_stream_ops,
+ .no_check = 0,
+ .flags = INET_PROTOSW_PERMANENT |
+ INET_PROTOSW_ICSK,
+};
+
+static int __net_init tcpv6_net_init(struct net *net)
+{
+ return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_TCP, net);
+}
+
+static void __net_exit tcpv6_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+}
+
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
+}
+
+static struct pernet_operations tcpv6_net_ops = {
+ .init = tcpv6_net_init,
+ .exit = tcpv6_net_exit,
+ .exit_batch = tcpv6_net_exit_batch,
+};
+
+int __init tcpv6_init(void)
+{
+ int ret;
+
+ ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ if (ret)
+ goto out;
+
+ /* register inet6 protocol */
+ ret = inet6_register_protosw(&tcpv6_protosw);
+ if (ret)
+ goto out_tcpv6_protocol;
+
+ ret = register_pernet_subsys(&tcpv6_net_ops);
+ if (ret)
+ goto out_tcpv6_protosw;
+out:
+ return ret;
+
+out_tcpv6_protocol:
+ inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+out_tcpv6_protosw:
+ inet6_unregister_protosw(&tcpv6_protosw);
+ goto out;
+}
+
+void tcpv6_exit(void)
+{
+ unregister_pernet_subsys(&tcpv6_net_ops);
+ inet6_unregister_protosw(&tcpv6_protosw);
+ inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
new file mode 100644
index 00000000..4f3cec12
--- /dev/null
+++ b/net/ipv6/tunnel6.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors Mitsuru KANDA <mk@linux-ipv6.org>
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ */
+
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
+static DEFINE_MUTEX(tunnel6_mutex);
+
+int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
+{
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&tunnel6_mutex);
+
+ for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&tunnel6_mutex);
+
+ return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_register);
+
+int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
+{
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&tunnel6_mutex);
+
+ for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ mutex_unlock(&tunnel6_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_deregister);
+
+#define for_each_tunnel_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+static int tunnel6_rcv(struct sk_buff *skb)
+{
+ struct xfrm6_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnel6_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int tunnel46_rcv(struct sk_buff *skb)
+{
+ struct xfrm6_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnel46_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnel6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol tunnel6_protocol = {
+ .handler = tunnel6_rcv,
+ .err_handler = tunnel6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static const struct inet6_protocol tunnel46_protocol = {
+ .handler = tunnel46_rcv,
+ .err_handler = tunnel6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static int __init tunnel6_init(void)
+{
+ if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
+ printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
+ return -EAGAIN;
+ }
+ if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) {
+ printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
+ inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static void __exit tunnel6_fini(void)
+{
+ if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP))
+ printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
+ if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
+ printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
+}
+
+module_init(tunnel6_init);
+module_exit(tunnel6_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
new file mode 100644
index 00000000..37b0699e
--- /dev/null
+++ b/net/ipv6/udp.c
@@ -0,0 +1,1526 @@
+/*
+ * UDP over IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on linux/ipv4/udp.c
+ *
+ * Fixes:
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
+ * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
+ * a single port at the same time.
+ * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
+ * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/raw.h>
+#include <net/tcp_states.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "udp_impl.h"
+
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+{
+ const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
+ __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+ int sk_ipv6only = ipv6_only_sock(sk);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
+ return (!sk2_ipv6only &&
+ (!sk1_rcv_saddr || !sk2_rcv_saddr ||
+ sk1_rcv_saddr == sk2_rcv_saddr));
+
+ if (addr_type2 == IPV6_ADDR_ANY &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY &&
+ !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+
+static unsigned int udp6_portaddr_hash(struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_v4mapped(addr6))
+ hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
+
+int udp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+ unsigned int hash2_nulladdr =
+ udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+ unsigned int hash2_partial =
+ udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+
+ /* precompute partial secondary hash */
+ udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+ return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
+}
+
+static void udp_v6_rehash(struct sock *sk)
+{
+ u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+ &inet6_sk(sk)->rcv_saddr,
+ inet_sk(sk)->inet_num);
+
+ udp_lib_rehash(sk, new_hash);
+}
+
+static inline int compute_score(struct sock *sk, struct net *net,
+ unsigned short hnum,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, __be16 dport,
+ int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+ sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+
+ score = 0;
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score++;
+ }
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
+ if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ return -1;
+ score++;
+ }
+ if (!ipv6_addr_any(&np->daddr)) {
+ if (!ipv6_addr_equal(&np->daddr, saddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
+#define SCORE2_MAX (1 + 1 + 1)
+static inline int compute_score2(struct sock *sk, struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned short hnum,
+ int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+ sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ return -1;
+ score = 0;
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score++;
+ }
+ if (!ipv6_addr_any(&np->daddr)) {
+ if (!ipv6_addr_equal(&np->daddr, saddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
+
+/* called with read_rcu_lock() */
+static struct sock *udp6_lib_lookup2(struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned int hnum, int dif,
+ struct udp_hslot *hslot2, unsigned int slot2)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ int score, badness;
+
+begin:
+ result = NULL;
+ badness = -1;
+ udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ score = compute_score2(sk, net, saddr, sport,
+ daddr, hnum, dif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ if (score == SCORE2_MAX)
+ goto exact_match;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot2)
+ goto begin;
+
+ if (result) {
+exact_match:
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ result = NULL;
+ else if (unlikely(compute_score2(result, net, saddr, sport,
+ daddr, hnum, dif) < badness)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ return result;
+}
+
+struct sock *__udp6_lib_lookup(struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, __be16 dport,
+ int dif, struct udp_table *udptable)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(dport);
+ unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ int score, badness;
+
+ rcu_read_lock();
+ if (hslot->count > 10) {
+ hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif,
+ hslot2, slot2);
+ if (!result) {
+ hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ &in6addr_any, hnum, dif,
+ hslot2, slot2);
+ }
+ rcu_read_unlock();
+ return result;
+ }
+begin:
+ result = NULL;
+ badness = -1;
+ sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ if (result) {
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ result = NULL;
+ else if (unlikely(compute_score(result, net, hnum, saddr, sport,
+ daddr, dport, dif) < badness)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ rcu_read_unlock();
+ return result;
+}
+EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
+
+static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport,
+ struct udp_table *udptable)
+{
+ struct sock *sk;
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ if (unlikely(sk = skb_steal_sock(skb)))
+ return sk;
+ return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+ udptable);
+}
+
+struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, __be16 dport, int dif)
+{
+ return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+}
+EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+
+
+/*
+ * This should be easy, if there is something there we
+ * return it, otherwise we block.
+ */
+
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct sk_buff *skb;
+ unsigned int ulen, copied;
+ int peeked, off = 0;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
+ int is_udp4;
+ bool slow;
+
+ if (addr_len)
+ *addr_len=sizeof(struct sockaddr_in6);
+
+ if (flags & MSG_ERRQUEUE)
+ return ipv6_recv_error(sk, msg, len);
+
+ if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+ return ipv6_recv_rxpmtu(sk, msg, len);
+
+try_again:
+ skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+ &peeked, &off, &err);
+ if (!skb)
+ goto out;
+
+ ulen = skb->len - sizeof(struct udphdr);
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
+ msg->msg_flags |= MSG_TRUNC;
+
+ is_udp4 = (skb->protocol == htons(ETH_P_IP));
+
+ /*
+ * If checksum is needed at all, try to do it while copying the
+ * data. If the data is truncated, or if we only want a partial
+ * coverage checksum (UDP-Lite), do it before the copy.
+ */
+
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_lib_checksum_complete(skb))
+ goto csum_copy_err;
+ }
+
+ if (skb_csum_unnecessary(skb))
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied );
+ else {
+ err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+ if (err == -EINVAL)
+ goto csum_copy_err;
+ }
+ if (err)
+ goto out_free;
+
+ if (!peeked) {
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
+ }
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ /* Copy the address. */
+ if (msg->msg_name) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = udp_hdr(skb)->source;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+
+ if (is_udp4)
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+ &sin6->sin6_addr);
+ else {
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
+ if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6->sin6_scope_id = IP6CB(skb)->iif;
+ }
+
+ }
+ if (is_udp4) {
+ if (inet->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+ } else {
+ if (np->rxopt.all)
+ datagram_recv_ctl(sk, msg, skb);
+ }
+
+ err = copied;
+ if (flags & MSG_TRUNC)
+ err = ulen;
+
+out_free:
+ skb_free_datagram_locked(sk, skb);
+out:
+ return err;
+
+csum_copy_err:
+ slow = lock_sock_fast(sk);
+ if (!skb_kill_datagram(sk, skb, flags)) {
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ }
+ unlock_sock_fast(sk, slow);
+
+ if (noblock)
+ return -EAGAIN;
+
+ /* starting over for a new packet */
+ msg->msg_flags &= ~MSG_TRUNC;
+ goto try_again;
+}
+
+void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info,
+ struct udp_table *udptable)
+{
+ struct ipv6_pinfo *np;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct in6_addr *saddr = &hdr->saddr;
+ const struct in6_addr *daddr = &hdr->daddr;
+ struct udphdr *uh = (struct udphdr*)(skb->data+offset);
+ struct sock *sk;
+ int err;
+
+ sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+ saddr, uh->source, inet6_iif(skb), udptable);
+ if (sk == NULL)
+ return;
+
+ np = inet6_sk(sk);
+
+ if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
+ goto out;
+
+ if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
+ goto out;
+
+ if (np->recverr)
+ ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
+
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+out:
+ sock_put(sk);
+}
+
+static __inline__ void udpv6_err(struct sk_buff *skb,
+ struct inet6_skb_parm *opt, u8 type,
+ u8 code, int offset, __be32 info )
+{
+ __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
+}
+
+int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+{
+ struct udp_sock *up = udp_sk(sk);
+ int rc;
+ int is_udplite = IS_UDPLITE(sk);
+
+ if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
+ sock_rps_save_rxhash(sk, skb);
+
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ /*
+ * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
+ */
+ if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+
+ if (up->pcrlen == 0) { /* full coverage was set */
+ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
+ " %d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
+ goto drop;
+ }
+ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
+ "too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ goto drop;
+ }
+ }
+
+ if (rcu_access_pointer(sk->sk_filter)) {
+ if (udp_lib_checksum_complete(skb))
+ goto drop;
+ }
+
+ skb_dst_drop(skb);
+ rc = sock_queue_rcv_skb(sk, skb);
+ if (rc < 0) {
+ /* Note that an ENOMEM error is charged twice */
+ if (rc == -ENOMEM)
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ goto drop_no_sk_drops_inc;
+ }
+
+ return 0;
+drop:
+ atomic_inc(&sk->sk_drops);
+drop_no_sk_drops_inc:
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ kfree_skb(skb);
+ return -1;
+}
+
+static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif)
+{
+ struct hlist_nulls_node *node;
+ struct sock *s = sk;
+ unsigned short num = ntohs(loc_port);
+
+ sk_nulls_for_each_from(s, node) {
+ struct inet_sock *inet = inet_sk(s);
+
+ if (!net_eq(sock_net(s), net))
+ continue;
+
+ if (udp_sk(s)->udp_port_hash == num &&
+ s->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(s);
+ if (inet->inet_dport) {
+ if (inet->inet_dport != rmt_port)
+ continue;
+ }
+ if (!ipv6_addr_any(&np->daddr) &&
+ !ipv6_addr_equal(&np->daddr, rmt_addr))
+ continue;
+
+ if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+ continue;
+
+ if (!ipv6_addr_any(&np->rcv_saddr)) {
+ if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ continue;
+ }
+ if (!inet6_mc_check(s, loc_addr, rmt_addr))
+ continue;
+ return s;
+ }
+ }
+ return NULL;
+}
+
+static void flush_stack(struct sock **stack, unsigned int count,
+ struct sk_buff *skb, unsigned int final)
+{
+ unsigned int i;
+ struct sock *sk;
+ struct sk_buff *skb1;
+
+ for (i = 0; i < count; i++) {
+ skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+
+ sk = stack[i];
+ if (skb1) {
+ if (sk_rcvqueues_full(sk, skb1)) {
+ kfree_skb(skb1);
+ goto drop;
+ }
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk))
+ udpv6_queue_rcv_skb(sk, skb1);
+ else if (sk_add_backlog(sk, skb1)) {
+ kfree_skb(skb1);
+ bh_unlock_sock(sk);
+ goto drop;
+ }
+ bh_unlock_sock(sk);
+ continue;
+ }
+drop:
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, IS_UDPLITE(sk));
+ }
+}
+/*
+ * Note: called only from the BH handler context,
+ * so we don't need to lock the hashes.
+ */
+static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ struct udp_table *udptable)
+{
+ struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ const struct udphdr *uh = udp_hdr(skb);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
+ int dif;
+ unsigned int i, count = 0;
+
+ spin_lock(&hslot->lock);
+ sk = sk_nulls_head(&hslot->head);
+ dif = inet6_iif(skb);
+ sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
+ while (sk) {
+ stack[count++] = sk;
+ sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
+ uh->source, saddr, dif);
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ if (!sk)
+ break;
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
+ }
+ /*
+ * before releasing the lock, we must take reference on sockets
+ */
+ for (i = 0; i < count; i++)
+ sock_hold(stack[i]);
+
+ spin_unlock(&hslot->lock);
+
+ if (count) {
+ flush_stack(stack, count, skb, count - 1);
+
+ for (i = 0; i < count; i++)
+ sock_put(stack[i]);
+ } else {
+ kfree_skb(skb);
+ }
+ return 0;
+}
+
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
+ int proto)
+{
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
+ if (uh->check == 0) {
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+ return 1;
+ }
+ if (skb->ip_summed == CHECKSUM_COMPLETE &&
+ !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->len, proto, skb->csum))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len, proto, 0));
+
+ return 0;
+}
+
+int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ int proto)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sock *sk;
+ struct udphdr *uh;
+ const struct in6_addr *saddr, *daddr;
+ u32 ulen = 0;
+
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto discard;
+
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
+
+ ulen = ntohs(uh->len);
+ if (ulen > skb->len)
+ goto short_packet;
+
+ if (proto == IPPROTO_UDP) {
+ /* UDP validates ulen. */
+
+ /* Check for jumbo payload */
+ if (ulen == 0)
+ ulen = skb->len;
+
+ if (ulen < sizeof(*uh))
+ goto short_packet;
+
+ if (ulen < skb->len) {
+ if (pskb_trim_rcsum(skb, ulen))
+ goto short_packet;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
+ }
+ }
+
+ if (udp6_csum_init(skb, uh, proto))
+ goto discard;
+
+ /*
+ * Multicast receive code
+ */
+ if (ipv6_addr_is_multicast(daddr))
+ return __udp6_lib_mcast_deliver(net, skb,
+ saddr, daddr, udptable);
+
+ /* Unicast */
+
+ /*
+ * check socket cache ... must talk to Alan about his plans
+ * for sock caches... i'll skip this for now.
+ */
+ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+
+ if (sk == NULL) {
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard;
+
+ if (udp_lib_checksum_complete(skb))
+ goto discard;
+ UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
+ proto == IPPROTO_UDPLITE);
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+ }
+
+ /* deliver */
+
+ if (sk_rcvqueues_full(sk, skb)) {
+ sock_put(sk);
+ goto discard;
+ }
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk))
+ udpv6_queue_rcv_skb(sk, skb);
+ else if (sk_add_backlog(sk, skb)) {
+ atomic_inc(&sk->sk_drops);
+ bh_unlock_sock(sk);
+ sock_put(sk);
+ goto discard;
+ }
+ bh_unlock_sock(sk);
+ sock_put(sk);
+ return 0;
+
+short_packet:
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
+ saddr,
+ ntohs(uh->source),
+ ulen,
+ skb->len,
+ daddr,
+ ntohs(uh->dest));
+
+discard:
+ UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ kfree_skb(skb);
+ return 0;
+}
+
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
+{
+ return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
+}
+
+/*
+ * Throw away all pending data and cancel the corking. Socket is locked.
+ */
+static void udp_v6_flush_pending_frames(struct sock *sk)
+{
+ struct udp_sock *up = udp_sk(sk);
+
+ if (up->pending == AF_INET)
+ udp_flush_pending_frames(sk);
+ else if (up->pending) {
+ up->len = 0;
+ up->pending = 0;
+ ip6_flush_pending_frames(sk);
+ }
+}
+
+/**
+ * udp6_hwcsum_outgoing - handle outgoing HW checksumming
+ * @sk: socket we are sending on
+ * @skb: sk_buff containing the filled-in UDP header
+ * (checksum field must be zeroed out)
+ */
+static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ unsigned int offset;
+ struct udphdr *uh = udp_hdr(skb);
+ __wsum csum = 0;
+
+ if (skb_queue_len(&sk->sk_write_queue) == 1) {
+ /* Only one fragment on the socket. */
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
+ } else {
+ /*
+ * HW-checksum won't work as there are two or more
+ * fragments on the socket so that all csums of sk_buffs
+ * should be together
+ */
+ offset = skb_transport_offset(skb);
+ skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ csum = csum_add(csum, skb->csum);
+ }
+
+ uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
+ csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ }
+}
+
+/*
+ * Sending
+ */
+
+static int udp_v6_push_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb;
+ struct udphdr *uh;
+ struct udp_sock *up = udp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+ int err = 0;
+ int is_udplite = IS_UDPLITE(sk);
+ __wsum csum = 0;
+
+ /* Grab the skbuff where UDP header space exists. */
+ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+ goto out;
+
+ /*
+ * Create a UDP header
+ */
+ uh = udp_hdr(skb);
+ uh->source = fl6->fl6_sport;
+ uh->dest = fl6->fl6_dport;
+ uh->len = htons(up->len);
+ uh->check = 0;
+
+ if (is_udplite)
+ csum = udplite_csum_outgoing(sk, skb);
+ else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
+ up->len);
+ goto send;
+ } else
+ csum = udp_csum_outgoing(sk, skb);
+
+ /* add protocol-dependent pseudo-header */
+ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
+ up->len, fl6->flowi6_proto, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+send:
+ err = ip6_push_pending_frames(sk);
+ if (err) {
+ if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
+ err = 0;
+ }
+ } else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
+out:
+ up->len = 0;
+ up->pending = 0;
+ return err;
+}
+
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len)
+{
+ struct ipv6_txoptions opt_space;
+ struct udp_sock *up = udp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ struct in6_addr *daddr, *final_p, final;
+ struct ipv6_txoptions *opt = NULL;
+ struct ip6_flowlabel *flowlabel = NULL;
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int addr_len = msg->msg_namelen;
+ int ulen = len;
+ int hlimit = -1;
+ int tclass = -1;
+ int dontfrag = -1;
+ int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int err;
+ int connected = 0;
+ int is_udplite = IS_UDPLITE(sk);
+ int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+
+ /* destination address check */
+ if (sin6) {
+ if (addr_len < offsetof(struct sockaddr, sa_data))
+ return -EINVAL;
+
+ switch (sin6->sin6_family) {
+ case AF_INET6:
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+ daddr = &sin6->sin6_addr;
+ break;
+ case AF_INET:
+ goto do_udp_sendmsg;
+ case AF_UNSPEC:
+ msg->msg_name = sin6 = NULL;
+ msg->msg_namelen = addr_len = 0;
+ daddr = NULL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else if (!up->pending) {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &np->daddr;
+ } else
+ daddr = NULL;
+
+ if (daddr) {
+ if (ipv6_addr_v4mapped(daddr)) {
+ struct sockaddr_in sin;
+ sin.sin_family = AF_INET;
+ sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
+ sin.sin_addr.s_addr = daddr->s6_addr32[3];
+ msg->msg_name = &sin;
+ msg->msg_namelen = sizeof(sin);
+do_udp_sendmsg:
+ if (__ipv6_only_sock(sk))
+ return -ENETUNREACH;
+ return udp_sendmsg(iocb, sk, msg, len);
+ }
+ }
+
+ if (up->pending == AF_INET)
+ return udp_sendmsg(iocb, sk, msg, len);
+
+ /* Rough check on arithmetic overflow,
+ better check is made in ip6_append_data().
+ */
+ if (len > INT_MAX - sizeof(struct udphdr))
+ return -EMSGSIZE;
+
+ if (up->pending) {
+ /*
+ * There are pending frames.
+ * The socket lock must be held while it's corked.
+ */
+ lock_sock(sk);
+ if (likely(up->pending)) {
+ if (unlikely(up->pending != AF_INET6)) {
+ release_sock(sk);
+ return -EAFNOSUPPORT;
+ }
+ dst = NULL;
+ goto do_append_data;
+ }
+ release_sock(sk);
+ }
+ ulen += sizeof(struct udphdr);
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ if (sin6) {
+ if (sin6->sin6_port == 0)
+ return -EINVAL;
+
+ fl6.fl6_dport = sin6->sin6_port;
+ daddr = &sin6->sin6_addr;
+
+ if (np->sndflow) {
+ fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ daddr = &flowlabel->dst;
+ }
+ }
+
+ /*
+ * Otherwise it will be difficult to maintain
+ * sk->sk_dst_cache.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ ipv6_addr_equal(daddr, &np->daddr))
+ daddr = &np->daddr;
+
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ sin6->sin6_scope_id &&
+ ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = sin6->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+
+ fl6.fl6_dport = inet->inet_dport;
+ daddr = &np->daddr;
+ fl6.flowlabel = np->flow_label;
+ connected = 1;
+ }
+
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
+ fl6.flowi6_mark = sk->sk_mark;
+
+ if (msg->msg_controllen) {
+ opt = &opt_space;
+ memset(opt, 0, sizeof(struct ipv6_txoptions));
+ opt->tot_len = sizeof(*opt);
+
+ err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
+ if (err < 0) {
+ fl6_sock_release(flowlabel);
+ return err;
+ }
+ if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ }
+ if (!(opt->opt_nflen|opt->opt_flen))
+ opt = NULL;
+ connected = 0;
+ }
+ if (opt == NULL)
+ opt = np->opt;
+ if (flowlabel)
+ opt = fl6_merge_options(&opt_space, flowlabel, opt);
+ opt = ipv6_fixup_options(&opt_space, opt);
+
+ fl6.flowi6_proto = sk->sk_protocol;
+ if (!ipv6_addr_any(daddr))
+ fl6.daddr = *daddr;
+ else
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+ fl6.saddr = np->saddr;
+ fl6.fl6_sport = inet->inet_sport;
+
+ final_p = fl6_update_dst(&fl6, opt, &final);
+ if (final_p)
+ connected = 0;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
+ fl6.flowi6_oif = np->mcast_oif;
+ connected = 0;
+ } else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto out;
+ }
+
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+ }
+
+ if (tclass < 0)
+ tclass = np->tclass;
+
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
+
+ if (msg->msg_flags&MSG_CONFIRM)
+ goto do_confirm;
+back_from_confirm:
+
+ lock_sock(sk);
+ if (unlikely(up->pending)) {
+ /* The socket is already corked while preparing it. */
+ /* ... which is an evident application bug. --ANK */
+ release_sock(sk);
+
+ LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ up->pending = AF_INET6;
+
+do_append_data:
+ up->len += ulen;
+ getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
+ err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
+ sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
+ (struct rt6_info*)dst,
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
+ if (err)
+ udp_v6_flush_pending_frames(sk);
+ else if (!corkreq)
+ err = udp_v6_push_pending_frames(sk);
+ else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+ up->pending = 0;
+
+ if (dst) {
+ if (connected) {
+ ip6_dst_store(sk, dst,
+ ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
+ &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
+ } else {
+ dst_release(dst);
+ }
+ dst = NULL;
+ }
+
+ if (err > 0)
+ err = np->recverr ? net_xmit_errno(err) : 0;
+ release_sock(sk);
+out:
+ dst_release(dst);
+ fl6_sock_release(flowlabel);
+ if (!err)
+ return len;
+ /*
+ * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
+ * ENOBUFS might not be good (it's not tunable per se), but otherwise
+ * we don't have a good statistic (IpOutDiscards but it can be too many
+ * things). We could add another new stat but at least for now that
+ * seems like overkill.
+ */
+ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
+ }
+ return err;
+
+do_confirm:
+ dst_confirm(dst);
+ if (!(msg->msg_flags&MSG_PROBE) || len)
+ goto back_from_confirm;
+ err = 0;
+ goto out;
+}
+
+void udpv6_destroy_sock(struct sock *sk)
+{
+ lock_sock(sk);
+ udp_v6_flush_pending_frames(sk);
+ release_sock(sk);
+
+ inet6_destroy_sock(sk);
+}
+
+/*
+ * Socket option code for UDP
+ */
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_v6_push_pending_frames);
+ return ipv6_setsockopt(sk, level, optname, optval, optlen);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ udp_v6_push_pending_frames);
+ return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
+}
+#endif
+
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ if (level == SOL_UDP || level == SOL_UDPLITE)
+ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
+ return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
+}
+#endif
+
+static int udp6_ufo_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(*uh)))
+ return -EINVAL;
+
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
+
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ unsigned int unfrag_ip6hlen, unfrag_len;
+ struct frag_hdr *fptr;
+ u8 *mac_start, *prevhdr;
+ u8 nexthdr;
+ u8 frag_hdr_sz = sizeof(struct frag_hdr);
+ int offset;
+ __wsum csum;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+ !(type & (SKB_GSO_UDP))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len- offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
+ pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
+ goto out;
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
+ unfrag_ip6hlen;
+ mac_start = skb_mac_header(skb);
+ memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+
+out:
+ return segs;
+}
+
+static const struct inet6_protocol udpv6_protocol = {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .gso_send_check = udp6_ufo_send_check,
+ .gso_segment = udp6_ufo_fragment,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+/* ------------------------------------------------------------------------ */
+#ifdef CONFIG_PROC_FS
+
+static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
+{
+ struct inet_sock *inet = inet_sk(sp);
+ struct ipv6_pinfo *np = inet6_sk(sp);
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+
+ dest = &np->daddr;
+ src = &np->rcv_saddr;
+ destp = ntohs(inet->inet_dport);
+ srcp = ntohs(inet->inet_sport);
+ seq_printf(seq,
+ "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ sk_wmem_alloc_get(sp),
+ sk_rmem_alloc_get(sp),
+ 0, 0L, 0,
+ sock_i_uid(sp), 0,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops));
+}
+
+int udp6_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_printf(seq,
+ " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode ref pointer drops\n");
+ else
+ udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+ return 0;
+}
+
+static const struct file_operations udp6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = udp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
+static struct udp_seq_afinfo udp6_seq_afinfo = {
+ .name = "udp6",
+ .family = AF_INET6,
+ .udp_table = &udp_table,
+ .seq_fops = &udp6_afinfo_seq_fops,
+ .seq_ops = {
+ .show = udp6_seq_show,
+ },
+};
+
+int __net_init udp6_proc_init(struct net *net)
+{
+ return udp_proc_register(net, &udp6_seq_afinfo);
+}
+
+void udp6_proc_exit(struct net *net) {
+ udp_proc_unregister(net, &udp6_seq_afinfo);
+}
+#endif /* CONFIG_PROC_FS */
+
+/* ------------------------------------------------------------------------ */
+
+struct proto udpv6_prot = {
+ .name = "UDPv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .backlog_rcv = udpv6_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v6_rehash,
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem = &sysctl_udp_wmem_min,
+ .sysctl_rmem = &sysctl_udp_rmem_min,
+ .obj_size = sizeof(struct udp6_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
+ .h.udp_table = &udp_table,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
+#endif
+ .clear_sk = sk_prot_clear_portaddr_nulls,
+};
+
+static struct inet_protosw udpv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDP,
+ .prot = &udpv6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_PERMANENT,
+};
+
+
+int __init udpv6_init(void)
+{
+ int ret;
+
+ ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+ if (ret)
+ goto out;
+
+ ret = inet6_register_protosw(&udpv6_protosw);
+ if (ret)
+ goto out_udpv6_protocol;
+out:
+ return ret;
+
+out_udpv6_protocol:
+ inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ goto out;
+}
+
+void udpv6_exit(void)
+{
+ inet6_unregister_protosw(&udpv6_protosw);
+ inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
new file mode 100644
index 00000000..d7571046
--- /dev/null
+++ b/net/ipv6/udp_impl.h
@@ -0,0 +1,37 @@
+#ifndef _UDP6_IMPL_H
+#define _UDP6_IMPL_H
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+
+extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
+extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
+ u8 , u8 , int , __be32 , struct udp_table *);
+
+extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
+
+extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+#ifdef CONFIG_COMPAT
+extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+#endif
+extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len);
+extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len);
+extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
+extern void udpv6_destroy_sock(struct sock *sk);
+
+#ifdef CONFIG_PROC_FS
+extern int udp6_seq_show(struct seq_file *seq, void *v);
+#endif
+#endif /* _UDP6_IMPL_H */
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
new file mode 100644
index 00000000..1d08e21d
--- /dev/null
+++ b/net/ipv6/udplite.c
@@ -0,0 +1,140 @@
+/*
+ * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6.
+ * See also net/ipv4/udplite.c
+ *
+ * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+ *
+ * Changes:
+ * Fixes:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/export.h>
+#include "udp_impl.h"
+
+static int udplitev6_rcv(struct sk_buff *skb)
+{
+ return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
+}
+
+static void udplitev6_err(struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
+}
+
+static const struct inet6_protocol udplitev6_protocol = {
+ .handler = udplitev6_rcv,
+ .err_handler = udplitev6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+struct proto udplitev6_prot = {
+ .name = "UDPLITEv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udplite_sk_init,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .backlog_rcv = udpv6_queue_rcv_skb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .get_port = udp_v6_get_port,
+ .obj_size = sizeof(struct udp6_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
+ .h.udp_table = &udplite_table,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
+#endif
+ .clear_sk = sk_prot_clear_portaddr_nulls,
+};
+
+static struct inet_protosw udplite6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDPLITE,
+ .prot = &udplitev6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = 0,
+ .flags = INET_PROTOSW_PERMANENT,
+};
+
+int __init udplitev6_init(void)
+{
+ int ret;
+
+ ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+ if (ret)
+ goto out;
+
+ ret = inet6_register_protosw(&udplite6_protosw);
+ if (ret)
+ goto out_udplitev6_protocol;
+out:
+ return ret;
+
+out_udplitev6_protocol:
+ inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+ goto out;
+}
+
+void udplitev6_exit(void)
+{
+ inet6_unregister_protosw(&udplite6_protosw);
+ inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
+}
+
+#ifdef CONFIG_PROC_FS
+
+static const struct file_operations udplite6_afinfo_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = udp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net
+};
+
+static struct udp_seq_afinfo udplite6_seq_afinfo = {
+ .name = "udplite6",
+ .family = AF_INET6,
+ .udp_table = &udplite_table,
+ .seq_fops = &udplite6_afinfo_seq_fops,
+ .seq_ops = {
+ .show = udp6_seq_show,
+ },
+};
+
+static int __net_init udplite6_proc_init_net(struct net *net)
+{
+ return udp_proc_register(net, &udplite6_seq_afinfo);
+}
+
+static void __net_exit udplite6_proc_exit_net(struct net *net)
+{
+ udp_proc_unregister(net, &udplite6_seq_afinfo);
+}
+
+static struct pernet_operations udplite6_net_ops = {
+ .init = udplite6_proc_init_net,
+ .exit = udplite6_proc_exit_net,
+};
+
+int __init udplite6_proc_init(void)
+{
+ return register_pernet_subsys(&udplite6_net_ops);
+}
+
+void udplite6_proc_exit(void)
+{
+ unregister_pernet_subsys(&udplite6_net_ops);
+}
+#endif
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
new file mode 100644
index 00000000..f8c3cf84
--- /dev/null
+++ b/net/ipv6/xfrm6_input.c
@@ -0,0 +1,146 @@
+/*
+ * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
+ *
+ * Authors:
+ * Mitsuru KANDA @USAGI
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * YOSHIFUJI Hideaki @USAGI
+ * IPv6 support
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ return xfrm6_extract_header(skb);
+}
+
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ return xfrm_input(skb, nexthdr, spi, 0);
+}
+EXPORT_SYMBOL(xfrm6_rcv_spi);
+
+int xfrm6_transport_finish(struct sk_buff *skb, int async)
+{
+ skb_network_header(skb)[IP6CB(skb)->nhoff] =
+ XFRM_MODE_SKB_CB(skb)->protocol;
+
+#ifndef CONFIG_NETFILTER
+ if (!async)
+ return 1;
+#endif
+
+ ipv6_hdr(skb)->payload_len = htons(skb->len);
+ __skb_push(skb, skb->data - skb_network_header(skb));
+
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ ip6_rcv_finish);
+ return -1;
+}
+
+int xfrm6_rcv(struct sk_buff *skb)
+{
+ return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0);
+}
+
+EXPORT_SYMBOL(xfrm6_rcv);
+
+int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+ xfrm_address_t *saddr, u8 proto)
+{
+ struct net *net = dev_net(skb->dev);
+ struct xfrm_state *x = NULL;
+ int i = 0;
+
+ /* Allocate new secpath or COW existing one. */
+ if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+ struct sec_path *sp;
+
+ sp = secpath_dup(skb->sp);
+ if (!sp) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
+ goto drop;
+ }
+ if (skb->sp)
+ secpath_put(skb->sp);
+ skb->sp = sp;
+ }
+
+ if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto drop;
+ }
+
+ for (i = 0; i < 3; i++) {
+ xfrm_address_t *dst, *src;
+
+ switch (i) {
+ case 0:
+ dst = daddr;
+ src = saddr;
+ break;
+ case 1:
+ /* lookup state with wild-card source address */
+ dst = daddr;
+ src = (xfrm_address_t *)&in6addr_any;
+ break;
+ default:
+ /* lookup state with wild-card addresses */
+ dst = (xfrm_address_t *)&in6addr_any;
+ src = (xfrm_address_t *)&in6addr_any;
+ break;
+ }
+
+ x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6);
+ if (!x)
+ continue;
+
+ spin_lock(&x->lock);
+
+ if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
+ likely(x->km.state == XFRM_STATE_VALID) &&
+ !xfrm_state_check_expire(x)) {
+ spin_unlock(&x->lock);
+ if (x->type->input(x, skb) > 0) {
+ /* found a valid state */
+ break;
+ }
+ } else
+ spin_unlock(&x->lock);
+
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
+ if (!x) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+ xfrm_audit_state_notfound_simple(skb, AF_INET6);
+ goto drop;
+ }
+
+ skb->sp->xvec[skb->sp->len++] = x;
+
+ spin_lock(&x->lock);
+
+ x->curlft.bytes += skb->len;
+ x->curlft.packets++;
+
+ spin_unlock(&x->lock);
+
+ return 1;
+
+drop:
+ return -1;
+}
+
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
new file mode 100644
index 00000000..9949a356
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -0,0 +1,131 @@
+/*
+ * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ * Miika Komu <miika@iki.fi>
+ * Herbert Xu <herbert@gondor.apana.org.au>
+ * Abhinav Pathak <abhinav.pathak@hiit.fi>
+ * Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static void xfrm6_beet_make_header(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ iph->version = 6;
+
+ memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+ sizeof(iph->flow_lbl));
+ iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+ ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+ iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ */
+static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *top_iph;
+ struct ip_beet_phdr *ph;
+ int optlen, hdr_len;
+
+ hdr_len = 0;
+ optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+ if (unlikely(optlen))
+ hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+ skb_set_network_header(skb, -x->props.header_len - hdr_len);
+ if (x->sel.family != AF_INET6)
+ skb->network_header += IPV4_BEET_PHMAXLEN;
+ skb->mac_header = skb->network_header +
+ offsetof(struct ipv6hdr, nexthdr);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+ ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len);
+
+ xfrm6_beet_make_header(skb);
+
+ top_iph = ipv6_hdr(skb);
+ if (unlikely(optlen)) {
+
+ BUG_ON(optlen < 0);
+
+ ph->padlen = 4 - (optlen & 4);
+ ph->hdrlen = optlen / 8;
+ ph->nexthdr = top_iph->nexthdr;
+ if (ph->padlen)
+ memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+ top_iph->nexthdr = IPPROTO_BEETPH;
+ }
+
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+ return 0;
+}
+
+static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *ip6h;
+ int size = sizeof(struct ipv6hdr);
+ int err;
+
+ err = skb_cow_head(skb, size + skb->mac_len);
+ if (err)
+ goto out;
+
+ __skb_push(skb, size);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ xfrm6_beet_make_header(skb);
+
+ ip6h = ipv6_hdr(skb);
+ ip6h->payload_len = htons(skb->len - size);
+ ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+ ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
+ err = 0;
+out:
+ return err;
+}
+
+static struct xfrm_mode xfrm6_beet_mode = {
+ .input2 = xfrm6_beet_input,
+ .input = xfrm_prepare_input,
+ .output2 = xfrm6_beet_output,
+ .output = xfrm6_prepare_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_BEET,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+};
+
+static int __init xfrm6_beet_init(void)
+{
+ return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
+}
+
+static void __exit xfrm6_beet_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
+ BUG_ON(err);
+}
+
+module_init(xfrm6_beet_init);
+module_exit(xfrm6_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 00000000..63d5d493
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,84 @@
+/*
+ * xfrm6_mode_ro.c - Route optimization mode for IPv6.
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * Authors:
+ * Noriaki TAKAMIYA @USAGI
+ * Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/stringify.h>
+#include <linux/time.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ u8 *prevhdr;
+ int hdr_len;
+
+ iph = ipv6_hdr(skb);
+
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->transport_header = skb->network_header + hdr_len;
+ __skb_pull(skb, hdr_len);
+ memmove(ipv6_hdr(skb), iph, hdr_len);
+
+ x->lastused = get_seconds();
+
+ return 0;
+}
+
+static struct xfrm_mode xfrm6_ro_mode = {
+ .output = xfrm6_ro_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_ROUTEOPTIMIZATION,
+};
+
+static int __init xfrm6_ro_init(void)
+{
+ return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
+}
+
+static void __exit xfrm6_ro_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
+ BUG_ON(err);
+}
+
+module_init(xfrm6_ro_init);
+module_exit(xfrm6_ro_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
new file mode 100644
index 00000000..4e344105
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -0,0 +1,85 @@
+/*
+ * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the encapsulation header.
+ */
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ u8 *prevhdr;
+ int hdr_len;
+
+ iph = ipv6_hdr(skb);
+
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->transport_header = skb->network_header + hdr_len;
+ __skb_pull(skb, hdr_len);
+ memmove(ipv6_hdr(skb), iph, hdr_len);
+ return 0;
+}
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation header.
+ *
+ * On entry, skb->h shall point to where the IP header should be and skb->nh
+ * shall be set to where the IP header currently is. skb->data shall point
+ * to the start of the payload.
+ */
+static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int ihl = skb->data - skb_transport_header(skb);
+
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
+ sizeof(struct ipv6hdr));
+ skb_reset_transport_header(skb);
+ return 0;
+}
+
+static struct xfrm_mode xfrm6_transport_mode = {
+ .input = xfrm6_transport_input,
+ .output = xfrm6_transport_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_TRANSPORT,
+};
+
+static int __init xfrm6_transport_init(void)
+{
+ return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
+}
+
+static void __exit xfrm6_transport_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
+ BUG_ON(err);
+}
+
+module_init(xfrm6_transport_init);
+module_exit(xfrm6_transport_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
new file mode 100644
index 00000000..9f2095b1
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -0,0 +1,117 @@
+/*
+ * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+ const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+ IP6_ECN_set_ce(inner_iph);
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per RFC 2401.
+ */
+static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *top_iph;
+ int dsfield;
+
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->mac_header = skb->network_header +
+ offsetof(struct ipv6hdr, nexthdr);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+ top_iph = ipv6_hdr(skb);
+
+ top_iph->version = 6;
+
+ memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+ sizeof(top_iph->flow_lbl));
+ top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+ dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+ dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.flags & XFRM_STATE_NOECN)
+ dsfield &= ~INET_ECN_MASK;
+ ipv6_change_dsfield(top_iph, 0, dsfield);
+ top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+ return 0;
+}
+
+static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = -EINVAL;
+
+ if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
+ goto out;
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto out;
+
+ if (skb_cloned(skb) &&
+ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ goto out;
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+ ipipv6_hdr(skb));
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ ipip6_ecn_decapsulate(skb);
+
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static struct xfrm_mode xfrm6_tunnel_mode = {
+ .input2 = xfrm6_mode_tunnel_input,
+ .input = xfrm_prepare_input,
+ .output2 = xfrm6_mode_tunnel_output,
+ .output = xfrm6_prepare_output,
+ .owner = THIS_MODULE,
+ .encap = XFRM_MODE_TUNNEL,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+};
+
+static int __init xfrm6_mode_tunnel_init(void)
+{
+ return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
+}
+
+static void __exit xfrm6_mode_tunnel_exit(void)
+{
+ int err;
+
+ err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
+ BUG_ON(err);
+}
+
+module_init(xfrm6_mode_tunnel_init);
+module_exit(xfrm6_mode_tunnel_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
new file mode 100644
index 00000000..8755a307
--- /dev/null
+++ b/net/ipv6/xfrm6_output.c
@@ -0,0 +1,161 @@
+/*
+ * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+
+int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+ u8 **prevhdr)
+{
+ return ip6_find_1stfragopt(skb, prevhdr);
+}
+
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+
+static int xfrm6_local_dontfrag(struct sk_buff *skb)
+{
+ int proto;
+ struct sock *sk = skb->sk;
+
+ if (sk) {
+ proto = sk->sk_protocol;
+
+ if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+ return inet6_sk(sk)->dontfrag;
+ }
+
+ return 0;
+}
+
+static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct sock *sk = skb->sk;
+
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.daddr = ipv6_hdr(skb)->daddr;
+
+ ipv6_local_rxpmtu(sk, &fl6, mtu);
+}
+
+static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct sock *sk = skb->sk;
+
+ fl6.fl6_dport = inet_sk(sk)->inet_dport;
+ fl6.daddr = ipv6_hdr(skb)->daddr;
+
+ ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
+}
+
+static int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ int mtu, ret = 0;
+ struct dst_entry *dst = skb_dst(skb);
+
+ mtu = dst_mtu(dst);
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ if (!skb->local_df && skb->len > mtu) {
+ skb->dev = dst->dev;
+
+ if (xfrm6_local_dontfrag(skb))
+ xfrm6_local_rxpmtu(skb, mtu);
+ else if (skb->sk)
+ xfrm6_local_error(skb, mtu);
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ret = -EMSGSIZE;
+ }
+
+ return ret;
+}
+
+int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ err = xfrm6_tunnel_check_size(skb);
+ if (err)
+ return err;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
+
+ return xfrm6_extract_header(skb);
+}
+
+int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ err = xfrm_inner_extract_output(x, skb);
+ if (err)
+ return err;
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+#ifdef CONFIG_NETFILTER
+ IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->local_df = 1;
+
+ return x->outer_mode->output2(x, skb);
+}
+EXPORT_SYMBOL(xfrm6_prepare_output);
+
+int xfrm6_output_finish(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER
+ IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
+
+ skb->protocol = htons(ETH_P_IPV6);
+ return xfrm_output(skb);
+}
+
+static int __xfrm6_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+ int mtu = ip6_skb_dst_mtu(skb);
+
+ if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ xfrm6_local_rxpmtu(skb, mtu);
+ return -EMSGSIZE;
+ } else if (!skb->local_df && skb->len > mtu && skb->sk) {
+ xfrm6_local_error(skb, mtu);
+ return -EMSGSIZE;
+ }
+
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ ((skb->len > mtu && !skb_is_gso(skb)) ||
+ dst_allfrag(skb_dst(skb)))) {
+ return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
+ }
+ return x->outer_mode->afinfo->output_finish(skb);
+}
+
+int xfrm6_output(struct sk_buff *skb)
+{
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
+ skb_dst(skb)->dev, __xfrm6_output);
+}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
new file mode 100644
index 00000000..8ea65e03
--- /dev/null
+++ b/net/ipv6/xfrm6_policy.c
@@ -0,0 +1,357 @@
+/*
+ * xfrm6_policy.c: based on xfrm4_policy.c
+ *
+ * Authors:
+ * Mitsuru KANDA @USAGI
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * IPv6 support
+ * YOSHIFUJI Hideaki
+ * Split up af-specific portion
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <net/addrconf.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+#include <net/mip6.h>
+#endif
+
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
+
+static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int err;
+
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
+ if (saddr)
+ memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
+
+ dst = ip6_route_output(net, NULL, &fl6);
+
+ err = dst->error;
+ if (dst->error) {
+ dst_release(dst);
+ dst = ERR_PTR(err);
+ }
+
+ return dst;
+}
+
+static int xfrm6_get_saddr(struct net *net,
+ xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+ struct dst_entry *dst;
+ struct net_device *dev;
+
+ dst = xfrm6_dst_lookup(net, 0, NULL, daddr);
+ if (IS_ERR(dst))
+ return -EHOSTUNREACH;
+
+ dev = ip6_dst_idev(dst)->dev;
+ ipv6_dev_get_saddr(dev_net(dev), dev,
+ (struct in6_addr *)&daddr->a6, 0,
+ (struct in6_addr *)&saddr->a6);
+ dst_release(dst);
+ return 0;
+}
+
+static int xfrm6_get_tos(const struct flowi *fl)
+{
+ return 0;
+}
+
+static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ int nfheader_len)
+{
+ if (dst->ops->family == AF_INET6) {
+ struct rt6_info *rt = (struct rt6_info*)dst;
+ if (rt->rt6i_node)
+ path->path_cookie = rt->rt6i_node->fn_sernum;
+ }
+
+ path->u.rt6.rt6i_nfheader_len = nfheader_len;
+
+ return 0;
+}
+
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ const struct flowi *fl)
+{
+ struct rt6_info *rt = (struct rt6_info*)xdst->route;
+
+ xdst->u.dst.dev = dev;
+ dev_hold(dev);
+
+ xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
+ if (!xdst->u.rt6.rt6i_idev)
+ return -ENODEV;
+
+ xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
+ if (rt->rt6i_peer)
+ atomic_inc(&rt->rt6i_peer->refcnt);
+
+ /* Sheit... I remember I did this right. Apparently,
+ * it was magically lost, so this code needs audit */
+ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
+ RTF_LOCAL);
+ xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
+ xdst->u.rt6.rt6i_node = rt->rt6i_node;
+ if (rt->rt6i_node)
+ xdst->route_cookie = rt->rt6i_node->fn_sernum;
+ xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
+ xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
+ xdst->u.rt6.rt6i_src = rt->rt6i_src;
+
+ return 0;
+}
+
+static inline void
+_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
+{
+ struct flowi6 *fl6 = &fl->u.ip6;
+ int onlyproto = 0;
+ u16 offset = skb_network_header_len(skb);
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct ipv6_opt_hdr *exthdr;
+ const unsigned char *nh = skb_network_header(skb);
+ u8 nexthdr = nh[IP6CB(skb)->nhoff];
+
+ memset(fl6, 0, sizeof(struct flowi6));
+ fl6->flowi6_mark = skb->mark;
+
+ fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+ fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
+
+ while (nh + offset + 1 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+ nh = skb_network_header(skb);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+
+ switch (nexthdr) {
+ case NEXTHDR_FRAGMENT:
+ onlyproto = 1;
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_HOP:
+ case NEXTHDR_DEST:
+ offset += ipv6_optlen(exthdr);
+ nexthdr = exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+ break;
+
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ if (!onlyproto && (nh + offset + 4 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
+ __be16 *ports = (__be16 *)exthdr;
+
+ fl6->fl6_sport = ports[!!reverse];
+ fl6->fl6_dport = ports[!reverse];
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+
+ case IPPROTO_ICMPV6:
+ if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+ u8 *icmp = (u8 *)exthdr;
+
+ fl6->fl6_icmp_type = icmp[0];
+ fl6->fl6_icmp_code = icmp[1];
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPPROTO_MH:
+ if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ struct ip6_mh *mh;
+ mh = (struct ip6_mh *)exthdr;
+
+ fl6->fl6_mh_type = mh->ip6mh_type;
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+#endif
+
+ /* XXX Why are there these headers? */
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+ default:
+ fl6->fl6_ipsec_spi = 0;
+ fl6->flowi6_proto = nexthdr;
+ return;
+ }
+ }
+}
+
+static inline int xfrm6_garbage_collect(struct dst_ops *ops)
+{
+ struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
+
+ xfrm6_policy_afinfo.garbage_collect(net);
+ return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
+}
+
+static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+ path->ops->update_pmtu(path, mtu);
+}
+
+static void xfrm6_dst_destroy(struct dst_entry *dst)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+ if (likely(xdst->u.rt6.rt6i_peer))
+ inet_putpeer(xdst->u.rt6.rt6i_peer);
+ xfrm_dst_destroy(xdst);
+}
+
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+ int unregister)
+{
+ struct xfrm_dst *xdst;
+
+ if (!unregister)
+ return;
+
+ xdst = (struct xfrm_dst *)dst;
+ if (xdst->u.rt6.rt6i_idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(dev_net(dev)->loopback_dev);
+ BUG_ON(!loopback_idev);
+
+ do {
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ xdst->u.rt6.rt6i_idev = loopback_idev;
+ in6_dev_hold(loopback_idev);
+ xdst = (struct xfrm_dst *)xdst->u.dst.child;
+ } while (xdst->u.dst.xfrm);
+
+ __in6_dev_put(loopback_idev);
+ }
+
+ xfrm_dst_ifdown(dst, dev);
+}
+
+static struct dst_ops xfrm6_dst_ops = {
+ .family = AF_INET6,
+ .protocol = cpu_to_be16(ETH_P_IPV6),
+ .gc = xfrm6_garbage_collect,
+ .update_pmtu = xfrm6_update_pmtu,
+ .cow_metrics = dst_cow_metrics_generic,
+ .destroy = xfrm6_dst_destroy,
+ .ifdown = xfrm6_dst_ifdown,
+ .local_out = __ip6_local_out,
+ .gc_thresh = 1024,
+};
+
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+ .family = AF_INET6,
+ .dst_ops = &xfrm6_dst_ops,
+ .dst_lookup = xfrm6_dst_lookup,
+ .get_saddr = xfrm6_get_saddr,
+ .decode_session = _decode_session6,
+ .get_tos = xfrm6_get_tos,
+ .init_path = xfrm6_init_path,
+ .fill_dst = xfrm6_fill_dst,
+ .blackhole_route = ip6_blackhole_route,
+};
+
+static int __init xfrm6_policy_init(void)
+{
+ return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
+}
+
+static void xfrm6_policy_fini(void)
+{
+ xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table xfrm6_policy_table[] = {
+ {
+ .procname = "xfrm6_gc_thresh",
+ .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static struct ctl_table_header *sysctl_hdr;
+#endif
+
+int __init xfrm6_init(void)
+{
+ int ret;
+ unsigned int gc_thresh;
+
+ /*
+ * We need a good default value for the xfrm6 gc threshold.
+ * In ipv4 we set it to the route hash table size * 8, which
+ * is half the size of the maximaum route cache for ipv4. It
+ * would be good to do the same thing for v6, except the table is
+ * constructed differently here. Here each table for a net namespace
+ * can have FIB_TABLE_HASHSZ entries, so lets go with the same
+ * computation that we used for ipv4 here. Also, lets keep the initial
+ * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
+ * to that as a minimum as well
+ */
+ gc_thresh = FIB6_TABLE_HASHSZ * 8;
+ xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+ dst_entries_init(&xfrm6_dst_ops);
+
+ ret = xfrm6_policy_init();
+ if (ret) {
+ dst_entries_destroy(&xfrm6_dst_ops);
+ goto out;
+ }
+ ret = xfrm6_state_init();
+ if (ret)
+ goto out_policy;
+
+#ifdef CONFIG_SYSCTL
+ sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
+ xfrm6_policy_table);
+#endif
+out:
+ return ret;
+out_policy:
+ xfrm6_policy_fini();
+ goto out;
+}
+
+void xfrm6_fini(void)
+{
+#ifdef CONFIG_SYSCTL
+ if (sysctl_hdr)
+ unregister_net_sysctl_table(sysctl_hdr);
+#endif
+ //xfrm6_input_fini();
+ xfrm6_policy_fini();
+ xfrm6_state_fini();
+ dst_entries_destroy(&xfrm6_dst_ops);
+}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
new file mode 100644
index 00000000..3f2f7c4a
--- /dev/null
+++ b/net/ipv6/xfrm6_state.c
@@ -0,0 +1,197 @@
+/*
+ * xfrm6_state.c: based on xfrm4_state.c
+ *
+ * Authors:
+ * Mitsuru KANDA @USAGI
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * IPv6 support
+ * YOSHIFUJI Hideaki @USAGI
+ * Split up af-specific portion
+ *
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/export.h>
+#include <net/dsfield.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+
+static void
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ /* Initialize temporary selector matching only
+ * to current session. */
+ *(struct in6_addr *)&sel->daddr = fl6->daddr;
+ *(struct in6_addr *)&sel->saddr = fl6->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
+ sel->dport_mask = htons(0xffff);
+ sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
+ sel->sport_mask = htons(0xffff);
+ sel->family = AF_INET6;
+ sel->prefixlen_d = 128;
+ sel->prefixlen_s = 128;
+ sel->proto = fl6->flowi6_proto;
+ sel->ifindex = fl6->flowi6_oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
+ const xfrm_address_t *daddr, const xfrm_address_t *saddr)
+{
+ x->id = tmpl->id;
+ if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
+ memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+ memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+ if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
+ memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+ x->props.mode = tmpl->mode;
+ x->props.reqid = tmpl->reqid;
+ x->props.family = AF_INET6;
+}
+
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
+static int
+__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
+{
+ int i;
+ int class[XFRM_MAX_DEPTH];
+ int count[maxclass];
+
+ memset(count, 0, sizeof(count));
+
+ for (i = 0; i < n; i++) {
+ int c;
+ class[i] = c = cmp(src[i]);
+ count[c]++;
+ }
+
+ for (i = 2; i < maxclass; i++)
+ count[i] += count[i - 1];
+
+ for (i = 0; i < n; i++) {
+ dst[count[class[i] - 1]++] = src[i];
+ src[i] = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(void *p)
+{
+ struct xfrm_state *v = p;
+
+ switch (v->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ if (v->id.proto != IPPROTO_AH)
+ return 1;
+ else
+ return 3;
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 4;
+ }
+ return 5;
+}
+
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
+{
+ return __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_state_sort_cmp, 6);
+}
+
+/*
+ * Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(void *p)
+{
+ struct xfrm_tmpl *v = p;
+ switch (v->mode) {
+ case XFRM_MODE_TRANSPORT:
+ return 1;
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 3;
+ }
+ return 4;
+}
+
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
+{
+ return __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_tmpl_sort_cmp, 5);
+}
+
+int xfrm6_extract_header(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+ XFRM_MODE_SKB_CB(skb)->id = 0;
+ XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+ XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+ XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+ XFRM_MODE_SKB_CB(skb)->optlen = 0;
+ memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+ sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+
+ return 0;
+}
+
+static struct xfrm_state_afinfo xfrm6_state_afinfo = {
+ .family = AF_INET6,
+ .proto = IPPROTO_IPV6,
+ .eth_proto = htons(ETH_P_IPV6),
+ .owner = THIS_MODULE,
+ .init_tempsel = __xfrm6_init_tempsel,
+ .init_temprop = xfrm6_init_temprop,
+ .tmpl_sort = __xfrm6_tmpl_sort,
+ .state_sort = __xfrm6_state_sort,
+ .output = xfrm6_output,
+ .output_finish = xfrm6_output_finish,
+ .extract_input = xfrm6_extract_input,
+ .extract_output = xfrm6_extract_output,
+ .transport_finish = xfrm6_transport_finish,
+};
+
+int __init xfrm6_state_init(void)
+{
+ return xfrm_state_register_afinfo(&xfrm6_state_afinfo);
+}
+
+void xfrm6_state_fini(void)
+{
+ xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
+}
+
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
new file mode 100644
index 00000000..4fe1db12
--- /dev/null
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors Mitsuru KANDA <mk@linux-ipv6.org>
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ * Based on net/ipv4/xfrm4_tunnel.c
+ *
+ */
+#include <linux/module.h>
+#include <linux/xfrm.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <net/netns/generic.h>
+
+#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
+#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
+
+#define XFRM6_TUNNEL_SPI_MIN 1
+#define XFRM6_TUNNEL_SPI_MAX 0xffffffff
+
+struct xfrm6_tunnel_net {
+ struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
+ struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
+ u32 spi;
+};
+
+static int xfrm6_tunnel_net_id __read_mostly;
+static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
+{
+ return net_generic(net, xfrm6_tunnel_net_id);
+}
+
+/*
+ * xfrm_tunnel_spi things are for allocating unique id ("spi")
+ * per xfrm_address_t.
+ */
+struct xfrm6_tunnel_spi {
+ struct hlist_node list_byaddr;
+ struct hlist_node list_byspi;
+ xfrm_address_t addr;
+ u32 spi;
+ atomic_t refcnt;
+ struct rcu_head rcu_head;
+};
+
+static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
+
+static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
+
+static inline unsigned xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr)
+{
+ unsigned h;
+
+ h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
+ h ^= h >> 16;
+ h ^= h >> 8;
+ h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
+
+ return h;
+}
+
+static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
+{
+ return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
+}
+
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
+{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ struct xfrm6_tunnel_spi *x6spi;
+ struct hlist_node *pos;
+
+ hlist_for_each_entry_rcu(x6spi, pos,
+ &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+ list_byaddr) {
+ if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
+ return x6spi;
+ }
+
+ return NULL;
+}
+
+__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
+{
+ struct xfrm6_tunnel_spi *x6spi;
+ u32 spi;
+
+ rcu_read_lock_bh();
+ x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
+ spi = x6spi ? x6spi->spi : 0;
+ rcu_read_unlock_bh();
+ return htonl(spi);
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
+
+static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
+{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ struct xfrm6_tunnel_spi *x6spi;
+ int index = xfrm6_tunnel_spi_hash_byspi(spi);
+ struct hlist_node *pos;
+
+ hlist_for_each_entry(x6spi, pos,
+ &xfrm6_tn->spi_byspi[index],
+ list_byspi) {
+ if (x6spi->spi == spi)
+ return -1;
+ }
+ return index;
+}
+
+static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
+{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ u32 spi;
+ struct xfrm6_tunnel_spi *x6spi;
+ int index;
+
+ if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN ||
+ xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX)
+ xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN;
+ else
+ xfrm6_tn->spi++;
+
+ for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
+ index = __xfrm6_tunnel_spi_check(net, spi);
+ if (index >= 0)
+ goto alloc_spi;
+ }
+ for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
+ index = __xfrm6_tunnel_spi_check(net, spi);
+ if (index >= 0)
+ goto alloc_spi;
+ }
+ spi = 0;
+ goto out;
+alloc_spi:
+ xfrm6_tn->spi = spi;
+ x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
+ if (!x6spi)
+ goto out;
+
+ memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
+ x6spi->spi = spi;
+ atomic_set(&x6spi->refcnt, 1);
+
+ hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]);
+
+ index = xfrm6_tunnel_spi_hash_byaddr(saddr);
+ hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]);
+out:
+ return spi;
+}
+
+__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
+{
+ struct xfrm6_tunnel_spi *x6spi;
+ u32 spi;
+
+ spin_lock_bh(&xfrm6_tunnel_spi_lock);
+ x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
+ if (x6spi) {
+ atomic_inc(&x6spi->refcnt);
+ spi = x6spi->spi;
+ } else
+ spi = __xfrm6_tunnel_alloc_spi(net, saddr);
+ spin_unlock_bh(&xfrm6_tunnel_spi_lock);
+
+ return htonl(spi);
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
+
+static void x6spi_destroy_rcu(struct rcu_head *head)
+{
+ kmem_cache_free(xfrm6_tunnel_spi_kmem,
+ container_of(head, struct xfrm6_tunnel_spi, rcu_head));
+}
+
+static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
+{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ struct xfrm6_tunnel_spi *x6spi;
+ struct hlist_node *pos, *n;
+
+ spin_lock_bh(&xfrm6_tunnel_spi_lock);
+
+ hlist_for_each_entry_safe(x6spi, pos, n,
+ &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+ list_byaddr)
+ {
+ if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+ if (atomic_dec_and_test(&x6spi->refcnt)) {
+ hlist_del_rcu(&x6spi->list_byaddr);
+ hlist_del_rcu(&x6spi->list_byspi);
+ call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu);
+ break;
+ }
+ }
+ }
+ spin_unlock_bh(&xfrm6_tunnel_spi_lock);
+}
+
+static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ skb_push(skb, -skb_network_offset(skb));
+ return 0;
+}
+
+static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ return skb_network_header(skb)[IP6CB(skb)->nhoff];
+}
+
+static int xfrm6_tunnel_rcv(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ __be32 spi;
+
+ spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+}
+
+static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ /* xfrm6_tunnel native err handling */
+ switch (type) {
+ case ICMPV6_DEST_UNREACH:
+ switch (code) {
+ case ICMPV6_NOROUTE:
+ case ICMPV6_ADM_PROHIBITED:
+ case ICMPV6_NOT_NEIGHBOUR:
+ case ICMPV6_ADDR_UNREACH:
+ case ICMPV6_PORT_UNREACH:
+ default:
+ break;
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ break;
+ case ICMPV6_TIME_EXCEED:
+ switch (code) {
+ case ICMPV6_EXC_HOPLIMIT:
+ break;
+ case ICMPV6_EXC_FRAGTIME:
+ default:
+ break;
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ switch (code) {
+ case ICMPV6_HDR_FIELD: break;
+ case ICMPV6_UNK_NEXTHDR: break;
+ case ICMPV6_UNK_OPTION: break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int xfrm6_tunnel_init_state(struct xfrm_state *x)
+{
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ return -EINVAL;
+
+ if (x->encap)
+ return -EINVAL;
+
+ x->props.header_len = sizeof(struct ipv6hdr);
+
+ return 0;
+}
+
+static void xfrm6_tunnel_destroy(struct xfrm_state *x)
+{
+ struct net *net = xs_net(x);
+
+ xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr);
+}
+
+static const struct xfrm_type xfrm6_tunnel_type = {
+ .description = "IP6IP6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_IPV6,
+ .init_state = xfrm6_tunnel_init_state,
+ .destructor = xfrm6_tunnel_destroy,
+ .input = xfrm6_tunnel_input,
+ .output = xfrm6_tunnel_output,
+};
+
+static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
+ .handler = xfrm6_tunnel_rcv,
+ .err_handler = xfrm6_tunnel_err,
+ .priority = 2,
+};
+
+static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
+ .handler = xfrm6_tunnel_rcv,
+ .err_handler = xfrm6_tunnel_err,
+ .priority = 2,
+};
+
+static int __net_init xfrm6_tunnel_net_init(struct net *net)
+{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ unsigned int i;
+
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]);
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+ INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]);
+ xfrm6_tn->spi = 0;
+
+ return 0;
+}
+
+static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
+{
+}
+
+static struct pernet_operations xfrm6_tunnel_net_ops = {
+ .init = xfrm6_tunnel_net_init,
+ .exit = xfrm6_tunnel_net_exit,
+ .id = &xfrm6_tunnel_net_id,
+ .size = sizeof(struct xfrm6_tunnel_net),
+};
+
+static int __init xfrm6_tunnel_init(void)
+{
+ int rv;
+
+ xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
+ sizeof(struct xfrm6_tunnel_spi),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!xfrm6_tunnel_spi_kmem)
+ return -ENOMEM;
+ rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
+ if (rv < 0)
+ goto out_pernet;
+ rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6);
+ if (rv < 0)
+ goto out_type;
+ rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6);
+ if (rv < 0)
+ goto out_xfrm6;
+ rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET);
+ if (rv < 0)
+ goto out_xfrm46;
+ return 0;
+
+out_xfrm46:
+ xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
+out_xfrm6:
+ xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+out_type:
+ unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+out_pernet:
+ kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
+ return rv;
+}
+
+static void __exit xfrm6_tunnel_fini(void)
+{
+ xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
+ xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
+ xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+ unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
+}
+
+module_init(xfrm6_tunnel_init);
+module_exit(xfrm6_tunnel_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);