37 files changed, 33972 insertions, 0 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
new file mode 100644
index 00000000..674641b1
--- /dev/null
+++ b/net/core/Makefile
@@ -0,0 +1,23 @@
+#
+# Makefile for the Linux networking core.
+#
+
+obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+	 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
+
+obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+
+obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+			sock_diag.o
+
+obj-$(CONFIG_XFRM) += flow.o
+obj-y += net-sysfs.o
+obj-$(CONFIG_NET_PKTGEN) += pktgen.o
+obj-$(CONFIG_NETPOLL) += netpoll.o
+obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
+obj-$(CONFIG_TRACEPOINTS) += net-traces.o
+obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
new file mode 100644
index 00000000..e4fbfd6e
--- /dev/null
+++ b/net/core/datagram.c
@@ -0,0 +1,780 @@
+/*
+ *	SUCS NET3:
+ *
+ *	Generic datagram handling routines. These are generic for all
+ *	protocols. Possibly a generic IP version on top of these would
+ *	make sense. Not tonight however 8-).
+ *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
+ *	NetROM layer all have identical poll code and mostly
+ *	identical recvmsg() code. So we share it here. The poll was
+ *	shared before but buried in udp.c so I moved it.
+ *
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
+ *						     udp.c code)
+ *
+ *	Fixes:
+ *		Alan Cox	:	NULL return from skb_peek_copy()
+ *					understood
+ *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
+ *					skb_peek_copy stuff.
+ *		Alan Cox	:	Added support for SOCK_SEQPACKET.
+ *					IPX can no longer use the SO_TYPE hack
+ *					but AX.25 now works right, and SPX is
+ *					feasible.
+ *		Alan Cox	:	Fixed write poll of non IP protocol
+ *					crash.
+ *		Florian  La Roche:	Changed for my new skbuff handling.
+ *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
+ *		Linus Torvalds	:	BSD semantic fixes.
+ *		Alan Cox	:	Datagram iovec handling
+ *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
+ *		Alan Cox	:	POSIXisms
+ *		Pete Wyckoff    :       Unconnected accept() fix.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <trace/events/skb.h>
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
+				  void *key)
+{
+	unsigned long bits = (unsigned long)key;
+
+	/*
+	 * Avoid a wakeup if event not interesting for us
+	 */
+	if (bits && !(bits & (POLLIN | POLLERR)))
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
+/*
+ * Wait for a packet..
+ */
+static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+{
+	int error;
+	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
+
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out_err;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto out;
+
+	/* Socket shut down? */
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		goto out_noerr;
+
+	/* Sequenced packets can come disconnected.
+	 * If so we report the problem
+	 */
+	error = -ENOTCONN;
+	if (connection_based(sk) &&
+	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
+		goto out_err;
+
+	/* handle signals */
+	if (signal_pending(current))
+		goto interrupted;
+
+	error = 0;
+	*timeo_p = schedule_timeout(*timeo_p);
+out:
+	finish_wait(sk_sleep(sk), &wait);
+	return error;
+interrupted:
+	error = sock_intr_errno(*timeo_p);
+out_err:
+	*err = error;
+	goto out;
+out_noerr:
+	*err = 0;
+	error = 1;
+	goto out;
+}
+
+/**
+ *	__skb_recv_datagram - Receive a datagram skbuff
+ *	@sk: socket
+ *	@flags: MSG_ flags
+ *	@off: an offset in bytes to peek skb from. Returns an offset
+ *	      within an skb where data actually starts
+ *	@peeked: returns non-zero if this packet has been seen before
+ *	@err: error code returned
+ *
+ *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
+ *	and possible races. This replaces identical code in packet, raw and
+ *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
+ *	the long standing peek and read race for datagram sockets. If you
+ *	alter this routine remember it must be re-entrant.
+ *
+ *	This function will lock the socket if a skb is returned, so the caller
+ *	needs to unlock the socket in that case (usually by calling
+ *	skb_free_datagram)
+ *
+ *	* It does not lock socket since today. This function is
+ *	* free of race conditions. This measure should/can improve
+ *	* significantly datagram socket latencies at high loads,
+ *	* when data copying to user space takes lots of time.
+ *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ *	*  8) Great win.)
+ *	*			                    --ANK (980729)
+ *
+ *	The order of the tests when we find no data waiting are specified
+ *	quite explicitly by POSIX 1003.1g, don't change them without having
+ *	the standard around please.
+ */
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    int *peeked, int *off, int *err)
+{
+	struct sk_buff *skb;
+	long timeo;
+	/*
+	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+	 */
+	int error = sock_error(sk);
+
+	if (error)
+		goto no_packet;
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	do {
+		/* Again only user level code calls this function, so nothing
+		 * interrupt level will suddenly eat the receive_queue.
+		 *
+		 * Look at current nfs client by the way...
+		 * However, this function was correct in any case. 8)
+		 */
+		unsigned long cpu_flags;
+		struct sk_buff_head *queue = &sk->sk_receive_queue;
+
+		spin_lock_irqsave(&queue->lock, cpu_flags);
+		skb_queue_walk(queue, skb) {
+			*peeked = skb->peeked;
+			if (flags & MSG_PEEK) {
+				if (*off >= skb->len) {
+					*off -= skb->len;
+					continue;
+				}
+				skb->peeked = 1;
+				atomic_inc(&skb->users);
+			} else
+				__skb_unlink(skb, queue);
+
+			spin_unlock_irqrestore(&queue->lock, cpu_flags);
+			return skb;
+		}
+		spin_unlock_irqrestore(&queue->lock, cpu_flags);
+
+		/* User doesn't want to wait */
+		error = -EAGAIN;
+		if (!timeo)
+			goto no_packet;
+
+	} while (!wait_for_packet(sk, err, &timeo));
+
+	return NULL;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+EXPORT_SYMBOL(__skb_recv_datagram);
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+				  int noblock, int *err)
+{
+	int peeked, off = 0;
+
+	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+				   &peeked, &off, err);
+}
+EXPORT_SYMBOL(skb_recv_datagram);
+
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
+{
+	consume_skb(skb);
+	sk_mem_reclaim_partial(sk);
+}
+EXPORT_SYMBOL(skb_free_datagram);
+
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
+{
+	bool slow;
+
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+
+	slow = lock_sock_fast(sk);
+	skb_orphan(skb);
+	sk_mem_reclaim_partial(sk);
+	unlock_sock_fast(sk, slow);
+
+	/* skb is now orphaned, can be freed outside of locked section */
+	trace_kfree_skb(skb, skb_free_datagram_locked);
+	__kfree_skb(skb);
+}
+EXPORT_SYMBOL(skb_free_datagram_locked);
+
+/**
+ *	skb_kill_datagram - Free a datagram skbuff forcibly
+ *	@sk: socket
+ *	@skb: datagram skbuff
+ *	@flags: MSG_ flags
+ *
+ *	This function frees a datagram skbuff that was received by
+ *	skb_recv_datagram.  The flags argument must match the one
+ *	used for skb_recv_datagram.
+ *
+ *	If the MSG_PEEK flag is set, and the packet is still on the
+ *	receive queue of the socket, it will be taken off the queue
+ *	before it is freed.
+ *
+ *	This function currently only disables BH when acquiring the
+ *	sk_receive_queue lock.  Therefore it must not be used in a
+ *	context where that lock is acquired in an IRQ context.
+ *
+ *	It returns 0 if the packet was removed by us.
+ */
+
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+{
+	int err = 0;
+
+	if (flags & MSG_PEEK) {
+		err = -ENOENT;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			atomic_dec(&skb->users);
+			err = 0;
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+
+	kfree_skb(skb);
+	atomic_inc(&sk->sk_drops);
+	sk_mem_reclaim_partial(sk);
+
+	return err;
+}
+EXPORT_SYMBOL(skb_kill_datagram);
+
+/**
+ *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: io vector to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
+			    struct iovec *to, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+
+	trace_skb_copy_datagram_iovec(skb, len);
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovec(to, skb->data + offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			struct page *page = skb_frag_page(frag);
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovec(to, vaddr + frag->page_offset +
+					     offset - start, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_datagram_iovec(frag_iter,
+						    offset - start,
+						    to, copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+
+/**
+ *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: io vector to copy to
+ *	@to_offset: offset in the io vector to start copying to
+ *	@len: amount of data to copy from buffer to iovec
+ *
+ *	Returns 0 or -EFAULT.
+ *	Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
+				  const struct iovec *to, int to_offset,
+				  int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to_offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			struct page *page = skb_frag_page(frag);
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
+						offset - start, to_offset, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to_offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_datagram_const_iovec(frag_iter,
+							  offset - start,
+							  to, to_offset,
+							  copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to_offset += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
+
+/**
+ *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying to
+ *	@from: io vector to copy to
+ *	@from_offset: offset in the io vector to start copying from
+ *	@len: amount of data to copy to buffer from iovec
+ *
+ *	Returns 0 or -EFAULT.
+ *	Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+				 const struct iovec *from, int from_offset,
+				 int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
+					copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		from_offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			struct page *page = skb_frag_page(frag);
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_fromiovecend(vaddr + frag->page_offset +
+						  offset - start,
+						  from, from_offset, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			from_offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_datagram_from_iovec(frag_iter,
+							 offset - start,
+							 from,
+							 from_offset,
+							 copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			from_offset += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+
+static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
+				      u8 __user *to, int len,
+				      __wsum *csump)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+	int pos = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		int err = 0;
+		if (copy > len)
+			copy = len;
+		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
+					       *csump, &err);
+		if (err)
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to += copy;
+		pos = copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			__wsum csum2;
+			int err = 0;
+			u8  *vaddr;
+			struct page *page = skb_frag_page(frag);
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			csum2 = csum_and_copy_to_user(vaddr +
+							frag->page_offset +
+							offset - start,
+						      to, copy, 0, &err);
+			kunmap(page);
+			if (err)
+				goto fault;
+			*csump = csum_block_add(*csump, csum2, pos);
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			__wsum csum2 = 0;
+			if (copy > len)
+				copy = len;
+			if (skb_copy_and_csum_datagram(frag_iter,
+						       offset - start,
+						       to, copy,
+						       &csum2))
+				goto fault;
+			*csump = csum_block_add(*csump, csum2, pos);
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
+{
+	__sum16 sum;
+
+	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+			netdev_rx_csum_fault(skb->dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+	return __skb_checksum_complete_head(skb, skb->len);
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
+/**
+ *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	@skb: skbuff
+ *	@hlen: hardware length
+ *	@iov: io vector
+ *
+ *	Caller _must_ check that skb will fit to this iovec.
+ *
+ *	Returns: 0       - success.
+ *		 -EINVAL - checksum failure.
+ *		 -EFAULT - fault during copy. Beware, in this case iovec
+ *			   can be modified!
+ */
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
+				     int hlen, struct iovec *iov)
+{
+	__wsum csum;
+	int chunk = skb->len - hlen;
+
+	if (!chunk)
+		return 0;
+
+	/* Skip filled elements.
+	 * Pretty silly, look at memcpy_toiovec, though 8)
+	 */
+	while (!iov->iov_len)
+		iov++;
+
+	if (iov->iov_len < chunk) {
+		if (__skb_checksum_complete(skb))
+			goto csum_error;
+		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+			goto fault;
+	} else {
+		csum = csum_partial(skb->data, hlen, skb->csum);
+		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+					       chunk, &csum))
+			goto fault;
+		if (csum_fold(csum))
+			goto csum_error;
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+			netdev_rx_csum_fault(skb->dev);
+		iov->iov_len -= chunk;
+		iov->iov_base += chunk;
+	}
+	return 0;
+csum_error:
+	return -EINVAL;
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+
+/**
+ * 	datagram_poll - generic datagram poll
+ *	@file: file struct
+ *	@sock: socket
+ *	@wait: poll table
+ *
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ */
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->sk_state == TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->sk_state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+EXPORT_SYMBOL(datagram_poll);
diff --git a/net/core/dev.c b/net/core/dev.c
new file mode 100644
index 00000000..fa0c2b80
--- /dev/null
+++ b/net/core/dev.c
@@ -0,0 +1,6554 @@
+/*
+ * 	NET3	Protocol independent device support routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the non IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Florian la Roche <rzsfl@rz.uni-sb.de>
+ *		Alan Cox <gw4pts@gw4pts.ampr.org>
+ *		David Hinds <dahinds@users.sourceforge.net>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Adam Sulmicki <adam@cfar.umd.edu>
+ *              Pekka Riikonen <priikone@poesidon.pspt.fi>
+ *
+ *	Changes:
+ *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
+ *              			to 2 if register_netdev gets called
+ *              			before net_dev_init & also removed a
+ *              			few lines of code in the process.
+ *		Alan Cox	:	device private ioctl copies fields back.
+ *		Alan Cox	:	Transmit queue code does relevant
+ *					stunts to keep the queue safe.
+ *		Alan Cox	:	Fixed double lock.
+ *		Alan Cox	:	Fixed promisc NULL pointer trap
+ *		????????	:	Support the full private ioctl range
+ *		Alan Cox	:	Moved ioctl permission check into
+ *					drivers
+ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
+ *		Alan Cox	:	100 backlog just doesn't cut it when
+ *					you start doing multicast video 8)
+ *		Alan Cox	:	Rewrote net_bh and list manager.
+ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
+ *		Alan Cox	:	Took out transmit every packet pass
+ *					Saved a few bytes in the ioctl handler
+ *		Alan Cox	:	Network driver sets packet type before
+ *					calling netif_rx. Saves a function
+ *					call a packet.
+ *		Alan Cox	:	Hashed net_bh()
+ *		Richard Kooijman:	Timestamp fixes.
+ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
+ *		Alan Cox	:	Device lock protection.
+ *		Alan Cox	: 	Fixed nasty side effect of device close
+ *					changes.
+ *		Rudi Cilibrasi	:	Pass the right thing to
+ *					set_mac_address()
+ *		Dave Miller	:	32bit quantity for the device lock to
+ *					make it work out on a Sparc.
+ *		Bjorn Ekwall	:	Added KERNELD hack.
+ *		Alan Cox	:	Cleaned up the backlog initialise.
+ *		Craig Metz	:	SIOCGIFCONF fix if space for under
+ *					1 device.
+ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
+ *					is no device open function.
+ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
+ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
+ *		Cyrus Durgin	:	Cleaned for KMOD
+ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
+ *					A network device unload needs to purge
+ *					the backlog queue.
+ *	Paul Rusty Russell	:	SIOCSIFNAME
+ *              Pekka Riikonen  :	Netdev boot-time settings code
+ *              Andrew Morton   :       Make unregister_netdevice wait
+ *              			indefinitely on dev->refcnt
+ * 		J Hadi Salim	:	- Backlog queue sampling
+ *				        - netif_rx() feedback
+ */
+
+#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/netpoll.h>
+#include <linux/rcupdate.h>
+#include <linux/delay.h>
+#include <net/wext.h>
+#include <net/iw_handler.h>
+#include <asm/current.h>
+#include <linux/audit.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/ctype.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <trace/events/napi.h>
+#include <trace/events/net.h>
+#include <trace/events/skb.h>
+#include <linux/pci.h>
+#include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
+#include <linux/net_tstamp.h>
+#include <linux/static_key.h>
+#include <net/flow_keys.h>
+
+#include "net-sysfs.h"
+
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
+ *             sure which should go first, but I bet it won't make much
+ *             difference if we are running VLANs.  The good news is that
+ *             this protocol won't be in the list unless compiled in, so
+ *             the average user (w/out VLANs) will not be adversely affected.
+ *             --BLG
+ *
+ *		0800	IP
+ *		8100    802.1Q VLAN
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+
+#define PTYPE_HASH_SIZE	(16)
+#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
+
+static DEFINE_SPINLOCK(ptype_lock);
+static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+static struct list_head ptype_all __read_mostly;	/* Taps */
+
+/*
+ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base_head list, and hold dev_base_lock for writing when they do the
+ * actual updates.  This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+DEFINE_RWLOCK(dev_base_lock);
+EXPORT_SYMBOL(dev_base_lock);
+
+static inline void dev_base_seq_inc(struct net *net)
+{
+	while (++net->dev_base_seq == 0);
+}
+
+static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
+{
+	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
+}
+
+static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+{
+	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
+}
+
+static inline void rps_lock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	spin_lock(&sd->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	spin_unlock(&sd->input_pkt_queue.lock);
+#endif
+}
+
+/* Device list insertion */
+static int list_netdevice(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+
+	ASSERT_RTNL();
+
+	write_lock_bh(&dev_base_lock);
+	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+	hlist_add_head_rcu(&dev->index_hlist,
+			   dev_index_hash(net, dev->ifindex));
+	write_unlock_bh(&dev_base_lock);
+
+	dev_base_seq_inc(net);
+
+	return 0;
+}
+
+/* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
+static void unlist_netdevice(struct net_device *dev)
+{
+	ASSERT_RTNL();
+
+	/* Unlink dev from the device chain */
+	write_lock_bh(&dev_base_lock);
+	list_del_rcu(&dev->dev_list);
+	hlist_del_rcu(&dev->name_hlist);
+	hlist_del_rcu(&dev->index_hlist);
+	write_unlock_bh(&dev_base_lock);
+
+	dev_base_seq_inc(dev_net(dev));
+}
+
+/*
+ *	Our notifier list
+ */
+
+static RAW_NOTIFIER_HEAD(netdev_chain);
+
+/*
+ *	Device drivers call our routines to queue packets here. We empty the
+ *	queue in the local softnet handler.
+ */
+
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+EXPORT_PER_CPU_SYMBOL(softnet_data);
+
+#ifdef CONFIG_LOCKDEP
+/*
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
+ * according to dev->type
+ */
+static const unsigned short netdev_lock_type[] =
+	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
+	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
+	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
+	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
+	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
+	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
+	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
+	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
+	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
+	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
+	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
+	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
+	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
+	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
+	 ARPHRD_VOID, ARPHRD_NONE};
+
+static const char *const netdev_lock_name[] =
+	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
+	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
+	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
+	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
+	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
+	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
+	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
+	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
+	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
+	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
+	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
+	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
+	 "_xmit_VOID", "_xmit_NONE"};
+
+static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
+
+static inline unsigned short netdev_lock_pos(unsigned short dev_type)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
+		if (netdev_lock_type[i] == dev_type)
+			return i;
+	/* the last key is used by default */
+	return ARRAY_SIZE(netdev_lock_type) - 1;
+}
+
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
+{
+	int i;
+
+	i = netdev_lock_pos(dev_type);
+	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
+				   netdev_lock_name[i]);
+}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+	int i;
+
+	i = netdev_lock_pos(dev->type);
+	lockdep_set_class_and_name(&dev->addr_list_lock,
+				   &netdev_addr_lock_key[i],
+				   netdev_lock_name[i]);
+}
+#else
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
+{
+}
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+}
+#endif
+
+/*******************************************************************************
+
+		Protocol management and registration routines
+
+*******************************************************************************/
+
+/*
+ *	Add a protocol ID to the list. Now that the input handler is
+ *	smarter we can dispense with all the messy stuff that used to be
+ *	here.
+ *
+ *	BEWARE!!! Protocol handlers, mangling input packets,
+ *	MUST BE last in hash buckets and checking protocol handlers
+ *	MUST start from promiscuous ptype_all chain in net_bh.
+ *	It is true now, do not change it.
+ *	Explanation follows: if protocol handler, mangling packet, will
+ *	be the first on list, it is not able to sense, that packet
+ *	is cloned and should be copied-on-write, so that it will
+ *	change it and subsequent readers will get broken packet.
+ *							--ANK (980803)
+ */
+
+static inline struct list_head *ptype_head(const struct packet_type *pt)
+{
+	if (pt->type == htons(ETH_P_ALL))
+		return &ptype_all;
+	else
+		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+}
+
+/**
+ *	dev_add_pack - add packet handler
+ *	@pt: packet type declaration
+ *
+ *	Add a protocol handler to the networking stack. The passed &packet_type
+ *	is linked into kernel lists and may not be freed until it has been
+ *	removed from the kernel lists.
+ *
+ *	This call does not sleep therefore it can not
+ *	guarantee all CPU's that are in middle of receiving packets
+ *	will see the new packet type (until the next received packet).
+ */
+
+void dev_add_pack(struct packet_type *pt)
+{
+	struct list_head *head = ptype_head(pt);
+
+	spin_lock(&ptype_lock);
+	list_add_rcu(&pt->list, head);
+	spin_unlock(&ptype_lock);
+}
+EXPORT_SYMBOL(dev_add_pack);
+
+/**
+ *	__dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns.
+ *
+ *      The packet type might still be in use by receivers
+ *	and must not be freed until after all the CPU's have gone
+ *	through a quiescent state.
+ */
+void __dev_remove_pack(struct packet_type *pt)
+{
+	struct list_head *head = ptype_head(pt);
+	struct packet_type *pt1;
+
+	spin_lock(&ptype_lock);
+
+	list_for_each_entry(pt1, head, list) {
+		if (pt == pt1) {
+			list_del_rcu(&pt->list);
+			goto out;
+		}
+	}
+
+	pr_warn("dev_remove_pack: %p not found\n", pt);
+out:
+	spin_unlock(&ptype_lock);
+}
+EXPORT_SYMBOL(__dev_remove_pack);
+
+/**
+ *	dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns.
+ *
+ *	This call sleeps to guarantee that no CPU is looking at the packet
+ *	type after return.
+ */
+void dev_remove_pack(struct packet_type *pt)
+{
+	__dev_remove_pack(pt);
+
+	synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_pack);
+
+/******************************************************************************
+
+		      Device Boot-time Settings Routines
+
+*******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *	netdev_boot_setup_add	- add new setup entry
+ *	@name: name of the device
+ *	@map: configured settings for the device
+ *
+ *	Adds new setup entry to the dev_boot_setup list.  The function
+ *	returns 0 on error and 1 on success.  This is a generic routine to
+ *	all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+	struct netdev_boot_setup *s;
+	int i;
+
+	s = dev_boot_setup;
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+			memset(s[i].name, 0, sizeof(s[i].name));
+			strlcpy(s[i].name, name, IFNAMSIZ);
+			memcpy(&s[i].map, map, sizeof(s[i].map));
+			break;
+		}
+	}
+
+	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ *	netdev_boot_setup_check	- check boot time settings
+ *	@dev: the netdevice
+ *
+ * 	Check boot time settings for the device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+	struct netdev_boot_setup *s = dev_boot_setup;
+	int i;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+		    !strcmp(dev->name, s[i].name)) {
+			dev->irq 	= s[i].map.irq;
+			dev->base_addr 	= s[i].map.base_addr;
+			dev->mem_start 	= s[i].map.mem_start;
+			dev->mem_end 	= s[i].map.mem_end;
+			return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+
+/**
+ *	netdev_boot_base	- get address from boot time settings
+ *	@prefix: prefix for network device
+ *	@unit: id for network device
+ *
+ * 	Check boot time settings for the base address of device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found.
+ */
+unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+	const struct netdev_boot_setup *s = dev_boot_setup;
+	char name[IFNAMSIZ];
+	int i;
+
+	sprintf(name, "%s%d", prefix, unit);
+
+	/*
+	 * If device already registered then return base of 1
+	 * to indicate not to probe for this interface
+	 */
+	if (__dev_get_by_name(&init_net, name))
+		return 1;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+		if (!strcmp(name, s[i].name))
+			return s[i].map.base_addr;
+	return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+int __init netdev_boot_setup(char *str)
+{
+	int ints[5];
+	struct ifmap map;
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	if (!str || !*str)
+		return 0;
+
+	/* Save settings */
+	memset(&map, 0, sizeof(map));
+	if (ints[0] > 0)
+		map.irq = ints[1];
+	if (ints[0] > 1)
+		map.base_addr = ints[2];
+	if (ints[0] > 2)
+		map.mem_start = ints[3];
+	if (ints[0] > 3)
+		map.mem_end = ints[4];
+
+	/* Add new entry to the list */
+	return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+/*******************************************************************************
+
+			    Device Interface Subroutines
+
+*******************************************************************************/
+
+/**
+ *	__dev_get_by_name	- find a device by its name
+ *	@net: the applicable net namespace
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	or @dev_base_lock. If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+struct net_device *__dev_get_by_name(struct net *net, const char *name)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_name_hash(net, name);
+
+	hlist_for_each_entry(dev, p, head, name_hlist)
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(__dev_get_by_name);
+
+/**
+ *	dev_get_by_name_rcu	- find a device by its name
+ *	@net: the applicable net namespace
+ *	@name: name to find
+ *
+ *	Find an interface by name.
+ *	If the name is found a pointer to the device is returned.
+ * 	If the name is not found then %NULL is returned.
+ *	The reference counters are not incremented so the caller must be
+ *	careful with locks. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_name_hash(net, name);
+
+	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_name_rcu);
+
+/**
+ *	dev_get_by_name		- find a device by its name
+ *	@net: the applicable net namespace
+ *	@name: name to find
+ *
+ *	Find an interface by name. This can be called from any
+ *	context and does its own locking. The returned handle has
+ *	the usage count incremented and the caller must use dev_put() to
+ *	release it when it is no longer needed. %NULL is returned if no
+ *	matching device is found.
+ */
+
+struct net_device *dev_get_by_name(struct net *net, const char *name)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	if (dev)
+		dev_hold(dev);
+	rcu_read_unlock();
+	return dev;
+}
+EXPORT_SYMBOL(dev_get_by_name);
+
+/**
+ *	__dev_get_by_index - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold either the RTNL semaphore
+ *	or @dev_base_lock.
+ */
+
+struct net_device *__dev_get_by_index(struct net *net, int ifindex)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_index_hash(net, ifindex);
+
+	hlist_for_each_entry(dev, p, head, index_hlist)
+		if (dev->ifindex == ifindex)
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(__dev_get_by_index);
+
+/**
+ *	dev_get_by_index_rcu - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
+{
+	struct hlist_node *p;
+	struct net_device *dev;
+	struct hlist_head *head = dev_index_hash(net, ifindex);
+
+	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+		if (dev->ifindex == ifindex)
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_index_rcu);
+
+
+/**
+ *	dev_get_by_index - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device returned has
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		dev_hold(dev);
+	rcu_read_unlock();
+	return dev;
+}
+EXPORT_SYMBOL(dev_get_by_index);
+
+/**
+ *	dev_getbyhwaddr_rcu - find a device by its hardware address
+ *	@net: the applicable net namespace
+ *	@type: media type of device
+ *	@ha: hardware address
+ *
+ *	Search for an interface by MAC address. Returns NULL if the device
+ *	is not found or a pointer to the device.
+ *	The caller must hold RCU or RTNL.
+ *	The returned device has not had its ref count increased
+ *	and the caller must therefore be careful about locking
+ *
+ */
+
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *ha)
+{
+	struct net_device *dev;
+
+	for_each_netdev_rcu(net, dev)
+		if (dev->type == type &&
+		    !memcmp(dev->dev_addr, ha, dev->addr_len))
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
+
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+	for_each_netdev(net, dev)
+		if (dev->type == type)
+			return dev;
+
+	return NULL;
+}
+EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
+{
+	struct net_device *dev, *ret = NULL;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
+		if (dev->type == type) {
+			dev_hold(dev);
+			ret = dev;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(dev_getfirstbyhwtype);
+
+/**
+ *	dev_get_by_flags_rcu - find any device with given flags
+ *	@net: the applicable net namespace
+ *	@if_flags: IFF_* values
+ *	@mask: bitmask of bits in if_flags to check
+ *
+ *	Search for any interface with the given flags. Returns NULL if a device
+ *	is not found or a pointer to the device. Must be called inside
+ *	rcu_read_lock(), and result refcount is unchanged.
+ */
+
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
+				    unsigned short mask)
+{
+	struct net_device *dev, *ret;
+
+	ret = NULL;
+	for_each_netdev_rcu(net, dev) {
+		if (((dev->flags ^ if_flags) & mask) == 0) {
+			ret = dev;
+			break;
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
+
+/**
+ *	dev_valid_name - check if name is okay for network device
+ *	@name: name string
+ *
+ *	Network device names need to be valid file names to
+ *	to allow sysfs to work.  We also disallow any kind of
+ *	whitespace.
+ */
+bool dev_valid_name(const char *name)
+{
+	if (*name == '\0')
+		return false;
+	if (strlen(name) >= IFNAMSIZ)
+		return false;
+	if (!strcmp(name, ".") || !strcmp(name, ".."))
+		return false;
+
+	while (*name) {
+		if (*name == '/' || isspace(*name))
+			return false;
+		name++;
+	}
+	return true;
+}
+EXPORT_SYMBOL(dev_valid_name);
+
+/**
+ *	__dev_alloc_name - allocate a name for a device
+ *	@net: network namespace to allocate the device name in
+ *	@name: name format string
+ *	@buf:  scratch buffer and result name string
+ *
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. It scans list of devices to build up a free map, then chooses
+ *	the first empty slot. The caller must hold the dev_base or rtnl lock
+ *	while allocating the name and adding the device in order to avoid
+ *	duplicates.
+ *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ *	Returns the number of the unit assigned or a negative errno code.
+ */
+
+static int __dev_alloc_name(struct net *net, const char *name, char *buf)
+{
+	int i = 0;
+	const char *p;
+	const int max_netdevices = 8*PAGE_SIZE;
+	unsigned long *inuse;
+	struct net_device *d;
+
+	p = strnchr(name, IFNAMSIZ-1, '%');
+	if (p) {
+		/*
+		 * Verify the string as this thing may have come from
+		 * the user.  There must be either one "%d" and no other "%"
+		 * characters.
+		 */
+		if (p[1] != 'd' || strchr(p + 2, '%'))
+			return -EINVAL;
+
+		/* Use one page as a bit array of possible slots */
+		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
+		if (!inuse)
+			return -ENOMEM;
+
+		for_each_netdev(net, d) {
+			if (!sscanf(d->name, name, &i))
+				continue;
+			if (i < 0 || i >= max_netdevices)
+				continue;
+
+			/*  avoid cases where sscanf is not exact inverse of printf */
+			snprintf(buf, IFNAMSIZ, name, i);
+			if (!strncmp(buf, d->name, IFNAMSIZ))
+				set_bit(i, inuse);
+		}
+
+		i = find_first_zero_bit(inuse, max_netdevices);
+		free_page((unsigned long) inuse);
+	}
+
+	if (buf != name)
+		snprintf(buf, IFNAMSIZ, name, i);
+	if (!__dev_get_by_name(net, buf))
+		return i;
+
+	/* It is possible to run out of possible slots
+	 * when the name is long and there isn't enough space left
+	 * for the digits, or if all bits are used.
+	 */
+	return -ENFILE;
+}
+
+/**
+ *	dev_alloc_name - allocate a name for a device
+ *	@dev: device
+ *	@name: name format string
+ *
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. It scans list of devices to build up a free map, then chooses
+ *	the first empty slot. The caller must hold the dev_base or rtnl lock
+ *	while allocating the name and adding the device in order to avoid
+ *	duplicates.
+ *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ *	Returns the number of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+	char buf[IFNAMSIZ];
+	struct net *net;
+	int ret;
+
+	BUG_ON(!dev_net(dev));
+	net = dev_net(dev);
+	ret = __dev_alloc_name(net, name, buf);
+	if (ret >= 0)
+		strlcpy(dev->name, buf, IFNAMSIZ);
+	return ret;
+}
+EXPORT_SYMBOL(dev_alloc_name);
+
+static int dev_get_valid_name(struct net_device *dev, const char *name)
+{
+	struct net *net;
+
+	BUG_ON(!dev_net(dev));
+	net = dev_net(dev);
+
+	if (!dev_valid_name(name))
+		return -EINVAL;
+
+	if (strchr(name, '%'))
+		return dev_alloc_name(dev, name);
+	else if (__dev_get_by_name(net, name))
+		return -EEXIST;
+	else if (dev->name != name)
+		strlcpy(dev->name, name, IFNAMSIZ);
+
+	return 0;
+}
+
+/**
+ *	dev_change_name - change name of a device
+ *	@dev: device
+ *	@newname: name (or format string) must be at least IFNAMSIZ
+ *
+ *	Change name of a device, can pass format strings "eth%d".
+ *	for wildcarding.
+ */
+int dev_change_name(struct net_device *dev, const char *newname)
+{
+	char oldname[IFNAMSIZ];
+	int err = 0;
+	int ret;
+	struct net *net;
+
+	ASSERT_RTNL();
+	BUG_ON(!dev_net(dev));
+
+	net = dev_net(dev);
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+		return 0;
+
+	memcpy(oldname, dev->name, IFNAMSIZ);
+
+	err = dev_get_valid_name(dev, newname);
+	if (err < 0)
+		return err;
+
+rollback:
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
+		memcpy(dev->name, oldname, IFNAMSIZ);
+		return ret;
+	}
+
+	write_lock_bh(&dev_base_lock);
+	hlist_del_rcu(&dev->name_hlist);
+	write_unlock_bh(&dev_base_lock);
+
+	synchronize_rcu();
+
+	write_lock_bh(&dev_base_lock);
+	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+	write_unlock_bh(&dev_base_lock);
+
+	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
+	ret = notifier_to_errno(ret);
+
+	if (ret) {
+		/* err >= 0 after dev_alloc_name() or stores the first errno */
+		if (err >= 0) {
+			err = ret;
+			memcpy(dev->name, oldname, IFNAMSIZ);
+			goto rollback;
+		} else {
+			pr_err("%s: name change rollback failed: %d\n",
+			       dev->name, ret);
+		}
+	}
+
+	return err;
+}
+
+/**
+ *	dev_set_alias - change ifalias of a device
+ *	@dev: device
+ *	@alias: name up to IFALIASZ
+ *	@len: limit of bytes to copy from info
+ *
+ *	Set ifalias for a device,
+ */
+int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+{
+	ASSERT_RTNL();
+
+	if (len >= IFALIASZ)
+		return -EINVAL;
+
+	if (!len) {
+		if (dev->ifalias) {
+			kfree(dev->ifalias);
+			dev->ifalias = NULL;
+		}
+		return 0;
+	}
+
+	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+	if (!dev->ifalias)
+		return -ENOMEM;
+
+	strlcpy(dev->ifalias, alias, len+1);
+	return len;
+}
+
+
+/**
+ *	netdev_features_change - device changes features
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed features.
+ */
+void netdev_features_change(struct net_device *dev)
+{
+	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL(netdev_features_change);
+
+/**
+ *	netdev_state_change - device changes state
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed state. This function calls
+ *	the notifier chains for netdev_chain and sends a NEWLINK message
+ *	to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+	if (dev->flags & IFF_UP) {
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+	}
+}
+EXPORT_SYMBOL(netdev_state_change);
+
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
+{
+	return call_netdevice_notifiers(event, dev);
+}
+EXPORT_SYMBOL(netdev_bonding_change);
+
+/**
+ *	dev_load 	- load a network module
+ *	@net: the applicable net namespace
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(struct net *net, const char *name)
+{
+	struct net_device *dev;
+	int no_module;
+
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	rcu_read_unlock();
+
+	no_module = !dev;
+	if (no_module && capable(CAP_NET_ADMIN))
+		no_module = request_module("netdev-%s", name);
+	if (no_module && capable(CAP_SYS_MODULE)) {
+		if (!request_module("%s", name))
+			pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
+			       name);
+	}
+}
+EXPORT_SYMBOL(dev_load);
+
+static int __dev_open(struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int ret;
+
+	ASSERT_RTNL();
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		return ret;
+
+	set_bit(__LINK_STATE_START, &dev->state);
+
+	if (ops->ndo_validate_addr)
+		ret = ops->ndo_validate_addr(dev);
+
+	if (!ret && ops->ndo_open)
+		ret = ops->ndo_open(dev);
+
+	if (ret)
+		clear_bit(__LINK_STATE_START, &dev->state);
+	else {
+		dev->flags |= IFF_UP;
+		net_dmaengine_get();
+		dev_set_rx_mode(dev);
+		dev_activate(dev);
+	}
+
+	return ret;
+}
+
+/**
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
+ *
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
+ */
+int dev_open(struct net_device *dev)
+{
+	int ret;
+
+	if (dev->flags & IFF_UP)
+		return 0;
+
+	ret = __dev_open(dev);
+	if (ret < 0)
+		return ret;
+
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_UP, dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(dev_open);
+
+static int __dev_close_many(struct list_head *head)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+	might_sleep();
+
+	list_for_each_entry(dev, head, unreg_list) {
+		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+
+		clear_bit(__LINK_STATE_START, &dev->state);
+
+		/* Synchronize to scheduled poll. We cannot touch poll list, it
+		 * can be even on different cpu. So just clear netif_running().
+		 *
+		 * dev->stop() will invoke napi_disable() on all of it's
+		 * napi_struct instances on this device.
+		 */
+		smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	}
+
+	dev_deactivate_many(head);
+
+	list_for_each_entry(dev, head, unreg_list) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+
+		/*
+		 *	Call the device specific close. This cannot fail.
+		 *	Only if device is UP
+		 *
+		 *	We allow it to be called even after a DETACH hot-plug
+		 *	event.
+		 */
+		if (ops->ndo_stop)
+			ops->ndo_stop(dev);
+
+		dev->flags &= ~IFF_UP;
+		net_dmaengine_put();
+	}
+
+	return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+	int retval;
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	retval = __dev_close_many(&single);
+	list_del(&single);
+	return retval;
+}
+
+static int dev_close_many(struct list_head *head)
+{
+	struct net_device *dev, *tmp;
+	LIST_HEAD(tmp_list);
+
+	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+		if (!(dev->flags & IFF_UP))
+			list_move(&dev->unreg_list, &tmp_list);
+
+	__dev_close_many(head);
+
+	list_for_each_entry(dev, head, unreg_list) {
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
+
+	/* rollback_registered_many needs the complete original list */
+	list_splice(&tmp_list, head);
+	return 0;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	if (dev->flags & IFF_UP) {
+		LIST_HEAD(single);
+
+		list_add(&dev->unreg_list, &single);
+		dev_close_many(&single);
+		list_del(&single);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(dev_close);
+
+
+/**
+ *	dev_disable_lro - disable Large Receive Offload on a device
+ *	@dev: device
+ *
+ *	Disable Large Receive Offload (LRO) on a net device.  Must be
+ *	called under RTNL.  This is needed if received packets may be
+ *	forwarded to another interface.
+ */
+void dev_disable_lro(struct net_device *dev)
+{
+	/*
+	 * If we're trying to disable lro on a vlan device
+	 * use the underlying physical device instead
+	 */
+	if (is_vlan_dev(dev))
+		dev = vlan_dev_real_dev(dev);
+
+	dev->wanted_features &= ~NETIF_F_LRO;
+	netdev_update_features(dev);
+
+	if (unlikely(dev->features & NETIF_F_LRO))
+		netdev_WARN(dev, "failed to disable LRO!\n");
+}
+EXPORT_SYMBOL(dev_disable_lro);
+
+
+static int dev_boot_phase = 1;
+
+/**
+ *	register_netdevice_notifier - register a network notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when network device events occur.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ *
+ * 	When registered all registration and up events are replayed
+ *	to the new notifier to allow device to have a race free
+ *	view of the network device list.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+	struct net_device *dev;
+	struct net_device *last;
+	struct net *net;
+	int err;
+
+	rtnl_lock();
+	err = raw_notifier_chain_register(&netdev_chain, nb);
+	if (err)
+		goto unlock;
+	if (dev_boot_phase)
+		goto unlock;
+	for_each_net(net) {
+		for_each_netdev(net, dev) {
+			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+			err = notifier_to_errno(err);
+			if (err)
+				goto rollback;
+
+			if (!(dev->flags & IFF_UP))
+				continue;
+
+			nb->notifier_call(nb, NETDEV_UP, dev);
+		}
+	}
+
+unlock:
+	rtnl_unlock();
+	return err;
+
+rollback:
+	last = dev;
+	for_each_net(net) {
+		for_each_netdev(net, dev) {
+			if (dev == last)
+				goto outroll;
+
+			if (dev->flags & IFF_UP) {
+				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+				nb->notifier_call(nb, NETDEV_DOWN, dev);
+			}
+			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
+		}
+	}
+
+outroll:
+	raw_notifier_chain_unregister(&netdev_chain, nb);
+	goto unlock;
+}
+EXPORT_SYMBOL(register_netdevice_notifier);
+
+/**
+ *	unregister_netdevice_notifier - unregister a network notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by
+ *	register_netdevice_notifier(). The notifier is unlinked into the
+ *	kernel structures and may then be reused. A negative errno code
+ *	is returned on a failure.
+ *
+ * 	After unregistering unregister and down device events are synthesized
+ *	for all devices on the device list to the removed notifier to remove
+ *	the need for special case cleanup code.
+ */
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+	struct net_device *dev;
+	struct net *net;
+	int err;
+
+	rtnl_lock();
+	err = raw_notifier_chain_unregister(&netdev_chain, nb);
+	if (err)
+		goto unlock;
+
+	for_each_net(net) {
+		for_each_netdev(net, dev) {
+			if (dev->flags & IFF_UP) {
+				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+				nb->notifier_call(nb, NETDEV_DOWN, dev);
+			}
+			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
+		}
+	}
+unlock:
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier);
+
+/**
+ *	call_netdevice_notifiers - call all network notifier blocks
+ *      @val: value passed unmodified to notifier function
+ *      @dev: net_device pointer passed unmodified to notifier function
+ *
+ *	Call all network notifier blocks.  Parameters and return value
+ *	are as for raw_notifier_call_chain().
+ */
+
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return raw_notifier_call_chain(&netdev_chain, val, dev);
+}
+EXPORT_SYMBOL(call_netdevice_notifiers);
+
+static struct static_key netstamp_needed __read_mostly;
+#ifdef HAVE_JUMP_LABEL
+/* We are not allowed to call static_key_slow_dec() from irq context
+ * If net_disable_timestamp() is called from irq context, defer the
+ * static_key_slow_dec() calls.
+ */
+static atomic_t netstamp_needed_deferred;
+#endif
+
+void net_enable_timestamp(void)
+{
+#ifdef HAVE_JUMP_LABEL
+	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+
+	if (deferred) {
+		while (--deferred)
+			static_key_slow_dec(&netstamp_needed);
+		return;
+	}
+#endif
+	WARN_ON(in_interrupt());
+	static_key_slow_inc(&netstamp_needed);
+}
+EXPORT_SYMBOL(net_enable_timestamp);
+
+void net_disable_timestamp(void)
+{
+#ifdef HAVE_JUMP_LABEL
+	if (in_interrupt()) {
+		atomic_inc(&netstamp_needed_deferred);
+		return;
+	}
+#endif
+	static_key_slow_dec(&netstamp_needed);
+}
+EXPORT_SYMBOL(net_disable_timestamp);
+
+static inline void net_timestamp_set(struct sk_buff *skb)
+{
+	skb->tstamp.tv64 = 0;
+	if (static_key_false(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
+#define net_timestamp_check(COND, SKB)			\
+	if (static_key_false(&netstamp_needed)) {		\
+		if ((COND) && !(SKB)->tstamp.tv64)	\
+			__net_timestamp(SKB);		\
+	}						\
+
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+	struct hwtstamp_config cfg;
+	enum hwtstamp_tx_types tx_type;
+	enum hwtstamp_rx_filters rx_filter;
+	int tx_type_valid = 0;
+	int rx_filter_valid = 0;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags) /* reserved for future extensions */
+		return -EINVAL;
+
+	tx_type = cfg.tx_type;
+	rx_filter = cfg.rx_filter;
+
+	switch (tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		tx_type_valid = 1;
+		break;
+	}
+
+	switch (rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		rx_filter_valid = 1;
+		break;
+	}
+
+	if (!tx_type_valid || !rx_filter_valid)
+		return -ERANGE;
+
+	return 0;
+}
+
+static inline bool is_skb_forwardable(struct net_device *dev,
+				      struct sk_buff *skb)
+{
+	unsigned int len;
+
+	if (!(dev->flags & IFF_UP))
+		return false;
+
+	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+	if (skb->len <= len)
+		return true;
+
+	/* if TSO is enabled, we don't care about the length as the packet
+	 * could be forwarded without being segmented before
+	 */
+	if (skb_is_gso(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_DROP     (packet was dropped, but freed)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+			atomic_long_inc(&dev->rx_dropped);
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+	}
+
+	skb_orphan(skb);
+	nf_reset(skb);
+
+	if (unlikely(!is_skb_forwardable(dev, skb))) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+	skb->skb_iif = 0;
+	skb->dev = dev;
+	skb_dst_drop(skb);
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, dev);
+	skb->mark = 0;
+	secpath_reset(skb);
+	nf_reset(skb);
+	return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+/*
+ *	Support routine. Sends outgoing frames to any network
+ *	taps currently in use.
+ */
+
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct packet_type *ptype;
+	struct sk_buff *skb2 = NULL;
+	struct packet_type *pt_prev = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		/* Never send packets back to the socket
+		 * they originated from - MvS (miquels@drinkel.ow.org)
+		 */
+		if ((ptype->dev == dev || !ptype->dev) &&
+		    (ptype->af_packet_priv == NULL ||
+		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+			if (pt_prev) {
+				deliver_skb(skb2, pt_prev, skb->dev);
+				pt_prev = ptype;
+				continue;
+			}
+
+			skb2 = skb_clone(skb, GFP_ATOMIC);
+			if (!skb2)
+				break;
+
+			net_timestamp_set(skb2);
+
+			/* skb->nh should be correctly
+			   set by sender, so that the second statement is
+			   just protection against buggy protocols.
+			 */
+			skb_reset_mac_header(skb2);
+
+			if (skb_network_header(skb2) < skb2->data ||
+			    skb2->network_header > skb2->tail) {
+				if (net_ratelimit())
+					pr_crit("protocol %04x is buggy, dev %s\n",
+						ntohs(skb2->protocol),
+						dev->name);
+				skb_reset_network_header(skb2);
+			}
+
+			skb2->transport_header = skb2->network_header;
+			skb2->pkt_type = PACKET_OUTGOING;
+			pt_prev = ptype;
+		}
+	}
+	if (pt_prev)
+		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+	rcu_read_unlock();
+}
+
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+	int i;
+	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+	/* If TC0 is invalidated disable TC mapping */
+	if (tc->offset + tc->count > txq) {
+		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
+		dev->num_tc = 0;
+		return;
+	}
+
+	/* Invalidated prio to tc mappings set to TC0 */
+	for (i = 1; i < TC_BITMASK + 1; i++) {
+		int q = netdev_get_prio_tc_map(dev, i);
+
+		tc = &dev->tc_to_txq[q];
+		if (tc->offset + tc->count > txq) {
+			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
+				i, q);
+			netdev_set_prio_tc_map(dev, i, 0);
+		}
+	}
+}
+
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	int rc;
+
+	if (txq < 1 || txq > dev->num_tx_queues)
+		return -EINVAL;
+
+	if (dev->reg_state == NETREG_REGISTERED ||
+	    dev->reg_state == NETREG_UNREGISTERING) {
+		ASSERT_RTNL();
+
+		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+						  txq);
+		if (rc)
+			return rc;
+
+		if (dev->num_tc)
+			netif_setup_tc(dev, txq);
+
+		if (txq < dev->real_num_tx_queues)
+			qdisc_reset_all_tx_gt(dev, txq);
+	}
+
+	dev->real_num_tx_queues = txq;
+	return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
+
+#ifdef CONFIG_RPS
+/**
+ *	netif_set_real_num_rx_queues - set actual number of RX queues used
+ *	@dev: Network device
+ *	@rxq: Actual number of RX queues
+ *
+ *	This must be called either with the rtnl_lock held or before
+ *	registration of the net device.  Returns 0 on success, or a
+ *	negative error code.  If called before registration, it always
+ *	succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+	int rc;
+
+	if (rxq < 1 || rxq > dev->num_rx_queues)
+		return -EINVAL;
+
+	if (dev->reg_state == NETREG_REGISTERED) {
+		ASSERT_RTNL();
+
+		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+						  rxq);
+		if (rc)
+			return rc;
+	}
+
+	dev->real_num_rx_queues = rxq;
+	return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
+
+static inline void __netif_reschedule(struct Qdisc *q)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sd = &__get_cpu_var(softnet_data);
+	q->next_sched = NULL;
+	*sd->output_queue_tailp = q;
+	sd->output_queue_tailp = &q->next_sched;
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+void __netif_schedule(struct Qdisc *q)
+{
+	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+		__netif_reschedule(q);
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void dev_kfree_skb_irq(struct sk_buff *skb)
+{
+	if (atomic_dec_and_test(&skb->users)) {
+		struct softnet_data *sd;
+		unsigned long flags;
+
+		local_irq_save(flags);
+		sd = &__get_cpu_var(softnet_data);
+		skb->next = sd->completion_queue;
+		sd->completion_queue = skb;
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL(dev_kfree_skb_irq);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+	if (in_irq() || irqs_disabled())
+		dev_kfree_skb_irq(skb);
+	else
+		dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
+void netif_device_detach(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_tx_stop_all_queues(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
+void netif_device_attach(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_tx_wake_all_queues(dev);
+		__netdev_watchdog_up(dev);
+	}
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+static void skb_warn_bad_offload(const struct sk_buff *skb)
+{
+	static const netdev_features_t null_features = 0;
+	struct net_device *dev = skb->dev;
+	const char *driver = "";
+
+	if (dev && dev->dev.parent)
+		driver = dev_driver_string(dev->dev.parent);
+
+	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
+	     "gso_type=%d ip_summed=%d\n",
+	     driver, dev ? &dev->features : &null_features,
+	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
+	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
+	     skb_shinfo(skb)->gso_type, skb->ip_summed);
+}
+
+/*
+ * Invalidate hardware checksum when packet is to be mangled, and
+ * complete checksum manually on outgoing path.
+ */
+int skb_checksum_help(struct sk_buff *skb)
+{
+	__wsum csum;
+	int ret = 0, offset;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		goto out_set_summed;
+
+	if (unlikely(skb_shinfo(skb)->gso_size)) {
+		skb_warn_bad_offload(skb);
+		return -EINVAL;
+	}
+
+	offset = skb_checksum_start_offset(skb);
+	BUG_ON(offset >= skb_headlen(skb));
+	csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+	offset += skb->csum_offset;
+	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
+	if (skb_cloned(skb) &&
+	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
+		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (ret)
+			goto out;
+	}
+
+	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+out_set_summed:
+	skb->ip_summed = CHECKSUM_NONE;
+out:
+	return ret;
+}
+EXPORT_SYMBOL(skb_checksum_help);
+
+/**
+ *	skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+	struct packet_type *ptype;
+	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
+	int err;
+
+	while (type == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vh;
+
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
+			return ERR_PTR(-EINVAL);
+
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
+	}
+
+	skb_reset_mac_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+	__skb_pull(skb, skb->mac_len);
+
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
+		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+				err = ptype->gso_send_check(skb);
+				segs = ERR_PTR(err);
+				if (err || skb_gso_ok(skb, features))
+					break;
+				__skb_push(skb, (skb->data -
+						 skb_network_header(skb)));
+			}
+			segs = ptype->gso_segment(skb, features);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	__skb_push(skb, skb->data - skb_mac_header(skb));
+
+	return segs;
+}
+EXPORT_SYMBOL(skb_gso_segment);
+
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+	if (net_ratelimit()) {
+		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+		dump_stack();
+	}
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_HIGHMEM
+	int i;
+	if (!(dev->features & NETIF_F_HIGHDMA)) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			if (PageHighMem(skb_frag_page(frag)))
+				return 1;
+		}
+	}
+
+	if (PCI_DMA_BUS_IS_PHYS) {
+		struct device *pdev = dev->dev.parent;
+
+		if (!pdev)
+			return 0;
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
+			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+				return 1;
+		}
+	}
+#endif
+	return 0;
+}
+
+struct dev_gso_cb {
+	void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
+static void dev_gso_skb_destructor(struct sk_buff *skb)
+{
+	struct dev_gso_cb *cb;
+
+	do {
+		struct sk_buff *nskb = skb->next;
+
+		skb->next = nskb->next;
+		nskb->next = NULL;
+		kfree_skb(nskb);
+	} while (skb->next);
+
+	cb = DEV_GSO_CB(skb);
+	if (cb->destructor)
+		cb->destructor(skb);
+}
+
+/**
+ *	dev_gso_segment - Perform emulated hardware segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: device features as applicable to this skb
+ *
+ *	This function segments the given skb and stores the list of segments
+ *	in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
+{
+	struct sk_buff *segs;
+
+	segs = skb_gso_segment(skb, features);
+
+	/* Verifying header integrity only. */
+	if (!segs)
+		return 0;
+
+	if (IS_ERR(segs))
+		return PTR_ERR(segs);
+
+	skb->next = segs;
+	DEV_GSO_CB(skb)->destructor = skb->destructor;
+	skb->destructor = dev_gso_skb_destructor;
+
+	return 0;
+}
+
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
+{
+	return ((features & NETIF_F_GEN_CSUM) ||
+		((features & NETIF_F_V4_CSUM) &&
+		 protocol == htons(ETH_P_IP)) ||
+		((features & NETIF_F_V6_CSUM) &&
+		 protocol == htons(ETH_P_IPV6)) ||
+		((features & NETIF_F_FCOE_CRC) &&
+		 protocol == htons(ETH_P_FCOE)));
+}
+
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+	__be16 protocol, netdev_features_t features)
+{
+	if (!can_checksum_protocol(features, protocol)) {
+		features &= ~NETIF_F_ALL_CSUM;
+		features &= ~NETIF_F_SG;
+	} else if (illegal_highdma(skb->dev, skb)) {
+		features &= ~NETIF_F_SG;
+	}
+
+	return features;
+}
+
+netdev_features_t netif_skb_features(struct sk_buff *skb)
+{
+	__be16 protocol = skb->protocol;
+	netdev_features_t features = skb->dev->features;
+
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	} else if (!vlan_tx_tag_present(skb)) {
+		return harmonize_features(skb, protocol, features);
+	}
+
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+
+	if (protocol != htons(ETH_P_8021Q)) {
+		return harmonize_features(skb, protocol, features);
+	} else {
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+		return harmonize_features(skb, protocol, features);
+	}
+}
+EXPORT_SYMBOL(netif_skb_features);
+
+/*
+ * Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG, or if
+ *	   at least one of fragments is in highmem and device does not
+ *	   support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      int features)
+{
+	return skb_is_nonlinear(skb) &&
+			((skb_has_frag_list(skb) &&
+				!(features & NETIF_F_FRAGLIST)) ||
+			(skb_shinfo(skb)->nr_frags &&
+				!(features & NETIF_F_SG)));
+}
+
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+			struct netdev_queue *txq)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int rc = NETDEV_TX_OK;
+	unsigned int skb_len;
+
+	if (likely(!skb->next)) {
+		netdev_features_t features;
+
+		/*
+		 * If device doesn't need skb->dst, release it right now while
+		 * its hot in this cpu cache
+		 */
+		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+			skb_dst_drop(skb);
+
+		if (!list_empty(&ptype_all))
+			dev_queue_xmit_nit(skb, dev);
+
+		features = netif_skb_features(skb);
+
+		if (vlan_tx_tag_present(skb) &&
+		    !(features & NETIF_F_HW_VLAN_TX)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				goto out;
+
+			skb->vlan_tci = 0;
+		}
+
+		if (netif_needs_gso(skb, features)) {
+			if (unlikely(dev_gso_segment(skb, features)))
+				goto out_kfree_skb;
+			if (skb->next)
+				goto gso;
+		} else {
+			if (skb_needs_linearize(skb, features) &&
+			    __skb_linearize(skb))
+				goto out_kfree_skb;
+
+			/* If packet is not checksummed and device does not
+			 * support checksumming for this protocol, complete
+			 * checksumming here.
+			 */
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
+				skb_set_transport_header(skb,
+					skb_checksum_start_offset(skb));
+				if (!(features & NETIF_F_ALL_CSUM) &&
+				     skb_checksum_help(skb))
+					goto out_kfree_skb;
+			}
+		}
+
+		skb_len = skb->len;
+		rc = ops->ndo_start_xmit(skb, dev);
+		trace_net_dev_xmit(skb, rc, dev, skb_len);
+		if (rc == NETDEV_TX_OK)
+			txq_trans_update(txq);
+		return rc;
+	}
+
+gso:
+	do {
+		struct sk_buff *nskb = skb->next;
+
+		skb->next = nskb->next;
+		nskb->next = NULL;
+
+		/*
+		 * If device doesn't need nskb->dst, release it right now while
+		 * its hot in this cpu cache
+		 */
+		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+			skb_dst_drop(nskb);
+
+		skb_len = nskb->len;
+		rc = ops->ndo_start_xmit(nskb, dev);
+		trace_net_dev_xmit(nskb, rc, dev, skb_len);
+		if (unlikely(rc != NETDEV_TX_OK)) {
+			if (rc & ~NETDEV_TX_MASK)
+				goto out_kfree_gso_skb;
+			nskb->next = skb->next;
+			skb->next = nskb;
+			return rc;
+		}
+		txq_trans_update(txq);
+		if (unlikely(netif_xmit_stopped(txq) && skb->next))
+			return NETDEV_TX_BUSY;
+	} while (skb->next);
+
+out_kfree_gso_skb:
+	if (likely(skb->next == NULL))
+		skb->destructor = DEV_GSO_CB(skb)->destructor;
+out_kfree_skb:
+	kfree_skb(skb);
+out:
+	return rc;
+}
+
+static u32 hashrnd __read_mostly;
+
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
+{
+	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
+
+	if (skb_rx_queue_recorded(skb)) {
+		hash = skb_get_rx_queue(skb);
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
+		return hash;
+	}
+
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
+	if (skb->sk && skb->sk->sk_hash)
+		hash = skb->sk->sk_hash;
+	else
+		hash = (__force u16) skb->protocol;
+	hash = jhash_1word(hash, hashrnd);
+
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
+static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		if (net_ratelimit()) {
+			pr_warn("%s selects TX queue %d, but real number of TX queues is %d\n",
+				dev->name, queue_index,
+				dev->real_num_tx_queues);
+		}
+		return 0;
+	}
+	return queue_index;
+}
+
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+					struct sk_buff *skb)
+{
+	int queue_index;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (dev->real_num_tx_queues == 1)
+		queue_index = 0;
+	else if (ops->ndo_select_queue) {
+		queue_index = ops->ndo_select_queue(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
+	} else {
+		struct sock *sk = skb->sk;
+		queue_index = sk_tx_queue_get(sk);
+
+		if (queue_index < 0 || skb->ooo_okay ||
+		    queue_index >= dev->real_num_tx_queues) {
+			int old_index = queue_index;
+
+			queue_index = get_xps_queue(dev, skb);
+			if (queue_index < 0)
+				queue_index = skb_tx_hash(dev, skb);
+
+			if (queue_index != old_index && sk) {
+				struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
+
+				if (dst && skb_dst(skb) == dst)
+					sk_tx_queue_set(sk, queue_index);
+			}
+		}
+	}
+
+	skb_set_queue_mapping(skb, queue_index);
+	return netdev_get_tx_queue(dev, queue_index);
+}
+
+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+				 struct net_device *dev,
+				 struct netdev_queue *txq)
+{
+	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended;
+	int rc;
+
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	qdisc_calculate_pkt_len(skb, q);
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	contended = qdisc_is_running(q);
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
+	spin_lock(root_lock);
+	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+		kfree_skb(skb);
+		rc = NET_XMIT_DROP;
+	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+		   qdisc_run_begin(q)) {
+		/*
+		 * This is a work-conserving queue; there are no old skbs
+		 * waiting to be sent out; and the qdisc is not running -
+		 * xmit the skb directly.
+		 */
+		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+			skb_dst_force(skb);
+
+		qdisc_bstats_update(q, skb);
+
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		} else
+			qdisc_run_end(q);
+
+		rc = NET_XMIT_SUCCESS;
+	} else {
+		skb_dst_force(skb);
+		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
+	}
+	spin_unlock(root_lock);
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
+	return rc;
+}
+
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+
+	if ((!skb->priority) && (skb->sk) && map)
+		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
+static DEFINE_PER_CPU(int, xmit_recursion);
+#define RECURSION_LIMIT 10
+
+/**
+ *	dev_queue_xmit - transmit a buffer
+ *	@skb: buffer to transmit
+ *
+ *	Queue a buffer for transmission to a network device. The caller must
+ *	have set the device and priority and built the buffer before calling
+ *	this function. The function can be called from an interrupt.
+ *
+ *	A negative errno code is returned on a failure. A success does not
+ *	guarantee the frame will be transmitted as it may be dropped due
+ *	to congestion or traffic shaping.
+ *
+ * -----------------------------------------------------------------------------------
+ *      I notice this method can also return errors from the queue disciplines,
+ *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
+ *      be positive.
+ *
+ *      Regardless of the return value, the skb is consumed, so it is currently
+ *      difficult to retry a send to this method.  (You can bump the ref count
+ *      before sending to hold a reference for retry if you are careful.)
+ *
+ *      When calling this method, interrupts MUST be enabled.  This is because
+ *      the BH enable code must have IRQs enabled so that it will not deadlock.
+ *          --BLG
+ */
+int dev_queue_xmit(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
+	struct Qdisc *q;
+	int rc = -ENOMEM;
+
+	/* Disable soft irqs for various locks below. Also
+	 * stops preemption for RCU.
+	 */
+	rcu_read_lock_bh();
+
+	skb_update_prio(skb);
+
+	txq = dev_pick_tx(dev, skb);
+	q = rcu_dereference_bh(txq->qdisc);
+
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
+#endif
+	trace_net_dev_queue(skb);
+	if (q->enqueue) {
+		rc = __dev_xmit_skb(skb, q, dev, txq);
+		goto out;
+	}
+
+	/* The device has no queue. Common case for software devices:
+	   loopback, all the sorts of tunnels...
+
+	   Really, it is unlikely that netif_tx_lock protection is necessary
+	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
+	   counters.)
+	   However, it is possible, that they rely on protection
+	   made by us here.
+
+	   Check this and shot the lock. It is not prone from deadlocks.
+	   Either shot noqueue qdisc, it is even simpler 8)
+	 */
+	if (dev->flags & IFF_UP) {
+		int cpu = smp_processor_id(); /* ok because BHs are off */
+
+		if (txq->xmit_lock_owner != cpu) {
+
+			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+				goto recursion_alert;
+
+			HARD_TX_LOCK(dev, txq, cpu);
+
+			if (!netif_xmit_stopped(txq)) {
+				__this_cpu_inc(xmit_recursion);
+				rc = dev_hard_start_xmit(skb, dev, txq);
+				__this_cpu_dec(xmit_recursion);
+				if (dev_xmit_complete(rc)) {
+					HARD_TX_UNLOCK(dev, txq);
+					goto out;
+				}
+			}
+			HARD_TX_UNLOCK(dev, txq);
+			if (net_ratelimit())
+				pr_crit("Virtual device %s asks to queue packet!\n",
+					dev->name);
+		} else {
+			/* Recursion is detected! It is possible,
+			 * unfortunately
+			 */
+recursion_alert:
+			if (net_ratelimit())
+				pr_crit("Dead loop on virtual device %s, fix it urgently!\n",
+					dev->name);
+		}
+	}
+
+	rc = -ENETDOWN;
+	rcu_read_unlock_bh();
+
+	kfree_skb(skb);
+	return rc;
+out:
+	rcu_read_unlock_bh();
+	return rc;
+}
+EXPORT_SYMBOL(dev_queue_xmit);
+
+
+/*=======================================================================
+			Receiver routines
+  =======================================================================*/
+
+int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
+int netdev_budget __read_mostly = 300;
+int weight_p __read_mostly = 64;            /* old backlog weight */
+
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+				     struct napi_struct *napi)
+{
+	list_add_tail(&napi->poll_list, &sd->poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
+/*
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
+ */
+void __skb_get_rxhash(struct sk_buff *skb)
+{
+	struct flow_keys keys;
+	u32 hash;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return;
+
+	if (keys.ports) {
+		if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
+			swap(keys.port16[0], keys.port16[1]);
+		skb->l4_rxhash = 1;
+	}
+
+	/* get a consistent hash (same value on both flow directions) */
+	if ((__force u32)keys.dst < (__force u32)keys.src)
+		swap(keys.dst, keys.src);
+
+	hash = jhash_3words((__force u32)keys.dst,
+			    (__force u32)keys.src,
+			    (__force u32)keys.ports, hashrnd);
+	if (!hash)
+		hash = 1;
+
+	skb->rxhash = hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+struct static_key rps_needed __read_mostly;
+
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+	    struct rps_dev_flow *rflow, u16 next_cpu)
+{
+	if (next_cpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+		struct netdev_rx_queue *rxqueue;
+		struct rps_dev_flow_table *flow_table;
+		struct rps_dev_flow *old_rflow;
+		u32 flow_id;
+		u16 rxq_index;
+		int rc;
+
+		/* Should we steer this flow to a different hardware queue? */
+		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
+		    !(dev->features & NETIF_F_NTUPLE))
+			goto out;
+		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+		if (rxq_index == skb_get_rx_queue(skb))
+			goto out;
+
+		rxqueue = dev->_rx + rxq_index;
+		flow_table = rcu_dereference(rxqueue->rps_flow_table);
+		if (!flow_table)
+			goto out;
+		flow_id = skb->rxhash & flow_table->mask;
+		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+							rxq_index, flow_id);
+		if (rc < 0)
+			goto out;
+		old_rflow = rflow;
+		rflow = &flow_table->flows[flow_id];
+		rflow->filter = rc;
+		if (old_rflow->filter == rflow->filter)
+			old_rflow->filter = RPS_NO_FILTER;
+	out:
+#endif
+		rflow->last_qtail =
+			per_cpu(softnet_data, next_cpu).input_queue_head;
+	}
+
+	rflow->cpu = next_cpu;
+	return rflow;
+}
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
+{
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
+	int cpu = -1;
+	u16 tcpu;
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->real_num_rx_queues)) {
+			WARN_ONCE(dev->real_num_rx_queues > 1,
+				  "%s received packet on queue %u, but number "
+				  "of RX queues is %u\n",
+				  dev->name, index, dev->real_num_rx_queues);
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		if (map->len == 1 &&
+		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
+			tcpu = map->cpus[0];
+			if (cpu_online(tcpu))
+				cpu = tcpu;
+			goto done;
+		}
+	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+		goto done;
+	}
+
+	skb_reset_network_header(skb);
+	if (!skb_get_rxhash(skb))
+		goto done;
+
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	if (flow_table && sock_flow_table) {
+		u16 next_cpu;
+		struct rps_dev_flow *rflow;
+
+		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+		tcpu = rflow->cpu;
+
+		next_cpu = sock_flow_table->ents[skb->rxhash &
+		    sock_flow_table->mask];
+
+		/*
+		 * If the desired CPU (where last recvmsg was done) is
+		 * different from current CPU (one in the rx-queue flow
+		 * table entry), switch if one of the following holds:
+		 *   - Current CPU is unset (equal to RPS_NO_CPU).
+		 *   - Current CPU is offline.
+		 *   - The current CPU's queue tail has advanced beyond the
+		 *     last packet that was enqueued using this table entry.
+		 *     This guarantees that all previous packets for the flow
+		 *     have been dequeued, thus preserving in order delivery.
+		 */
+		if (unlikely(tcpu != next_cpu) &&
+		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+		      rflow->last_qtail)) >= 0))
+			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
+		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+			*rflowp = rflow;
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+	if (map) {
+		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+		if (cpu_online(tcpu)) {
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+done:
+	return cpu;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+			 u32 flow_id, u16 filter_id)
+{
+	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_dev_flow *rflow;
+	bool expire = true;
+	int cpu;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	if (flow_table && flow_id <= flow_table->mask) {
+		rflow = &flow_table->flows[flow_id];
+		cpu = ACCESS_ONCE(rflow->cpu);
+		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+			   rflow->last_qtail) <
+		     (int)(10 * flow_table->mask)))
+			expire = false;
+	}
+	rcu_read_unlock();
+	return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
+/* Called from hardirq (IPI) context */
+static void rps_trigger_softirq(void *data)
+{
+	struct softnet_data *sd = data;
+
+	____napi_schedule(sd, &sd->backlog);
+	sd->received_rps++;
+}
+
+#endif /* CONFIG_RPS */
+
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+	if (sd != mysd) {
+		sd->rps_ipi_next = mysd->rps_ipi_list;
+		mysd->rps_ipi_list = sd;
+
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		return 1;
+	}
+#endif /* CONFIG_RPS */
+	return 0;
+}
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+			      unsigned int *qtail)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	sd = &per_cpu(softnet_data, cpu);
+
+	local_irq_save(flags);
+
+	rps_lock(sd);
+	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+		if (skb_queue_len(&sd->input_pkt_queue)) {
+enqueue:
+			__skb_queue_tail(&sd->input_pkt_queue, skb);
+			input_queue_tail_incr_save(sd, qtail);
+			rps_unlock(sd);
+			local_irq_restore(flags);
+			return NET_RX_SUCCESS;
+		}
+
+		/* Schedule NAPI for backlog device
+		 * We can use non atomic operation since we own the queue lock
+		 */
+		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
+			if (!rps_ipi_queued(sd))
+				____napi_schedule(sd, &sd->backlog);
+		}
+		goto enqueue;
+	}
+
+	sd->dropped++;
+	rps_unlock(sd);
+
+	local_irq_restore(flags);
+
+	atomic_long_inc(&skb->dev->rx_dropped);
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+/**
+ *	netif_rx	-	post buffer to the network code
+ *	@skb: buffer to post
+ *
+ *	This function receives a packet from a device driver and queues it for
+ *	the upper (protocol) levels to process.  It always succeeds. The buffer
+ *	may be dropped during processing for congestion control or by the
+ *	protocol layers.
+ *
+ *	return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_DROP     (packet was dropped)
+ *
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+	int ret;
+
+	/* if netpoll wants it, pretend we never saw it */
+	if (netpoll_rx(skb))
+		return NET_RX_DROP;
+
+	net_timestamp_check(netdev_tstamp_prequeue, skb);
+
+	trace_netif_rx(skb);
+#ifdef CONFIG_RPS
+	if (static_key_false(&rps_needed)) {
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu;
+
+		preempt_disable();
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu < 0)
+			cpu = smp_processor_id();
+
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
+		rcu_read_unlock();
+		preempt_enable();
+	} else
+#endif
+	{
+		unsigned int qtail;
+		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+		put_cpu();
+	}
+	return ret;
+}
+EXPORT_SYMBOL(netif_rx);
+
+int netif_rx_ni(struct sk_buff *skb)
+{
+	int err;
+
+	preempt_disable();
+	err = netif_rx(skb);
+	if (local_softirq_pending())
+		do_softirq();
+	preempt_enable();
+
+	return err;
+}
+EXPORT_SYMBOL(netif_rx_ni);
+
+static void net_tx_action(struct softirq_action *h)
+{
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_disable();
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_enable();
+
+		while (clist) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+
+			WARN_ON(atomic_read(&skb->users));
+			trace_kfree_skb(skb, net_tx_action);
+			__kfree_skb(skb);
+		}
+	}
+
+	if (sd->output_queue) {
+		struct Qdisc *head;
+
+		local_irq_disable();
+		head = sd->output_queue;
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
+		local_irq_enable();
+
+		while (head) {
+			struct Qdisc *q = head;
+			spinlock_t *root_lock;
+
+			head = head->next_sched;
+
+			root_lock = qdisc_lock(q);
+			if (spin_trylock(root_lock)) {
+				smp_mb__before_clear_bit();
+				clear_bit(__QDISC_STATE_SCHED,
+					  &q->state);
+				qdisc_run(q);
+				spin_unlock(root_lock);
+			} else {
+				if (!test_bit(__QDISC_STATE_DEACTIVATED,
+					      &q->state)) {
+					__netif_reschedule(q);
+				} else {
+					smp_mb__before_clear_bit();
+					clear_bit(__QDISC_STATE_SCHED,
+						  &q->state);
+				}
+			}
+		}
+	}
+}
+
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
+/* This hook is defined here for ATM LANE */
+int (*br_fdb_test_addr_hook)(struct net_device *dev,
+			     unsigned char *addr) __read_mostly;
+EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
+#endif
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesn't stop any functionality; if you dont have
+ * the ingress scheduler, you just can't add policies on ingress.
+ *
+ */
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
+{
+	struct net_device *dev = skb->dev;
+	u32 ttl = G_TC_RTTL(skb->tc_verd);
+	int result = TC_ACT_OK;
+	struct Qdisc *q;
+
+	if (unlikely(MAX_RED_LOOP < ttl++)) {
+		if (net_ratelimit())
+			pr_warn("Redir loop detected Dropping packet (%d->%d)\n",
+				skb->skb_iif, dev->ifindex);
+		return TC_ACT_SHOT;
+	}
+
+	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
+	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+
+	q = rxq->qdisc;
+	if (q != &noop_qdisc) {
+		spin_lock(qdisc_lock(q));
+		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+			result = qdisc_enqueue_root(skb, q);
+		spin_unlock(qdisc_lock(q));
+	}
+
+	return result;
+}
+
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+					 struct packet_type **pt_prev,
+					 int *ret, struct net_device *orig_dev)
+{
+	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+	if (!rxq || rxq->qdisc == &noop_qdisc)
+		goto out;
+
+	if (*pt_prev) {
+		*ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*pt_prev = NULL;
+	}
+
+	switch (ing_filter(skb, rxq)) {
+	case TC_ACT_SHOT:
+	case TC_ACT_STOLEN:
+		kfree_skb(skb);
+		return NULL;
+	}
+
+out:
+	skb->tc_verd = 0;
+	return skb;
+}
+#endif
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *	@rx_handler_data: data pointer that is used by rx handler
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ *
+ *	For a general description of rx_handler, see enum rx_handler_result.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	RCU_INIT_POINTER(dev->rx_handler, NULL);
+	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
+static int __netif_receive_skb(struct sk_buff *skb)
+{
+	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
+	struct net_device *orig_dev;
+	struct net_device *null_or_dev;
+	bool deliver_exact = false;
+	int ret = NET_RX_DROP;
+	__be16 type;
+
+	net_timestamp_check(!netdev_tstamp_prequeue, skb);
+
+	trace_netif_receive_skb(skb);
+
+	/* if we've gotten here through NAPI, check netpoll */
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
+
+	if (!skb->skb_iif)
+		skb->skb_iif = skb->dev->ifindex;
+	orig_dev = skb->dev;
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_reset_mac_len(skb);
+
+	pt_prev = NULL;
+
+	rcu_read_lock();
+
+another_round:
+
+	__this_cpu_inc(softnet_data.processed);
+
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		goto ncls;
+	}
+#endif
+
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		if (!ptype->dev || ptype->dev == skb->dev) {
+			if (pt_prev)
+				ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = ptype;
+		}
+	}
+
+#ifdef CONFIG_NET_CLS_ACT
+	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+	if (!skb)
+		goto out;
+ncls:
+#endif
+
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (vlan_tx_tag_present(skb)) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		if (vlan_do_receive(&skb, !rx_handler))
+			goto another_round;
+		else if (unlikely(!skb))
+			goto out;
+	}
+
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		switch (rx_handler(&skb)) {
+		case RX_HANDLER_CONSUMED:
+			goto out;
+		case RX_HANDLER_ANOTHER:
+			goto another_round;
+		case RX_HANDLER_EXACT:
+			deliver_exact = true;
+		case RX_HANDLER_PASS:
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	/* deliver only exact match when indicated */
+	null_or_dev = deliver_exact ? skb->dev : NULL;
+
+	type = skb->protocol;
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
+		if (ptype->type == type &&
+		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
+		     ptype->dev == orig_dev)) {
+			if (pt_prev)
+				ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = ptype;
+		}
+	}
+
+	if (pt_prev) {
+		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	} else {
+		atomic_long_inc(&skb->dev->rx_dropped);
+		kfree_skb(skb);
+		/* Jamal, now you will not able to escape explaining
+		 * me how you were going to use this. :-)
+		 */
+		ret = NET_RX_DROP;
+	}
+
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+	net_timestamp_check(netdev_tstamp_prequeue, skb);
+
+	if (skb_defer_rx_timestamp(skb))
+		return NET_RX_SUCCESS;
+
+#ifdef CONFIG_RPS
+	if (static_key_false(&rps_needed)) {
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
+
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+			return ret;
+		}
+		rcu_read_unlock();
+	}
+#endif
+	return __netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(netif_receive_skb);
+
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
+static void flush_backlog(void *arg)
+{
+	struct net_device *dev = arg;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct sk_buff *skb, *tmp;
+
+	rps_lock(sd);
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->input_pkt_queue);
+			kfree_skb(skb);
+			input_queue_head_incr(sd);
+		}
+	}
+	rps_unlock(sd);
+
+	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->process_queue);
+			kfree_skb(skb);
+			input_queue_head_incr(sd);
+		}
+	}
+}
+
+static int napi_gro_complete(struct sk_buff *skb)
+{
+	struct packet_type *ptype;
+	__be16 type = skb->protocol;
+	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	int err = -ENOENT;
+
+	if (NAPI_GRO_CB(skb)->count == 1) {
+		skb_shinfo(skb)->gso_size = 0;
+		goto out;
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, head, list) {
+		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+			continue;
+
+		err = ptype->gro_complete(skb);
+		break;
+	}
+	rcu_read_unlock();
+
+	if (err) {
+		WARN_ON(&ptype->list == head);
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+out:
+	return netif_receive_skb(skb);
+}
+
+inline void napi_gro_flush(struct napi_struct *napi)
+{
+	struct sk_buff *skb, *next;
+
+	for (skb = napi->gro_list; skb; skb = next) {
+		next = skb->next;
+		skb->next = NULL;
+		napi_gro_complete(skb);
+	}
+
+	napi->gro_count = 0;
+	napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	struct sk_buff **pp = NULL;
+	struct packet_type *ptype;
+	__be16 type = skb->protocol;
+	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	int same_flow;
+	int mac_len;
+	enum gro_result ret;
+
+	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+		goto normal;
+
+	if (skb_is_gso(skb) || skb_has_frag_list(skb))
+		goto normal;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, head, list) {
+		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+			continue;
+
+		skb_set_network_header(skb, skb_gro_offset(skb));
+		mac_len = skb->network_header - skb->mac_header;
+		skb->mac_len = mac_len;
+		NAPI_GRO_CB(skb)->same_flow = 0;
+		NAPI_GRO_CB(skb)->flush = 0;
+		NAPI_GRO_CB(skb)->free = 0;
+
+		pp = ptype->gro_receive(&napi->gro_list, skb);
+		break;
+	}
+	rcu_read_unlock();
+
+	if (&ptype->list == head)
+		goto normal;
+
+	same_flow = NAPI_GRO_CB(skb)->same_flow;
+	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
+
+	if (pp) {
+		struct sk_buff *nskb = *pp;
+
+		*pp = nskb->next;
+		nskb->next = NULL;
+		napi_gro_complete(nskb);
+		napi->gro_count--;
+	}
+
+	if (same_flow)
+		goto ok;
+
+	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
+		goto normal;
+
+	napi->gro_count++;
+	NAPI_GRO_CB(skb)->count = 1;
+	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+	skb->next = napi->gro_list;
+	napi->gro_list = skb;
+	ret = GRO_HELD;
+
+pull:
+	if (skb_headlen(skb) < skb_gro_offset(skb)) {
+		int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+		BUG_ON(skb->end - skb->tail < grow);
+
+		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+		skb->tail += grow;
+		skb->data_len -= grow;
+
+		skb_shinfo(skb)->frags[0].page_offset += grow;
+		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
+
+		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
+			skb_frag_unref(skb, 0);
+			memmove(skb_shinfo(skb)->frags,
+				skb_shinfo(skb)->frags + 1,
+				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
+		}
+	}
+
+ok:
+	return ret;
+
+normal:
+	ret = GRO_NORMAL;
+	goto pull;
+}
+EXPORT_SYMBOL(dev_gro_receive);
+
+static inline gro_result_t
+__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	struct sk_buff *p;
+	unsigned int maclen = skb->dev->hard_header_len;
+
+	for (p = napi->gro_list; p; p = p->next) {
+		unsigned long diffs;
+
+		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= p->vlan_tci ^ skb->vlan_tci;
+		if (maclen == ETH_HLEN)
+			diffs |= compare_ether_header(skb_mac_header(p),
+						      skb_gro_mac_header(skb));
+		else if (!diffs)
+			diffs = memcmp(skb_mac_header(p),
+				       skb_gro_mac_header(skb),
+				       maclen);
+		NAPI_GRO_CB(p)->same_flow = !diffs;
+		NAPI_GRO_CB(p)->flush = 0;
+	}
+
+	return dev_gro_receive(napi, skb);
+}
+
+gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+{
+	switch (ret) {
+	case GRO_NORMAL:
+		if (netif_receive_skb(skb))
+			ret = GRO_DROP;
+		break;
+
+	case GRO_DROP:
+	case GRO_MERGED_FREE:
+		kfree_skb(skb);
+		break;
+
+	case GRO_HELD:
+	case GRO_MERGED:
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+void skb_gro_reset_offset(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->data_offset = 0;
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
+
+	if (skb->mac_header == skb->tail &&
+	    !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
+		NAPI_GRO_CB(skb)->frag0 =
+			skb_frag_address(&skb_shinfo(skb)->frags[0]);
+		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
+	}
+}
+EXPORT_SYMBOL(skb_gro_reset_offset);
+
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	skb_gro_reset_offset(skb);
+
+	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+{
+	__skb_pull(skb, skb_headlen(skb));
+	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
+	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
+	skb->vlan_tci = 0;
+	skb->dev = napi->dev;
+	skb->skb_iif = 0;
+
+	napi->skb = skb;
+}
+
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
+{
+	struct sk_buff *skb = napi->skb;
+
+	if (!skb) {
+		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
+		if (skb)
+			napi->skb = skb;
+	}
+	return skb;
+}
+EXPORT_SYMBOL(napi_get_frags);
+
+gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+			       gro_result_t ret)
+{
+	switch (ret) {
+	case GRO_NORMAL:
+	case GRO_HELD:
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if (ret == GRO_HELD)
+			skb_gro_pull(skb, -ETH_HLEN);
+		else if (netif_receive_skb(skb))
+			ret = GRO_DROP;
+		break;
+
+	case GRO_DROP:
+	case GRO_MERGED_FREE:
+		napi_reuse_skb(napi, skb);
+		break;
+
+	case GRO_MERGED:
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(napi_frags_finish);
+
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
+{
+	struct sk_buff *skb = napi->skb;
+	struct ethhdr *eth;
+	unsigned int hlen;
+	unsigned int off;
+
+	napi->skb = NULL;
+
+	skb_reset_mac_header(skb);
+	skb_gro_reset_offset(skb);
+
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*eth);
+	eth = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eth = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!eth)) {
+			napi_reuse_skb(napi, skb);
+			skb = NULL;
+			goto out;
+		}
+	}
+
+	skb_gro_pull(skb, sizeof(*eth));
+
+	/*
+	 * This works because the only protocols we care about don't require
+	 * special handling.  We'll fix it up properly at the end.
+	 */
+	skb->protocol = eth->h_proto;
+
+out:
+	return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+gro_result_t napi_gro_frags(struct napi_struct *napi)
+{
+	struct sk_buff *skb = napi_frags_skb(napi);
+
+	if (!skb)
+		return GRO_DROP;
+
+	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *remsd = sd->rps_ipi_list;
+
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
+		}
+	} else
+#endif
+		local_irq_enable();
+}
+
+static int process_backlog(struct napi_struct *napi, int quota)
+{
+	int work = 0;
+	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+
+#ifdef CONFIG_RPS
+	/* Check if we have pending ipi, its better to send them now,
+	 * not waiting net_rx_action() end.
+	 */
+	if (sd->rps_ipi_list) {
+		local_irq_disable();
+		net_rps_action_and_irq_enable(sd);
+	}
+#endif
+	napi->weight = weight_p;
+	local_irq_disable();
+	while (work < quota) {
+		struct sk_buff *skb;
+		unsigned int qlen;
+
+		while ((skb = __skb_dequeue(&sd->process_queue))) {
+			local_irq_enable();
+			__netif_receive_skb(skb);
+			local_irq_disable();
+			input_queue_head_incr(sd);
+			if (++work >= quota) {
+				local_irq_enable();
+				return work;
+			}
+		}
+
+		rps_lock(sd);
+		qlen = skb_queue_len(&sd->input_pkt_queue);
+		if (qlen)
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
+
+		if (qlen < quota - work) {
+			/*
+			 * Inline a custom version of __napi_complete().
+			 * only current cpu owns and manipulates this napi,
+			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+			 * we can use a plain write instead of clear_bit(),
+			 * and we dont need an smp_mb() memory barrier.
+			 */
+			list_del(&napi->poll_list);
+			napi->state = 0;
+
+			quota = work + qlen;
+		}
+		rps_unlock(sd);
+	}
+	local_irq_enable();
+
+	return work;
+}
+
+/**
+ * __napi_schedule - schedule for receive
+ * @n: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void __napi_schedule(struct napi_struct *n)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	____napi_schedule(&__get_cpu_var(softnet_data), n);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__napi_schedule);
+
+void __napi_complete(struct napi_struct *n)
+{
+	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+	BUG_ON(n->gro_list);
+
+	list_del(&n->poll_list);
+	smp_mb__before_clear_bit();
+	clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+	unsigned long flags;
+
+	/*
+	 * don't let napi dequeue from the cpu poll list
+	 * just in case its running on a different cpu
+	 */
+	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+		return;
+
+	napi_gro_flush(n);
+	local_irq_save(flags);
+	__napi_complete(n);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+		    int (*poll)(struct napi_struct *, int), int weight)
+{
+	INIT_LIST_HEAD(&napi->poll_list);
+	napi->gro_count = 0;
+	napi->gro_list = NULL;
+	napi->skb = NULL;
+	napi->poll = poll;
+	napi->weight = weight;
+	list_add(&napi->dev_list, &dev->napi_list);
+	napi->dev = dev;
+#ifdef CONFIG_NETPOLL
+	spin_lock_init(&napi->poll_lock);
+	napi->poll_owner = -1;
+#endif
+	set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+	struct sk_buff *skb, *next;
+
+	list_del_init(&napi->dev_list);
+	napi_free_frags(napi);
+
+	for (skb = napi->gro_list; skb; skb = next) {
+		next = skb->next;
+		skb->next = NULL;
+		kfree_skb(skb);
+	}
+
+	napi->gro_list = NULL;
+	napi->gro_count = 0;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
+static void net_rx_action(struct softirq_action *h)
+{
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	unsigned long time_limit = jiffies + 2;
+	int budget = netdev_budget;
+	void *have;
+
+	local_irq_disable();
+
+	while (!list_empty(&sd->poll_list)) {
+		struct napi_struct *n;
+		int work, weight;
+
+		/* If softirq window is exhuasted then punt.
+		 * Allow this to run for 2 jiffies since which will allow
+		 * an average latency of 1.5/HZ.
+		 */
+		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
+			goto softnet_break;
+
+		local_irq_enable();
+
+		/* Even though interrupts have been re-enabled, this
+		 * access is safe because interrupts can only add new
+		 * entries to the tail of this list, and only ->poll()
+		 * calls can remove this head entry from the list.
+		 */
+		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
+
+		have = netpoll_poll_lock(n);
+
+		weight = n->weight;
+
+		/* This NAPI_STATE_SCHED test is for avoiding a race
+		 * with netpoll's poll_napi().  Only the entity which
+		 * obtains the lock and sees NAPI_STATE_SCHED set will
+		 * actually make the ->poll() call.  Therefore we avoid
+		 * accidentally calling ->poll() when NAPI is not scheduled.
+		 */
+		work = 0;
+		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+			work = n->poll(n, weight);
+			trace_napi_poll(n);
+		}
+
+		WARN_ON_ONCE(work > weight);
+
+		budget -= work;
+
+		local_irq_disable();
+
+		/* Drivers must not modify the NAPI state if they
+		 * consume the entire weight.  In such cases this code
+		 * still "owns" the NAPI instance and therefore can
+		 * move the instance around on the list at-will.
+		 */
+		if (unlikely(work == weight)) {
+			if (unlikely(napi_disable_pending(n))) {
+				local_irq_enable();
+				napi_complete(n);
+				local_irq_disable();
+			} else
+				list_move_tail(&n->poll_list, &sd->poll_list);
+		}
+
+		netpoll_poll_unlock(have);
+	}
+out:
+	net_rps_action_and_irq_enable(sd);
+
+#ifdef CONFIG_NET_DMA
+	/*
+	 * There may not be any more sk_buffs coming right now, so push
+	 * any pending DMA copies to hardware
+	 */
+	dma_issue_pending_all();
+#endif
+
+	return;
+
+softnet_break:
+	sd->time_squeeze++;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	goto out;
+}
+
+static gifconf_func_t *gifconf_list[NPROTO];
+
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
+{
+	if (family >= NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+EXPORT_SYMBOL(register_gifconf);
+
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
+{
+	struct net_device *dev;
+	struct ifreq ifr;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	strcpy(ifr.ifr_name, dev->name);
+	rcu_read_unlock();
+
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(struct net *net, char __user *arg)
+{
+	struct ifconf ifc;
+	struct net_device *dev;
+	char __user *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each.
+	 */
+
+	total = 0;
+	for_each_netdev(net, dev) {
+		for (i = 0; i < NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (!pos)
+					done = gifconf_list[i](dev, NULL, 0);
+				else
+					done = gifconf_list[i](dev, pos + total,
+							       len - total);
+				if (done < 0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller.
+	 */
+	ifc.ifc_len = total;
+
+	/*
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	struct hlist_node *p;
+	struct hlist_head *h;
+	unsigned int count = 0, offset = get_offset(*pos);
+
+	h = &net->dev_name_head[get_bucket(*pos)];
+	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+		if (++count == offset)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	unsigned int bucket;
+
+	do {
+		dev = dev_from_same_bucket(seq, pos);
+		if (dev)
+			return dev;
+
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
+	} while (bucket < NETDEV_HASHENTRIES);
+
+	return NULL;
+}
+
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
+		return NULL;
+
+	return dev_from_bucket(seq, pos);
+}
+
+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return dev_from_bucket(seq, pos);
+}
+
+void dev_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct softnet_data *softnet_get_online(loff_t *pos)
+{
+	struct softnet_data *sd = NULL;
+
+	while (*pos < nr_cpu_ids)
+		if (cpu_online(*pos)) {
+			sd = &per_cpu(softnet_data, *pos);
+			break;
+		} else
+			++*pos;
+	return sd;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct softnet_data *sd = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   0, 0, 0, 0, /* was fastroute */
+		   sd->cpu_collision, sd->received_rps);
+	return 0;
+}
+
+static const struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static const struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static const struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= PTYPE_HASH_SIZE)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s %pF\n",
+			   pt->dev ? pt->dev->name : "", pt->func);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ptype_seq_ops,
+			sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+
+static int __net_init dev_proc_net_init(struct net *net)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
+		goto out;
+	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
+		goto out_dev;
+	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
+		goto out_softnet;
+
+	if (wext_proc_init(net))
+		goto out_ptype;
+	rc = 0;
+out:
+	return rc;
+out_ptype:
+	proc_net_remove(net, "ptype");
+out_softnet:
+	proc_net_remove(net, "softnet_stat");
+out_dev:
+	proc_net_remove(net, "dev");
+	goto out;
+}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+	wext_proc_exit(net);
+
+	proc_net_remove(net, "ptype");
+	proc_net_remove(net, "softnet_stat");
+	proc_net_remove(net, "dev");
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+	.init = dev_proc_net_init,
+	.exit = dev_proc_net_exit,
+};
+
+static int __init dev_proc_init(void)
+{
+	return register_pernet_subsys(&dev_proc_ops);
+}
+#else
+#define dev_proc_init() 0
+#endif	/* CONFIG_PROC_FS */
+
+
+/**
+ *	netdev_set_master	-	set up master pointer
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success the reference counts
+ *	are adjusted and the function returns zero.
+ */
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+	struct net_device *old = slave->master;
+
+	ASSERT_RTNL();
+
+	if (master) {
+		if (old)
+			return -EBUSY;
+		dev_hold(master);
+	}
+
+	slave->master = master;
+
+	if (old)
+		dev_put(old);
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_master);
+
+/**
+ *	netdev_set_bond_master	-	set up bonding master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success %RTM_NEWLINK is sent
+ *	to the routing socket and the function returns zero.
+ */
+int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = netdev_set_master(slave, master);
+	if (err)
+		return err;
+	if (master)
+		slave->flags |= IFF_SLAVE;
+	else
+		slave->flags &= ~IFF_SLAVE;
+
+	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_bond_master);
+
+static void dev_change_rx_flags(struct net_device *dev, int flags)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+		ops->ndo_change_rx_flags(dev, flags);
+}
+
+static int __dev_set_promiscuity(struct net_device *dev, int inc)
+{
+	unsigned int old_flags = dev->flags;
+	uid_t uid;
+	gid_t gid;
+
+	ASSERT_RTNL();
+
+	dev->flags |= IFF_PROMISC;
+	dev->promiscuity += inc;
+	if (dev->promiscuity == 0) {
+		/*
+		 * Avoid overflow.
+		 * If inc causes overflow, untouch promisc and return error.
+		 */
+		if (inc < 0)
+			dev->flags &= ~IFF_PROMISC;
+		else {
+			dev->promiscuity -= inc;
+			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
+				dev->name);
+			return -EOVERFLOW;
+		}
+	}
+	if (dev->flags != old_flags) {
+		pr_info("device %s %s promiscuous mode\n",
+			dev->name,
+			dev->flags & IFF_PROMISC ? "entered" : "left");
+		if (audit_enabled) {
+			current_uid_gid(&uid, &gid);
+			audit_log(current->audit_context, GFP_ATOMIC,
+				AUDIT_ANOM_PROMISCUOUS,
+				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
+				dev->name, (dev->flags & IFF_PROMISC),
+				(old_flags & IFF_PROMISC),
+				audit_get_loginuid(current),
+				uid, gid,
+				audit_get_sessionid(current));
+		}
+
+		dev_change_rx_flags(dev, IFF_PROMISC);
+	}
+	return 0;
+}
+
+/**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promiscuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ *	Return 0 if successful or a negative errno code on error.
+ */
+int dev_set_promiscuity(struct net_device *dev, int inc)
+{
+	unsigned int old_flags = dev->flags;
+	int err;
+
+	err = __dev_set_promiscuity(dev, inc);
+	if (err < 0)
+		return err;
+	if (dev->flags != old_flags)
+		dev_set_rx_mode(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_set_promiscuity);
+
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative @inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ *	Return 0 if successful or a negative errno code on error.
+ */
+
+int dev_set_allmulti(struct net_device *dev, int inc)
+{
+	unsigned int old_flags = dev->flags;
+
+	ASSERT_RTNL();
+
+	dev->flags |= IFF_ALLMULTI;
+	dev->allmulti += inc;
+	if (dev->allmulti == 0) {
+		/*
+		 * Avoid overflow.
+		 * If inc causes overflow, untouch allmulti and return error.
+		 */
+		if (inc < 0)
+			dev->flags &= ~IFF_ALLMULTI;
+		else {
+			dev->allmulti -= inc;
+			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
+				dev->name);
+			return -EOVERFLOW;
+		}
+	}
+	if (dev->flags ^ old_flags) {
+		dev_change_rx_flags(dev, IFF_ALLMULTI);
+		dev_set_rx_mode(dev);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(dev_set_allmulti);
+
+/*
+ *	Upload unicast and multicast address lists to device and
+ *	configure RX filtering. When the device doesn't support unicast
+ *	filtering it is put in promiscuous mode while unicast addresses
+ *	are present.
+ */
+void __dev_set_rx_mode(struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	/* dev_open will call this function so the list will stay sane. */
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	if (!netif_device_present(dev))
+		return;
+
+	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
+		/* Unicast addresses changes may only happen under the rtnl,
+		 * therefore calling __dev_set_promiscuity here is safe.
+		 */
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
+			__dev_set_promiscuity(dev, 1);
+			dev->uc_promisc = true;
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
+			__dev_set_promiscuity(dev, -1);
+			dev->uc_promisc = false;
+		}
+	}
+
+	if (ops->ndo_set_rx_mode)
+		ops->ndo_set_rx_mode(dev);
+}
+
+void dev_set_rx_mode(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+}
+
+/**
+ *	dev_get_flags - get flags reported to userspace
+ *	@dev: device
+ *
+ *	Get the combination of flag bits exported through APIs to userspace.
+ */
+unsigned dev_get_flags(const struct net_device *dev)
+{
+	unsigned flags;
+
+	flags = (dev->flags & ~(IFF_PROMISC |
+				IFF_ALLMULTI |
+				IFF_RUNNING |
+				IFF_LOWER_UP |
+				IFF_DORMANT)) |
+		(dev->gflags & (IFF_PROMISC |
+				IFF_ALLMULTI));
+
+	if (netif_running(dev)) {
+		if (netif_oper_up(dev))
+			flags |= IFF_RUNNING;
+		if (netif_carrier_ok(dev))
+			flags |= IFF_LOWER_UP;
+		if (netif_dormant(dev))
+			flags |= IFF_DORMANT;
+	}
+
+	return flags;
+}
+EXPORT_SYMBOL(dev_get_flags);
+
+int __dev_change_flags(struct net_device *dev, unsigned int flags)
+{
+	unsigned int old_flags = dev->flags;
+	int ret;
+
+	ASSERT_RTNL();
+
+	/*
+	 *	Set the flags on our device.
+	 */
+
+	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
+			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
+			       IFF_AUTOMEDIA)) |
+		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
+				    IFF_ALLMULTI));
+
+	/*
+	 *	Load in the correct multicast list now the flags have changed.
+	 */
+
+	if ((old_flags ^ flags) & IFF_MULTICAST)
+		dev_change_rx_flags(dev, IFF_MULTICAST);
+
+	dev_set_rx_mode(dev);
+
+	/*
+	 *	Have we downed the interface. We handle IFF_UP ourselves
+	 *	according to user attempts to set it, rather than blindly
+	 *	setting it.
+	 */
+
+	ret = 0;
+	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
+		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+
+		if (!ret)
+			dev_set_rx_mode(dev);
+	}
+
+	if ((flags ^ dev->gflags) & IFF_PROMISC) {
+		int inc = (flags & IFF_PROMISC) ? 1 : -1;
+
+		dev->gflags ^= IFF_PROMISC;
+		dev_set_promiscuity(dev, inc);
+	}
+
+	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+	   is important. Some (broken) drivers set IFF_PROMISC, when
+	   IFF_ALLMULTI is requested not asking us and not reporting.
+	 */
+	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
+		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
+
+		dev->gflags ^= IFF_ALLMULTI;
+		dev_set_allmulti(dev, inc);
+	}
+
+	return ret;
+}
+
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+{
+	unsigned int changes = dev->flags ^ old_flags;
+
+	if (changes & IFF_UP) {
+		if (dev->flags & IFF_UP)
+			call_netdevice_notifiers(NETDEV_UP, dev);
+		else
+			call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+}
+
+/**
+ *	dev_change_flags - change device settings
+ *	@dev: device
+ *	@flags: device state flags
+ *
+ *	Change settings on device based state flags. The flags are
+ *	in the userspace exported format.
+ */
+int dev_change_flags(struct net_device *dev, unsigned int flags)
+{
+	int ret;
+	unsigned int changes, old_flags = dev->flags;
+
+	ret = __dev_change_flags(dev, flags);
+	if (ret < 0)
+		return ret;
+
+	changes = old_flags ^ dev->flags;
+	if (changes)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
+
+	__dev_notify_flags(dev, old_flags);
+	return ret;
+}
+EXPORT_SYMBOL(dev_change_flags);
+
+/**
+ *	dev_set_mtu - Change maximum transfer unit
+ *	@dev: device
+ *	@new_mtu: new transfer unit
+ *
+ *	Change the maximum transfer size of the network device.
+ */
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int err;
+
+	if (new_mtu == dev->mtu)
+		return 0;
+
+	/*	MTU must be positive.	 */
+	if (new_mtu < 0)
+		return -EINVAL;
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	err = 0;
+	if (ops->ndo_change_mtu)
+		err = ops->ndo_change_mtu(dev, new_mtu);
+	else
+		dev->mtu = new_mtu;
+
+	if (!err && dev->flags & IFF_UP)
+		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_set_mtu);
+
+/**
+ *	dev_set_group - Change group this device belongs to
+ *	@dev: device
+ *	@new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+	dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
+ *	dev_set_mac_address - Change Media Access Control Address
+ *	@dev: device
+ *	@sa: new address
+ *
+ *	Change the hardware (MAC) address of the device
+ */
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int err;
+
+	if (!ops->ndo_set_mac_address)
+		return -EOPNOTSUPP;
+	if (sa->sa_family != dev->type)
+		return -EINVAL;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	err = ops->ndo_set_mac_address(dev, sa);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_set_mac_address);
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
+ */
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (cmd) {
+	case SIOCGIFFLAGS:	/* Get interface flags */
+		ifr->ifr_flags = (short) dev_get_flags(dev);
+		return 0;
+
+	case SIOCGIFMETRIC:	/* Get the metric on the interface
+				   (currently unused) */
+		ifr->ifr_metric = 0;
+		return 0;
+
+	case SIOCGIFMTU:	/* Get the MTU of a device */
+		ifr->ifr_mtu = dev->mtu;
+		return 0;
+
+	case SIOCGIFHWADDR:
+		if (!dev->addr_len)
+			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+		else
+			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		ifr->ifr_hwaddr.sa_family = dev->type;
+		return 0;
+
+	case SIOCGIFSLAVE:
+		err = -EINVAL;
+		break;
+
+	case SIOCGIFMAP:
+		ifr->ifr_map.mem_start = dev->mem_start;
+		ifr->ifr_map.mem_end   = dev->mem_end;
+		ifr->ifr_map.base_addr = dev->base_addr;
+		ifr->ifr_map.irq       = dev->irq;
+		ifr->ifr_map.dma       = dev->dma;
+		ifr->ifr_map.port      = dev->if_port;
+		return 0;
+
+	case SIOCGIFINDEX:
+		ifr->ifr_ifindex = dev->ifindex;
+		return 0;
+
+	case SIOCGIFTXQLEN:
+		ifr->ifr_qlen = dev->tx_queue_len;
+		return 0;
+
+	default:
+		/* dev_ioctl() should ensure this case
+		 * is never reached
+		 */
+		WARN_ON(1);
+		err = -ENOTTY;
+		break;
+
+	}
+	return err;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	const struct net_device_ops *ops;
+
+	if (!dev)
+		return -ENODEV;
+
+	ops = dev->netdev_ops;
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:	/* Set interface flags */
+		return dev_change_flags(dev, ifr->ifr_flags);
+
+	case SIOCSIFMETRIC:	/* Set the metric on the interface
+				   (currently unused) */
+		return -EOPNOTSUPP;
+
+	case SIOCSIFMTU:	/* Set the MTU of a device */
+		return dev_set_mtu(dev, ifr->ifr_mtu);
+
+	case SIOCSIFHWADDR:
+		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+	case SIOCSIFHWBROADCAST:
+		if (ifr->ifr_hwaddr.sa_family != dev->type)
+			return -EINVAL;
+		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+		return 0;
+
+	case SIOCSIFMAP:
+		if (ops->ndo_set_config) {
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return ops->ndo_set_config(dev, &ifr->ifr_map);
+		}
+		return -EOPNOTSUPP;
+
+	case SIOCADDMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCDELMULTI:
+		if (!ops->ndo_set_rx_mode ||
+		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+			return -EINVAL;
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
+
+	case SIOCSIFTXQLEN:
+		if (ifr->ifr_qlen < 0)
+			return -EINVAL;
+		dev->tx_queue_len = ifr->ifr_qlen;
+		return 0;
+
+	case SIOCSIFNAME:
+		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+		return dev_change_name(dev, ifr->ifr_newname);
+
+	case SIOCSHWTSTAMP:
+		err = net_hwtstamp_validate(ifr);
+		if (err)
+			return err;
+		/* fall through */
+
+	/*
+	 *	Unknown or private ioctl
+	 */
+	default:
+		if ((cmd >= SIOCDEVPRIVATE &&
+		    cmd <= SIOCDEVPRIVATE + 15) ||
+		    cmd == SIOCBONDENSLAVE ||
+		    cmd == SIOCBONDRELEASE ||
+		    cmd == SIOCBONDSETHWADDR ||
+		    cmd == SIOCBONDSLAVEINFOQUERY ||
+		    cmd == SIOCBONDINFOQUERY ||
+		    cmd == SIOCBONDCHANGEACTIVE ||
+		    cmd == SIOCGMIIPHY ||
+		    cmd == SIOCGMIIREG ||
+		    cmd == SIOCSMIIREG ||
+		    cmd == SIOCBRADDIF ||
+		    cmd == SIOCBRDELIF ||
+		    cmd == SIOCSHWTSTAMP ||
+		    cmd == SIOCWANDEV) {
+			err = -EOPNOTSUPP;
+			if (ops->ndo_do_ioctl) {
+				if (netif_device_present(dev))
+					err = ops->ndo_do_ioctl(dev, ifr, cmd);
+				else
+					err = -ENODEV;
+			}
+		} else
+			err = -EINVAL;
+
+	}
+	return err;
+}
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@net: the applicable net namespace
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+
+	if (cmd == SIOCGIFCONF) {
+		rtnl_lock();
+		ret = dev_ifconf(net, (char __user *) arg);
+		rtnl_unlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME)
+		return dev_ifname(net, (struct ifreq __user *)arg);
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about.
+	 */
+
+	switch (cmd) {
+	/*
+	 *	These ioctl calls:
+	 *	- can be done by all.
+	 *	- atomic and do not require locking.
+	 *	- return a value
+	 */
+	case SIOCGIFFLAGS:
+	case SIOCGIFMETRIC:
+	case SIOCGIFMTU:
+	case SIOCGIFHWADDR:
+	case SIOCGIFSLAVE:
+	case SIOCGIFMAP:
+	case SIOCGIFINDEX:
+	case SIOCGIFTXQLEN:
+		dev_load(net, ifr.ifr_name);
+		rcu_read_lock();
+		ret = dev_ifsioc_locked(net, &ifr, cmd);
+		rcu_read_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	case SIOCETHTOOL:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ethtool(net, &ifr);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- return a value
+	 */
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSIFNAME:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		if (!ret) {
+			if (colon)
+				*colon = ':';
+			if (copy_to_user(arg, &ifr,
+					 sizeof(struct ifreq)))
+				ret = -EFAULT;
+		}
+		return ret;
+
+	/*
+	 *	These ioctl calls:
+	 *	- require superuser power.
+	 *	- require strict serialization.
+	 *	- do not return a value
+	 */
+	case SIOCSIFFLAGS:
+	case SIOCSIFMETRIC:
+	case SIOCSIFMTU:
+	case SIOCSIFMAP:
+	case SIOCSIFHWADDR:
+	case SIOCSIFSLAVE:
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+	case SIOCSIFHWBROADCAST:
+	case SIOCSIFTXQLEN:
+	case SIOCSMIIREG:
+	case SIOCBONDENSLAVE:
+	case SIOCBONDRELEASE:
+	case SIOCBONDSETHWADDR:
+	case SIOCBONDCHANGEACTIVE:
+	case SIOCBRADDIF:
+	case SIOCBRDELIF:
+	case SIOCSHWTSTAMP:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* fall through */
+	case SIOCBONDSLAVEINFOQUERY:
+	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr.ifr_name);
+		rtnl_lock();
+		ret = dev_ifsioc(net, &ifr, cmd);
+		rtnl_unlock();
+		return ret;
+
+	case SIOCGIFMEM:
+		/* Get the per device memory space. We can add this but
+		 * currently do not support it */
+	case SIOCSIFMEM:
+		/* Set the per device memory buffer space.
+		 * Not applicable in our case */
+	case SIOCSIFLINK:
+		return -ENOTTY;
+
+	/*
+	 *	Unknown or private ioctl.
+	 */
+	default:
+		if (cmd == SIOCWANDEV ||
+		    (cmd >= SIOCDEVPRIVATE &&
+		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(net, &ifr, cmd);
+			rtnl_unlock();
+			if (!ret && copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+				ret = -EFAULT;
+			return ret;
+		}
+		/* Take care of Wireless Extensions */
+		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+			return wext_handle_ioctl(net, &ifr, cmd, arg);
+		return -ENOTTY;
+	}
+}
+
+
+/**
+ *	dev_new_index	-	allocate an ifindex
+ *	@net: the applicable net namespace
+ *
+ *	Returns a suitable unique value for a new device interface
+ *	number.  The caller must hold the rtnl semaphore or the
+ *	dev_base_lock to be sure it remains unique.
+ */
+static int dev_new_index(struct net *net)
+{
+	static int ifindex;
+	for (;;) {
+		if (++ifindex <= 0)
+			ifindex = 1;
+		if (!__dev_get_by_index(net, ifindex))
+			return ifindex;
+	}
+}
+
+/* Delayed registration/unregisteration */
+static LIST_HEAD(net_todo_list);
+
+static void net_set_todo(struct net_device *dev)
+{
+	list_add_tail(&dev->todo_list, &net_todo_list);
+}
+
+static void rollback_registered_many(struct list_head *head)
+{
+	struct net_device *dev, *tmp;
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+		/* Some devices call without registering
+		 * for initialization unwind. Remove those
+		 * devices and proceed with the remaining.
+		 */
+		if (dev->reg_state == NETREG_UNINITIALIZED) {
+			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+				 dev->name, dev);
+
+			WARN_ON(1);
+			list_del(&dev->unreg_list);
+			continue;
+		}
+		dev->dismantle = true;
+		BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	}
+
+	/* If device is running, close it first. */
+	dev_close_many(head);
+
+	list_for_each_entry(dev, head, unreg_list) {
+		/* And unlink it from device chain. */
+		unlist_netdevice(dev);
+
+		dev->reg_state = NETREG_UNREGISTERING;
+	}
+
+	synchronize_net();
+
+	list_for_each_entry(dev, head, unreg_list) {
+		/* Shutdown queueing discipline. */
+		dev_shutdown(dev);
+
+
+		/* Notify protocols, that we are about to destroy
+		   this device. They should clean all the things.
+		*/
+		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+
+		/*
+		 *	Flush the unicast and multicast chains
+		 */
+		dev_uc_flush(dev);
+		dev_mc_flush(dev);
+
+		if (dev->netdev_ops->ndo_uninit)
+			dev->netdev_ops->ndo_uninit(dev);
+
+		/* Notifier chain MUST detach us from master device. */
+		WARN_ON(dev->master);
+
+		/* Remove entries from kobject tree */
+		netdev_unregister_kobject(dev);
+	}
+
+	/* Process any work delayed until the end of the batch */
+	dev = list_first_entry(head, struct net_device, unreg_list);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+
+	synchronize_net();
+
+	list_for_each_entry(dev, head, unreg_list)
+		dev_put(dev);
+}
+
+static void rollback_registered(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	rollback_registered_many(&single);
+	list_del(&single);
+}
+
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	/* Fix illegal checksum combinations */
+	if ((features & NETIF_F_HW_CSUM) &&
+	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
+		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	}
+
+	/* Fix illegal SG+CSUM combinations. */
+	if ((features & NETIF_F_SG) &&
+	    !(features & NETIF_F_ALL_CSUM)) {
+		netdev_dbg(dev,
+			"Dropping NETIF_F_SG since no checksum feature.\n");
+		features &= ~NETIF_F_SG;
+	}
+
+	/* TSO requires that SG is present as well. */
+	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
+		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
+		features &= ~NETIF_F_ALL_TSO;
+	}
+
+	/* TSO ECN requires that TSO is present as well. */
+	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
+		features &= ~NETIF_F_TSO_ECN;
+
+	/* Software GSO depends on SG. */
+	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
+		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
+		features &= ~NETIF_F_GSO;
+	}
+
+	/* UFO needs SG and checksumming */
+	if (features & NETIF_F_UFO) {
+		/* maybe split UFO into V4 and V6? */
+		if (!((features & NETIF_F_GEN_CSUM) ||
+		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+			netdev_dbg(dev,
+				"Dropping NETIF_F_UFO since no checksum offload features.\n");
+			features &= ~NETIF_F_UFO;
+		}
+
+		if (!(features & NETIF_F_SG)) {
+			netdev_dbg(dev,
+				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
+			features &= ~NETIF_F_UFO;
+		}
+	}
+
+	return features;
+}
+
+int __netdev_update_features(struct net_device *dev)
+{
+	netdev_features_t features;
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	features = netdev_get_wanted_features(dev);
+
+	if (dev->netdev_ops->ndo_fix_features)
+		features = dev->netdev_ops->ndo_fix_features(dev, features);
+
+	/* driver might be less strict about feature dependencies */
+	features = netdev_fix_features(dev, features);
+
+	if (dev->features == features)
+		return 0;
+
+	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+		&dev->features, &features);
+
+	if (dev->netdev_ops->ndo_set_features)
+		err = dev->netdev_ops->ndo_set_features(dev, features);
+
+	if (unlikely(err < 0)) {
+		netdev_err(dev,
+			"set_features() failed (%d); wanted %pNF, left %pNF\n",
+			err, &features, &dev->features);
+		return -1;
+	}
+
+	if (!err)
+		dev->features = features;
+
+	return 1;
+}
+
+/**
+ *	netdev_update_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications if it
+ *	has changed. Should be called after driver or hardware dependent
+ *	conditions might have changed that influence the features.
+ */
+void netdev_update_features(struct net_device *dev)
+{
+	if (__netdev_update_features(dev))
+		netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_update_features);
+
+/**
+ *	netdev_change_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications even
+ *	if they have not changed. Should be called instead of
+ *	netdev_update_features() if also dev->vlan_features might
+ *	have changed to allow the changes to be propagated to stacked
+ *	VLAN devices.
+ */
+void netdev_change_features(struct net_device *dev)
+{
+	__netdev_update_features(dev);
+	netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_change_features);
+
+/**
+ *	netif_stacked_transfer_operstate -	transfer operstate
+ *	@rootdev: the root or lower level device to transfer state from
+ *	@dev: the device to transfer operstate to
+ *
+ *	Transfer operational state from root to device. This is normally
+ *	called when a stacking relationship exists between the root
+ *	device and the device(a leaf device).
+ */
+void netif_stacked_transfer_operstate(const struct net_device *rootdev,
+					struct net_device *dev)
+{
+	if (rootdev->operstate == IF_OPER_DORMANT)
+		netif_dormant_on(dev);
+	else
+		netif_dormant_off(dev);
+
+	if (netif_carrier_ok(rootdev)) {
+		if (!netif_carrier_ok(dev))
+			netif_carrier_on(dev);
+	} else {
+		if (netif_carrier_ok(dev))
+			netif_carrier_off(dev);
+	}
+}
+EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+
+#ifdef CONFIG_RPS
+static int netif_alloc_rx_queues(struct net_device *dev)
+{
+	unsigned int i, count = dev->num_rx_queues;
+	struct netdev_rx_queue *rx;
+
+	BUG_ON(count < 1);
+
+	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+	if (!rx) {
+		pr_err("netdev: Unable to allocate %u rx queues\n", count);
+		return -ENOMEM;
+	}
+	dev->_rx = rx;
+
+	for (i = 0; i < count; i++)
+		rx[i].dev = dev;
+	return 0;
+}
+#endif
+
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue, void *_unused)
+{
+	/* Initialize queue lock */
+	spin_lock_init(&queue->_xmit_lock);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	queue->xmit_lock_owner = -1;
+	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
+	queue->dev = dev;
+#ifdef CONFIG_BQL
+	dql_init(&queue->dql, HZ);
+#endif
+}
+
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+	unsigned int count = dev->num_tx_queues;
+	struct netdev_queue *tx;
+
+	BUG_ON(count < 1);
+
+	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+	if (!tx) {
+		pr_err("netdev: Unable to allocate %u tx queues\n", count);
+		return -ENOMEM;
+	}
+	dev->_tx = tx;
+
+	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
+
+	return 0;
+}
+
+/**
+ *	register_netdevice	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	Callers must hold the rtnl semaphore. You may want
+ *	register_netdev() instead of this.
+ *
+ *	BUGS:
+ *	The locking appears insufficient to guarantee two parallel registers
+ *	will not get the same name.
+ */
+
+int register_netdevice(struct net_device *dev)
+{
+	int ret;
+	struct net *net = dev_net(dev);
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	might_sleep();
+
+	/* When net_device's are persistent, this will be fatal. */
+	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+	BUG_ON(!net);
+
+	spin_lock_init(&dev->addr_list_lock);
+	netdev_set_addr_lockdep_class(dev);
+
+	dev->iflink = -1;
+
+	ret = dev_get_valid_name(dev, dev->name);
+	if (ret < 0)
+		goto out;
+
+	/* Init, if this function is available */
+	if (dev->netdev_ops->ndo_init) {
+		ret = dev->netdev_ops->ndo_init(dev);
+		if (ret) {
+			if (ret > 0)
+				ret = -EIO;
+			goto out;
+		}
+	}
+
+	dev->ifindex = dev_new_index(net);
+	if (dev->iflink == -1)
+		dev->iflink = dev->ifindex;
+
+	/* Transfer changeable features to wanted_features and enable
+	 * software offloads (GSO and GRO).
+	 */
+	dev->hw_features |= NETIF_F_SOFT_FEATURES;
+	dev->features |= NETIF_F_SOFT_FEATURES;
+	dev->wanted_features = dev->features & dev->hw_features;
+
+	/* Turn on no cache copy if HW is doing checksum */
+	if (!(dev->flags & IFF_LOOPBACK)) {
+		dev->hw_features |= NETIF_F_NOCACHE_COPY;
+		if (dev->features & NETIF_F_ALL_CSUM) {
+			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+			dev->features |= NETIF_F_NOCACHE_COPY;
+		}
+	}
+
+	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
+	 */
+	dev->vlan_features |= NETIF_F_HIGHDMA;
+
+	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		goto err_uninit;
+
+	ret = netdev_register_kobject(dev);
+	if (ret)
+		goto err_uninit;
+	dev->reg_state = NETREG_REGISTERED;
+
+	__netdev_update_features(dev);
+
+	/*
+	 *	Default initial state at registry is that the
+	 *	device is present.
+	 */
+
+	set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+	dev_init_scheduler(dev);
+	dev_hold(dev);
+	list_netdevice(dev);
+
+	/* Notify protocols, that a new device appeared. */
+	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
+	ret = notifier_to_errno(ret);
+	if (ret) {
+		rollback_registered(dev);
+		dev->reg_state = NETREG_UNREGISTERED;
+	}
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	if (!dev->rtnl_link_ops ||
+	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
+out:
+	return ret;
+
+err_uninit:
+	if (dev->netdev_ops->ndo_uninit)
+		dev->netdev_ops->ndo_uninit(dev);
+	goto out;
+}
+EXPORT_SYMBOL(register_netdevice);
+
+/**
+ *	init_dummy_netdev	- init a dummy network device for NAPI
+ *	@dev: device to init
+ *
+ *	This takes a network device structure and initialize the minimum
+ *	amount of fields so it can be used to schedule NAPI polls without
+ *	registering a full blown interface. This is to be used by drivers
+ *	that need to tie several hardware interfaces to a single NAPI
+ *	poll scheduler due to HW limitations.
+ */
+int init_dummy_netdev(struct net_device *dev)
+{
+	/* Clear everything. Note we don't initialize spinlocks
+	 * are they aren't supposed to be taken by any of the
+	 * NAPI code and this dummy netdev is supposed to be
+	 * only ever used for NAPI polls
+	 */
+	memset(dev, 0, sizeof(struct net_device));
+
+	/* make sure we BUG if trying to hit standard
+	 * register/unregister code path
+	 */
+	dev->reg_state = NETREG_DUMMY;
+
+	/* NAPI wants this */
+	INIT_LIST_HEAD(&dev->napi_list);
+
+	/* a dummy interface is started by default */
+	set_bit(__LINK_STATE_PRESENT, &dev->state);
+	set_bit(__LINK_STATE_START, &dev->state);
+
+	/* Note : We dont allocate pcpu_refcnt for dummy devices,
+	 * because users of this 'device' dont need to change
+	 * its refcount.
+	 */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(init_dummy_netdev);
+
+
+/**
+ *	register_netdev	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	This is a wrapper around register_netdevice that takes the rtnl semaphore
+ *	and expands the device name if you passed a format string to
+ *	alloc_netdev.
+ */
+int register_netdev(struct net_device *dev)
+{
+	int err;
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_netdev);
+
+int netdev_refcnt_read(const struct net_device *dev)
+{
+	int i, refcnt = 0;
+
+	for_each_possible_cpu(i)
+		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+	return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
+/*
+ * netdev_wait_allrefs - wait until all references are gone.
+ *
+ * This is called when unregistering network devices.
+ *
+ * Any protocol or device that holds a reference should register
+ * for netdevice notification, and cleanup and put back the
+ * reference if they receive an UNREGISTER event.
+ * We can get stuck here if buggy protocols don't correctly
+ * call dev_put.
+ */
+static void netdev_wait_allrefs(struct net_device *dev)
+{
+	unsigned long rebroadcast_time, warning_time;
+	int refcnt;
+
+	linkwatch_forget_dev(dev);
+
+	rebroadcast_time = warning_time = jiffies;
+	refcnt = netdev_refcnt_read(dev);
+
+	while (refcnt != 0) {
+		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+			rtnl_lock();
+
+			/* Rebroadcast unregister notification */
+			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
+			 * should have already handle it the first time */
+
+			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+				     &dev->state)) {
+				/* We must not have linkwatch events
+				 * pending on unregister. If this
+				 * happens, we simply run the queue
+				 * unscheduled, resulting in a noop
+				 * for this device.
+				 */
+				linkwatch_run_queue();
+			}
+
+			__rtnl_unlock();
+
+			rebroadcast_time = jiffies;
+		}
+
+		msleep(250);
+
+		refcnt = netdev_refcnt_read(dev);
+
+		if (time_after(jiffies, warning_time + 10 * HZ)) {
+			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+				 dev->name, refcnt);
+			warning_time = jiffies;
+		}
+	}
+}
+
+/* The sequence is:
+ *
+ *	rtnl_lock();
+ *	...
+ *	register_netdevice(x1);
+ *	register_netdevice(x2);
+ *	...
+ *	unregister_netdevice(y1);
+ *	unregister_netdevice(y2);
+ *      ...
+ *	rtnl_unlock();
+ *	free_netdev(y1);
+ *	free_netdev(y2);
+ *
+ * We are invoked by rtnl_unlock().
+ * This allows us to deal with problems:
+ * 1) We can delete sysfs objects which invoke hotplug
+ *    without deadlocking with linkwatch via keventd.
+ * 2) Since we run with the RTNL semaphore not held, we can sleep
+ *    safely in order to wait for the netdev refcnt to drop to zero.
+ *
+ * We must not return until all unregister events added during
+ * the interval the lock was held have been completed.
+ */
+void netdev_run_todo(void)
+{
+	struct list_head list;
+
+	/* Snapshot list, allow later requests */
+	list_replace_init(&net_todo_list, &list);
+
+	__rtnl_unlock();
+
+	/* Wait for rcu callbacks to finish before attempting to drain
+	 * the device list.  This usually avoids a 250ms wait.
+	 */
+	if (!list_empty(&list))
+		rcu_barrier();
+
+	while (!list_empty(&list)) {
+		struct net_device *dev
+			= list_first_entry(&list, struct net_device, todo_list);
+		list_del(&dev->todo_list);
+
+		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
+			pr_err("network todo '%s' but state %d\n",
+			       dev->name, dev->reg_state);
+			dump_stack();
+			continue;
+		}
+
+		dev->reg_state = NETREG_UNREGISTERED;
+
+		on_each_cpu(flush_backlog, dev, 1);
+
+		netdev_wait_allrefs(dev);
+
+		/* paranoia */
+		BUG_ON(netdev_refcnt_read(dev));
+		WARN_ON(rcu_access_pointer(dev->ip_ptr));
+		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+		WARN_ON(dev->dn_ptr);
+
+		if (dev->destructor)
+			dev->destructor(dev);
+
+		/* Free network device */
+		kobject_put(&dev->dev.kobj);
+	}
+}
+
+/* Convert net_device_stats to rtnl_link_stats64.  They have the same
+ * fields in the same order, with only the type differing.
+ */
+void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+			     const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+	memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+	size_t i, n = sizeof(*stats64) / sizeof(u64);
+	const unsigned long *src = (const unsigned long *)netdev_stats;
+	u64 *dst = (u64 *)stats64;
+
+	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+		     sizeof(*stats64) / sizeof(u64));
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+#endif
+}
+EXPORT_SYMBOL(netdev_stats_to_stats64);
+
+/**
+ *	dev_get_stats	- get network device statistics
+ *	@dev: device to get statistics from
+ *	@storage: place to store stats
+ *
+ *	Get network statistics from device. Return @storage.
+ *	The device driver may provide its own method by setting
+ *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ *	otherwise the internal statistics structure is used.
+ */
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (ops->ndo_get_stats64) {
+		memset(storage, 0, sizeof(*storage));
+		ops->ndo_get_stats64(dev, storage);
+	} else if (ops->ndo_get_stats) {
+		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
+	} else {
+		netdev_stats_to_stats64(storage, &dev->stats);
+	}
+	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+	return storage;
+}
+EXPORT_SYMBOL(dev_get_stats);
+
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
+{
+	struct netdev_queue *queue = dev_ingress_queue(dev);
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (queue)
+		return queue;
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	netdev_init_one_queue(dev, queue, NULL);
+	queue->qdisc = &noop_qdisc;
+	queue->qdisc_sleeping = &noop_qdisc;
+	rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+	return queue;
+}
+
+/**
+ *	alloc_netdev_mqs - allocate network device
+ *	@sizeof_priv:	size of private data to allocate space for
+ *	@name:		device name format string
+ *	@setup:		callback to initialize device
+ *	@txqs:		the number of TX subqueues to allocate
+ *	@rxqs:		the number of RX subqueues to allocate
+ *
+ *	Allocates a struct net_device with private data area for driver use
+ *	and performs basic initialization.  Also allocates subquue structs
+ *	for each queue on the device.
+ */
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *),
+		unsigned int txqs, unsigned int rxqs)
+{
+	struct net_device *dev;
+	size_t alloc_size;
+	struct net_device *p;
+
+	BUG_ON(strlen(name) >= sizeof(dev->name));
+
+	if (txqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
+		return NULL;
+	}
+
+#ifdef CONFIG_RPS
+	if (rxqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
+		return NULL;
+	}
+#endif
+
+	alloc_size = sizeof(struct net_device);
+	if (sizeof_priv) {
+		/* ensure 32-byte alignment of private area */
+		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
+		alloc_size += sizeof_priv;
+	}
+	/* ensure 32-byte alignment of whole construct */
+	alloc_size += NETDEV_ALIGN - 1;
+
+	p = kzalloc(alloc_size, GFP_KERNEL);
+	if (!p) {
+		pr_err("alloc_netdev: Unable to allocate device\n");
+		return NULL;
+	}
+
+	dev = PTR_ALIGN(p, NETDEV_ALIGN);
+	dev->padded = (char *)dev - (char *)p;
+
+	dev->pcpu_refcnt = alloc_percpu(int);
+	if (!dev->pcpu_refcnt)
+		goto free_p;
+
+	if (dev_addr_init(dev))
+		goto free_pcpu;
+
+	dev_mc_init(dev);
+	dev_uc_init(dev);
+
+	dev_net_set(dev, &init_net);
+
+	dev->gso_max_size = GSO_MAX_SIZE;
+
+	INIT_LIST_HEAD(&dev->napi_list);
+	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->link_watch_list);
+	dev->priv_flags = IFF_XMIT_DST_RELEASE;
+	setup(dev);
+
+	dev->num_tx_queues = txqs;
+	dev->real_num_tx_queues = txqs;
+	if (netif_alloc_netdev_queues(dev))
+		goto free_all;
+
+#ifdef CONFIG_RPS
+	dev->num_rx_queues = rxqs;
+	dev->real_num_rx_queues = rxqs;
+	if (netif_alloc_rx_queues(dev))
+		goto free_all;
+#endif
+
+	strcpy(dev->name, name);
+	dev->group = INIT_NETDEV_GROUP;
+	return dev;
+
+free_all:
+	free_netdev(dev);
+	return NULL;
+
+free_pcpu:
+	free_percpu(dev->pcpu_refcnt);
+	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
+free_p:
+	kfree(p);
+	return NULL;
+}
+EXPORT_SYMBOL(alloc_netdev_mqs);
+
+/**
+ *	free_netdev - free network device
+ *	@dev: device
+ *
+ *	This function does the last stage of destroying an allocated device
+ * 	interface. The reference to the device object is released.
+ *	If this is the last reference then it will be freed.
+ */
+void free_netdev(struct net_device *dev)
+{
+	struct napi_struct *p, *n;
+
+	release_net(dev_net(dev));
+
+	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
+	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
+
+	/* Flush device addresses */
+	dev_addr_flush(dev);
+
+	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+		netif_napi_del(p);
+
+	free_percpu(dev->pcpu_refcnt);
+	dev->pcpu_refcnt = NULL;
+
+	/*  Compatibility with error handling in drivers */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		kfree((char *)dev - dev->padded);
+		return;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
+	dev->reg_state = NETREG_RELEASED;
+
+	/* will free via device release */
+	put_device(&dev->dev);
+}
+EXPORT_SYMBOL(free_netdev);
+
+/**
+ *	synchronize_net -  Synchronize with packet receive processing
+ *
+ *	Wait for packets currently being received to be done.
+ *	Does not block later packets from starting.
+ */
+void synchronize_net(void)
+{
+	might_sleep();
+	if (rtnl_is_locked())
+		synchronize_rcu_expedited();
+	else
+		synchronize_rcu();
+}
+EXPORT_SYMBOL(synchronize_net);
+
+/**
+ *	unregister_netdevice_queue - remove device from the kernel
+ *	@dev: device
+ *	@head: list
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables.
+ *	If head not NULL, device is queued to be unregistered later.
+ *
+ *	Callers must hold the rtnl semaphore.  You may want
+ *	unregister_netdev() instead of this.
+ */
+
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
+{
+	ASSERT_RTNL();
+
+	if (head) {
+		list_move_tail(&dev->unreg_list, head);
+	} else {
+		rollback_registered(dev);
+		/* Finish processing unregister after unlock */
+		net_set_todo(dev);
+	}
+}
+EXPORT_SYMBOL(unregister_netdevice_queue);
+
+/**
+ *	unregister_netdevice_many - unregister many devices
+ *	@head: list of devices
+ */
+void unregister_netdevice_many(struct list_head *head)
+{
+	struct net_device *dev;
+
+	if (!list_empty(head)) {
+		rollback_registered_many(head);
+		list_for_each_entry(dev, head, unreg_list)
+			net_set_todo(dev);
+	}
+}
+EXPORT_SYMBOL(unregister_netdevice_many);
+
+/**
+ *	unregister_netdev - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables.
+ *
+ *	This is just a wrapper for unregister_netdevice that takes
+ *	the rtnl semaphore.  In general you want to use this and not
+ *	unregister_netdevice.
+ */
+void unregister_netdev(struct net_device *dev)
+{
+	rtnl_lock();
+	unregister_netdevice(dev);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(unregister_netdev);
+
+/**
+ *	dev_change_net_namespace - move device to different nethost namespace
+ *	@dev: device
+ *	@net: network namespace
+ *	@pat: If not NULL name pattern to try if the current device name
+ *	      is already taken in the destination network namespace.
+ *
+ *	This function shuts down a device interface and moves it
+ *	to a new network namespace. On success 0 is returned, on
+ *	a failure a netagive errno code is returned.
+ *
+ *	Callers must hold the rtnl semaphore.
+ */
+
+int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	/* Don't allow namespace local devices to be moved. */
+	err = -EINVAL;
+	if (dev->features & NETIF_F_NETNS_LOCAL)
+		goto out;
+
+	/* Ensure the device has been registrered */
+	err = -EINVAL;
+	if (dev->reg_state != NETREG_REGISTERED)
+		goto out;
+
+	/* Get out if there is nothing todo */
+	err = 0;
+	if (net_eq(dev_net(dev), net))
+		goto out;
+
+	/* Pick the destination device name, and ensure
+	 * we can use it in the destination network namespace.
+	 */
+	err = -EEXIST;
+	if (__dev_get_by_name(net, dev->name)) {
+		/* We get here if we can't use the current device name */
+		if (!pat)
+			goto out;
+		if (dev_get_valid_name(dev, pat) < 0)
+			goto out;
+	}
+
+	/*
+	 * And now a mini version of register_netdevice unregister_netdevice.
+	 */
+
+	/* If device is running close it first. */
+	dev_close(dev);
+
+	/* And unlink it from device chain */
+	err = -ENODEV;
+	unlist_netdevice(dev);
+
+	synchronize_net();
+
+	/* Shutdown queueing discipline. */
+	dev_shutdown(dev);
+
+	/* Notify protocols, that we are about to destroy
+	   this device. They should clean all the things.
+
+	   Note that dev->reg_state stays at NETREG_REGISTERED.
+	   This is wanted because this way 8021q and macvlan know
+	   the device is just moving and can keep their slaves up.
+	*/
+	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+
+	/*
+	 *	Flush the unicast and multicast chains
+	 */
+	dev_uc_flush(dev);
+	dev_mc_flush(dev);
+
+	/* Actually switch the network namespace */
+	dev_net_set(dev, net);
+
+	/* If there is an ifindex conflict assign a new one */
+	if (__dev_get_by_index(net, dev->ifindex)) {
+		int iflink = (dev->iflink == dev->ifindex);
+		dev->ifindex = dev_new_index(net);
+		if (iflink)
+			dev->iflink = dev->ifindex;
+	}
+
+	/* Fixup kobjects */
+	err = device_rename(&dev->dev, dev->name);
+	WARN_ON(err);
+
+	/* Add the device back in the hashes */
+	list_netdevice(dev);
+
+	/* Notify protocols, that a new device appeared. */
+	call_netdevice_notifiers(NETDEV_REGISTER, dev);
+
+	/*
+	 *	Prevent userspace races by waiting until the network
+	 *	device is fully setup before sending notifications.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
+	synchronize_net();
+	err = 0;
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(dev_change_net_namespace);
+
+static int dev_cpu_callback(struct notifier_block *nfb,
+			    unsigned long action,
+			    void *ocpu)
+{
+	struct sk_buff **list_skb;
+	struct sk_buff *skb;
+	unsigned int cpu, oldcpu = (unsigned long)ocpu;
+	struct softnet_data *sd, *oldsd;
+
+	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
+		return NOTIFY_OK;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+	sd = &per_cpu(softnet_data, cpu);
+	oldsd = &per_cpu(softnet_data, oldcpu);
+
+	/* Find end of our completion_queue. */
+	list_skb = &sd->completion_queue;
+	while (*list_skb)
+		list_skb = &(*list_skb)->next;
+	/* Append completion queue from offline CPU. */
+	*list_skb = oldsd->completion_queue;
+	oldsd->completion_queue = NULL;
+
+	/* Append output queue from offline CPU. */
+	if (oldsd->output_queue) {
+		*sd->output_queue_tailp = oldsd->output_queue;
+		sd->output_queue_tailp = oldsd->output_queue_tailp;
+		oldsd->output_queue = NULL;
+		oldsd->output_queue_tailp = &oldsd->output_queue;
+	}
+	/* Append NAPI poll list from offline CPU. */
+	if (!list_empty(&oldsd->poll_list)) {
+		list_splice_init(&oldsd->poll_list, &sd->poll_list);
+		raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	}
+
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_enable();
+
+	/* Process offline CPU's input_pkt_queue */
+	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
+		netif_rx(skb);
+		input_queue_head_incr(oldsd);
+	}
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+		netif_rx(skb);
+		input_queue_head_incr(oldsd);
+	}
+
+	return NOTIFY_OK;
+}
+
+
+/**
+ *	netdev_increment_features - increment feature set by one
+ *	@all: current feature set
+ *	@one: new feature set
+ *	@mask: mask feature set
+ *
+ *	Computes a new feature set after adding a device with feature set
+ *	@one to the master device with current feature set @all.  Will not
+ *	enable anything that is off in @mask. Returns the new feature set.
+ */
+netdev_features_t netdev_increment_features(netdev_features_t all,
+	netdev_features_t one, netdev_features_t mask)
+{
+	if (mask & NETIF_F_GEN_CSUM)
+		mask |= NETIF_F_ALL_CSUM;
+	mask |= NETIF_F_VLAN_CHALLENGED;
+
+	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+	all &= one | ~NETIF_F_ALL_FOR_ALL;
+
+	/* If one device supports hw checksumming, set for all. */
+	if (all & NETIF_F_GEN_CSUM)
+		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+
+	return all;
+}
+EXPORT_SYMBOL(netdev_increment_features);
+
+static struct hlist_head *netdev_create_hash(void)
+{
+	int i;
+	struct hlist_head *hash;
+
+	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
+	if (hash != NULL)
+		for (i = 0; i < NETDEV_HASHENTRIES; i++)
+			INIT_HLIST_HEAD(&hash[i]);
+
+	return hash;
+}
+
+/* Initialize per network namespace state */
+static int __net_init netdev_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->dev_base_head);
+
+	net->dev_name_head = netdev_create_hash();
+	if (net->dev_name_head == NULL)
+		goto err_name;
+
+	net->dev_index_head = netdev_create_hash();
+	if (net->dev_index_head == NULL)
+		goto err_idx;
+
+	return 0;
+
+err_idx:
+	kfree(net->dev_name_head);
+err_name:
+	return -ENOMEM;
+}
+
+/**
+ *	netdev_drivername - network driver for the device
+ *	@dev: network device
+ *
+ *	Determine network driver for device.
+ */
+const char *netdev_drivername(const struct net_device *dev)
+{
+	const struct device_driver *driver;
+	const struct device *parent;
+	const char *empty = "";
+
+	parent = dev->dev.parent;
+	if (!parent)
+		return empty;
+
+	driver = parent->driver;
+	if (driver && driver->name)
+		return driver->name;
+	return empty;
+}
+
+int __netdev_printk(const char *level, const struct net_device *dev,
+			   struct va_format *vaf)
+{
+	int r;
+
+	if (dev && dev->dev.parent)
+		r = dev_printk(level, dev->dev.parent, "%s: %pV",
+			       netdev_name(dev), vaf);
+	else if (dev)
+		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+	else
+		r = printk("%s(NULL net_device): %pV", level, vaf);
+
+	return r;
+}
+EXPORT_SYMBOL(__netdev_printk);
+
+int netdev_printk(const char *level, const struct net_device *dev,
+		  const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = __netdev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level)			\
+int func(const struct net_device *dev, const char *fmt, ...)	\
+{								\
+	int r;							\
+	struct va_format vaf;					\
+	va_list args;						\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __netdev_printk(level, dev, &vaf);			\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
+static void __net_exit netdev_exit(struct net *net)
+{
+	kfree(net->dev_name_head);
+	kfree(net->dev_index_head);
+}
+
+static struct pernet_operations __net_initdata netdev_net_ops = {
+	.init = netdev_init,
+	.exit = netdev_exit,
+};
+
+static void __net_exit default_device_exit(struct net *net)
+{
+	struct net_device *dev, *aux;
+	/*
+	 * Push all migratable network devices back to the
+	 * initial network namespace
+	 */
+	rtnl_lock();
+	for_each_netdev_safe(net, dev, aux) {
+		int err;
+		char fb_name[IFNAMSIZ];
+
+		/* Ignore unmoveable devices (i.e. loopback) */
+		if (dev->features & NETIF_F_NETNS_LOCAL)
+			continue;
+
+		/* Leave virtual devices for the generic cleanup */
+		if (dev->rtnl_link_ops)
+			continue;
+
+		/* Push remaining network devices to init_net */
+		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
+		err = dev_change_net_namespace(dev, &init_net, fb_name);
+		if (err) {
+			pr_emerg("%s: failed to move %s to init_net: %d\n",
+				 __func__, dev->name, err);
+			BUG();
+		}
+	}
+	rtnl_unlock();
+}
+
+static void __net_exit default_device_exit_batch(struct list_head *net_list)
+{
+	/* At exit all network devices most be removed from a network
+	 * namespace.  Do this in the reverse order of registration.
+	 * Do this across as many network namespaces as possible to
+	 * improve batching efficiency.
+	 */
+	struct net_device *dev;
+	struct net *net;
+	LIST_HEAD(dev_kill_list);
+
+	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list) {
+		for_each_netdev_reverse(net, dev) {
+			if (dev->rtnl_link_ops)
+				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
+			else
+				unregister_netdevice_queue(dev, &dev_kill_list);
+		}
+	}
+	unregister_netdevice_many(&dev_kill_list);
+	list_del(&dev_kill_list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations __net_initdata default_device_ops = {
+	.exit = default_device_exit,
+	.exit_batch = default_device_exit_batch,
+};
+
+/*
+ *	Initialize the DEV module. At boot time this walks the device list and
+ *	unhooks any devices that fail to initialise (normally hardware not
+ *	present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+/*
+ *       This is called single threaded during boot, so no need
+ *       to take the rtnl semaphore.
+ */
+static int __init net_dev_init(void)
+{
+	int i, rc = -ENOMEM;
+
+	BUG_ON(!dev_boot_phase);
+
+	if (dev_proc_init())
+		goto out;
+
+	if (netdev_kobject_init())
+		goto out;
+
+	INIT_LIST_HEAD(&ptype_all);
+	for (i = 0; i < PTYPE_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&ptype_base[i]);
+
+	if (register_pernet_subsys(&netdev_net_ops))
+		goto out;
+
+	/*
+	 *	Initialise the packet receive queues.
+	 */
+
+	for_each_possible_cpu(i) {
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
+
+		memset(sd, 0, sizeof(*sd));
+		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init(&sd->process_queue);
+		sd->completion_queue = NULL;
+		INIT_LIST_HEAD(&sd->poll_list);
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
+#ifdef CONFIG_RPS
+		sd->csd.func = rps_trigger_softirq;
+		sd->csd.info = sd;
+		sd->csd.flags = 0;
+		sd->cpu = i;
+#endif
+
+		sd->backlog.poll = process_backlog;
+		sd->backlog.weight = weight_p;
+		sd->backlog.gro_list = NULL;
+		sd->backlog.gro_count = 0;
+	}
+
+	dev_boot_phase = 0;
+
+	/* The loopback device is special if any other network devices
+	 * is present in a network namespace the loopback device must
+	 * be present. Since we now dynamically allocate and free the
+	 * loopback device ensure this invariant is maintained by
+	 * keeping the loopback device as the first device on the
+	 * list of network devices.  Ensuring the loopback devices
+	 * is the first device that appears and the last network device
+	 * that disappears.
+	 */
+	if (register_pernet_device(&loopback_net_ops))
+		goto out;
+
+	if (register_pernet_device(&default_device_ops))
+		goto out;
+
+	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
+
+	hotcpu_notifier(dev_cpu_callback, 0);
+	dst_init();
+	dev_mcast_init();
+	rc = 0;
+out:
+	return rc;
+}
+
+subsys_initcall(net_dev_init);
+
+static int __init initialize_hashrnd(void)
+{
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
+	return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
+EXPORT_SYMBOL(dev_change_name);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 00000000..626698f0
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,734 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			if (global) {
+				/* check if addr is already used as global */
+				if (ha->global_use)
+					return 0;
+				else
+					ha->global_use = true;
+			}
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->global_use = global;
+	ha->synced = false;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
+	return 0;
+}
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
+{
+	struct netdev_hw_addr *ha;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			if (global) {
+				if (!ha->global_use)
+					break;
+				else
+					ha->global_use = false;
+			}
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			kfree_rcu(ha, rcu_head);
+			list->count--;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, &from_list->list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
+	}
+	return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del(to_list, ha->addr, addr_len, type);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list,
+		   int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+		}
+	}
+	return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list,
+		      int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		list_del_rcu(&ha->list);
+		kfree_rcu(ha, rcu_head);
+	}
+	list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ *	dev_addr_flush - Flush device address list
+ *	@dev: device
+ *
+ *	Flush device address list and reset ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addrs);
+	dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ *	dev_addr_init - Init device address list
+ *	@dev: device
+ *
+ *	Init device address list and create the first element,
+ *	used by ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_init(&dev->dev_addrs);
+	memset(addr, 0, sizeof(addr));
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addrs.list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ *	dev_addr_add - Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del - Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha;
+
+	ASSERT_RTNL();
+
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple - Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+				     to_dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple - Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device supplying the addresses to be deleted
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+			       to_dev->addr_len, addr_type);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ *	dev_uc_add - Add a secondary unicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ *	dev_uc_del - Release secondary unicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ *	dev_uc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_addr_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_nested(to);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ *	dev_uc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_uc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock_nested(to);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ *	dev_uc_flush - Flush unicast addresses
+ *	@dev: device
+ *
+ *	Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->uc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ *	dev_uc_flush - Init unicast address list
+ *	@dev: device
+ *
+ *	Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+/**
+ *	dev_mc_add - Add a multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a multicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ *	dev_mc_add_global - Add a global multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+
+/**
+ *	dev_mc_del - Delete a multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ *	dev_mc_del_global - Delete a global multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ *	dev_mc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_addr_lock_bh.
+ *
+ *	This function is intended to be called from the ndo_set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_nested(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ *	dev_mc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_mc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock_nested(to);
+	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ *	dev_mc_flush - Flush multicast addresses
+ *	@dev: device
+ *
+ *	Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->mc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ *	dev_mc_flush - Init multicast address list
+ *	@dev: device
+ *
+ *	Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
new file mode 100644
index 00000000..b856f87e
--- /dev/null
+++ b/net/core/drop_monitor.c
@@ -0,0 +1,390 @@
+/*
+ * Monitoring code for network dropped packet alerts
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <net/genetlink.h>
+#include <net/netevent.h>
+
+#include <trace/events/skb.h>
+#include <trace/events/napi.h>
+
+#include <asm/unaligned.h>
+
+#define TRACE_ON 1
+#define TRACE_OFF 0
+
+/*
+ * Globals, our netlink socket pointer
+ * and the work handle that will send up
+ * netlink alerts
+ */
+static int trace_state = TRACE_OFF;
+static DEFINE_MUTEX(trace_state_mutex);
+
+struct per_cpu_dm_data {
+	spinlock_t		lock;
+	struct sk_buff		*skb;
+	struct work_struct	dm_alert_work;
+	struct timer_list	send_timer;
+};
+
+struct dm_hw_stat_delta {
+	struct net_device *dev;
+	unsigned long last_rx;
+	struct list_head list;
+	struct rcu_head rcu;
+	unsigned long last_drop_val;
+};
+
+static struct genl_family net_drop_monitor_family = {
+	.id             = GENL_ID_GENERATE,
+	.hdrsize        = 0,
+	.name           = "NET_DM",
+	.version        = 2,
+	.maxattr        = NET_DM_CMD_MAX,
+};
+
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+
+static int dm_hit_limit = 64;
+static int dm_delay = 1;
+static unsigned long dm_hw_check_delta = 2*HZ;
+static LIST_HEAD(hw_stats_list);
+
+static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
+{
+	size_t al;
+	struct net_dm_alert_msg *msg;
+	struct nlattr *nla;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	al = sizeof(struct net_dm_alert_msg);
+	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+	al += sizeof(struct nlattr);
+
+	skb = genlmsg_new(al, GFP_KERNEL);
+
+	if (skb) {
+		genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+				0, NET_DM_CMD_ALERT);
+		nla = nla_reserve(skb, NLA_UNSPEC,
+				  sizeof(struct net_dm_alert_msg));
+		msg = nla_data(nla);
+		memset(msg, 0, al);
+	} else {
+		mod_timer(&data->send_timer, jiffies + HZ / 10);
+	}
+
+	spin_lock_irqsave(&data->lock, flags);
+	swap(data->skb, skb);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return skb;
+}
+
+static void send_dm_alert(struct work_struct *work)
+{
+	struct sk_buff *skb;
+	struct per_cpu_dm_data *data;
+
+	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	skb = reset_per_cpu_data(data);
+
+	if (skb)
+		genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+}
+
+/*
+ * This is the timer function to delay the sending of an alert
+ * in the event that more drops will arrive during the
+ * hysteresis period.
+ */
+static void sched_send_work(unsigned long _data)
+{
+	struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
+
+	schedule_work(&data->dm_alert_work);
+}
+
+static void trace_drop_common(struct sk_buff *skb, void *location)
+{
+	struct net_dm_alert_msg *msg;
+	struct nlmsghdr *nlh;
+	struct nlattr *nla;
+	int i;
+	struct sk_buff *dskb;
+	struct per_cpu_dm_data *data;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	data = &__get_cpu_var(dm_cpu_data);
+	spin_lock(&data->lock);
+	dskb = data->skb;
+
+	if (!dskb)
+		goto out;
+
+	nlh = (struct nlmsghdr *)dskb->data;
+	nla = genlmsg_data(nlmsg_data(nlh));
+	msg = nla_data(nla);
+	for (i = 0; i < msg->entries; i++) {
+		if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
+			msg->points[i].count++;
+			goto out;
+		}
+	}
+	if (msg->entries == dm_hit_limit)
+		goto out;
+	/*
+	 * We need to create a new entry
+	 */
+	__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
+	nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
+	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+	msg->points[msg->entries].count = 1;
+	msg->entries++;
+
+	if (!timer_pending(&data->send_timer)) {
+		data->send_timer.expires = jiffies + dm_delay * HZ;
+		add_timer(&data->send_timer);
+	}
+
+out:
+	spin_unlock_irqrestore(&data->lock, flags);
+}
+
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
+{
+	trace_drop_common(skb, location);
+}
+
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
+{
+	struct dm_hw_stat_delta *new_stat;
+
+	/*
+	 * Don't check napi structures with no associated device
+	 */
+	if (!napi->dev)
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+		/*
+		 * only add a note to our monitor buffer if:
+		 * 1) this is the dev we received on
+		 * 2) its after the last_rx delta
+		 * 3) our rx_dropped count has gone up
+		 */
+		if ((new_stat->dev == napi->dev)  &&
+		    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
+		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+			trace_drop_common(NULL, NULL);
+			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
+			new_stat->last_rx = jiffies;
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
+static int set_all_monitor_traces(int state)
+{
+	int rc = 0;
+	struct dm_hw_stat_delta *new_stat = NULL;
+	struct dm_hw_stat_delta *temp;
+
+	mutex_lock(&trace_state_mutex);
+
+	if (state == trace_state) {
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+
+	switch (state) {
+	case TRACE_ON:
+		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
+		break;
+	case TRACE_OFF:
+		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
+
+		tracepoint_synchronize_unregister();
+
+		/*
+		 * Clean the device list
+		 */
+		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+			if (new_stat->dev == NULL) {
+				list_del_rcu(&new_stat->list);
+				kfree_rcu(new_stat, rcu);
+			}
+		}
+		break;
+	default:
+		rc = 1;
+		break;
+	}
+
+	if (!rc)
+		trace_state = state;
+	else
+		rc = -EINPROGRESS;
+
+out_unlock:
+	mutex_unlock(&trace_state_mutex);
+
+	return rc;
+}
+
+
+static int net_dm_cmd_config(struct sk_buff *skb,
+			struct genl_info *info)
+{
+	return -ENOTSUPP;
+}
+
+static int net_dm_cmd_trace(struct sk_buff *skb,
+			struct genl_info *info)
+{
+	switch (info->genlhdr->cmd) {
+	case NET_DM_CMD_START:
+		return set_all_monitor_traces(TRACE_ON);
+		break;
+	case NET_DM_CMD_STOP:
+		return set_all_monitor_traces(TRACE_OFF);
+		break;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int dropmon_net_event(struct notifier_block *ev_block,
+			unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dm_hw_stat_delta *new_stat = NULL;
+	struct dm_hw_stat_delta *tmp;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+
+		if (!new_stat)
+			goto out;
+
+		new_stat->dev = dev;
+		new_stat->last_rx = jiffies;
+		mutex_lock(&trace_state_mutex);
+		list_add_rcu(&new_stat->list, &hw_stats_list);
+		mutex_unlock(&trace_state_mutex);
+		break;
+	case NETDEV_UNREGISTER:
+		mutex_lock(&trace_state_mutex);
+		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+			if (new_stat->dev == dev) {
+				new_stat->dev = NULL;
+				if (trace_state == TRACE_OFF) {
+					list_del_rcu(&new_stat->list);
+					kfree_rcu(new_stat, rcu);
+					break;
+				}
+			}
+		}
+		mutex_unlock(&trace_state_mutex);
+		break;
+	}
+out:
+	return NOTIFY_DONE;
+}
+
+static struct genl_ops dropmon_ops[] = {
+	{
+		.cmd = NET_DM_CMD_CONFIG,
+		.doit = net_dm_cmd_config,
+	},
+	{
+		.cmd = NET_DM_CMD_START,
+		.doit = net_dm_cmd_trace,
+	},
+	{
+		.cmd = NET_DM_CMD_STOP,
+		.doit = net_dm_cmd_trace,
+	},
+};
+
+static struct notifier_block dropmon_net_notifier = {
+	.notifier_call = dropmon_net_event
+};
+
+static int __init init_net_drop_monitor(void)
+{
+	struct per_cpu_dm_data *data;
+	int cpu, rc;
+
+	printk(KERN_INFO "Initializing network drop monitor service\n");
+
+	if (sizeof(void *) > 8) {
+		printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
+		return -ENOSPC;
+	}
+
+	rc = genl_register_family_with_ops(&net_drop_monitor_family,
+					   dropmon_ops,
+					   ARRAY_SIZE(dropmon_ops));
+	if (rc) {
+		printk(KERN_ERR "Could not create drop monitor netlink family\n");
+		return rc;
+	}
+
+	rc = register_netdevice_notifier(&dropmon_net_notifier);
+	if (rc < 0) {
+		printk(KERN_CRIT "Failed to register netdevice notifier\n");
+		goto out_unreg;
+	}
+
+	rc = 0;
+
+	for_each_present_cpu(cpu) {
+		data = &per_cpu(dm_cpu_data, cpu);
+		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+		init_timer(&data->send_timer);
+		data->send_timer.data = (unsigned long)data;
+		data->send_timer.function = sched_send_work;
+		spin_lock_init(&data->lock);
+		reset_per_cpu_data(data);
+	}
+
+
+	goto out;
+
+out_unreg:
+	genl_unregister_family(&net_drop_monitor_family);
+out:
+	return rc;
+}
+
+late_initcall(init_net_drop_monitor);
diff --git a/net/core/dst.c b/net/core/dst.c
new file mode 100644
index 00000000..43d94ced
--- /dev/null
+++ b/net/core/dst.c
@@ -0,0 +1,419 @@
+/*
+ * net/core/dst.c	Protocol independent destination cache.
+ *
+ * Authors:		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <net/net_namespace.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+
+#include <net/dst.h>
+
+/*
+ * Theory of operations:
+ * 1) We use a list, protected by a spinlock, to add
+ *    new entries from both BH and non-BH context.
+ * 2) In order to keep spinlock held for a small delay,
+ *    we use a second list where are stored long lived
+ *    entries, that are handled by the garbage collect thread
+ *    fired by a workqueue.
+ * 3) This list is guarded by a mutex,
+ *    so that the gc_task and dst_dev_event() can be synchronized.
+ */
+
+/*
+ * We want to keep lock & list close together
+ * to dirty as few cache lines as possible in __dst_free().
+ * As this is not a very strong hint, we dont force an alignment on SMP.
+ */
+static struct {
+	spinlock_t		lock;
+	struct dst_entry	*list;
+	unsigned long		timer_inc;
+	unsigned long		timer_expires;
+} dst_garbage = {
+	.lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock),
+	.timer_inc = DST_GC_MAX,
+};
+static void dst_gc_task(struct work_struct *work);
+static void ___dst_free(struct dst_entry *dst);
+
+static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
+
+static DEFINE_MUTEX(dst_gc_mutex);
+/*
+ * long lived entries are maintained in this list, guarded by dst_gc_mutex
+ */
+static struct dst_entry         *dst_busy_list;
+
+static void dst_gc_task(struct work_struct *work)
+{
+	int    delayed = 0;
+	int    work_performed = 0;
+	unsigned long expires = ~0L;
+	struct dst_entry *dst, *next, head;
+	struct dst_entry *last = &head;
+
+	mutex_lock(&dst_gc_mutex);
+	next = dst_busy_list;
+
+loop:
+	while ((dst = next) != NULL) {
+		next = dst->next;
+		prefetch(&next->next);
+		cond_resched();
+		if (likely(atomic_read(&dst->__refcnt))) {
+			last->next = dst;
+			last = dst;
+			delayed++;
+			continue;
+		}
+		work_performed++;
+
+		dst = dst_destroy(dst);
+		if (dst) {
+			/* NOHASH and still referenced. Unless it is already
+			 * on gc list, invalidate it and add to gc list.
+			 *
+			 * Note: this is temporary. Actually, NOHASH dst's
+			 * must be obsoleted when parent is obsoleted.
+			 * But we do not have state "obsoleted, but
+			 * referenced by parent", so it is right.
+			 */
+			if (dst->obsolete > 1)
+				continue;
+
+			___dst_free(dst);
+			dst->next = next;
+			next = dst;
+		}
+	}
+
+	spin_lock_bh(&dst_garbage.lock);
+	next = dst_garbage.list;
+	if (next) {
+		dst_garbage.list = NULL;
+		spin_unlock_bh(&dst_garbage.lock);
+		goto loop;
+	}
+	last->next = NULL;
+	dst_busy_list = head.next;
+	if (!dst_busy_list)
+		dst_garbage.timer_inc = DST_GC_MAX;
+	else {
+		/*
+		 * if we freed less than 1/10 of delayed entries,
+		 * we can sleep longer.
+		 */
+		if (work_performed <= delayed/10) {
+			dst_garbage.timer_expires += dst_garbage.timer_inc;
+			if (dst_garbage.timer_expires > DST_GC_MAX)
+				dst_garbage.timer_expires = DST_GC_MAX;
+			dst_garbage.timer_inc += DST_GC_INC;
+		} else {
+			dst_garbage.timer_inc = DST_GC_INC;
+			dst_garbage.timer_expires = DST_GC_MIN;
+		}
+		expires = dst_garbage.timer_expires;
+		/*
+		 * if the next desired timer is more than 4 seconds in the
+		 * future then round the timer to whole seconds
+		 */
+		if (expires > 4*HZ)
+			expires = round_jiffies_relative(expires);
+		schedule_delayed_work(&dst_gc_work, expires);
+	}
+
+	spin_unlock_bh(&dst_garbage.lock);
+	mutex_unlock(&dst_gc_mutex);
+}
+
+int dst_discard(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL(dst_discard);
+
+const u32 dst_default_metrics[RTAX_MAX];
+
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
+		int initial_ref, int initial_obsolete, int flags)
+{
+	struct dst_entry *dst;
+
+	if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
+		if (ops->gc(ops))
+			return NULL;
+	}
+	dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
+	if (!dst)
+		return NULL;
+	dst->child = NULL;
+	dst->dev = dev;
+	if (dev)
+		dev_hold(dev);
+	dst->ops = ops;
+	dst_init_metrics(dst, dst_default_metrics, true);
+	dst->expires = 0UL;
+	dst->path = dst;
+	RCU_INIT_POINTER(dst->_neighbour, NULL);
+#ifdef CONFIG_XFRM
+	dst->xfrm = NULL;
+#endif
+	dst->input = dst_discard;
+	dst->output = dst_discard;
+	dst->error = 0;
+	dst->obsolete = initial_obsolete;
+	dst->header_len = 0;
+	dst->trailer_len = 0;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	dst->tclassid = 0;
+#endif
+	atomic_set(&dst->__refcnt, initial_ref);
+	dst->__use = 0;
+	dst->lastuse = jiffies;
+	dst->flags = flags;
+	dst->next = NULL;
+	if (!(flags & DST_NOCOUNT))
+		dst_entries_add(ops, 1);
+	return dst;
+}
+EXPORT_SYMBOL(dst_alloc);
+
+static void ___dst_free(struct dst_entry *dst)
+{
+	/* The first case (dev==NULL) is required, when
+	   protocol module is unloaded.
+	 */
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
+		dst->input = dst->output = dst_discard;
+	dst->obsolete = 2;
+}
+
+void __dst_free(struct dst_entry *dst)
+{
+	spin_lock_bh(&dst_garbage.lock);
+	___dst_free(dst);
+	dst->next = dst_garbage.list;
+	dst_garbage.list = dst;
+	if (dst_garbage.timer_inc > DST_GC_INC) {
+		dst_garbage.timer_inc = DST_GC_INC;
+		dst_garbage.timer_expires = DST_GC_MIN;
+		cancel_delayed_work(&dst_gc_work);
+		schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
+	}
+	spin_unlock_bh(&dst_garbage.lock);
+}
+EXPORT_SYMBOL(__dst_free);
+
+struct dst_entry *dst_destroy(struct dst_entry * dst)
+{
+	struct dst_entry *child;
+	struct neighbour *neigh;
+
+	smp_rmb();
+
+again:
+	neigh = rcu_dereference_protected(dst->_neighbour, 1);
+	child = dst->child;
+
+	if (neigh) {
+		RCU_INIT_POINTER(dst->_neighbour, NULL);
+		neigh_release(neigh);
+	}
+
+	if (!(dst->flags & DST_NOCOUNT))
+		dst_entries_add(dst->ops, -1);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	if (dst->dev)
+		dev_put(dst->dev);
+	kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+	dst = child;
+	if (dst) {
+		int nohash = dst->flags & DST_NOHASH;
+
+		if (atomic_dec_and_test(&dst->__refcnt)) {
+			/* We were real parent of this dst, so kill child. */
+			if (nohash)
+				goto again;
+		} else {
+			/* Child is still referenced, return it for freeing. */
+			if (nohash)
+				return dst;
+			/* Child is still in his hash table */
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(dst_destroy);
+
+void dst_release(struct dst_entry *dst)
+{
+	if (dst) {
+		int newrefcnt;
+
+		newrefcnt = atomic_dec_return(&dst->__refcnt);
+		WARN_ON(newrefcnt < 0);
+		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
+			dst = dst_destroy(dst);
+			if (dst)
+				__dst_free(dst);
+		}
+	}
+}
+EXPORT_SYMBOL(dst_release);
+
+u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+
+	if (p) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			kfree(p);
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+EXPORT_SYMBOL(dst_cow_metrics_generic);
+
+/* Caller asserts that dst_metrics_read_only(dst) is false.  */
+void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	unsigned long prev, new;
+
+	new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+	prev = cmpxchg(&dst->_metrics, old, new);
+	if (prev == old)
+		kfree(__DST_METRICS_PTR(old));
+}
+EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	/* If dst not in cache, we must take a reference, because
+	 * dst_release() will destroy dst as soon as its refcount becomes zero
+	 */
+	if (unlikely(dst->flags & DST_NOCACHE)) {
+		dst_hold(dst);
+		skb_dst_set(skb, dst);
+	} else {
+		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+	}
+}
+EXPORT_SYMBOL(skb_dst_set_noref);
+
+/* Dirty hack. We did it in 2.2 (in __dst_free),
+ * we have _very_ good reasons not to repeat
+ * this mistake in 2.3, but we have no choice
+ * now. _It_ _is_ _explicit_ _deliberate_
+ * _race_ _condition_.
+ *
+ * Commented and originally written by Alexey.
+ */
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+		       int unregister)
+{
+	if (dst->ops->ifdown)
+		dst->ops->ifdown(dst, dev, unregister);
+
+	if (dev != dst->dev)
+		return;
+
+	if (!unregister) {
+		dst->input = dst->output = dst_discard;
+	} else {
+		struct neighbour *neigh;
+
+		dst->dev = dev_net(dst->dev)->loopback_dev;
+		dev_hold(dst->dev);
+		dev_put(dev);
+		rcu_read_lock();
+		neigh = dst_get_neighbour_noref(dst);
+		if (neigh && neigh->dev == dev) {
+			neigh->dev = dst->dev;
+			dev_hold(dst->dev);
+			dev_put(dev);
+		}
+		rcu_read_unlock();
+	}
+}
+
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+			 void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dst_entry *dst, *last = NULL;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+	case NETDEV_DOWN:
+		mutex_lock(&dst_gc_mutex);
+		for (dst = dst_busy_list; dst; dst = dst->next) {
+			last = dst;
+			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+		}
+
+		spin_lock_bh(&dst_garbage.lock);
+		dst = dst_garbage.list;
+		dst_garbage.list = NULL;
+		spin_unlock_bh(&dst_garbage.lock);
+
+		if (last)
+			last->next = dst;
+		else
+			dst_busy_list = dst;
+		for (; dst; dst = dst->next)
+			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+		mutex_unlock(&dst_gc_mutex);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_dev_notifier = {
+	.notifier_call	= dst_dev_event,
+	.priority = -10, /* must be called after other network notifiers */
+};
+
+void __init dst_init(void)
+{
+	register_netdevice_notifier(&dst_dev_notifier);
+}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
new file mode 100644
index 00000000..6d6d7d25
--- /dev/null
+++ b/net/core/ethtool.c
@@ -0,0 +1,1510 @@
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/bitops.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
+
+/*
+ * Some useful ethtool_ops methods that're device independent.
+ * If we find that all drivers want to do the same thing here,
+ * we can turn these into dev_() function calls.
+ */
+
+u32 ethtool_op_get_link(struct net_device *dev)
+{
+	return netif_carrier_ok(dev) ? 1 : 0;
+}
+EXPORT_SYMBOL(ethtool_op_get_link);
+
+/* Handlers for each ethtool command */
+
+#define ETHTOOL_DEV_FEATURE_WORDS	((NETDEV_FEATURE_COUNT + 31) / 32)
+
+static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+	[NETIF_F_SG_BIT] =               "tx-scatter-gather",
+	[NETIF_F_IP_CSUM_BIT] =          "tx-checksum-ipv4",
+	[NETIF_F_HW_CSUM_BIT] =          "tx-checksum-ip-generic",
+	[NETIF_F_IPV6_CSUM_BIT] =        "tx-checksum-ipv6",
+	[NETIF_F_HIGHDMA_BIT] =          "highdma",
+	[NETIF_F_FRAGLIST_BIT] =         "tx-scatter-gather-fraglist",
+	[NETIF_F_HW_VLAN_TX_BIT] =       "tx-vlan-hw-insert",
+
+	[NETIF_F_HW_VLAN_RX_BIT] =       "rx-vlan-hw-parse",
+	[NETIF_F_HW_VLAN_FILTER_BIT] =   "rx-vlan-filter",
+	[NETIF_F_VLAN_CHALLENGED_BIT] =  "vlan-challenged",
+	[NETIF_F_GSO_BIT] =              "tx-generic-segmentation",
+	[NETIF_F_LLTX_BIT] =             "tx-lockless",
+	[NETIF_F_NETNS_LOCAL_BIT] =      "netns-local",
+	[NETIF_F_GRO_BIT] =              "rx-gro",
+	[NETIF_F_LRO_BIT] =              "rx-lro",
+
+	[NETIF_F_TSO_BIT] =              "tx-tcp-segmentation",
+	[NETIF_F_UFO_BIT] =              "tx-udp-fragmentation",
+	[NETIF_F_GSO_ROBUST_BIT] =       "tx-gso-robust",
+	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
+	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
+	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+
+	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
+	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
+	[NETIF_F_FCOE_MTU_BIT] =         "fcoe-mtu",
+	[NETIF_F_NTUPLE_BIT] =           "rx-ntuple-filter",
+	[NETIF_F_RXHASH_BIT] =           "rx-hashing",
+	[NETIF_F_RXCSUM_BIT] =           "rx-checksum",
+	[NETIF_F_NOCACHE_COPY_BIT] =     "tx-nocache-copy",
+	[NETIF_F_LOOPBACK_BIT] =         "loopback",
+	[NETIF_F_RXFCS_BIT] =            "rx-fcs",
+	[NETIF_F_RXALL_BIT] =            "rx-all",
+};
+
+static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gfeatures cmd = {
+		.cmd = ETHTOOL_GFEATURES,
+		.size = ETHTOOL_DEV_FEATURE_WORDS,
+	};
+	struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
+	u32 __user *sizeaddr;
+	u32 copy_size;
+	int i;
+
+	/* in case feature bits run out again */
+	BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t));
+
+	for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+		features[i].available = (u32)(dev->hw_features >> (32 * i));
+		features[i].requested = (u32)(dev->wanted_features >> (32 * i));
+		features[i].active = (u32)(dev->features >> (32 * i));
+		features[i].never_changed =
+			(u32)(NETIF_F_NEVER_CHANGE >> (32 * i));
+	}
+
+	sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
+	if (get_user(copy_size, sizeaddr))
+		return -EFAULT;
+
+	if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
+		copy_size = ETHTOOL_DEV_FEATURE_WORDS;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	useraddr += sizeof(cmd);
+	if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_sfeatures cmd;
+	struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
+	netdev_features_t wanted = 0, valid = 0;
+	int i, ret = 0;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+	useraddr += sizeof(cmd);
+
+	if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
+		return -EINVAL;
+
+	if (copy_from_user(features, useraddr, sizeof(features)))
+		return -EFAULT;
+
+	for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+		valid |= (netdev_features_t)features[i].valid << (32 * i);
+		wanted |= (netdev_features_t)features[i].requested << (32 * i);
+	}
+
+	if (valid & ~NETIF_F_ETHTOOL_BITS)
+		return -EINVAL;
+
+	if (valid & ~dev->hw_features) {
+		valid &= dev->hw_features;
+		ret |= ETHTOOL_F_UNSUPPORTED;
+	}
+
+	dev->wanted_features &= ~valid;
+	dev->wanted_features |= wanted & valid;
+	__netdev_update_features(dev);
+
+	if ((dev->wanted_features ^ dev->features) & valid)
+		ret |= ETHTOOL_F_WISH;
+
+	return ret;
+}
+
+static int __ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (sset == ETH_SS_FEATURES)
+		return ARRAY_SIZE(netdev_features_strings);
+
+	if (ops && ops->get_sset_count && ops->get_strings)
+		return ops->get_sset_count(dev, sset);
+	else
+		return -EOPNOTSUPP;
+}
+
+static void __ethtool_get_strings(struct net_device *dev,
+	u32 stringset, u8 *data)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (stringset == ETH_SS_FEATURES)
+		memcpy(data, netdev_features_strings,
+			sizeof(netdev_features_strings));
+	else
+		/* ops->get_strings is valid because checked earlier */
+		ops->get_strings(dev, stringset, data);
+}
+
+static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
+{
+	/* feature masks of legacy discrete ethtool ops */
+
+	switch (eth_cmd) {
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_STXCSUM:
+		return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_SRXCSUM:
+		return NETIF_F_RXCSUM;
+	case ETHTOOL_GSG:
+	case ETHTOOL_SSG:
+		return NETIF_F_SG;
+	case ETHTOOL_GTSO:
+	case ETHTOOL_STSO:
+		return NETIF_F_ALL_TSO;
+	case ETHTOOL_GUFO:
+	case ETHTOOL_SUFO:
+		return NETIF_F_UFO;
+	case ETHTOOL_GGSO:
+	case ETHTOOL_SGSO:
+		return NETIF_F_GSO;
+	case ETHTOOL_GGRO:
+	case ETHTOOL_SGRO:
+		return NETIF_F_GRO;
+	default:
+		BUG();
+	}
+}
+
+static int ethtool_get_one_feature(struct net_device *dev,
+	char __user *useraddr, u32 ethcmd)
+{
+	netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
+	struct ethtool_value edata = {
+		.cmd = ethcmd,
+		.data = !!(dev->features & mask),
+	};
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_one_feature(struct net_device *dev,
+	void __user *useraddr, u32 ethcmd)
+{
+	struct ethtool_value edata;
+	netdev_features_t mask;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	mask = ethtool_get_feature_mask(ethcmd);
+	mask &= dev->hw_features;
+	if (!mask)
+		return -EOPNOTSUPP;
+
+	if (edata.data)
+		dev->wanted_features |= mask;
+	else
+		dev->wanted_features &= ~mask;
+
+	__netdev_update_features(dev);
+
+	return 0;
+}
+
+#define ETH_ALL_FLAGS    (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
+			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
+			  NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+	u32 flags = 0;
+
+	if (dev->features & NETIF_F_LRO)	flags |= ETH_FLAG_LRO;
+	if (dev->features & NETIF_F_HW_VLAN_RX)	flags |= ETH_FLAG_RXVLAN;
+	if (dev->features & NETIF_F_HW_VLAN_TX)	flags |= ETH_FLAG_TXVLAN;
+	if (dev->features & NETIF_F_NTUPLE)	flags |= ETH_FLAG_NTUPLE;
+	if (dev->features & NETIF_F_RXHASH)	flags |= ETH_FLAG_RXHASH;
+
+	return flags;
+}
+
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+	netdev_features_t features = 0, changed;
+
+	if (data & ~ETH_ALL_FLAGS)
+		return -EINVAL;
+
+	if (data & ETH_FLAG_LRO)	features |= NETIF_F_LRO;
+	if (data & ETH_FLAG_RXVLAN)	features |= NETIF_F_HW_VLAN_RX;
+	if (data & ETH_FLAG_TXVLAN)	features |= NETIF_F_HW_VLAN_TX;
+	if (data & ETH_FLAG_NTUPLE)	features |= NETIF_F_NTUPLE;
+	if (data & ETH_FLAG_RXHASH)	features |= NETIF_F_RXHASH;
+
+	/* allow changing only bits set in hw_features */
+	changed = (features ^ dev->features) & ETH_ALL_FEATURES;
+	if (changed & ~dev->hw_features)
+		return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
+
+	dev->wanted_features =
+		(dev->wanted_features & ~changed) | (features & changed);
+
+	__netdev_update_features(dev);
+
+	return 0;
+}
+
+int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	ASSERT_RTNL();
+
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	memset(cmd, 0, sizeof(struct ethtool_cmd));
+	cmd->cmd = ETHTOOL_GSET;
+	return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(__ethtool_get_settings);
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+	int err;
+	struct ethtool_cmd cmd;
+
+	err = __ethtool_get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd;
+
+	if (!dev->ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_settings(dev, &cmd);
+}
+
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+						  void __user *useraddr)
+{
+	struct ethtool_drvinfo info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GDRVINFO;
+	if (ops && ops->get_drvinfo) {
+		ops->get_drvinfo(dev, &info);
+	} else if (dev->dev.parent && dev->dev.parent->driver) {
+		strlcpy(info.bus_info, dev_name(dev->dev.parent),
+			sizeof(info.bus_info));
+		strlcpy(info.driver, dev->dev.parent->driver->name,
+			sizeof(info.driver));
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * Use ETHTOOL_GSSET_INFO instead.
+	 */
+	if (ops && ops->get_sset_count) {
+		int rc;
+
+		rc = ops->get_sset_count(dev, ETH_SS_TEST);
+		if (rc >= 0)
+			info.testinfo_len = rc;
+		rc = ops->get_sset_count(dev, ETH_SS_STATS);
+		if (rc >= 0)
+			info.n_stats = rc;
+		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
+		if (rc >= 0)
+			info.n_priv_flags = rc;
+	}
+	if (ops && ops->get_regs_len)
+		info.regdump_len = ops->get_regs_len(dev);
+	if (ops && ops->get_eeprom_len)
+		info.eedump_len = ops->get_eeprom_len(dev);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
+						    void __user *useraddr)
+{
+	struct ethtool_sset_info info;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	n_bits = hweight64(sset_mask);
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = __ethtool_get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						u32 cmd, void __user *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	int rc;
+
+	if (!dev->ethtool_ops->set_rxnfc)
+		return -EOPNOTSUPP;
+
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
+		return -EFAULT;
+
+	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+	if (rc)
+		return rc;
+
+	if (cmd == ETHTOOL_SRXCLSRLINS &&
+	    copy_to_user(useraddr, &info, info_size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						u32 cmd, void __user *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+	void *rule_buf = NULL;
+
+	if (!ops->get_rxnfc)
+		return -EOPNOTSUPP;
+
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
+		return -EFAULT;
+
+	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+		if (info.rule_cnt > 0) {
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
+						   GFP_USER);
+			if (!rule_buf)
+				return -ENOMEM;
+		}
+	}
+
+	ret = ops->get_rxnfc(dev, &info, rule_buf);
+	if (ret < 0)
+		goto err_out;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, info_size))
+		goto err_out;
+
+	if (rule_buf) {
+		useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+		if (copy_to_user(useraddr, rule_buf,
+				 info.rule_cnt * sizeof(u32)))
+			goto err_out;
+	}
+	ret = 0;
+
+err_out:
+	kfree(rule_buf);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	u32 user_size, dev_size;
+	u32 *indir;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&user_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(user_size)))
+		return -EFAULT;
+
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			 &dev_size, sizeof(dev_size)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size.  Otherwise, if it's smaller than the
+	 * device table size it's an error.
+	 */
+	if (user_size < dev_size)
+		return user_size == 0 ? 0 : -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr +
+			 offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+			 indir, dev_size * sizeof(indir[0])))
+		ret = -EFAULT;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxnfc rx_rings;
+	u32 user_size, dev_size, i;
+	u32 *indir;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->set_rxfh_indir ||
+	    !dev->ethtool_ops->get_rxnfc)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&user_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(user_size)))
+		return -EFAULT;
+
+	if (user_size != 0 && user_size != dev_size)
+		return -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+
+	if (user_size == 0) {
+		for (i = 0; i < dev_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	} else {
+		if (copy_from_user(indir,
+				  useraddr +
+				  offsetof(struct ethtool_rxfh_indir,
+					   ring_index[0]),
+				  dev_size * sizeof(indir[0]))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		/* Validate ring indices */
+		for (i = 0; i < dev_size; i++) {
+			if (indir[i] >= rx_rings.data) {
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_regs regs;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *regbuf;
+	int reglen, ret;
+
+	if (!ops->get_regs || !ops->get_regs_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&regs, useraddr, sizeof(regs)))
+		return -EFAULT;
+
+	reglen = ops->get_regs_len(dev);
+	if (regs.len > reglen)
+		regs.len = reglen;
+
+	regbuf = vzalloc(reglen);
+	if (reglen && !regbuf)
+		return -ENOMEM;
+
+	ops->get_regs(dev, &regs, regbuf);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &regs, sizeof(regs)))
+		goto out;
+	useraddr += offsetof(struct ethtool_regs, data);
+	if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
+		goto out;
+	ret = 0;
+
+ out:
+	vfree(regbuf);
+	return ret;
+}
+
+static int ethtool_reset(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value reset;
+	int ret;
+
+	if (!dev->ethtool_ops->reset)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&reset, useraddr, sizeof(reset)))
+		return -EFAULT;
+
+	ret = dev->ethtool_ops->reset(dev, &reset.data);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &reset, sizeof(reset)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+
+	if (!dev->ethtool_ops->get_wol)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_wol(dev, &wol);
+
+	if (copy_to_user(useraddr, &wol, sizeof(wol)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol;
+
+	if (!dev->ethtool_ops->set_wol)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&wol, useraddr, sizeof(wol)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+	if (!dev->ethtool_ops->nway_reset)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void __user *userbuf = useraddr + sizeof(eeprom);
+	u32 bytes_remaining;
+	u8 *data;
+	int ret = 0;
+
+	if (!ops->get_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(PAGE_SIZE, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	bytes_remaining = eeprom.len;
+	while (bytes_remaining > 0) {
+		eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
+
+		ret = ops->get_eeprom(dev, &eeprom, data);
+		if (ret)
+			break;
+		if (copy_to_user(userbuf, data, eeprom.len)) {
+			ret = -EFAULT;
+			break;
+		}
+		userbuf += eeprom.len;
+		eeprom.offset += eeprom.len;
+		bytes_remaining -= eeprom.len;
+	}
+
+	eeprom.len = userbuf - (useraddr + sizeof(eeprom));
+	eeprom.offset -= eeprom.len;
+	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+		ret = -EFAULT;
+
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void __user *userbuf = useraddr + sizeof(eeprom);
+	u32 bytes_remaining;
+	u8 *data;
+	int ret = 0;
+
+	if (!ops->set_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(PAGE_SIZE, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	bytes_remaining = eeprom.len;
+	while (bytes_remaining > 0) {
+		eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
+
+		if (copy_from_user(data, userbuf, eeprom.len)) {
+			ret = -EFAULT;
+			break;
+		}
+		ret = ops->set_eeprom(dev, &eeprom, data);
+		if (ret)
+			break;
+		userbuf += eeprom.len;
+		eeprom.offset += eeprom.len;
+		bytes_remaining -= eeprom.len;
+	}
+
+	kfree(data);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_coalesce(dev, &coalesce);
+
+	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+		return -EFAULT;
+	return 0;
+}
+
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce;
+
+	if (!dev->ethtool_ops->set_coalesce)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
+
+	if (!dev->ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_ringparam(dev, &ringparam);
+
+	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam;
+
+	if (!dev->ethtool_ops->set_ringparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+	if (!dev->ethtool_ops->get_channels)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_channels(dev, &channels);
+
+	if (copy_to_user(useraddr, &channels, sizeof(channels)))
+		return -EFAULT;
+	return 0;
+}
+
+static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels;
+
+	if (!dev->ethtool_ops->set_channels)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&channels, useraddr, sizeof(channels)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_channels(dev, &channels);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam;
+
+	if (!dev->ethtool_ops->set_pauseparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_test test;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret, test_len;
+
+	if (!ops->self_test || !ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	test_len = ops->get_sset_count(dev, ETH_SS_TEST);
+	if (test_len < 0)
+		return test_len;
+	WARN_ON(test_len == 0);
+
+	if (copy_from_user(&test, useraddr, sizeof(test)))
+		return -EFAULT;
+
+	test.len = test_len;
+	data = kmalloc(test_len * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->self_test(dev, &test, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &test, sizeof(test)))
+		goto out;
+	useraddr += sizeof(test);
+	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	u8 *data;
+	int ret;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	ret = __ethtool_get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
+
+	gstrings.len = ret;
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	__ethtool_get_strings(dev, gstrings.string_set, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value id;
+	static bool busy;
+	int rc;
+
+	if (!dev->ethtool_ops->set_phys_id)
+		return -EOPNOTSUPP;
+
+	if (busy)
+		return -EBUSY;
+
+	if (copy_from_user(&id, useraddr, sizeof(id)))
+		return -EFAULT;
+
+	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+	if (rc < 0)
+		return rc;
+
+	/* Drop the RTNL lock while waiting, but prevent reentry or
+	 * removal of the device.
+	 */
+	busy = true;
+	dev_hold(dev);
+	rtnl_unlock();
+
+	if (rc == 0) {
+		/* Driver will handle this itself */
+		schedule_timeout_interruptible(
+			id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
+	} else {
+		/* Driver expects to be called at twice the frequency in rc */
+		int n = rc * 2, i, interval = HZ / n;
+
+		/* Count down seconds */
+		do {
+			/* Count down iterations per second */
+			i = n;
+			do {
+				rtnl_lock();
+				rc = dev->ethtool_ops->set_phys_id(dev,
+				    (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
+				rtnl_unlock();
+				if (rc)
+					break;
+				schedule_timeout_interruptible(interval);
+			} while (!signal_pending(current) && --i != 0);
+		} while (!signal_pending(current) &&
+			 (id.data == 0 || --id.data != 0));
+	}
+
+	rtnl_lock();
+	dev_put(dev);
+	busy = false;
+
+	(void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+	return rc;
+}
+
+static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_stats stats;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret, n_stats;
+
+	if (!ops->get_ethtool_stats || !ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
+	if (n_stats < 0)
+		return n_stats;
+	WARN_ON(n_stats == 0);
+
+	if (copy_from_user(&stats, useraddr, sizeof(stats)))
+		return -EFAULT;
+
+	stats.n_stats = n_stats;
+	data = kmalloc(n_stats * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_ethtool_stats(dev, &stats, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &stats, sizeof(stats)))
+		goto out;
+	useraddr += sizeof(stats);
+	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_perm_addr epaddr;
+
+	if (copy_from_user(&epaddr, useraddr, sizeof(epaddr)))
+		return -EFAULT;
+
+	if (epaddr.size < dev->addr_len)
+		return -ETOOSMALL;
+	epaddr.size = dev->addr_len;
+
+	if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+		return -EFAULT;
+	useraddr += sizeof(epaddr);
+	if (copy_to_user(useraddr, dev->perm_addr, epaddr.size))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
+			     u32 cmd, u32 (*actor)(struct net_device *))
+{
+	struct ethtool_value edata = { .cmd = cmd };
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	edata.data = actor(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr,
+			     void (*actor)(struct net_device *, u32))
+{
+	struct ethtool_value edata;
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	actor(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
+			     int (*actor)(struct net_device *, u32))
+{
+	struct ethtool_value edata;
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	return actor(dev, edata.data);
+}
+
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+						   char __user *useraddr)
+{
+	struct ethtool_flash efl;
+
+	if (copy_from_user(&efl, useraddr, sizeof(efl)))
+		return -EFAULT;
+
+	if (!dev->ethtool_ops->flash_device)
+		return -EOPNOTSUPP;
+
+	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+
+	return dev->ethtool_ops->flash_device(dev, &efl);
+}
+
+static int ethtool_set_dump(struct net_device *dev,
+			void __user *useraddr)
+{
+	struct ethtool_dump dump;
+
+	if (!dev->ethtool_ops->set_dump)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_dump(dev, &dump);
+}
+
+static int ethtool_get_dump_flag(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	struct ethtool_dump dump;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	ret = ops->get_dump_flag(dev, &dump);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_dump_data(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	__u32 len;
+	struct ethtool_dump dump, tmp;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data = NULL;
+
+	if (!dev->ethtool_ops->get_dump_data ||
+		!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	memset(&tmp, 0, sizeof(tmp));
+	tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
+	ret = ops->get_dump_flag(dev, &tmp);
+	if (ret)
+		return ret;
+
+	len = (tmp.len > dump.len) ? dump.len : tmp.len;
+	if (!len)
+		return -EFAULT;
+
+	data = vzalloc(tmp.len);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_dump_data(dev, &dump, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	useraddr += offsetof(struct ethtool_dump, data);
+	if (copy_to_user(useraddr, data, len))
+		ret = -EFAULT;
+out:
+	vfree(data);
+	return ret;
+}
+
+/* The main entry point in this file.  Called from net/core/dev.c */
+
+int dev_ethtool(struct net *net, struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	void __user *useraddr = ifr->ifr_data;
+	u32 ethcmd;
+	int rc;
+	u32 old_features;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+		return -EFAULT;
+
+	if (!dev->ethtool_ops) {
+		/* ETHTOOL_GDRVINFO does not require any driver support.
+		 * It is also unprivileged and does not change anything,
+		 * so we can take a shortcut to it. */
+		if (ethcmd == ETHTOOL_GDRVINFO)
+			return ethtool_get_drvinfo(dev, useraddr);
+		else
+			return -EOPNOTSUPP;
+	}
+
+	/* Allow some commands to be done by anyone */
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+	case ETHTOOL_GDRVINFO:
+	case ETHTOOL_GMSGLVL:
+	case ETHTOOL_GCOALESCE:
+	case ETHTOOL_GRINGPARAM:
+	case ETHTOOL_GPAUSEPARAM:
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_GSG:
+	case ETHTOOL_GSSET_INFO:
+	case ETHTOOL_GSTRINGS:
+	case ETHTOOL_GTSO:
+	case ETHTOOL_GPERMADDR:
+	case ETHTOOL_GUFO:
+	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
+	case ETHTOOL_GFLAGS:
+	case ETHTOOL_GPFLAGS:
+	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+	case ETHTOOL_GFEATURES:
+		break;
+	default:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+	}
+
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
+			return rc;
+	}
+	old_features = dev->features;
+
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+		rc = ethtool_get_settings(dev, useraddr);
+		break;
+	case ETHTOOL_SSET:
+		rc = ethtool_set_settings(dev, useraddr);
+		break;
+	case ETHTOOL_GDRVINFO:
+		rc = ethtool_get_drvinfo(dev, useraddr);
+		break;
+	case ETHTOOL_GREGS:
+		rc = ethtool_get_regs(dev, useraddr);
+		break;
+	case ETHTOOL_GWOL:
+		rc = ethtool_get_wol(dev, useraddr);
+		break;
+	case ETHTOOL_SWOL:
+		rc = ethtool_set_wol(dev, useraddr);
+		break;
+	case ETHTOOL_GMSGLVL:
+		rc = ethtool_get_value(dev, useraddr, ethcmd,
+				       dev->ethtool_ops->get_msglevel);
+		break;
+	case ETHTOOL_SMSGLVL:
+		rc = ethtool_set_value_void(dev, useraddr,
+				       dev->ethtool_ops->set_msglevel);
+		break;
+	case ETHTOOL_NWAY_RST:
+		rc = ethtool_nway_reset(dev);
+		break;
+	case ETHTOOL_GLINK:
+		rc = ethtool_get_link(dev, useraddr);
+		break;
+	case ETHTOOL_GEEPROM:
+		rc = ethtool_get_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_SEEPROM:
+		rc = ethtool_set_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_GCOALESCE:
+		rc = ethtool_get_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_SCOALESCE:
+		rc = ethtool_set_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_GRINGPARAM:
+		rc = ethtool_get_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_SRINGPARAM:
+		rc = ethtool_set_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_GPAUSEPARAM:
+		rc = ethtool_get_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_SPAUSEPARAM:
+		rc = ethtool_set_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_TEST:
+		rc = ethtool_self_test(dev, useraddr);
+		break;
+	case ETHTOOL_GSTRINGS:
+		rc = ethtool_get_strings(dev, useraddr);
+		break;
+	case ETHTOOL_PHYS_ID:
+		rc = ethtool_phys_id(dev, useraddr);
+		break;
+	case ETHTOOL_GSTATS:
+		rc = ethtool_get_stats(dev, useraddr);
+		break;
+	case ETHTOOL_GPERMADDR:
+		rc = ethtool_get_perm_addr(dev, useraddr);
+		break;
+	case ETHTOOL_GFLAGS:
+		rc = ethtool_get_value(dev, useraddr, ethcmd,
+					__ethtool_get_flags);
+		break;
+	case ETHTOOL_SFLAGS:
+		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
+		break;
+	case ETHTOOL_GPFLAGS:
+		rc = ethtool_get_value(dev, useraddr, ethcmd,
+				       dev->ethtool_ops->get_priv_flags);
+		break;
+	case ETHTOOL_SPFLAGS:
+		rc = ethtool_set_value(dev, useraddr,
+				       dev->ethtool_ops->set_priv_flags);
+		break;
+	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_SRXFH:
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_FLASHDEV:
+		rc = ethtool_flash_device(dev, useraddr);
+		break;
+	case ETHTOOL_RESET:
+		rc = ethtool_reset(dev, useraddr);
+		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
+	case ETHTOOL_GRXFHINDIR:
+		rc = ethtool_get_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFHINDIR:
+		rc = ethtool_set_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_GFEATURES:
+		rc = ethtool_get_features(dev, useraddr);
+		break;
+	case ETHTOOL_SFEATURES:
+		rc = ethtool_set_features(dev, useraddr);
+		break;
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_GSG:
+	case ETHTOOL_GTSO:
+	case ETHTOOL_GUFO:
+	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
+		rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
+		break;
+	case ETHTOOL_STXCSUM:
+	case ETHTOOL_SRXCSUM:
+	case ETHTOOL_SSG:
+	case ETHTOOL_STSO:
+	case ETHTOOL_SUFO:
+	case ETHTOOL_SGSO:
+	case ETHTOOL_SGRO:
+		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
+		break;
+	case ETHTOOL_GCHANNELS:
+		rc = ethtool_get_channels(dev, useraddr);
+		break;
+	case ETHTOOL_SCHANNELS:
+		rc = ethtool_set_channels(dev, useraddr);
+		break;
+	case ETHTOOL_SET_DUMP:
+		rc = ethtool_set_dump(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_FLAG:
+		rc = ethtool_get_dump_flag(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_DATA:
+		rc = ethtool_get_dump_data(dev, useraddr);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	if (dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
+	return rc;
+}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
new file mode 100644
index 00000000..c02e63c9
--- /dev/null
+++ b/net/core/fib_rules.c
@@ -0,0 +1,770 @@
+/*
+ * net/core/fib_rules.c		Generic Routing Rules
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/fib_rules.h>
+
+int fib_default_rule_add(struct fib_rules_ops *ops,
+			 u32 pref, u32 table, u32 flags)
+{
+	struct fib_rule *r;
+
+	r = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (r == NULL)
+		return -ENOMEM;
+
+	atomic_set(&r->refcnt, 1);
+	r->action = FR_ACT_TO_TBL;
+	r->pref = pref;
+	r->table = table;
+	r->flags = flags;
+	r->fr_net = hold_net(ops->fro_net);
+
+	/* The lock is not required here, the list in unreacheable
+	 * at the moment this function is called */
+	list_add_tail(&r->list, &ops->rules_list);
+	return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_add);
+
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&ops->rules_list)) {
+		pos = ops->rules_list.next;
+		if (pos->next != &ops->rules_list) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid);
+
+static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
+{
+	struct fib_rules_ops *ops;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+		if (ops->family == family) {
+			if (!try_module_get(ops->owner))
+				ops = NULL;
+			rcu_read_unlock();
+			return ops;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+	if (ops)
+		module_put(ops->owner);
+}
+
+static void flush_route_cache(struct fib_rules_ops *ops)
+{
+	if (ops->flush_cache)
+		ops->flush_cache(ops);
+}
+
+static int __fib_rules_register(struct fib_rules_ops *ops)
+{
+	int err = -EEXIST;
+	struct fib_rules_ops *o;
+	struct net *net;
+
+	net = ops->fro_net;
+
+	if (ops->rule_size < sizeof(struct fib_rule))
+		return -EINVAL;
+
+	if (ops->match == NULL || ops->configure == NULL ||
+	    ops->compare == NULL || ops->fill == NULL ||
+	    ops->action == NULL)
+		return -EINVAL;
+
+	spin_lock(&net->rules_mod_lock);
+	list_for_each_entry(o, &net->rules_ops, list)
+		if (ops->family == o->family)
+			goto errout;
+
+	hold_net(net);
+	list_add_tail_rcu(&ops->list, &net->rules_ops);
+	err = 0;
+errout:
+	spin_unlock(&net->rules_mod_lock);
+
+	return err;
+}
+
+struct fib_rules_ops *
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
+{
+	struct fib_rules_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&ops->rules_list);
+	ops->fro_net = net;
+
+	err = __fib_rules_register(ops);
+	if (err) {
+		kfree(ops);
+		ops = ERR_PTR(err);
+	}
+
+	return ops;
+}
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
+{
+	struct fib_rule *rule, *tmp;
+
+	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
+		list_del_rcu(&rule->list);
+		fib_rule_put(rule);
+	}
+}
+
+static void fib_rules_put_rcu(struct rcu_head *head)
+{
+	struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
+	struct net *net = ops->fro_net;
+
+	release_net(net);
+	kfree(ops);
+}
+
+void fib_rules_unregister(struct fib_rules_ops *ops)
+{
+	struct net *net = ops->fro_net;
+
+	spin_lock(&net->rules_mod_lock);
+	list_del_rcu(&ops->list);
+	fib_rules_cleanup_ops(ops);
+	spin_unlock(&net->rules_mod_lock);
+
+	call_rcu(&ops->rcu, fib_rules_put_rcu);
+}
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
+			  struct flowi *fl, int flags)
+{
+	int ret = 0;
+
+	if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
+		goto out;
+
+	if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
+		goto out;
+
+	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
+		goto out;
+
+	ret = ops->match(rule, fl, flags);
+out:
+	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
+}
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct fib_rule *rule;
+	int err;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+jumped:
+		if (!fib_rule_match(rule, ops, fl, flags))
+			continue;
+
+		if (rule->action == FR_ACT_GOTO) {
+			struct fib_rule *target;
+
+			target = rcu_dereference(rule->ctarget);
+			if (target == NULL) {
+				continue;
+			} else {
+				rule = target;
+				goto jumped;
+			}
+		} else if (rule->action == FR_ACT_NOP)
+			continue;
+		else
+			err = ops->action(rule, fl, flags, arg);
+
+		if (err != -EAGAIN) {
+			if ((arg->flags & FIB_LOOKUP_NOREF) ||
+			    likely(atomic_inc_not_zero(&rule->refcnt))) {
+				arg->rule = rule;
+				goto out;
+			}
+			break;
+		}
+	}
+
+	err = -ESRCH;
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
+			    struct fib_rules_ops *ops)
+{
+	int err = -EINVAL;
+
+	if (frh->src_len)
+		if (tb[FRA_SRC] == NULL ||
+		    frh->src_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_SRC]) != ops->addr_size)
+			goto errout;
+
+	if (frh->dst_len)
+		if (tb[FRA_DST] == NULL ||
+		    frh->dst_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_DST]) != ops->addr_size)
+			goto errout;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *r, *last = NULL;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL, unresolved = 0;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(net, frh->family);
+	if (ops == NULL) {
+		err = -EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
+	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (rule == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+	rule->fr_net = hold_net(net);
+
+	if (tb[FRA_PRIORITY])
+		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+	if (tb[FRA_IIFNAME]) {
+		struct net_device *dev;
+
+		rule->iifindex = -1;
+		nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->iifname);
+		if (dev)
+			rule->iifindex = dev->ifindex;
+	}
+
+	if (tb[FRA_OIFNAME]) {
+		struct net_device *dev;
+
+		rule->oifindex = -1;
+		nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(net, rule->oifname);
+		if (dev)
+			rule->oifindex = dev->ifindex;
+	}
+
+	if (tb[FRA_FWMARK]) {
+		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule->mark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			rule->mark_mask = 0xFFFFFFFF;
+	}
+
+	if (tb[FRA_FWMASK])
+		rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+
+	rule->action = frh->action;
+	rule->flags = frh->flags;
+	rule->table = frh_get_table(frh, tb);
+
+	if (!tb[FRA_PRIORITY] && ops->default_pref)
+		rule->pref = ops->default_pref(ops);
+
+	err = -EINVAL;
+	if (tb[FRA_GOTO]) {
+		if (rule->action != FR_ACT_GOTO)
+			goto errout_free;
+
+		rule->target = nla_get_u32(tb[FRA_GOTO]);
+		/* Backward jumps are prohibited to avoid endless loops */
+		if (rule->target <= rule->pref)
+			goto errout_free;
+
+		list_for_each_entry(r, &ops->rules_list, list) {
+			if (r->pref == rule->target) {
+				RCU_INIT_POINTER(rule->ctarget, r);
+				break;
+			}
+		}
+
+		if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
+			unresolved = 1;
+	} else if (rule->action == FR_ACT_GOTO)
+		goto errout_free;
+
+	err = ops->configure(rule, skb, frh, tb);
+	if (err < 0)
+		goto errout_free;
+
+	list_for_each_entry(r, &ops->rules_list, list) {
+		if (r->pref > rule->pref)
+			break;
+		last = r;
+	}
+
+	fib_rule_get(rule);
+
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, &ops->rules_list);
+
+	if (ops->unresolved_rules) {
+		/*
+		 * There are unresolved goto rules in the list, check if
+		 * any of them are pointing to this new rule.
+		 */
+		list_for_each_entry(r, &ops->rules_list, list) {
+			if (r->action == FR_ACT_GOTO &&
+			    r->target == rule->pref &&
+			    rtnl_dereference(r->ctarget) == NULL) {
+				rcu_assign_pointer(r->ctarget, rule);
+				if (--ops->unresolved_rules == 0)
+					break;
+			}
+		}
+	}
+
+	if (rule->action == FR_ACT_GOTO)
+		ops->nr_goto_rules++;
+
+	if (unresolved)
+		ops->unresolved_rules++;
+
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	flush_route_cache(ops);
+	rules_ops_put(ops);
+	return 0;
+
+errout_free:
+	release_net(rule->fr_net);
+	kfree(rule);
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *tmp;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(net, frh->family);
+	if (ops == NULL) {
+		err = -EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
+	list_for_each_entry(rule, &ops->rules_list, list) {
+		if (frh->action && (frh->action != rule->action))
+			continue;
+
+		if (frh->table && (frh_get_table(frh, tb) != rule->table))
+			continue;
+
+		if (tb[FRA_PRIORITY] &&
+		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+			continue;
+
+		if (tb[FRA_IIFNAME] &&
+		    nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
+			continue;
+
+		if (tb[FRA_OIFNAME] &&
+		    nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
+			continue;
+
+		if (tb[FRA_FWMARK] &&
+		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
+			continue;
+
+		if (tb[FRA_FWMASK] &&
+		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
+			continue;
+
+		if (!ops->compare(rule, frh, tb))
+			continue;
+
+		if (rule->flags & FIB_RULE_PERMANENT) {
+			err = -EPERM;
+			goto errout;
+		}
+
+		list_del_rcu(&rule->list);
+
+		if (rule->action == FR_ACT_GOTO) {
+			ops->nr_goto_rules--;
+			if (rtnl_dereference(rule->ctarget) == NULL)
+				ops->unresolved_rules--;
+		}
+
+		/*
+		 * Check if this rule is a target to any of them. If so,
+		 * disable them. As this operation is eventually very
+		 * expensive, it is only performed if goto rules have
+		 * actually been added.
+		 */
+		if (ops->nr_goto_rules > 0) {
+			list_for_each_entry(tmp, &ops->rules_list, list) {
+				if (rtnl_dereference(tmp->ctarget) == rule) {
+					RCU_INIT_POINTER(tmp->ctarget, NULL);
+					ops->unresolved_rules++;
+				}
+			}
+		}
+
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+				   NETLINK_CB(skb).pid);
+		fib_rule_put(rule);
+		flush_route_cache(ops);
+		rules_ops_put(ops);
+		return 0;
+	}
+
+	err = -ENOENT;
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+					 struct fib_rule *rule)
+{
+	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
+			 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
+			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
+			 + nla_total_size(4) /* FRA_PRIORITY */
+			 + nla_total_size(4) /* FRA_TABLE */
+			 + nla_total_size(4) /* FRA_FWMARK */
+			 + nla_total_size(4); /* FRA_FWMASK */
+
+	if (ops->nlmsg_payload)
+		payload += ops->nlmsg_payload(rule);
+
+	return payload;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+			    u32 pid, u32 seq, int type, int flags,
+			    struct fib_rules_ops *ops)
+{
+	struct nlmsghdr *nlh;
+	struct fib_rule_hdr *frh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	frh = nlmsg_data(nlh);
+	frh->family = ops->family;
+	frh->table = rule->table;
+	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
+	frh->res1 = 0;
+	frh->res2 = 0;
+	frh->action = rule->action;
+	frh->flags = rule->flags;
+
+	if (rule->action == FR_ACT_GOTO &&
+	    rcu_access_pointer(rule->ctarget) == NULL)
+		frh->flags |= FIB_RULE_UNRESOLVED;
+
+	if (rule->iifname[0]) {
+		NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname);
+
+		if (rule->iifindex == -1)
+			frh->flags |= FIB_RULE_IIF_DETACHED;
+	}
+
+	if (rule->oifname[0]) {
+		NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname);
+
+		if (rule->oifindex == -1)
+			frh->flags |= FIB_RULE_OIF_DETACHED;
+	}
+
+	if (rule->pref)
+		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+	if (rule->mark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
+
+	if (rule->mark_mask || rule->mark)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
+
+	if (rule->target)
+		NLA_PUT_U32(skb, FRA_GOTO, rule->target);
+
+	if (ops->fill(rule, skb, frh) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
+		      struct fib_rules_ops *ops)
+{
+	int idx = 0;
+	struct fib_rule *rule;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+		if (idx < cb->args[1])
+			goto skip;
+
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
+				     NLM_F_MULTI, ops) < 0)
+			break;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[1] = idx;
+	rules_ops_put(ops);
+
+	return skb->len;
+}
+
+static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct fib_rules_ops *ops;
+	int idx = 0, family;
+
+	family = rtnl_msg_family(cb->nlh);
+	if (family != AF_UNSPEC) {
+		/* Protocol specific dump request */
+		ops = lookup_rules_ops(net, family);
+		if (ops == NULL)
+			return -EAFNOSUPPORT;
+
+		return dump_rules(skb, cb, ops);
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+		if (idx < cb->args[0] || !try_module_get(ops->owner))
+			goto skip;
+
+		if (dump_rules(skb, cb, ops) < 0)
+			break;
+
+		cb->args[1] = 0;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid)
+{
+	struct net *net;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	net = ops->fro_net;
+	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, ops->nlgroup, err);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->iifindex == -1 &&
+		    strcmp(dev->name, rule->iifname) == 0)
+			rule->iifindex = dev->ifindex;
+		if (rule->oifindex == -1 &&
+		    strcmp(dev->name, rule->oifname) == 0)
+			rule->oifindex = dev->ifindex;
+	}
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->iifindex == dev->ifindex)
+			rule->iifindex = -1;
+		if (rule->oifindex == dev->ifindex)
+			rule->oifindex = -1;
+	}
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		list_for_each_entry(ops, &net->rules_ops, list)
+			attach_rules(&ops->rules_list, dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		list_for_each_entry(ops, &net->rules_ops, list)
+			detach_rules(&ops->rules_list, dev);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call = fib_rules_event,
+};
+
+static int __net_init fib_rules_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->rules_ops);
+	spin_lock_init(&net->rules_mod_lock);
+	return 0;
+}
+
+static struct pernet_operations fib_rules_net_ops = {
+	.init = fib_rules_net_init,
+};
+
+static int __init fib_rules_init(void)
+{
+	int err;
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
+
+	err = register_pernet_subsys(&fib_rules_net_ops);
+	if (err < 0)
+		goto fail;
+
+	err = register_netdevice_notifier(&fib_rules_notifier);
+	if (err < 0)
+		goto fail_unregister;
+
+	return 0;
+
+fail_unregister:
+	unregister_pernet_subsys(&fib_rules_net_ops);
+fail:
+	rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
+	rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
+	rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
+	return err;
+}
+
+subsys_initcall(fib_rules_init);
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 00000000..6f755cca
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,656 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <jschlst@samba.org>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <linux/gfp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/netlink.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <linux/filter.h>
+#include <linux/reciprocal_div.h>
+#include <linux/ratelimit.h>
+
+/* No hurry in this branch
+ *
+ * Exported for the bpf jit load helper.
+ */
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
+{
+	u8 *ptr = NULL;
+
+	if (k >= SKF_NET_OFF)
+		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
+	else if (k >= SKF_LL_OFF)
+		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
+
+	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
+		return ptr;
+	return NULL;
+}
+
+static inline void *load_pointer(const struct sk_buff *skb, int k,
+				 unsigned int size, void *buffer)
+{
+	if (k >= 0)
+		return skb_header_pointer(skb, k, size, buffer);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
+}
+
+/**
+ *	sk_filter - run a packet through a socket filter
+ *	@sk: sock associated with &sk_buff
+ *	@skb: buffer to filter
+ *
+ * Run the filter code and then cut skb->data to correct size returned by
+ * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * than pkt_len we keep whole skb->data. This is the socket level
+ * wrapper to sk_run_filter. It returns 0 if the packet should
+ * be accepted or -EPERM if the packet should be tossed.
+ *
+ */
+int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+	struct sk_filter *filter;
+
+	err = security_sock_rcv_skb(sk, skb);
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	filter = rcu_dereference(sk->sk_filter);
+	if (filter) {
+		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+
+		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+	}
+	rcu_read_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL(sk_filter);
+
+/**
+ *	sk_run_filter - run a filter on a socket
+ *	@skb: buffer to run the filter on
+ *	@fentry: filter to apply
+ *
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
+ */
+unsigned int sk_run_filter(const struct sk_buff *skb,
+			   const struct sock_filter *fentry)
+{
+	void *ptr;
+	u32 A = 0;			/* Accumulator */
+	u32 X = 0;			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	u32 tmp;
+	int k;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+	for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define	K (fentry->k)
+#else
+		const u32 K = fentry->k;
+#endif
+
+		switch (fentry->code) {
+		case BPF_S_ALU_ADD_X:
+			A += X;
+			continue;
+		case BPF_S_ALU_ADD_K:
+			A += K;
+			continue;
+		case BPF_S_ALU_SUB_X:
+			A -= X;
+			continue;
+		case BPF_S_ALU_SUB_K:
+			A -= K;
+			continue;
+		case BPF_S_ALU_MUL_X:
+			A *= X;
+			continue;
+		case BPF_S_ALU_MUL_K:
+			A *= K;
+			continue;
+		case BPF_S_ALU_DIV_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+		case BPF_S_ALU_DIV_K:
+			A = reciprocal_divide(A, K);
+			continue;
+		case BPF_S_ALU_AND_X:
+			A &= X;
+			continue;
+		case BPF_S_ALU_AND_K:
+			A &= K;
+			continue;
+		case BPF_S_ALU_OR_X:
+			A |= X;
+			continue;
+		case BPF_S_ALU_OR_K:
+			A |= K;
+			continue;
+		case BPF_S_ALU_LSH_X:
+			A <<= X;
+			continue;
+		case BPF_S_ALU_LSH_K:
+			A <<= K;
+			continue;
+		case BPF_S_ALU_RSH_X:
+			A >>= X;
+			continue;
+		case BPF_S_ALU_RSH_K:
+			A >>= K;
+			continue;
+		case BPF_S_ALU_NEG:
+			A = -A;
+			continue;
+		case BPF_S_JMP_JA:
+			fentry += K;
+			continue;
+		case BPF_S_JMP_JGT_K:
+			fentry += (A > K) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JGE_K:
+			fentry += (A >= K) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JEQ_K:
+			fentry += (A == K) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JSET_K:
+			fentry += (A & K) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JGT_X:
+			fentry += (A > X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JGE_X:
+			fentry += (A >= X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JEQ_X:
+			fentry += (A == X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_JMP_JSET_X:
+			fentry += (A & X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_S_LD_W_ABS:
+			k = K;
+load_w:
+			ptr = load_pointer(skb, k, 4, &tmp);
+			if (ptr != NULL) {
+				A = get_unaligned_be32(ptr);
+				continue;
+			}
+			return 0;
+		case BPF_S_LD_H_ABS:
+			k = K;
+load_h:
+			ptr = load_pointer(skb, k, 2, &tmp);
+			if (ptr != NULL) {
+				A = get_unaligned_be16(ptr);
+				continue;
+			}
+			return 0;
+		case BPF_S_LD_B_ABS:
+			k = K;
+load_b:
+			ptr = load_pointer(skb, k, 1, &tmp);
+			if (ptr != NULL) {
+				A = *(u8 *)ptr;
+				continue;
+			}
+			return 0;
+		case BPF_S_LD_W_LEN:
+			A = skb->len;
+			continue;
+		case BPF_S_LDX_W_LEN:
+			X = skb->len;
+			continue;
+		case BPF_S_LD_W_IND:
+			k = X + K;
+			goto load_w;
+		case BPF_S_LD_H_IND:
+			k = X + K;
+			goto load_h;
+		case BPF_S_LD_B_IND:
+			k = X + K;
+			goto load_b;
+		case BPF_S_LDX_B_MSH:
+			ptr = load_pointer(skb, K, 1, &tmp);
+			if (ptr != NULL) {
+				X = (*(u8 *)ptr & 0xf) << 2;
+				continue;
+			}
+			return 0;
+		case BPF_S_LD_IMM:
+			A = K;
+			continue;
+		case BPF_S_LDX_IMM:
+			X = K;
+			continue;
+		case BPF_S_LD_MEM:
+			A = mem[K];
+			continue;
+		case BPF_S_LDX_MEM:
+			X = mem[K];
+			continue;
+		case BPF_S_MISC_TAX:
+			X = A;
+			continue;
+		case BPF_S_MISC_TXA:
+			A = X;
+			continue;
+		case BPF_S_RET_K:
+			return K;
+		case BPF_S_RET_A:
+			return A;
+		case BPF_S_ST:
+			mem[K] = A;
+			continue;
+		case BPF_S_STX:
+			mem[K] = X;
+			continue;
+		case BPF_S_ANC_PROTOCOL:
+			A = ntohs(skb->protocol);
+			continue;
+		case BPF_S_ANC_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case BPF_S_ANC_IFINDEX:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->ifindex;
+			continue;
+		case BPF_S_ANC_MARK:
+			A = skb->mark;
+			continue;
+		case BPF_S_ANC_QUEUE:
+			A = skb->queue_mapping;
+			continue;
+		case BPF_S_ANC_HATYPE:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->type;
+			continue;
+		case BPF_S_ANC_RXHASH:
+			A = skb->rxhash;
+			continue;
+		case BPF_S_ANC_CPU:
+			A = raw_smp_processor_id();
+			continue;
+		case BPF_S_ANC_NLATTR: {
+			struct nlattr *nla;
+
+			if (skb_is_nonlinear(skb))
+				return 0;
+			if (A > skb->len - sizeof(struct nlattr))
+				return 0;
+
+			nla = nla_find((struct nlattr *)&skb->data[A],
+				       skb->len - A, X);
+			if (nla)
+				A = (void *)nla - (void *)skb->data;
+			else
+				A = 0;
+			continue;
+		}
+		case BPF_S_ANC_NLATTR_NEST: {
+			struct nlattr *nla;
+
+			if (skb_is_nonlinear(skb))
+				return 0;
+			if (A > skb->len - sizeof(struct nlattr))
+				return 0;
+
+			nla = (struct nlattr *)&skb->data[A];
+			if (nla->nla_len > A - skb->len)
+				return 0;
+
+			nla = nla_find_nested(nla, X);
+			if (nla)
+				A = (void *)nla - (void *)skb->data;
+			else
+				A = 0;
+			continue;
+		}
+		default:
+			WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
+				       fentry->code, fentry->jt,
+				       fentry->jf, fentry->k);
+			return 0;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sk_run_filter);
+
+/*
+ * Security :
+ * A BPF program is able to use 16 cells of memory to store intermediate
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * As we dont want to clear mem[] array for each packet going through
+ * sk_run_filter(), we check that filter loaded by user never try to read
+ * a cell if not previously written, and we check all branches to be sure
+ * a malicious user doesn't try to abuse us.
+ */
+static int check_load_and_stores(struct sock_filter *filter, int flen)
+{
+	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	int pc, ret = 0;
+
+	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		return -ENOMEM;
+	memset(masks, 0xff, flen * sizeof(*masks));
+
+	for (pc = 0; pc < flen; pc++) {
+		memvalid &= masks[pc];
+
+		switch (filter[pc].code) {
+		case BPF_S_ST:
+		case BPF_S_STX:
+			memvalid |= (1 << filter[pc].k);
+			break;
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+			if (!(memvalid & (1 << filter[pc].k))) {
+				ret = -EINVAL;
+				goto error;
+			}
+			break;
+		case BPF_S_JMP_JA:
+			/* a jump must set masks on target */
+			masks[pc + 1 + filter[pc].k] &= memvalid;
+			memvalid = ~0;
+			break;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
+			/* a jump must set masks on targets */
+			masks[pc + 1 + filter[pc].jt] &= memvalid;
+			masks[pc + 1 + filter[pc].jf] &= memvalid;
+			memvalid = ~0;
+			break;
+		}
+	}
+error:
+	kfree(masks);
+	return ret;
+}
+
+/**
+ *	sk_chk_filter - verify socket filter code
+ *	@filter: filter to verify
+ *	@flen: length of filter
+ *
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal
+ * instructions, and must end with a RET instruction.
+ *
+ * All jumps are forward as they are not signed.
+ *
+ * Returns 0 if the rule set is legal or -EINVAL if not.
+ */
+int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
+{
+	/*
+	 * Valid instructions are initialized to non-0.
+	 * Invalid instructions are initialized to 0.
+	 */
+	static const u8 codes[] = {
+		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
+		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
+		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
+		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
+		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
+		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
+		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
+		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
+		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
+		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
+		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
+		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
+		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
+		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
+		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
+		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
+		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
+		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
+		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
+		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
+		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
+		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
+		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
+		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
+		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
+		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
+		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
+		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
+		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
+		[BPF_RET|BPF_K]          = BPF_S_RET_K,
+		[BPF_RET|BPF_A]          = BPF_S_RET_A,
+		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
+		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
+		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
+		[BPF_ST]                 = BPF_S_ST,
+		[BPF_STX]                = BPF_S_STX,
+		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
+		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
+		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
+		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
+		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
+		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
+		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
+		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
+	};
+	int pc;
+
+	if (flen == 0 || flen > BPF_MAXINSNS)
+		return -EINVAL;
+
+	/* check the filter code now */
+	for (pc = 0; pc < flen; pc++) {
+		struct sock_filter *ftest = &filter[pc];
+		u16 code = ftest->code;
+
+		if (code >= ARRAY_SIZE(codes))
+			return -EINVAL;
+		code = codes[code];
+		if (!code)
+			return -EINVAL;
+		/* Some instructions need special checks */
+		switch (code) {
+		case BPF_S_ALU_DIV_K:
+			/* check for division by zero */
+			if (ftest->k == 0)
+				return -EINVAL;
+			ftest->k = reciprocal_value(ftest->k);
+			break;
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+		case BPF_S_ST:
+		case BPF_S_STX:
+			/* check for invalid memory addresses */
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			break;
+		case BPF_S_JMP_JA:
+			/*
+			 * Note, the large ftest->k might cause loops.
+			 * Compare this with conditional jumps below,
+			 * where offsets are limited. --ANK (981016)
+			 */
+			if (ftest->k >= (unsigned)(flen-pc-1))
+				return -EINVAL;
+			break;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
+			/* for conditionals both must be safe */
+			if (pc + ftest->jt + 1 >= flen ||
+			    pc + ftest->jf + 1 >= flen)
+				return -EINVAL;
+			break;
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
+				code = BPF_S_ANC_##CODE;	\
+				break
+			switch (ftest->k) {
+			ANCILLARY(PROTOCOL);
+			ANCILLARY(PKTTYPE);
+			ANCILLARY(IFINDEX);
+			ANCILLARY(NLATTR);
+			ANCILLARY(NLATTR_NEST);
+			ANCILLARY(MARK);
+			ANCILLARY(QUEUE);
+			ANCILLARY(HATYPE);
+			ANCILLARY(RXHASH);
+			ANCILLARY(CPU);
+			}
+		}
+		ftest->code = code;
+	}
+
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return check_load_and_stores(filter, flen);
+	}
+	return -EINVAL;
+}
+EXPORT_SYMBOL(sk_chk_filter);
+
+/**
+ * 	sk_filter_release_rcu - Release a socket filter by rcu_head
+ *	@rcu: rcu_head that contains the sk_filter to free
+ */
+void sk_filter_release_rcu(struct rcu_head *rcu)
+{
+	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+	bpf_jit_free(fp);
+	kfree(fp);
+}
+EXPORT_SYMBOL(sk_filter_release_rcu);
+
+/**
+ *	sk_attach_filter - attach a socket filter
+ *	@fprog: the filter program
+ *	@sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sk_filter *fp, *old_fp;
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+	if (fprog->filter == NULL)
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+		return -EFAULT;
+	}
+
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;
+	fp->bpf_func = sk_run_filter;
+
+	err = sk_chk_filter(fp->insns, fp->len);
+	if (err) {
+		sk_filter_uncharge(sk, fp);
+		return err;
+	}
+
+	bpf_jit_compile(fp);
+
+	old_fp = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
+	rcu_assign_pointer(sk->sk_filter, fp);
+
+	if (old_fp)
+		sk_filter_uncharge(sk, old_fp);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sk_attach_filter);
+
+int sk_detach_filter(struct sock *sk)
+{
+	int ret = -ENOENT;
+	struct sk_filter *filter;
+
+	filter = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
+	if (filter) {
+		RCU_INIT_POINTER(sk->sk_filter, NULL);
+		sk_filter_uncharge(sk, filter);
+		ret = 0;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
new file mode 100644
index 00000000..e318c7e9
--- /dev/null
+++ b/net/core/flow.c
@@ -0,0 +1,464 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/mutex.h>
+#include <net/flow.h>
+#include <linux/atomic.h>
+#include <linux/security.h>
+
+struct flow_cache_entry {
+	union {
+		struct hlist_node	hlist;
+		struct list_head	gc_list;
+	} u;
+	struct net			*net;
+	u16				family;
+	u8				dir;
+	u32				genid;
+	struct flowi			key;
+	struct flow_cache_object	*object;
+};
+
+struct flow_cache_percpu {
+	struct hlist_head		*hash_table;
+	int				hash_count;
+	u32				hash_rnd;
+	int				hash_rnd_recalc;
+	struct tasklet_struct		flush_tasklet;
+};
+
+struct flow_flush_info {
+	struct flow_cache		*cache;
+	atomic_t			cpuleft;
+	struct completion		completion;
+};
+
+struct flow_cache {
+	u32				hash_shift;
+	struct flow_cache_percpu __percpu *percpu;
+	struct notifier_block		hotcpu_notifier;
+	int				low_watermark;
+	int				high_watermark;
+	struct timer_list		rnd_timer;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+EXPORT_SYMBOL(flow_cache_genid);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep __read_mostly;
+
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
+
+#define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	struct flow_cache *fc = (void *) arg;
+	int i;
+
+	for_each_possible_cpu(i)
+		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
+
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
+}
+
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+	if (atomic_read(&flow_cache_genid) != fle->genid)
+		return 0;
+	if (fle->object && !fle->object->ops->check(fle->object))
+		return 0;
+	return 1;
+}
+
+static void flow_entry_kill(struct flow_cache_entry *fle)
+{
+	if (fle->object)
+		fle->object->ops->delete(fle->object);
+	kmem_cache_free(flow_cachep, fle);
+}
+
+static void flow_cache_gc_task(struct work_struct *work)
+{
+	struct list_head gc_list;
+	struct flow_cache_entry *fce, *n;
+
+	INIT_LIST_HEAD(&gc_list);
+	spin_lock_bh(&flow_cache_gc_lock);
+	list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+	spin_unlock_bh(&flow_cache_gc_lock);
+
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+		flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+				     int deleted, struct list_head *gc_list)
+{
+	if (deleted) {
+		fcp->hash_count -= deleted;
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+		schedule_work(&flow_cache_gc_work);
+	}
+}
+
+static void __flow_cache_shrink(struct flow_cache *fc,
+				struct flow_cache_percpu *fcp,
+				int shrink_to)
+{
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
+
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
+		int saved = 0;
+
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			if (saved < shrink_to &&
+			    flow_entry_valid(fle)) {
+				saved++;
+			} else {
+				deleted++;
+				hlist_del(&fle->u.hlist);
+				list_add_tail(&fle->u.gc_list, &gc_list);
+			}
+		}
+	}
+
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
+}
+
+static void flow_cache_shrink(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
+{
+	int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+
+	__flow_cache_shrink(fc, fcp, shrink_to);
+}
+
+static void flow_new_hash_rnd(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
+{
+	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+	fcp->hash_rnd_recalc = 0;
+	__flow_cache_shrink(fc, fcp, 0);
+}
+
+static u32 flow_hash_code(struct flow_cache *fc,
+			  struct flow_cache_percpu *fcp,
+			  const struct flowi *key,
+			  size_t keysize)
+{
+	const u32 *k = (const u32 *) key;
+	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
+
+	return jhash2(k, length, fcp->hash_rnd)
+		& (flow_cache_hash_size(fc) - 1);
+}
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment.
+ */
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
+			    size_t keysize)
+{
+	const flow_compare_t *k1, *k1_lim, *k2;
+
+	k1 = (const flow_compare_t *) key1;
+	k1_lim = k1 + keysize;
+
+	k2 = (const flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
+		  flow_resolve_t resolver, void *ctx)
+{
+	struct flow_cache *fc = &flow_cache_global;
+	struct flow_cache_percpu *fcp;
+	struct flow_cache_entry *fle, *tfle;
+	struct hlist_node *entry;
+	struct flow_cache_object *flo;
+	size_t keysize;
+	unsigned int hash;
+
+	local_bh_disable();
+	fcp = this_cpu_ptr(fc->percpu);
+
+	fle = NULL;
+	flo = NULL;
+
+	keysize = flow_key_size(family);
+	if (!keysize)
+		goto nocache;
+
+	/* Packet really early in init?  Making flow_cache_init a
+	 * pre-smp initcall would solve this.  --RR */
+	if (!fcp->hash_table)
+		goto nocache;
+
+	if (fcp->hash_rnd_recalc)
+		flow_new_hash_rnd(fc, fcp);
+
+	hash = flow_hash_code(fc, fcp, key, keysize);
+	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+		if (tfle->net == net &&
+		    tfle->family == family &&
+		    tfle->dir == dir &&
+		    flow_key_compare(key, &tfle->key, keysize) == 0) {
+			fle = tfle;
+			break;
+		}
+	}
+
+	if (unlikely(!fle)) {
+		if (fcp->hash_count > fc->high_watermark)
+			flow_cache_shrink(fc, fcp);
+
+		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+		if (fle) {
+			fle->net = net;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
+			fle->object = NULL;
+			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+			fcp->hash_count++;
+		}
+	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+		flo = fle->object;
+		if (!flo)
+			goto ret_object;
+		flo = flo->ops->get(flo);
+		if (flo)
+			goto ret_object;
+	} else if (fle->object) {
+	        flo = fle->object;
+	        flo->ops->delete(flo);
+	        fle->object = NULL;
+	}
+
+nocache:
+	flo = NULL;
+	if (fle) {
+		flo = fle->object;
+		fle->object = NULL;
+	}
+	flo = resolver(net, key, family, dir, flo, ctx);
+	if (fle) {
+		fle->genid = atomic_read(&flow_cache_genid);
+		if (!IS_ERR(flo))
+			fle->object = flo;
+		else
+			fle->genid--;
+	} else {
+		if (flo && !IS_ERR(flo))
+			flo->ops->delete(flo);
+	}
+ret_object:
+	local_bh_enable();
+	return flo;
+}
+EXPORT_SYMBOL(flow_cache_lookup);
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	struct flow_cache *fc = info->cache;
+	struct flow_cache_percpu *fcp;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
+
+	fcp = this_cpu_ptr(fc->percpu);
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			if (flow_entry_valid(fle))
+				continue;
+
+			deleted++;
+			hlist_del(&fle->u.hlist);
+			list_add_tail(&fle->u.gc_list, &gc_list);
+		}
+	}
+
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
+	tasklet->data = (unsigned long)info;
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+	static DEFINE_MUTEX(flow_flush_sem);
+
+	/* Don't want cpus going down or up during this. */
+	get_online_cpus();
+	mutex_lock(&flow_flush_sem);
+	info.cache = &flow_cache_global;
+	atomic_set(&info.cpuleft, num_online_cpus());
+	init_completion(&info.completion);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 0);
+	flow_cache_flush_tasklet((unsigned long)&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+	mutex_unlock(&flow_flush_sem);
+	put_online_cpus();
+}
+
+static void flow_cache_flush_task(struct work_struct *work)
+{
+	flow_cache_flush();
+}
+
+static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+
+void flow_cache_flush_deferred(void)
+{
+	schedule_work(&flow_cache_flush_work);
+}
+
+static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
+{
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+	size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+
+	if (!fcp->hash_table) {
+		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
+		if (!fcp->hash_table) {
+			pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+			return -ENOMEM;
+		}
+		fcp->hash_rnd_recalc = 1;
+		fcp->hash_count = 0;
+		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+	}
+	return 0;
+}
+
+static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
+			  unsigned long action,
+			  void *hcpu)
+{
+	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+	int res, cpu = (unsigned long) hcpu;
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		res = flow_cache_cpu_prepare(fc, cpu);
+		if (res)
+			return notifier_from_errno(res);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		__flow_cache_shrink(fc, fcp, 0);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static int __init flow_cache_init(struct flow_cache *fc)
+{
+	int i;
+
+	fc->hash_shift = 10;
+	fc->low_watermark = 2 * flow_cache_hash_size(fc);
+	fc->high_watermark = 4 * flow_cache_hash_size(fc);
+
+	fc->percpu = alloc_percpu(struct flow_cache_percpu);
+	if (!fc->percpu)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		if (flow_cache_cpu_prepare(fc, i))
+			goto err;
+	}
+	fc->hotcpu_notifier = (struct notifier_block){
+		.notifier_call = flow_cache_cpu,
+	};
+	register_hotcpu_notifier(&fc->hotcpu_notifier);
+
+	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+		    (unsigned long) fc);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
+
+	return 0;
+
+err:
+	for_each_possible_cpu(i) {
+		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
+		kfree(fcp->hash_table);
+		fcp->hash_table = NULL;
+	}
+
+	free_percpu(fc->percpu);
+	fc->percpu = NULL;
+
+	return -ENOMEM;
+}
+
+static int __init flow_cache_init_global(void)
+{
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_PANIC, NULL);
+
+	return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
new file mode 100644
index 00000000..a225089d
--- /dev/null
+++ b/net/core/flow_dissector.c
@@ -0,0 +1,144 @@
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <net/flow_keys.h>
+
+/* copy saddr & daddr, possibly using 64bit load/store
+ * Equivalent to :	flow->src = iph->saddr;
+ *			flow->dst = iph->daddr;
+ */
+static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+{
+	BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
+		     offsetof(typeof(*flow), src) + sizeof(flow->src));
+	memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+}
+
+bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+{
+	int poff, nhoff = skb_network_offset(skb);
+	u8 ip_proto;
+	__be16 proto = skb->protocol;
+
+	memset(flow, 0, sizeof(*flow));
+
+again:
+	switch (proto) {
+	case __constant_htons(ETH_P_IP): {
+		const struct iphdr *iph;
+		struct iphdr _iph;
+ip:
+		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		if (!iph)
+			return false;
+
+		if (ip_is_fragment(iph))
+			ip_proto = 0;
+		else
+			ip_proto = iph->protocol;
+		iph_to_flow_copy_addrs(flow, iph);
+		nhoff += iph->ihl * 4;
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6): {
+		const struct ipv6hdr *iph;
+		struct ipv6hdr _iph;
+ipv6:
+		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		if (!iph)
+			return false;
+
+		ip_proto = iph->nexthdr;
+		flow->src = iph->saddr.s6_addr32[3];
+		flow->dst = iph->daddr.s6_addr32[3];
+		nhoff += sizeof(struct ipv6hdr);
+		break;
+	}
+	case __constant_htons(ETH_P_8021Q): {
+		const struct vlan_hdr *vlan;
+		struct vlan_hdr _vlan;
+
+		vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+		if (!vlan)
+			return false;
+
+		proto = vlan->h_vlan_encapsulated_proto;
+		nhoff += sizeof(*vlan);
+		goto again;
+	}
+	case __constant_htons(ETH_P_PPP_SES): {
+		struct {
+			struct pppoe_hdr hdr;
+			__be16 proto;
+		} *hdr, _hdr;
+		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		if (!hdr)
+			return false;
+		proto = hdr->proto;
+		nhoff += PPPOE_SES_HLEN;
+		switch (proto) {
+		case __constant_htons(PPP_IP):
+			goto ip;
+		case __constant_htons(PPP_IPV6):
+			goto ipv6;
+		default:
+			return false;
+		}
+	}
+	default:
+		return false;
+	}
+
+	switch (ip_proto) {
+	case IPPROTO_GRE: {
+		struct gre_hdr {
+			__be16 flags;
+			__be16 proto;
+		} *hdr, _hdr;
+
+		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		if (!hdr)
+			return false;
+		/*
+		 * Only look inside GRE if version zero and no
+		 * routing
+		 */
+		if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
+			proto = hdr->proto;
+			nhoff += 4;
+			if (hdr->flags & GRE_CSUM)
+				nhoff += 4;
+			if (hdr->flags & GRE_KEY)
+				nhoff += 4;
+			if (hdr->flags & GRE_SEQ)
+				nhoff += 4;
+			goto again;
+		}
+		break;
+	}
+	case IPPROTO_IPIP:
+		goto again;
+	default:
+		break;
+	}
+
+	flow->ip_proto = ip_proto;
+	poff = proto_ports_offset(ip_proto);
+	if (poff >= 0) {
+		__be32 *ports, _ports;
+
+		nhoff += poff;
+		ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+		if (ports)
+			flow->ports = *ports;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(skb_flow_dissect);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
new file mode 100644
index 00000000..d9d198aa
--- /dev/null
+++ b/net/core/gen_estimator.c
@@ -0,0 +1,321 @@
+/*
+ * net/sched/gen_estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *              Jamal Hadi Salim - moved it to net/core and reshulfed
+ *              names to make it usable in general net subsystem.
+ */
+
+#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/gen_stats.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * avbps is scaled by 2^5, avpps is scaled by 2^10.
+   * both values are reported as 32 bit unsigned values. bps can
+     overflow for fast links : max speed being 34360Mbit/sec
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct gen_estimator
+{
+	struct list_head	list;
+	struct gnet_stats_basic_packed	*bstats;
+	struct gnet_stats_rate_est	*rate_est;
+	spinlock_t		*stats_lock;
+	int			ewma_log;
+	u64			last_bytes;
+	u64			avbps;
+	u32			last_packets;
+	u32			avpps;
+	struct rcu_head		e_rcu;
+	struct rb_node		node;
+};
+
+struct gen_estimator_head
+{
+	struct timer_list	timer;
+	struct list_head	list;
+};
+
+static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Protects against NULL dereference */
+static DEFINE_RWLOCK(est_lock);
+
+/* Protects against soft lockup during large deletion */
+static struct rb_root est_root = RB_ROOT;
+static DEFINE_SPINLOCK(est_tree_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct gen_estimator *e;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(e, &elist[idx].list, list) {
+		u64 nbytes;
+		u64 brate;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		read_lock(&est_lock);
+		if (e->bstats == NULL)
+			goto skip;
+
+		nbytes = e->bstats->bytes;
+		npackets = e->bstats->packets;
+		brate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
+		e->rate_est->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
+		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+skip:
+		read_unlock(&est_lock);
+		spin_unlock(e->stats_lock);
+	}
+
+	if (!list_empty(&elist[idx].list))
+		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
+	rcu_read_unlock();
+}
+
+static void gen_add_node(struct gen_estimator *est)
+{
+	struct rb_node **p = &est_root.rb_node, *parent = NULL;
+
+	while (*p) {
+		struct gen_estimator *e;
+
+		parent = *p;
+		e = rb_entry(parent, struct gen_estimator, node);
+
+		if (est->bstats > e->bstats)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&est->node, parent, p);
+	rb_insert_color(&est->node, &est_root);
+}
+
+static
+struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
+				    const struct gnet_stats_rate_est *rate_est)
+{
+	struct rb_node *p = est_root.rb_node;
+
+	while (p) {
+		struct gen_estimator *e;
+
+		e = rb_entry(p, struct gen_estimator, node);
+
+		if (bstats > e->bstats)
+			p = p->rb_right;
+		else if (bstats < e->bstats || rate_est != e->rate_est)
+			p = p->rb_left;
+		else
+			return e;
+	}
+	return NULL;
+}
+
+/**
+ * gen_new_estimator - create a new rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+ * as destination. A new timer with the interval specified in the
+ * configuration TLV is created. Upon each interval, the latest statistics
+ * will be read from &bstats and the estimated rate will be stored in
+ * &rate_est with the statistics lock grabed during this period.
+ *
+ * Returns 0 on success or a negative error code.
+ *
+ */
+int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+		      struct gnet_stats_rate_est *rate_est,
+		      spinlock_t *stats_lock,
+		      struct nlattr *opt)
+{
+	struct gen_estimator *est;
+	struct gnet_estimator *parm = nla_data(opt);
+	int idx;
+
+	if (nla_len(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	idx = parm->interval + 2;
+	est->bstats = bstats;
+	est->rate_est = rate_est;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = bstats->bytes;
+	est->avbps = rate_est->bps<<5;
+	est->last_packets = bstats->packets;
+	est->avpps = rate_est->pps<<10;
+
+	spin_lock_bh(&est_tree_lock);
+	if (!elist[idx].timer.function) {
+		INIT_LIST_HEAD(&elist[idx].list);
+		setup_timer(&elist[idx].timer, est_timer, idx);
+	}
+
+	if (list_empty(&elist[idx].list))
+		mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
+
+	list_add_rcu(&est->list, &elist[idx].list);
+	gen_add_node(est);
+	spin_unlock_bh(&est_tree_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(gen_new_estimator);
+
+/**
+ * gen_kill_estimator - remove a rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Removes the rate estimator specified by &bstats and &rate_est.
+ *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
+ */
+void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
+			struct gnet_stats_rate_est *rate_est)
+{
+	struct gen_estimator *e;
+
+	spin_lock_bh(&est_tree_lock);
+	while ((e = gen_find_node(bstats, rate_est))) {
+		rb_erase(&e->node, &est_root);
+
+		write_lock(&est_lock);
+		e->bstats = NULL;
+		write_unlock(&est_lock);
+
+		list_del_rcu(&e->list);
+		kfree_rcu(e, e_rcu);
+	}
+	spin_unlock_bh(&est_tree_lock);
+}
+EXPORT_SYMBOL(gen_kill_estimator);
+
+/**
+ * gen_replace_estimator - replace rate estimator configuration
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+ * gen_kill_estimator() and gen_new_estimator().
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+			  struct gnet_stats_rate_est *rate_est,
+			  spinlock_t *stats_lock, struct nlattr *opt)
+{
+	gen_kill_estimator(bstats, rate_est);
+	return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+}
+EXPORT_SYMBOL(gen_replace_estimator);
+
+/**
+ * gen_estimator_active - test if estimator is currently in use
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Returns true if estimator is active, and false if not.
+ */
+bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
+			  const struct gnet_stats_rate_est *rate_est)
+{
+	bool res;
+
+	ASSERT_RTNL();
+
+	spin_lock_bh(&est_tree_lock);
+	res = gen_find_node(bstats, rate_est) != NULL;
+	spin_unlock_bh(&est_tree_lock);
+
+	return res;
+}
+EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
new file mode 100644
index 00000000..0452eb27
--- /dev/null
+++ b/net/core/gen_stats.c
@@ -0,0 +1,250 @@
+/*
+ * net/core/gen_stats.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:  Thomas Graf <tgraf@suug.ch>
+ *           Jamal Hadi Salim
+ *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * See Documentation/networking/gen_stats.txt
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
+#include <net/netlink.h>
+#include <net/gen_stats.h>
+
+
+static inline int
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+{
+	NLA_PUT(d->skb, type, size, buf);
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(d->lock);
+	return -1;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
+ * @xstats_type: TLV type for backward compatibility xstats TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * The dumping handle is marked to be in backward compatibility mode telling
+ * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
+	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+	__acquires(lock)
+{
+	memset(d, 0, sizeof(*d));
+
+	spin_lock_bh(lock);
+	d->lock = lock;
+	if (type)
+		d->tail = (struct nlattr *)skb_tail_pointer(skb);
+	d->skb = skb;
+	d->compat_tc_stats = tc_stats_type;
+	d->compat_xstats = xstats_type;
+
+	if (d->tail)
+		return gnet_stats_copy(d, type, NULL, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+	struct gnet_dump *d)
+{
+	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+}
+EXPORT_SYMBOL(gnet_stats_start_copy);
+
+/**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @d: dumping handle
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bytes = b->bytes;
+		d->tc_stats.packets = b->packets;
+	}
+
+	if (d->tail) {
+		struct gnet_stats_basic sb;
+
+		memset(&sb, 0, sizeof(sb));
+		sb.bytes = b->bytes;
+		sb.packets = b->packets;
+		return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
+	}
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_copy_basic);
+
+/**
+ * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
+ * @d: dumping handle
+ * @b: basic statistics
+ * @r: rate estimator statistics
+ *
+ * Appends the rate estimator statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_rate_est(struct gnet_dump *d,
+			 const struct gnet_stats_basic_packed *b,
+			 struct gnet_stats_rate_est *r)
+{
+	if (b && !gen_estimator_active(b, r))
+		return 0;
+
+	if (d->compat_tc_stats) {
+		d->tc_stats.bps = r->bps;
+		d->tc_stats.pps = r->pps;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+
+/**
+ * gnet_stats_copy_queue - copy queue statistics into statistics TLV
+ * @d: dumping handle
+ * @q: queue statistics
+ *
+ * Appends the queue statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.drops = q->drops;
+		d->tc_stats.qlen = q->qlen;
+		d->tc_stats.backlog = q->backlog;
+		d->tc_stats.overlimits = q->overlimits;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_copy_queue);
+
+/**
+ * gnet_stats_copy_app - copy application specific statistics into statistics TLV
+ * @d: dumping handle
+ * @st: application specific statistics data
+ * @len: length of data
+ *
+ * Appends the application sepecific statistics to the top level TLV created by
+ * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
+ * handle is in backward compatibility mode.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
+{
+	if (d->compat_xstats) {
+		d->xstats = st;
+		d->xstats_len = len;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_copy_app);
+
+/**
+ * gnet_stats_finish_copy - finish dumping procedure
+ * @d: dumping handle
+ *
+ * Corrects the length of the top level TLV to include all TLVs added
+ * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
+ * if gnet_stats_start_copy_compat() was used and releases the statistics
+ * lock.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_finish_copy(struct gnet_dump *d)
+{
+	if (d->tail)
+		d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
+
+	if (d->compat_tc_stats)
+		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
+			sizeof(d->tc_stats)) < 0)
+			return -1;
+
+	if (d->compat_xstats && d->xstats) {
+		if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
+			d->xstats_len) < 0)
+			return -1;
+	}
+
+	spin_unlock_bh(d->lock);
+	return 0;
+}
+EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
new file mode 100644
index 00000000..7e7aeb01
--- /dev/null
+++ b/net/core/iovec.c
@@ -0,0 +1,264 @@
+/*
+ *	iovec manipulation routines.
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *		Andrew Lunn	:	Errors in iovec copying.
+ *		Pedro Roque	:	Added memcpy_fromiovecend and
+ *					csum_..._fromiovecend.
+ *		Andi Kleen	:	fixed error handling for 2.1
+ *		Alexey Kuznetsov:	2.1 optimisations
+ *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+
+/*
+ *	Verify iovec. The caller must ensure that the iovec is big enough
+ *	to hold the message iovec.
+ *
+ *	Save time not doing access_ok. copy_*_user will make this work
+ *	in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode)
+{
+	int size, ct, err;
+
+	if (m->msg_namelen) {
+		if (mode == VERIFY_READ) {
+			void __user *namep;
+			namep = (void __user __force *) m->msg_name;
+			err = move_addr_to_kernel(namep, m->msg_namelen,
+						  address);
+			if (err < 0)
+				return err;
+		}
+		m->msg_name = address;
+	} else {
+		m->msg_name = NULL;
+	}
+
+	size = m->msg_iovlen * sizeof(struct iovec);
+	if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
+		return -EFAULT;
+
+	m->msg_iov = iov;
+	err = 0;
+
+	for (ct = 0; ct < m->msg_iovlen; ct++) {
+		size_t len = iov[ct].iov_len;
+
+		if (len > INT_MAX - err) {
+			len = INT_MAX - err;
+			iov[ct].iov_len = len;
+		}
+		err += len;
+	}
+
+	return err;
+}
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				return -EFAULT;
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovec);
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+		      int offset, int len)
+{
+	int copy;
+	for (; len > 0; ++iov) {
+		/* Skip over the finished iovecs */
+		if (unlikely(offset >= iov->iov_len)) {
+			offset -= iov->iov_len;
+			continue;
+		}
+		copy = min_t(unsigned int, iov->iov_len - offset, len);
+		if (copy_to_user(iov->iov_base + offset, kdata, copy))
+			return -EFAULT;
+		offset = 0;
+		kdata += copy;
+		len -= copy;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovecend);
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				return -EFAULT;
+			len -= copy;
+			kdata += copy;
+			iov->iov_base += copy;
+			iov->iov_len -= copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovec);
+
+/*
+ *	Copy iovec from kernel. Returns -EFAULT on error.
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovecend);
+
+/*
+ *	And now for the all-in-one: copy and checksum from a user iovec
+ *	directly to a datagram
+ *	Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *	ip_build_xmit must ensure that when fragmenting only the last
+ *	call to this function will be unaligned also.
+ */
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+				 int offset, unsigned int len, __wsum *csump)
+{
+	__wsum csum = *csump;
+	int partial_cnt = 0, err = 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+
+		/* There is a remnant from previous iov. */
+		if (partial_cnt) {
+			int par_len = 4 - partial_cnt;
+
+			/* iov component is too short ... */
+			if (par_len > copy) {
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
+				base += copy;
+				partial_cnt += copy;
+				len -= copy;
+				iov++;
+				if (len)
+					continue;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
+			}
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
+			base  += par_len;
+			copy  -= par_len;
+			len   -= par_len;
+			partial_cnt = 0;
+		}
+
+		if (len > copy) {
+			partial_cnt = copy % 4;
+			if (partial_cnt) {
+				copy -= partial_cnt;
+				if (copy_from_user(kdata + copy, base + copy,
+						partial_cnt))
+					goto out_fault;
+			}
+		}
+
+		if (copy) {
+			csum = csum_and_copy_from_user(base, kdata, copy,
+							csum, &err);
+			if (err)
+				goto out;
+		}
+		len   -= copy + partial_cnt;
+		kdata += copy + partial_cnt;
+		iov++;
+	}
+	*csump = csum;
+out:
+	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
+}
+EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 00000000..52d0a445
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+	BUG_ON(in_irq());
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(skb_frag_page(frag));
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+	kunmap_atomic(vaddr);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
new file mode 100644
index 00000000..c3519c6d
--- /dev/null
+++ b/net/core/link_watch.c
@@ -0,0 +1,251 @@
+/*
+ * Linux network device link state notification
+ *
+ * Author:
+ *     Stefan Rompf <sux@loplof.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_URGENT = 0,
+};
+
+static unsigned long linkwatch_flags;
+static unsigned long linkwatch_nextevent;
+
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
+
+static LIST_HEAD(lweventlist);
+static DEFINE_SPINLOCK(lweventlist_lock);
+
+static unsigned char default_operstate(const struct net_device *dev)
+{
+	if (!netif_carrier_ok(dev))
+		return (dev->ifindex != dev->iflink ?
+			IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
+
+	if (netif_dormant(dev))
+		return IF_OPER_DORMANT;
+
+	return IF_OPER_UP;
+}
+
+
+static void rfc2863_policy(struct net_device *dev)
+{
+	unsigned char operstate = default_operstate(dev);
+
+	if (operstate == dev->operstate)
+		return;
+
+	write_lock_bh(&dev_base_lock);
+
+	switch(dev->link_mode) {
+	case IF_LINK_MODE_DORMANT:
+		if (operstate == IF_OPER_UP)
+			operstate = IF_OPER_DORMANT;
+		break;
+
+	case IF_LINK_MODE_DEFAULT:
+	default:
+		break;
+	}
+
+	dev->operstate = operstate;
+
+	write_unlock_bh(&dev_base_lock);
+}
+
+
+static bool linkwatch_urgent_event(struct net_device *dev)
+{
+	if (!netif_running(dev))
+		return false;
+
+	if (dev->ifindex != dev->iflink)
+		return true;
+
+	return netif_carrier_ok(dev) &&	qdisc_tx_changing(dev);
+}
+
+
+static void linkwatch_add_event(struct net_device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lweventlist_lock, flags);
+	if (list_empty(&dev->link_watch_list)) {
+		list_add_tail(&dev->link_watch_list, &lweventlist);
+		dev_hold(dev);
+	}
+	spin_unlock_irqrestore(&lweventlist_lock, flags);
+}
+
+
+static void linkwatch_schedule_work(int urgent)
+{
+	unsigned long delay = linkwatch_nextevent - jiffies;
+
+	if (test_bit(LW_URGENT, &linkwatch_flags))
+		return;
+
+	/* Minimise down-time: drop delay for up event. */
+	if (urgent) {
+		if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
+			return;
+		delay = 0;
+	}
+
+	/* If we wrap around we'll delay it by at most HZ. */
+	if (delay > HZ)
+		delay = 0;
+
+	/*
+	 * This is true if we've scheduled it immeditately or if we don't
+	 * need an immediate execution and it's already pending.
+	 */
+	if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
+		return;
+
+	/* Don't bother if there is nothing urgent. */
+	if (!test_bit(LW_URGENT, &linkwatch_flags))
+		return;
+
+	/* It's already running which is good enough. */
+	if (!__cancel_delayed_work(&linkwatch_work))
+		return;
+
+	/* Otherwise we reschedule it again for immediate execution. */
+	schedule_delayed_work(&linkwatch_work, 0);
+}
+
+
+static void linkwatch_do_dev(struct net_device *dev)
+{
+	/*
+	 * Make sure the above read is complete since it can be
+	 * rewritten as soon as we clear the bit below.
+	 */
+	smp_mb__before_clear_bit();
+
+	/* We are about to handle this device,
+	 * so new events can be accepted
+	 */
+	clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+	rfc2863_policy(dev);
+	if (dev->flags & IFF_UP) {
+		if (netif_carrier_ok(dev))
+			dev_activate(dev);
+		else
+			dev_deactivate(dev);
+
+		netdev_state_change(dev);
+	}
+	dev_put(dev);
+}
+
+static void __linkwatch_run_queue(int urgent_only)
+{
+	struct net_device *dev;
+	LIST_HEAD(wrk);
+
+	/*
+	 * Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket.  This limit does not apply to up events
+	 * while the device qdisc is down.
+	 */
+	if (!urgent_only)
+		linkwatch_nextevent = jiffies + HZ;
+	/* Limit wrap-around effect on delay. */
+	else if (time_after(linkwatch_nextevent, jiffies + HZ))
+		linkwatch_nextevent = jiffies;
+
+	clear_bit(LW_URGENT, &linkwatch_flags);
+
+	spin_lock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &wrk);
+
+	while (!list_empty(&wrk)) {
+
+		dev = list_first_entry(&wrk, struct net_device, link_watch_list);
+		list_del_init(&dev->link_watch_list);
+
+		if (urgent_only && !linkwatch_urgent_event(dev)) {
+			list_add_tail(&dev->link_watch_list, &lweventlist);
+			continue;
+		}
+		spin_unlock_irq(&lweventlist_lock);
+		linkwatch_do_dev(dev);
+		spin_lock_irq(&lweventlist_lock);
+	}
+
+	if (!list_empty(&lweventlist))
+		linkwatch_schedule_work(0);
+	spin_unlock_irq(&lweventlist_lock);
+}
+
+void linkwatch_forget_dev(struct net_device *dev)
+{
+	unsigned long flags;
+	int clean = 0;
+
+	spin_lock_irqsave(&lweventlist_lock, flags);
+	if (!list_empty(&dev->link_watch_list)) {
+		list_del_init(&dev->link_watch_list);
+		clean = 1;
+	}
+	spin_unlock_irqrestore(&lweventlist_lock, flags);
+	if (clean)
+		linkwatch_do_dev(dev);
+}
+
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void)
+{
+	__linkwatch_run_queue(0);
+}
+
+
+static void linkwatch_event(struct work_struct *dummy)
+{
+	rtnl_lock();
+	__linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
+	rtnl_unlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	bool urgent = linkwatch_urgent_event(dev);
+
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		linkwatch_add_event(dev);
+	} else if (!urgent)
+		return;
+
+	linkwatch_schedule_work(urgent);
+}
+EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
new file mode 100644
index 00000000..73b90351
--- /dev/null
+++ b/net/core/neighbour.c
@@ -0,0 +1,3037 @@
+/*
+ *	Generic address resolution entity
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
+ *	Harald Welte		Add neighbour cache statistics like rtstat
+ */
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/times.h>
+#include <net/net_namespace.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/log2.h>
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+#define PNEIGH_HASHMASK		0xF
+
+static void neigh_timer_handler(unsigned long arg);
+static void __neigh_notify(struct neighbour *n, int type, int flags);
+static void neigh_update_notify(struct neighbour *neigh);
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+
+static struct neigh_table *neigh_tables;
+#ifdef CONFIG_PROC_FS
+static const struct file_operations neigh_stat_seq_fops;
+#endif
+
+/*
+   Neighbour hash table buckets are protected with rwlock tbl->lock.
+
+   - All the scans/updates to hash buckets MUST be made under this lock.
+   - NOTHING clever should be made under this lock: no callbacks
+     to protocol backends, no attempts to send something to network.
+     It will result in deadlocks, if backend/driver wants to use neighbour
+     cache.
+   - If the entry requires some non-trivial actions, increase
+     its reference count and release table lock.
+
+   Neighbour entries are protected:
+   - with reference count.
+   - with rwlock neigh->lock
+
+   Reference count prevents destruction.
+
+   neigh->lock mainly serializes ll address data and its validity state.
+   However, the same lock is used to protect another entry fields:
+    - timer
+    - resolution queue
+
+   Again, nothing clever shall be made under neigh->lock,
+   the most complicated procedure, which we allow is dev->hard_header.
+   It is supposed, that dev->hard_header is simplistic and does
+   not make callbacks to neighbour tables.
+
+   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
+   list of neighbour tables. This list is used only in process context,
+ */
+
+static DEFINE_RWLOCK(neigh_tbl_lock);
+
+static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+static void neigh_cleanup_and_release(struct neighbour *neigh)
+{
+	if (neigh->parms->neigh_cleanup)
+		neigh->parms->neigh_cleanup(neigh);
+
+	__neigh_notify(neigh, RTM_DELNEIGH, 0);
+	neigh_release(neigh);
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonable choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return base ? (net_random() % base) + (base >> 1) : 0;
+}
+EXPORT_SYMBOL(neigh_rand_reach_time);
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+	int shrunk = 0;
+	int i;
+	struct neigh_hash_table *nht;
+
+	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
+
+	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (i = 0; i < (1 << nht->hash_shift); i++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np;
+
+		np = &nht->hash_buckets[i];
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
+			/* Neighbour record may be discarded if:
+			 * - nobody refers to it.
+			 * - it is not permanent
+			 */
+			write_lock(&n->lock);
+			if (atomic_read(&n->refcnt) == 1 &&
+			    !(n->nud_state & NUD_PERMANENT)) {
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						  lockdep_is_held(&tbl->lock)));
+				n->dead = 1;
+				shrunk	= 1;
+				write_unlock(&n->lock);
+				neigh_cleanup_and_release(n);
+				continue;
+			}
+			write_unlock(&n->lock);
+			np = &n->next;
+		}
+	}
+
+	tbl->last_flush = jiffies;
+
+	write_unlock_bh(&tbl->lock);
+
+	return shrunk;
+}
+
+static void neigh_add_timer(struct neighbour *n, unsigned long when)
+{
+	neigh_hold(n);
+	if (unlikely(mod_timer(&n->timer, when))) {
+		printk("NEIGH: BUG, double timer add, state is %x\n",
+		       n->nud_state);
+		dump_stack();
+	}
+}
+
+static int neigh_del_timer(struct neighbour *n)
+{
+	if ((n->nud_state & NUD_IN_TIMER) &&
+	    del_timer(&n->timer)) {
+		neigh_release(n);
+		return 1;
+	}
+	return 0;
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+	struct neigh_hash_table *nht;
+
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+
+	for (i = 0; i < (1 << nht->hash_shift); i++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np = &nht->hash_buckets[i];
+
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			rcu_assign_pointer(*np,
+				   rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
+			write_lock(&n->lock);
+			neigh_del_timer(n);
+			n->dead = 1;
+
+			if (atomic_read(&n->refcnt) != 1) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				skb_queue_purge(&n->arp_queue);
+				n->arp_queue_len_bytes = 0;
+				n->output = neigh_blackhole;
+				if (n->nud_state & NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			}
+			write_unlock(&n->lock);
+			neigh_cleanup_and_release(n);
+		}
+	}
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+	write_lock_bh(&tbl->lock);
+	neigh_flush_dev(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_changeaddr);
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	write_lock_bh(&tbl->lock);
+	neigh_flush_dev(tbl, dev);
+	pneigh_ifdown(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	return 0;
+}
+EXPORT_SYMBOL(neigh_ifdown);
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
+{
+	struct neighbour *n = NULL;
+	unsigned long now = jiffies;
+	int entries;
+
+	entries = atomic_inc_return(&tbl->entries) - 1;
+	if (entries >= tbl->gc_thresh3 ||
+	    (entries >= tbl->gc_thresh2 &&
+	     time_after(now, tbl->last_flush + 5 * HZ))) {
+		if (!neigh_forced_gc(tbl) &&
+		    entries >= tbl->gc_thresh3)
+			goto out_entries;
+	}
+
+	if (tbl->entry_size)
+		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+	else {
+		int sz = sizeof(*n) + tbl->key_len;
+
+		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
+		sz += dev->neigh_priv_len;
+		n = kzalloc(sz, GFP_ATOMIC);
+	}
+	if (!n)
+		goto out_entries;
+
+	skb_queue_head_init(&n->arp_queue);
+	rwlock_init(&n->lock);
+	seqlock_init(&n->ha_lock);
+	n->updated	  = n->used = now;
+	n->nud_state	  = NUD_NONE;
+	n->output	  = neigh_blackhole;
+	seqlock_init(&n->hh.hh_lock);
+	n->parms	  = neigh_parms_clone(&tbl->parms);
+	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
+
+	NEIGH_CACHE_STAT_INC(tbl, allocs);
+	n->tbl		  = tbl;
+	atomic_set(&n->refcnt, 1);
+	n->dead		  = 1;
+out:
+	return n;
+
+out_entries:
+	atomic_dec(&tbl->entries);
+	goto out;
+}
+
+static void neigh_get_hash_rnd(u32 *x)
+{
+	get_random_bytes(x, sizeof(*x));
+	*x |= 1;
+}
+
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
+{
+	size_t size = (1 << shift) * sizeof(struct neighbour *);
+	struct neigh_hash_table *ret;
+	struct neighbour __rcu **buckets;
+	int i;
+
+	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+	if (!ret)
+		return NULL;
+	if (size <= PAGE_SIZE)
+		buckets = kzalloc(size, GFP_ATOMIC);
+	else
+		buckets = (struct neighbour __rcu **)
+			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+					   get_order(size));
+	if (!buckets) {
+		kfree(ret);
+		return NULL;
+	}
+	ret->hash_buckets = buckets;
+	ret->hash_shift = shift;
+	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
+		neigh_get_hash_rnd(&ret->hash_rnd[i]);
+	return ret;
+}
+
+static void neigh_hash_free_rcu(struct rcu_head *head)
+{
+	struct neigh_hash_table *nht = container_of(head,
+						    struct neigh_hash_table,
+						    rcu);
+	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
+	struct neighbour __rcu **buckets = nht->hash_buckets;
+
+	if (size <= PAGE_SIZE)
+		kfree(buckets);
+	else
+		free_pages((unsigned long)buckets, get_order(size));
+	kfree(nht);
+}
+
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+						unsigned long new_shift)
+{
+	unsigned int i, hash;
+	struct neigh_hash_table *new_nht, *old_nht;
+
+	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
+
+	old_nht = rcu_dereference_protected(tbl->nht,
+					    lockdep_is_held(&tbl->lock));
+	new_nht = neigh_hash_alloc(new_shift);
+	if (!new_nht)
+		return old_nht;
+
+	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
+		struct neighbour *n, *next;
+
+		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+						   lockdep_is_held(&tbl->lock));
+		     n != NULL;
+		     n = next) {
+			hash = tbl->hash(n->primary_key, n->dev,
+					 new_nht->hash_rnd);
+
+			hash >>= (32 - new_nht->hash_shift);
+			next = rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock));
+
+			rcu_assign_pointer(n->next,
+					   rcu_dereference_protected(
+						new_nht->hash_buckets[hash],
+						lockdep_is_held(&tbl->lock)));
+			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+		}
+	}
+
+	rcu_assign_pointer(tbl->nht, new_nht);
+	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+	return new_nht;
+}
+
+struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val;
+	struct neigh_hash_table *nht;
+
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
+		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+
+	rcu_read_unlock_bh();
+	return n;
+}
+EXPORT_SYMBOL(neigh_lookup);
+
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+				     const void *pkey)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val;
+	struct neigh_hash_table *nht;
+
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
+		if (!memcmp(n->primary_key, pkey, key_len) &&
+		    net_eq(dev_net(n->dev), net)) {
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+
+	rcu_read_unlock_bh();
+	return n;
+}
+EXPORT_SYMBOL(neigh_lookup_nodev);
+
+struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	u32 hash_val;
+	int key_len = tbl->key_len;
+	int error;
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
+	struct neigh_hash_table *nht;
+
+	if (!n) {
+		rc = ERR_PTR(-ENOBUFS);
+		goto out;
+	}
+
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+	dev_hold(dev);
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	if (dev->netdev_ops->ndo_neigh_construct) {
+		error = dev->netdev_ops->ndo_neigh_construct(n);
+		if (error < 0) {
+			rc = ERR_PTR(error);
+			goto out_neigh_release;
+		}
+	}
+
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
+
+	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+
+	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
+		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
+
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+
+	if (n->parms->dead) {
+		rc = ERR_PTR(-EINVAL);
+		goto out_tbl_unlock;
+	}
+
+	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+					    lockdep_is_held(&tbl->lock));
+	     n1 != NULL;
+	     n1 = rcu_dereference_protected(n1->next,
+			lockdep_is_held(&tbl->lock))) {
+		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+			neigh_hold(n1);
+			rc = n1;
+			goto out_tbl_unlock;
+		}
+	}
+
+	n->dead = 0;
+	neigh_hold(n);
+	rcu_assign_pointer(n->next,
+			   rcu_dereference_protected(nht->hash_buckets[hash_val],
+						     lockdep_is_held(&tbl->lock)));
+	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	rc = n;
+out:
+	return rc;
+out_tbl_unlock:
+	write_unlock_bh(&tbl->lock);
+out_neigh_release:
+	neigh_release(n);
+	goto out;
+}
+EXPORT_SYMBOL(neigh_create);
+
+static u32 pneigh_hash(const void *pkey, int key_len)
+{
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+	return hash_val;
+}
+
+static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
+					      struct net *net,
+					      const void *pkey,
+					      int key_len,
+					      struct net_device *dev)
+{
+	while (n) {
+		if (!memcmp(n->key, pkey, key_len) &&
+		    net_eq(pneigh_net(n), net) &&
+		    (n->dev == dev || !n->dev))
+			return n;
+		n = n->next;
+	}
+	return NULL;
+}
+
+struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
+		struct net *net, const void *pkey, struct net_device *dev)
+{
+	int key_len = tbl->key_len;
+	u32 hash_val = pneigh_hash(pkey, key_len);
+
+	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
+				 net, pkey, key_len, dev);
+}
+EXPORT_SYMBOL_GPL(__pneigh_lookup);
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
+				    struct net *net, const void *pkey,
+				    struct net_device *dev, int creat)
+{
+	struct pneigh_entry *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = pneigh_hash(pkey, key_len);
+
+	read_lock_bh(&tbl->lock);
+	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
+			      net, pkey, key_len, dev);
+	read_unlock_bh(&tbl->lock);
+
+	if (n || !creat)
+		goto out;
+
+	ASSERT_RTNL();
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (!n)
+		goto out;
+
+	write_pnet(&n->net, hold_net(net));
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
+	if (dev)
+		dev_hold(dev);
+
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		if (dev)
+			dev_put(dev);
+		release_net(net);
+		kfree(n);
+		n = NULL;
+		goto out;
+	}
+
+	write_lock_bh(&tbl->lock);
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	write_unlock_bh(&tbl->lock);
+out:
+	return n;
+}
+EXPORT_SYMBOL(pneigh_lookup);
+
+
+int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
+		  struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	int key_len = tbl->key_len;
+	u32 hash_val = pneigh_hash(pkey, key_len);
+
+	write_lock_bh(&tbl->lock);
+	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+	     np = &n->next) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
+		    net_eq(pneigh_net(n), net)) {
+			*np = n->next;
+			write_unlock_bh(&tbl->lock);
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			if (n->dev)
+				dev_put(n->dev);
+			release_net(pneigh_net(n));
+			kfree(n);
+			return 0;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h];
+		while ((n = *np) != NULL) {
+			if (!dev || n->dev == dev) {
+				*np = n->next;
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				if (n->dev)
+					dev_put(n->dev);
+				release_net(pneigh_net(n));
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+static void neigh_parms_destroy(struct neigh_parms *parms);
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+	if (atomic_dec_and_test(&parms->refcnt))
+		neigh_parms_destroy(parms);
+}
+
+/*
+ *	neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{
+	struct net_device *dev = neigh->dev;
+
+	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
+
+	if (!neigh->dead) {
+		printk(KERN_WARNING
+		       "Destroying alive neighbour %p\n", neigh);
+		dump_stack();
+		return;
+	}
+
+	if (neigh_del_timer(neigh))
+		printk(KERN_WARNING "Impossible event.\n");
+
+	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
+
+	if (dev->netdev_ops->ndo_neigh_destroy)
+		dev->netdev_ops->ndo_neigh_destroy(neigh);
+
+	dev_put(dev);
+	neigh_parms_put(neigh->parms);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+	atomic_dec(&neigh->tbl->entries);
+	kfree_rcu(neigh, rcu);
+}
+EXPORT_SYMBOL(neigh_destroy);
+
+/* Neighbour state is suspicious;
+   disable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+
+	neigh->output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+}
+
+static void neigh_periodic_work(struct work_struct *work)
+{
+	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+	struct neighbour *n;
+	struct neighbour __rcu **np;
+	unsigned int i;
+	struct neigh_hash_table *nht;
+
+	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
+
+	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+
+	/*
+	 *	periodically recompute ReachableTime from random function
+	 */
+
+	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
+		struct neigh_parms *p;
+		tbl->last_rand = jiffies;
+		for (p = &tbl->parms; p; p = p->next)
+			p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
+		np = &nht->hash_buckets[i];
+
+		while ((n = rcu_dereference_protected(*np,
+				lockdep_is_held(&tbl->lock))) != NULL) {
+			unsigned int state;
+
+			write_lock(&n->lock);
+
+			state = n->nud_state;
+			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+				write_unlock(&n->lock);
+				goto next_elt;
+			}
+
+			if (time_before(n->used, n->confirmed))
+				n->used = n->confirmed;
+
+			if (atomic_read(&n->refcnt) == 1 &&
+			    (state == NUD_FAILED ||
+			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
+				*np = n->next;
+				n->dead = 1;
+				write_unlock(&n->lock);
+				neigh_cleanup_and_release(n);
+				continue;
+			}
+			write_unlock(&n->lock);
+
+next_elt:
+			np = &n->next;
+		}
+		/*
+		 * It's fine to release lock here, even if hash table
+		 * grows while we are preempted.
+		 */
+		write_unlock_bh(&tbl->lock);
+		cond_resched();
+		write_lock_bh(&tbl->lock);
+		nht = rcu_dereference_protected(tbl->nht,
+						lockdep_is_held(&tbl->lock));
+	}
+	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
+	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
+	 * base_reachable_time.
+	 */
+	schedule_delayed_work(&tbl->gc_work,
+			      tbl->parms.base_reachable_time >> 1);
+	write_unlock_bh(&tbl->lock);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return (n->nud_state & NUD_PROBE) ?
+		p->ucast_probes :
+		p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+static void neigh_invalidate(struct neighbour *neigh)
+	__releases(neigh->lock)
+	__acquires(neigh->lock)
+{
+	struct sk_buff *skb;
+
+	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
+	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+	neigh->updated = jiffies;
+
+	/* It is very thin place. report_unreachable is very complicated
+	   routine. Particularly, it can hit the same neighbour entry!
+
+	   So that, we try to be accurate and avoid dead loop. --ANK
+	 */
+	while (neigh->nud_state == NUD_FAILED &&
+	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+		write_unlock(&neigh->lock);
+		neigh->ops->error_report(neigh, skb);
+		write_lock(&neigh->lock);
+	}
+	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
+}
+
+static void neigh_probe(struct neighbour *neigh)
+	__releases(neigh->lock)
+{
+	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+	/* keep skb alive even if arp_queue overflows */
+	if (skb)
+		skb = skb_copy(skb, GFP_ATOMIC);
+	write_unlock(&neigh->lock);
+	neigh->ops->solicit(neigh, skb);
+	atomic_inc(&neigh->probes);
+	kfree_skb(skb);
+}
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg)
+{
+	unsigned long now, next;
+	struct neighbour *neigh = (struct neighbour *)arg;
+	unsigned state;
+	int notify = 0;
+
+	write_lock(&neigh->lock);
+
+	state = neigh->nud_state;
+	now = jiffies;
+	next = now + HZ;
+
+	if (!(state & NUD_IN_TIMER))
+		goto out;
+
+	if (state & NUD_REACHABLE) {
+		if (time_before_eq(now,
+				   neigh->confirmed + neigh->parms->reachable_time)) {
+			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else if (time_before_eq(now,
+					  neigh->used + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh->updated = jiffies;
+			neigh_suspect(neigh);
+			next = now + neigh->parms->delay_probe_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+			neigh->nud_state = NUD_STALE;
+			neigh->updated = jiffies;
+			neigh_suspect(neigh);
+			notify = 1;
+		}
+	} else if (state & NUD_DELAY) {
+		if (time_before_eq(now,
+				   neigh->confirmed + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+			neigh->nud_state = NUD_REACHABLE;
+			neigh->updated = jiffies;
+			neigh_connect(neigh);
+			notify = 1;
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+			neigh->nud_state = NUD_PROBE;
+			neigh->updated = jiffies;
+			atomic_set(&neigh->probes, 0);
+			next = now + neigh->parms->retrans_time;
+		}
+	} else {
+		/* NUD_PROBE|NUD_INCOMPLETE */
+		next = now + neigh->parms->retrans_time;
+	}
+
+	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
+	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
+		neigh->nud_state = NUD_FAILED;
+		notify = 1;
+		neigh_invalidate(neigh);
+	}
+
+	if (neigh->nud_state & NUD_IN_TIMER) {
+		if (time_before(next, jiffies + HZ/2))
+			next = jiffies + HZ/2;
+		if (!mod_timer(&neigh->timer, next))
+			neigh_hold(neigh);
+	}
+	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+		neigh_probe(neigh);
+	} else {
+out:
+		write_unlock(&neigh->lock);
+	}
+
+	if (notify)
+		neigh_update_notify(neigh);
+
+	neigh_release(neigh);
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	int rc;
+	bool immediate_probe = false;
+
+	write_lock_bh(&neigh->lock);
+
+	rc = 0;
+	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
+		goto out_unlock_bh;
+
+	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
+		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+			unsigned long next, now = jiffies;
+
+			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
+			neigh->nud_state     = NUD_INCOMPLETE;
+			neigh->updated = now;
+			next = now + max(neigh->parms->retrans_time, HZ/2);
+			neigh_add_timer(neigh, next);
+			immediate_probe = true;
+		} else {
+			neigh->nud_state = NUD_FAILED;
+			neigh->updated = jiffies;
+			write_unlock_bh(&neigh->lock);
+
+			kfree_skb(skb);
+			return 1;
+		}
+	} else if (neigh->nud_state & NUD_STALE) {
+		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+		neigh->nud_state = NUD_DELAY;
+		neigh->updated = jiffies;
+		neigh_add_timer(neigh,
+				jiffies + neigh->parms->delay_probe_time);
+	}
+
+	if (neigh->nud_state == NUD_INCOMPLETE) {
+		if (skb) {
+			while (neigh->arp_queue_len_bytes + skb->truesize >
+			       neigh->parms->queue_len_bytes) {
+				struct sk_buff *buff;
+
+				buff = __skb_dequeue(&neigh->arp_queue);
+				if (!buff)
+					break;
+				neigh->arp_queue_len_bytes -= buff->truesize;
+				kfree_skb(buff);
+				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
+			}
+			skb_dst_force(skb);
+			__skb_queue_tail(&neigh->arp_queue, skb);
+			neigh->arp_queue_len_bytes += skb->truesize;
+		}
+		rc = 1;
+	}
+out_unlock_bh:
+	if (immediate_probe)
+		neigh_probe(neigh);
+	else
+		write_unlock(&neigh->lock);
+	local_bh_enable();
+	return rc;
+}
+EXPORT_SYMBOL(__neigh_event_send);
+
+static void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
+		= NULL;
+
+	if (neigh->dev->header_ops)
+		update = neigh->dev->header_ops->cache_update;
+
+	if (update) {
+		hh = &neigh->hh;
+		if (hh->hh_len) {
+			write_seqlock_bh(&hh->hh_lock);
+			update(hh, neigh->dev, neigh->ha);
+			write_sequnlock_bh(&hh->hh_lock);
+		}
+	}
+}
+
+
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- flags
+	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
+				if it is different.
+	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
+				lladdr instead of overriding it
+				if it is different.
+				It also allows to retain current state
+				if lladdr is unchanged.
+	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
+
+	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
+				NTF_ROUTER flag.
+	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
+				a router.
+
+   Caller MUST hold reference count on the entry.
+ */
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+		 u32 flags)
+{
+	u8 old;
+	int err;
+	int notify = 0;
+	struct net_device *dev;
+	int update_isrouter = 0;
+
+	write_lock_bh(&neigh->lock);
+
+	dev    = neigh->dev;
+	old    = neigh->nud_state;
+	err    = -EPERM;
+
+	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
+	    (old & (NUD_NOARP | NUD_PERMANENT)))
+		goto out;
+
+	if (!(new & NUD_VALID)) {
+		neigh_del_timer(neigh);
+		if (old & NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		err = 0;
+		notify = old & NUD_VALID;
+		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
+		    (new & NUD_FAILED)) {
+			neigh_invalidate(neigh);
+			notify = 1;
+		}
+		goto out;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (!dev->addr_len) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if ((old & NUD_VALID) &&
+		    !memcmp(lladdr, neigh->ha, dev->addr_len))
+			lladdr = neigh->ha;
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		err = -EINVAL;
+		if (!(old & NUD_VALID))
+			goto out;
+		lladdr = neigh->ha;
+	}
+
+	if (new & NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	err = 0;
+	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+	if (old & NUD_VALID) {
+		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
+			update_isrouter = 0;
+			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
+			    (old & NUD_CONNECTED)) {
+				lladdr = neigh->ha;
+				new = NUD_STALE;
+			} else
+				goto out;
+		} else {
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
+			     (old & NUD_CONNECTED))
+			    )
+				new = old;
+		}
+	}
+
+	if (new != old) {
+		neigh_del_timer(neigh);
+		if (new & NUD_IN_TIMER)
+			neigh_add_timer(neigh, (jiffies +
+						((new & NUD_REACHABLE) ?
+						 neigh->parms->reachable_time :
+						 0)));
+		neigh->nud_state = new;
+	}
+
+	if (lladdr != neigh->ha) {
+		write_seqlock(&neigh->ha_lock);
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		write_sequnlock(&neigh->ha_lock);
+		neigh_update_hhs(neigh);
+		if (!(new & NUD_CONNECTED))
+			neigh->confirmed = jiffies -
+				      (neigh->parms->base_reachable_time << 1);
+		notify = 1;
+	}
+	if (new == old)
+		goto out;
+	if (new & NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old & NUD_VALID)) {
+		struct sk_buff *skb;
+
+		/* Again: avoid dead loop if something went wrong */
+
+		while (neigh->nud_state & NUD_VALID &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			struct dst_entry *dst = skb_dst(skb);
+			struct neighbour *n2, *n1 = neigh;
+			write_unlock_bh(&neigh->lock);
+
+			rcu_read_lock();
+			/* On shaper/eql skb->dst->neighbour != neigh :( */
+			if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
+				n1 = n2;
+			n1->output(n1, skb);
+			rcu_read_unlock();
+
+			write_lock_bh(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+		neigh->arp_queue_len_bytes = 0;
+	}
+out:
+	if (update_isrouter) {
+		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
+			(neigh->flags | NTF_ROUTER) :
+			(neigh->flags & ~NTF_ROUTER);
+	}
+	write_unlock_bh(&neigh->lock);
+
+	if (notify)
+		neigh_update_notify(neigh);
+
+	return err;
+}
+EXPORT_SYMBOL(neigh_update);
+
+struct neighbour *neigh_event_ns(struct neigh_table *tbl,
+				 u8 *lladdr, void *saddr,
+				 struct net_device *dev)
+{
+	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
+						 lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE,
+			     NEIGH_UPDATE_F_OVERRIDE);
+	return neigh;
+}
+EXPORT_SYMBOL(neigh_event_ns);
+
+/* called with read_lock_bh(&n->lock); */
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
+{
+	struct net_device *dev = dst->dev;
+	__be16 prot = dst->ops->protocol;
+	struct hh_cache	*hh = &n->hh;
+
+	write_lock_bh(&n->lock);
+
+	/* Only one thread can come in here and initialize the
+	 * hh_cache entry.
+	 */
+	if (!hh->hh_len)
+		dev->header_ops->cache(n, hh, prot);
+
+	write_unlock_bh(&n->lock);
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
+ */
+
+int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	__skb_pull(skb, skb_network_offset(skb));
+
+	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+			    skb->len) < 0 &&
+	    dev->header_ops->rebuild(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+EXPORT_SYMBOL(neigh_compat_output);
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	int rc = 0;
+
+	if (!dst)
+		goto discard;
+
+	__skb_pull(skb, skb_network_offset(skb));
+
+	if (!neigh_event_send(neigh, skb)) {
+		int err;
+		struct net_device *dev = neigh->dev;
+		unsigned int seq;
+
+		if (dev->header_ops->cache && !neigh->hh.hh_len)
+			neigh_hh_init(neigh, dst);
+
+		do {
+			seq = read_seqbegin(&neigh->ha_lock);
+			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+					      neigh->ha, NULL, skb->len);
+		} while (read_seqretry(&neigh->ha_lock, seq));
+
+		if (err >= 0)
+			rc = dev_queue_xmit(skb);
+		else
+			goto out_kfree_skb;
+	}
+out:
+	return rc;
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
+		      dst, neigh);
+out_kfree_skb:
+	rc = -EINVAL;
+	kfree_skb(skb);
+	goto out;
+}
+EXPORT_SYMBOL(neigh_resolve_output);
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+	struct net_device *dev = neigh->dev;
+	unsigned int seq;
+	int err;
+
+	__skb_pull(skb, skb_network_offset(skb));
+
+	do {
+		seq = read_seqbegin(&neigh->ha_lock);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      neigh->ha, NULL, skb->len);
+	} while (read_seqretry(&neigh->ha_lock, seq));
+
+	if (err >= 0)
+		err = dev_queue_xmit(skb);
+	else {
+		err = -EINVAL;
+		kfree_skb(skb);
+	}
+	return err;
+}
+EXPORT_SYMBOL(neigh_connected_output);
+
+int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+	return dev_queue_xmit(skb);
+}
+EXPORT_SYMBOL(neigh_direct_output);
+
+static void neigh_proxy_process(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	long sched_next = 0;
+	unsigned long now = jiffies;
+	struct sk_buff *skb, *n;
+
+	spin_lock(&tbl->proxy_queue.lock);
+
+	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
+		long tdif = NEIGH_CB(skb)->sched_next - now;
+
+		if (tdif <= 0) {
+			struct net_device *dev = skb->dev;
+
+			__skb_unlink(skb, &tbl->proxy_queue);
+			if (tbl->proxy_redo && netif_running(dev)) {
+				rcu_read_lock();
+				tbl->proxy_redo(skb);
+				rcu_read_unlock();
+			} else {
+				kfree_skb(skb);
+			}
+
+			dev_put(dev);
+		} else if (!sched_next || tdif < sched_next)
+			sched_next = tdif;
+	}
+	del_timer(&tbl->proxy_timer);
+	if (sched_next)
+		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+		    struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	unsigned long sched_next = now + (net_random() % p->proxy_delay);
+
+	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+		kfree_skb(skb);
+		return;
+	}
+
+	NEIGH_CB(skb)->sched_next = sched_next;
+	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
+
+	spin_lock(&tbl->proxy_queue.lock);
+	if (del_timer(&tbl->proxy_timer)) {
+		if (time_before(tbl->proxy_timer.expires, sched_next))
+			sched_next = tbl->proxy_timer.expires;
+	}
+	skb_dst_drop(skb);
+	dev_hold(skb->dev);
+	__skb_queue_tail(&tbl->proxy_queue, skb);
+	mod_timer(&tbl->proxy_timer, sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+EXPORT_SYMBOL(pneigh_enqueue);
+
+static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
+						      struct net *net, int ifindex)
+{
+	struct neigh_parms *p;
+
+	for (p = &tbl->parms; p; p = p->next) {
+		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
+		    (!p->dev && !ifindex))
+			return p;
+	}
+
+	return NULL;
+}
+
+struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
+				      struct neigh_table *tbl)
+{
+	struct neigh_parms *p, *ref;
+	struct net *net = dev_net(dev);
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	ref = lookup_neigh_parms(tbl, net, 0);
+	if (!ref)
+		return NULL;
+
+	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
+	if (p) {
+		p->tbl		  = tbl;
+		atomic_set(&p->refcnt, 1);
+		p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+
+		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
+			kfree(p);
+			return NULL;
+		}
+
+		dev_hold(dev);
+		p->dev = dev;
+		write_pnet(&p->net, hold_net(net));
+		p->sysctl_table = NULL;
+		write_lock_bh(&tbl->lock);
+		p->next		= tbl->parms.next;
+		tbl->parms.next = p;
+		write_unlock_bh(&tbl->lock);
+	}
+	return p;
+}
+EXPORT_SYMBOL(neigh_parms_alloc);
+
+static void neigh_rcu_free_parms(struct rcu_head *head)
+{
+	struct neigh_parms *parms =
+		container_of(head, struct neigh_parms, rcu_head);
+
+	neigh_parms_put(parms);
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+	struct neigh_parms **p;
+
+	if (!parms || parms == &tbl->parms)
+		return;
+	write_lock_bh(&tbl->lock);
+	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+		if (*p == parms) {
+			*p = parms->next;
+			parms->dead = 1;
+			write_unlock_bh(&tbl->lock);
+			if (parms->dev)
+				dev_put(parms->dev);
+			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
+			return;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK1("neigh_parms_release: not found\n");
+}
+EXPORT_SYMBOL(neigh_parms_release);
+
+static void neigh_parms_destroy(struct neigh_parms *parms)
+{
+	release_net(neigh_parms_net(parms));
+	kfree(parms);
+}
+
+static struct lock_class_key neigh_table_proxy_queue_class;
+
+void neigh_table_init_no_netlink(struct neigh_table *tbl)
+{
+	unsigned long now = jiffies;
+	unsigned long phsize;
+
+	write_pnet(&tbl->parms.net, &init_net);
+	atomic_set(&tbl->parms.refcnt, 1);
+	tbl->parms.reachable_time =
+			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+	tbl->stats = alloc_percpu(struct neigh_statistics);
+	if (!tbl->stats)
+		panic("cannot create neighbour cache statistics");
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
+			      &neigh_stat_seq_fops, tbl))
+		panic("cannot create neighbour proc dir entry");
+#endif
+
+	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
+
+	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
+	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
+
+	if (!tbl->nht || !tbl->phash_buckets)
+		panic("cannot allocate neighbour cache hashes");
+
+	rwlock_init(&tbl->lock);
+	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
+	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
+	skb_queue_head_init_class(&tbl->proxy_queue,
+			&neigh_table_proxy_queue_class);
+
+	tbl->last_flush = now;
+	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
+}
+EXPORT_SYMBOL(neigh_table_init_no_netlink);
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+	struct neigh_table *tmp;
+
+	neigh_table_init_no_netlink(tbl);
+	write_lock(&neigh_tbl_lock);
+	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
+		if (tmp->family == tbl->family)
+			break;
+	}
+	tbl->next	= neigh_tables;
+	neigh_tables	= tbl;
+	write_unlock(&neigh_tbl_lock);
+
+	if (unlikely(tmp)) {
+		printk(KERN_ERR "NEIGH: Registering multiple tables for "
+		       "family %d\n", tbl->family);
+		dump_stack();
+	}
+}
+EXPORT_SYMBOL(neigh_table_init);
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+	struct neigh_table **tp;
+
+	/* It is not clean... Fix it to unload IPv6 module safely */
+	cancel_delayed_work_sync(&tbl->gc_work);
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	neigh_ifdown(tbl, NULL);
+	if (atomic_read(&tbl->entries))
+		printk(KERN_CRIT "neighbour leakage\n");
+	write_lock(&neigh_tbl_lock);
+	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+		if (*tp == tbl) {
+			*tp = tbl->next;
+			break;
+		}
+	}
+	write_unlock(&neigh_tbl_lock);
+
+	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+		 neigh_hash_free_rcu);
+	tbl->nht = NULL;
+
+	kfree(tbl->phash_buckets);
+	tbl->phash_buckets = NULL;
+
+	remove_proc_entry(tbl->id, init_net.proc_net_stat);
+
+	free_percpu(tbl->stats);
+	tbl->stats = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(neigh_table_clear);
+
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ndmsg *ndm;
+	struct nlattr *dst_attr;
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err = -EINVAL;
+
+	ASSERT_RTNL();
+	if (nlmsg_len(nlh) < sizeof(*ndm))
+		goto out;
+
+	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+	if (dst_attr == NULL)
+		goto out;
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+	}
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		struct neighbour *neigh;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		if (nla_len(dst_attr) < tbl->key_len)
+			goto out;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
+			goto out;
+		}
+
+		if (dev == NULL)
+			goto out;
+
+		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+		if (neigh == NULL) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		err = neigh_update(neigh, NULL, NUD_FAILED,
+				   NEIGH_UPDATE_F_OVERRIDE |
+				   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
+		goto out;
+	}
+	read_unlock(&neigh_tbl_lock);
+	err = -EAFNOSUPPORT;
+
+out:
+	return err;
+}
+
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ndmsg *ndm;
+	struct nlattr *tb[NDA_MAX+1];
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err;
+
+	ASSERT_RTNL();
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		goto out;
+
+	err = -EINVAL;
+	if (tb[NDA_DST] == NULL)
+		goto out;
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+			goto out;
+	}
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+		struct neighbour *neigh;
+		void *dst, *lladdr;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		if (nla_len(tb[NDA_DST]) < tbl->key_len)
+			goto out;
+		dst = nla_data(tb[NDA_DST]);
+		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			struct pneigh_entry *pn;
+
+			err = -ENOBUFS;
+			pn = pneigh_lookup(tbl, net, dst, dev, 1);
+			if (pn) {
+				pn->flags = ndm->ndm_flags;
+				err = 0;
+			}
+			goto out;
+		}
+
+		if (dev == NULL)
+			goto out;
+
+		neigh = neigh_lookup(tbl, dst, dev);
+		if (neigh == NULL) {
+			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+				err = -ENOENT;
+				goto out;
+			}
+
+			neigh = __neigh_lookup_errno(tbl, dst, dev);
+			if (IS_ERR(neigh)) {
+				err = PTR_ERR(neigh);
+				goto out;
+			}
+		} else {
+			if (nlh->nlmsg_flags & NLM_F_EXCL) {
+				err = -EEXIST;
+				neigh_release(neigh);
+				goto out;
+			}
+
+			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+		}
+
+		if (ndm->ndm_flags & NTF_USE) {
+			neigh_event_send(neigh, NULL);
+			err = 0;
+		} else
+			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+		neigh_release(neigh);
+		goto out;
+	}
+
+	read_unlock(&neigh_tbl_lock);
+	err = -EAFNOSUPPORT;
+out:
+	return err;
+}
+
+static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NDTA_PARMS);
+	if (nest == NULL)
+		return -ENOBUFS;
+
+	if (parms->dev)
+		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+
+	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
+	/* approximative value for deprecated QUEUE_LEN (in packets) */
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
+		    DIV_ROUND_UP(parms->queue_len_bytes,
+				 SKB_TRUESIZE(ETH_FRAME_LEN)));
+	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+		      parms->base_reachable_time);
+	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+			      u32 pid, u32 seq, int type, int flags)
+{
+	struct nlmsghdr *nlh;
+	struct ndtmsg *ndtmsg;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ndtmsg = nlmsg_data(nlh);
+
+	read_lock_bh(&tbl->lock);
+	ndtmsg->ndtm_family = tbl->family;
+	ndtmsg->ndtm_pad1   = 0;
+	ndtmsg->ndtm_pad2   = 0;
+
+	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+
+	{
+		unsigned long now = jiffies;
+		unsigned int flush_delta = now - tbl->last_flush;
+		unsigned int rand_delta = now - tbl->last_rand;
+		struct neigh_hash_table *nht;
+		struct ndt_config ndc = {
+			.ndtc_key_len		= tbl->key_len,
+			.ndtc_entry_size	= tbl->entry_size,
+			.ndtc_entries		= atomic_read(&tbl->entries),
+			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
+			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
+			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
+		};
+
+		rcu_read_lock_bh();
+		nht = rcu_dereference_bh(tbl->nht);
+		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
+		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
+		rcu_read_unlock_bh();
+
+		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+	}
+
+	{
+		int cpu;
+		struct ndt_stats ndst;
+
+		memset(&ndst, 0, sizeof(ndst));
+
+		for_each_possible_cpu(cpu) {
+			struct neigh_statistics	*st;
+
+			st = per_cpu_ptr(tbl->stats, cpu);
+			ndst.ndts_allocs		+= st->allocs;
+			ndst.ndts_destroys		+= st->destroys;
+			ndst.ndts_hash_grows		+= st->hash_grows;
+			ndst.ndts_res_failed		+= st->res_failed;
+			ndst.ndts_lookups		+= st->lookups;
+			ndst.ndts_hits			+= st->hits;
+			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
+			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
+			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
+			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
+		}
+
+		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+	}
+
+	BUG_ON(tbl->parms.dev);
+	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
+		goto nla_put_failure;
+
+	read_unlock_bh(&tbl->lock);
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	read_unlock_bh(&tbl->lock);
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int neightbl_fill_param_info(struct sk_buff *skb,
+				    struct neigh_table *tbl,
+				    struct neigh_parms *parms,
+				    u32 pid, u32 seq, int type,
+				    unsigned int flags)
+{
+	struct ndtmsg *ndtmsg;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ndtmsg = nlmsg_data(nlh);
+
+	read_lock_bh(&tbl->lock);
+	ndtmsg->ndtm_family = tbl->family;
+	ndtmsg->ndtm_pad1   = 0;
+	ndtmsg->ndtm_pad2   = 0;
+
+	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+	    neightbl_fill_parms(skb, parms) < 0)
+		goto errout;
+
+	read_unlock_bh(&tbl->lock);
+	return nlmsg_end(skb, nlh);
+errout:
+	read_unlock_bh(&tbl->lock);
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
+	[NDTA_NAME]		= { .type = NLA_STRING },
+	[NDTA_THRESH1]		= { .type = NLA_U32 },
+	[NDTA_THRESH2]		= { .type = NLA_U32 },
+	[NDTA_THRESH3]		= { .type = NLA_U32 },
+	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
+	[NDTA_PARMS]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
+	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
+	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
+	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
+	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
+	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
+	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
+};
+
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct neigh_table *tbl;
+	struct ndtmsg *ndtmsg;
+	struct nlattr *tb[NDTA_MAX+1];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+			  nl_neightbl_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[NDTA_NAME] == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	ndtmsg = nlmsg_data(nlh);
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
+			continue;
+
+		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
+			break;
+	}
+
+	if (tbl == NULL) {
+		err = -ENOENT;
+		goto errout_locked;
+	}
+
+	/*
+	 * We acquire tbl->lock to be nice to the periodic timers and
+	 * make sure they always see a consistent set of values.
+	 */
+	write_lock_bh(&tbl->lock);
+
+	if (tb[NDTA_PARMS]) {
+		struct nlattr *tbp[NDTPA_MAX+1];
+		struct neigh_parms *p;
+		int i, ifindex = 0;
+
+		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+				       nl_ntbl_parm_policy);
+		if (err < 0)
+			goto errout_tbl_lock;
+
+		if (tbp[NDTPA_IFINDEX])
+			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
+
+		p = lookup_neigh_parms(tbl, net, ifindex);
+		if (p == NULL) {
+			err = -ENOENT;
+			goto errout_tbl_lock;
+		}
+
+		for (i = 1; i <= NDTPA_MAX; i++) {
+			if (tbp[i] == NULL)
+				continue;
+
+			switch (i) {
+			case NDTPA_QUEUE_LEN:
+				p->queue_len_bytes = nla_get_u32(tbp[i]) *
+						     SKB_TRUESIZE(ETH_FRAME_LEN);
+				break;
+			case NDTPA_QUEUE_LENBYTES:
+				p->queue_len_bytes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_PROXY_QLEN:
+				p->proxy_qlen = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_APP_PROBES:
+				p->app_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_UCAST_PROBES:
+				p->ucast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_MCAST_PROBES:
+				p->mcast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_BASE_REACHABLE_TIME:
+				p->base_reachable_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_GC_STALETIME:
+				p->gc_staletime = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_DELAY_PROBE_TIME:
+				p->delay_probe_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_RETRANS_TIME:
+				p->retrans_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_ANYCAST_DELAY:
+				p->anycast_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_PROXY_DELAY:
+				p->proxy_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_LOCKTIME:
+				p->locktime = nla_get_msecs(tbp[i]);
+				break;
+			}
+		}
+	}
+
+	if (tb[NDTA_THRESH1])
+		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+
+	if (tb[NDTA_THRESH2])
+		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+
+	if (tb[NDTA_THRESH3])
+		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+
+	if (tb[NDTA_GC_INTERVAL])
+		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+
+	err = 0;
+
+errout_tbl_lock:
+	write_unlock_bh(&tbl->lock);
+errout_locked:
+	read_unlock(&neigh_tbl_lock);
+errout:
+	return err;
+}
+
+static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int family, tidx, nidx = 0;
+	int tbl_skip = cb->args[0];
+	int neigh_skip = cb->args[1];
+	struct neigh_table *tbl;
+
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
+		struct neigh_parms *p;
+
+		if (tidx < tbl_skip || (family && tbl->family != family))
+			continue;
+
+		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+				       NLM_F_MULTI) <= 0)
+			break;
+
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+			if (!net_eq(neigh_parms_net(p), net))
+				continue;
+
+			if (nidx < neigh_skip)
+				goto next;
+
+			if (neightbl_fill_param_info(skb, tbl, p,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGHTBL,
+						     NLM_F_MULTI) <= 0)
+				goto out;
+		next:
+			nidx++;
+		}
+
+		neigh_skip = 0;
+	}
+out:
+	read_unlock(&neigh_tbl_lock);
+	cb->args[0] = tidx;
+	cb->args[1] = nidx;
+
+	return skb->len;
+}
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+			   u32 pid, u32 seq, int type, unsigned int flags)
+{
+	unsigned long now = jiffies;
+	struct nda_cacheinfo ci;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = neigh->ops->family;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags	 = neigh->flags;
+	ndm->ndm_type	 = neigh->type;
+	ndm->ndm_ifindex = neigh->dev->ifindex;
+
+	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
+
+	read_lock_bh(&neigh->lock);
+	ndm->ndm_state	 = neigh->nud_state;
+	if (neigh->nud_state & NUD_VALID) {
+		char haddr[MAX_ADDR_LEN];
+
+		neigh_ha_snapshot(haddr, neigh, neigh->dev);
+		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+			read_unlock_bh(&neigh->lock);
+			goto nla_put_failure;
+		}
+	}
+
+	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
+	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
+	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
+	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
+	read_unlock_bh(&neigh->lock);
+
+	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
+			    u32 pid, u32 seq, int type, unsigned int flags,
+			    struct neigh_table *tbl)
+{
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = tbl->family;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
+	ndm->ndm_type	 = NDA_DST;
+	ndm->ndm_ifindex = pn->dev->ifindex;
+	ndm->ndm_state	 = NUD_NONE;
+
+	NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static void neigh_update_notify(struct neighbour *neigh)
+{
+	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
+}
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+			    struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct neighbour *n;
+	int rc, h, s_h = cb->args[1];
+	int idx, s_idx = idx = cb->args[2];
+	struct neigh_hash_table *nht;
+
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	for (h = s_h; h < (1 << nht->hash_shift); h++) {
+		if (h > s_h)
+			s_idx = 0;
+		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next)) {
+			if (!net_eq(dev_net(n->dev), net))
+				continue;
+			if (idx < s_idx)
+				goto next;
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH,
+					    NLM_F_MULTI) <= 0) {
+				rc = -1;
+				goto out;
+			}
+next:
+			idx++;
+		}
+	}
+	rc = skb->len;
+out:
+	rcu_read_unlock_bh();
+	cb->args[1] = h;
+	cb->args[2] = idx;
+	return rc;
+}
+
+static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+			     struct netlink_callback *cb)
+{
+	struct pneigh_entry *n;
+	struct net *net = sock_net(skb->sk);
+	int rc, h, s_h = cb->args[3];
+	int idx, s_idx = idx = cb->args[4];
+
+	read_lock_bh(&tbl->lock);
+
+	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
+		if (h > s_h)
+			s_idx = 0;
+		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
+			if (dev_net(n->dev) != net)
+				continue;
+			if (idx < s_idx)
+				goto next;
+			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH,
+					    NLM_F_MULTI, tbl) <= 0) {
+				read_unlock_bh(&tbl->lock);
+				rc = -1;
+				goto out;
+			}
+		next:
+			idx++;
+		}
+	}
+
+	read_unlock_bh(&tbl->lock);
+	rc = skb->len;
+out:
+	cb->args[3] = h;
+	cb->args[4] = idx;
+	return rc;
+
+}
+
+static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct neigh_table *tbl;
+	int t, family, s_t;
+	int proxy = 0;
+	int err;
+
+	read_lock(&neigh_tbl_lock);
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+
+	/* check for full ndmsg structure presence, family member is
+	 * the same for both structures
+	 */
+	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
+	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
+		proxy = 1;
+
+	s_t = cb->args[0];
+
+	for (tbl = neigh_tables, t = 0; tbl;
+	     tbl = tbl->next, t++) {
+		if (t < s_t || (family && tbl->family != family))
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args) -
+						sizeof(cb->args[0]));
+		if (proxy)
+			err = pneigh_dump_table(tbl, skb, cb);
+		else
+			err = neigh_dump_table(tbl, skb, cb);
+		if (err < 0)
+			break;
+	}
+	read_unlock(&neigh_tbl_lock);
+
+	cb->args[0] = t;
+	return skb->len;
+}
+
+void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
+{
+	int chain;
+	struct neigh_hash_table *nht;
+
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	read_lock(&tbl->lock); /* avoid resizes */
+	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+		struct neighbour *n;
+
+		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next))
+			cb(n, cookie);
+	}
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
+}
+EXPORT_SYMBOL(neigh_for_each);
+
+/* The tbl->lock must be held as a writer and BH disabled. */
+void __neigh_for_each_release(struct neigh_table *tbl,
+			      int (*cb)(struct neighbour *))
+{
+	int chain;
+	struct neigh_hash_table *nht;
+
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+		struct neighbour *n;
+		struct neighbour __rcu **np;
+
+		np = &nht->hash_buckets[chain];
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
+			int release;
+
+			write_lock(&n->lock);
+			release = cb(n);
+			if (release) {
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
+				n->dead = 1;
+			} else
+				np = &n->next;
+			write_unlock(&n->lock);
+			if (release)
+				neigh_cleanup_and_release(n);
+		}
+	}
+}
+EXPORT_SYMBOL(__neigh_for_each_release);
+
+#ifdef CONFIG_PROC_FS
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct neigh_hash_table *nht = state->nht;
+	struct neighbour *n = NULL;
+	int bucket = state->bucket;
+
+	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
+		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
+
+		while (n) {
+			if (!net_eq(dev_net(n->dev), net))
+				goto next;
+			if (state->neigh_sub_iter) {
+				loff_t fakep = 0;
+				void *v;
+
+				v = state->neigh_sub_iter(state, n, &fakep);
+				if (!v)
+					goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+next:
+			n = rcu_dereference_bh(n->next);
+		}
+
+		if (n)
+			break;
+	}
+	state->bucket = bucket;
+
+	return n;
+}
+
+static struct neighbour *neigh_get_next(struct seq_file *seq,
+					struct neighbour *n,
+					loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct neigh_hash_table *nht = state->nht;
+
+	if (state->neigh_sub_iter) {
+		void *v = state->neigh_sub_iter(state, n, pos);
+		if (v)
+			return n;
+	}
+	n = rcu_dereference_bh(n->next);
+
+	while (1) {
+		while (n) {
+			if (!net_eq(dev_net(n->dev), net))
+				goto next;
+			if (state->neigh_sub_iter) {
+				void *v = state->neigh_sub_iter(state, n, pos);
+				if (v)
+					return n;
+				goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+next:
+			n = rcu_dereference_bh(n->next);
+		}
+
+		if (n)
+			break;
+
+		if (++state->bucket >= (1 << nht->hash_shift))
+			break;
+
+		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
+	}
+
+	if (n && pos)
+		--(*pos);
+	return n;
+}
+
+static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct neighbour *n = neigh_get_first(seq);
+
+	if (n) {
+		--(*pos);
+		while (*pos) {
+			n = neigh_get_next(seq, n, pos);
+			if (!n)
+				break;
+		}
+	}
+	return *pos ? NULL : n;
+}
+
+static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct neigh_table *tbl = state->tbl;
+	struct pneigh_entry *pn = NULL;
+	int bucket = state->bucket;
+
+	state->flags |= NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+		pn = tbl->phash_buckets[bucket];
+		while (pn && !net_eq(pneigh_net(pn), net))
+			pn = pn->next;
+		if (pn)
+			break;
+	}
+	state->bucket = bucket;
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
+					    struct pneigh_entry *pn,
+					    loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct neigh_table *tbl = state->tbl;
+
+	do {
+		pn = pn->next;
+	} while (pn && !net_eq(pneigh_net(pn), net));
+
+	while (!pn) {
+		if (++state->bucket > PNEIGH_HASHMASK)
+			break;
+		pn = tbl->phash_buckets[state->bucket];
+		while (pn && !net_eq(pneigh_net(pn), net))
+			pn = pn->next;
+		if (pn)
+			break;
+	}
+
+	if (pn && pos)
+		--(*pos);
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct pneigh_entry *pn = pneigh_get_first(seq);
+
+	if (pn) {
+		--(*pos);
+		while (*pos) {
+			pn = pneigh_get_next(seq, pn, pos);
+			if (!pn)
+				break;
+		}
+	}
+	return *pos ? NULL : pn;
+}
+
+static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	void *rc;
+	loff_t idxpos = *pos;
+
+	rc = neigh_get_idx(seq, &idxpos);
+	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+		rc = pneigh_get_idx(seq, &idxpos);
+
+	return rc;
+}
+
+void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+	__acquires(rcu_bh)
+{
+	struct neigh_seq_state *state = seq->private;
+
+	state->tbl = tbl;
+	state->bucket = 0;
+	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
+
+	rcu_read_lock_bh();
+	state->nht = rcu_dereference_bh(tbl->nht);
+
+	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
+}
+EXPORT_SYMBOL(neigh_seq_start);
+
+void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct neigh_seq_state *state;
+	void *rc;
+
+	if (v == SEQ_START_TOKEN) {
+		rc = neigh_get_first(seq);
+		goto out;
+	}
+
+	state = seq->private;
+	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
+		rc = neigh_get_next(seq, v, NULL);
+		if (rc)
+			goto out;
+		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+			rc = pneigh_get_first(seq);
+	} else {
+		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
+		rc = pneigh_get_next(seq, v, NULL);
+	}
+out:
+	++(*pos);
+	return rc;
+}
+EXPORT_SYMBOL(neigh_seq_next);
+
+void neigh_seq_stop(struct seq_file *seq, void *v)
+	__releases(rcu_bh)
+{
+	rcu_read_unlock_bh();
+}
+EXPORT_SYMBOL(neigh_seq_stop);
+
+/* statistics via seq_file */
+
+static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct neigh_table *tbl = seq->private;
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct neigh_table *tbl = seq->private;
+	int cpu;
+
+	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
+
+static int neigh_stat_seq_show(struct seq_file *seq, void *v)
+{
+	struct neigh_table *tbl = seq->private;
+	struct neigh_statistics *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
+			"%08lx %08lx  %08lx %08lx %08lx\n",
+		   atomic_read(&tbl->entries),
+
+		   st->allocs,
+		   st->destroys,
+		   st->hash_grows,
+
+		   st->lookups,
+		   st->hits,
+
+		   st->res_failed,
+
+		   st->rcv_probes_mcast,
+		   st->rcv_probes_ucast,
+
+		   st->periodic_gc_runs,
+		   st->forced_gc_runs,
+		   st->unres_discards
+		   );
+
+	return 0;
+}
+
+static const struct seq_operations neigh_stat_seq_ops = {
+	.start	= neigh_stat_seq_start,
+	.next	= neigh_stat_seq_next,
+	.stop	= neigh_stat_seq_stop,
+	.show	= neigh_stat_seq_show,
+};
+
+static int neigh_stat_seq_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &neigh_stat_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+		sf->private = PDE(inode)->data;
+	}
+	return ret;
+};
+
+static const struct file_operations neigh_stat_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open 	 = neigh_stat_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static inline size_t neigh_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ndmsg))
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+	       + nla_total_size(sizeof(struct nda_cacheinfo))
+	       + nla_total_size(4); /* NDA_PROBES */
+}
+
+static void __neigh_notify(struct neighbour *n, int type, int flags)
+{
+	struct net *net = dev_net(n->dev);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = neigh_fill_info(skb, n, 0, 0, type, flags);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
+{
+	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+}
+EXPORT_SYMBOL(neigh_app_ns);
+#endif /* CONFIG_ARPD */
+
+#ifdef CONFIG_SYSCTL
+
+static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
+			   size_t *lenp, loff_t *ppos)
+{
+	int size, ret;
+	ctl_table tmp = *ctl;
+
+	tmp.data = &size;
+	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret)
+		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
+	return ret;
+}
+
+enum {
+	NEIGH_VAR_MCAST_PROBE,
+	NEIGH_VAR_UCAST_PROBE,
+	NEIGH_VAR_APP_PROBE,
+	NEIGH_VAR_RETRANS_TIME,
+	NEIGH_VAR_BASE_REACHABLE_TIME,
+	NEIGH_VAR_DELAY_PROBE_TIME,
+	NEIGH_VAR_GC_STALETIME,
+	NEIGH_VAR_QUEUE_LEN,
+	NEIGH_VAR_QUEUE_LEN_BYTES,
+	NEIGH_VAR_PROXY_QLEN,
+	NEIGH_VAR_ANYCAST_DELAY,
+	NEIGH_VAR_PROXY_DELAY,
+	NEIGH_VAR_LOCKTIME,
+	NEIGH_VAR_RETRANS_TIME_MS,
+	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
+	NEIGH_VAR_GC_INTERVAL,
+	NEIGH_VAR_GC_THRESH1,
+	NEIGH_VAR_GC_THRESH2,
+	NEIGH_VAR_GC_THRESH3,
+	NEIGH_VAR_MAX
+};
+
+static struct neigh_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+	char *dev_name;
+} neigh_sysctl_template __read_mostly = {
+	.neigh_vars = {
+		[NEIGH_VAR_MCAST_PROBE] = {
+			.procname	= "mcast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_UCAST_PROBE] = {
+			.procname	= "ucast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_APP_PROBE] = {
+			.procname	= "app_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_RETRANS_TIME] = {
+			.procname	= "retrans_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_userhz_jiffies,
+		},
+		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
+			.procname	= "base_reachable_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
+		},
+		[NEIGH_VAR_DELAY_PROBE_TIME] = {
+			.procname	= "delay_first_probe_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
+		},
+		[NEIGH_VAR_GC_STALETIME] = {
+			.procname	= "gc_stale_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
+		},
+		[NEIGH_VAR_QUEUE_LEN] = {
+			.procname	= "unres_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_unres_qlen,
+		},
+		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
+			.procname	= "unres_qlen_bytes",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_PROXY_QLEN] = {
+			.procname	= "proxy_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_ANYCAST_DELAY] = {
+			.procname	= "anycast_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_userhz_jiffies,
+		},
+		[NEIGH_VAR_PROXY_DELAY] = {
+			.procname	= "proxy_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_userhz_jiffies,
+		},
+		[NEIGH_VAR_LOCKTIME] = {
+			.procname	= "locktime",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_userhz_jiffies,
+		},
+		[NEIGH_VAR_RETRANS_TIME_MS] = {
+			.procname	= "retrans_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_ms_jiffies,
+		},
+		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
+			.procname	= "base_reachable_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_ms_jiffies,
+		},
+		[NEIGH_VAR_GC_INTERVAL] = {
+			.procname	= "gc_interval",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
+		},
+		[NEIGH_VAR_GC_THRESH1] = {
+			.procname	= "gc_thresh1",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_GC_THRESH2] = {
+			.procname	= "gc_thresh2",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		[NEIGH_VAR_GC_THRESH3] = {
+			.procname	= "gc_thresh3",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{},
+	},
+};
+
+int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
+			  char *p_name, proc_handler *handler)
+{
+	struct neigh_sysctl_table *t;
+	const char *dev_name_source = NULL;
+
+#define NEIGH_CTL_PATH_ROOT	0
+#define NEIGH_CTL_PATH_PROTO	1
+#define NEIGH_CTL_PATH_NEIGH	2
+#define NEIGH_CTL_PATH_DEV	3
+
+	struct ctl_path neigh_path[] = {
+		{ .procname = "net",	 },
+		{ .procname = "proto",	 },
+		{ .procname = "neigh",	 },
+		{ .procname = "default", },
+		{ },
+	};
+
+	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+	if (!t)
+		goto err;
+
+	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
+	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
+	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
+	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
+	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
+	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
+	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
+	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
+
+	if (dev) {
+		dev_name_source = dev->name;
+		/* Terminate the table early */
+		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
+		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
+	} else {
+		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
+		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
+		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
+	}
+
+
+	if (handler) {
+		/* RetransTime */
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
+		/* ReachableTime */
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
+		/* RetransTime (in milliseconds)*/
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
+		/* ReachableTime (in milliseconds) */
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
+	}
+
+	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
+	if (!t->dev_name)
+		goto free;
+
+	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
+	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
+
+	t->sysctl_header =
+		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
+	if (!t->sysctl_header)
+		goto free_procname;
+
+	p->sysctl_table = t;
+	return 0;
+
+free_procname:
+	kfree(t->dev_name);
+free:
+	kfree(t);
+err:
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(neigh_sysctl_register);
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+	if (p->sysctl_table) {
+		struct neigh_sysctl_table *t = p->sysctl_table;
+		p->sysctl_table = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t->dev_name);
+		kfree(t);
+	}
+}
+EXPORT_SYMBOL(neigh_sysctl_unregister);
+
+#endif	/* CONFIG_SYSCTL */
+
+static int __init neigh_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
+
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
+		      NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
+
+	return 0;
+}
+
+subsys_initcall(neigh_init);
+
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
new file mode 100644
index 00000000..49558623
--- /dev/null
+++ b/net/core/net-sysfs.c
@@ -0,0 +1,1506 @@
+/*
+ * net-sysfs.c - network device class and attributes
+ *
+ * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/slab.h>
+#include <linux/nsproxy.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <linux/wireless.h>
+#include <linux/vmalloc.h>
+#include <linux/export.h>
+#include <linux/jiffies.h>
+#include <net/wext.h>
+
+#include "net-sysfs.h"
+
+#ifdef CONFIG_SYSFS
+static const char fmt_hex[] = "%#x\n";
+static const char fmt_long_hex[] = "%#lx\n";
+static const char fmt_dec[] = "%d\n";
+static const char fmt_udec[] = "%u\n";
+static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
+
+static inline int dev_isalive(const struct net_device *dev)
+{
+	return dev->reg_state <= NETREG_REGISTERED;
+}
+
+/* use same locking rules as GIF* ioctl's */
+static ssize_t netdev_show(const struct device *dev,
+			   struct device_attribute *attr, char *buf,
+			   ssize_t (*format)(const struct net_device *, char *))
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+		ret = (*format)(net, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* generate a show function for simple field */
+#define NETDEVICE_SHOW(field, format_string)				\
+static ssize_t format_##field(const struct net_device *net, char *buf)	\
+{									\
+	return sprintf(buf, format_string, net->field);			\
+}									\
+static ssize_t show_##field(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
+{									\
+	return netdev_show(dev, attr, buf, format_##field);		\
+}
+
+
+/* use same locking and permission rules as SIF* ioctl's */
+static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t len,
+			    int (*set)(struct net_device *, unsigned long))
+{
+	struct net_device *net = to_net_dev(dev);
+	char *endp;
+	unsigned long new;
+	int ret = -EINVAL;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	new = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		goto err;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (dev_isalive(net)) {
+		if ((ret = (*set)(net, new)) == 0)
+			ret = len;
+	}
+	rtnl_unlock();
+ err:
+	return ret;
+}
+
+NETDEVICE_SHOW(dev_id, fmt_hex);
+NETDEVICE_SHOW(addr_assign_type, fmt_dec);
+NETDEVICE_SHOW(addr_len, fmt_dec);
+NETDEVICE_SHOW(iflink, fmt_dec);
+NETDEVICE_SHOW(ifindex, fmt_dec);
+NETDEVICE_SHOW(type, fmt_dec);
+NETDEVICE_SHOW(link_mode, fmt_dec);
+
+/* use same locking rules as GIFHWADDR ioctl's */
+static ssize_t show_address(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+		ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+static ssize_t show_broadcast(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	if (dev_isalive(net))
+		return sysfs_format_mac(buf, net->broadcast, net->addr_len);
+	return -EINVAL;
+}
+
+static ssize_t show_carrier(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	if (netif_running(netdev)) {
+		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
+	}
+	return -EINVAL;
+}
+
+static ssize_t show_speed(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!__ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
+	}
+	rtnl_unlock();
+	return ret;
+}
+
+static ssize_t show_duplex(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!__ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n",
+				      cmd.duplex ? "full" : "half");
+	}
+	rtnl_unlock();
+	return ret;
+}
+
+static ssize_t show_dormant(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+
+	if (netif_running(netdev))
+		return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
+
+	return -EINVAL;
+}
+
+static const char *const operstates[] = {
+	"unknown",
+	"notpresent", /* currently unused */
+	"down",
+	"lowerlayerdown",
+	"testing", /* currently unused */
+	"dormant",
+	"up"
+};
+
+static ssize_t show_operstate(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	const struct net_device *netdev = to_net_dev(dev);
+	unsigned char operstate;
+
+	read_lock(&dev_base_lock);
+	operstate = netdev->operstate;
+	if (!netif_running(netdev))
+		operstate = IF_OPER_DOWN;
+	read_unlock(&dev_base_lock);
+
+	if (operstate >= ARRAY_SIZE(operstates))
+		return -EINVAL; /* should not happen */
+
+	return sprintf(buf, "%s\n", operstates[operstate]);
+}
+
+/* read-write attributes */
+NETDEVICE_SHOW(mtu, fmt_dec);
+
+static int change_mtu(struct net_device *net, unsigned long new_mtu)
+{
+	return dev_set_mtu(net, (int) new_mtu);
+}
+
+static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_mtu);
+}
+
+NETDEVICE_SHOW(flags, fmt_hex);
+
+static int change_flags(struct net_device *net, unsigned long new_flags)
+{
+	return dev_change_flags(net, (unsigned) new_flags);
+}
+
+static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_flags);
+}
+
+NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+
+static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
+{
+	net->tx_queue_len = new_len;
+	return 0;
+}
+
+static ssize_t store_tx_queue_len(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
+}
+
+static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t len)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	size_t count = len;
+	ssize_t ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	/* ignore trailing newline */
+	if (len >  0 && buf[len - 1] == '\n')
+		--count;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+	ret = dev_set_alias(netdev, buf, count);
+	rtnl_unlock();
+
+	return ret < 0 ? ret : len;
+}
+
+static ssize_t show_ifalias(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	const struct net_device *netdev = to_net_dev(dev);
+	ssize_t ret = 0;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+	if (netdev->ifalias)
+		ret = sprintf(buf, "%s\n", netdev->ifalias);
+	rtnl_unlock();
+	return ret;
+}
+
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+	dev_set_group(net, (int) new_group);
+	return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_group);
+}
+
+static struct device_attribute net_class_attributes[] = {
+	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
+	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
+	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
+	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
+	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
+	__ATTR(type, S_IRUGO, show_type, NULL),
+	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
+	__ATTR(address, S_IRUGO, show_address, NULL),
+	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
+	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(speed, S_IRUGO, show_speed, NULL),
+	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
+	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
+	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
+	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
+	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
+	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
+	       store_tx_queue_len),
+	__ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
+	{}
+};
+
+/* Show a given an attribute in the statistics group */
+static ssize_t netstat_show(const struct device *d,
+			    struct device_attribute *attr, char *buf,
+			    unsigned long offset)
+{
+	struct net_device *dev = to_net_dev(d);
+	ssize_t ret = -EINVAL;
+
+	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+			offset % sizeof(u64) != 0);
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev)) {
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
+	}
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+/* generate a read-only statistics attribute */
+#define NETSTAT_ENTRY(name)						\
+static ssize_t show_##name(struct device *d,				\
+			   struct device_attribute *attr, char *buf) 	\
+{									\
+	return netstat_show(d, attr, buf,				\
+			    offsetof(struct rtnl_link_stats64, name));	\
+}									\
+static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+NETSTAT_ENTRY(rx_packets);
+NETSTAT_ENTRY(tx_packets);
+NETSTAT_ENTRY(rx_bytes);
+NETSTAT_ENTRY(tx_bytes);
+NETSTAT_ENTRY(rx_errors);
+NETSTAT_ENTRY(tx_errors);
+NETSTAT_ENTRY(rx_dropped);
+NETSTAT_ENTRY(tx_dropped);
+NETSTAT_ENTRY(multicast);
+NETSTAT_ENTRY(collisions);
+NETSTAT_ENTRY(rx_length_errors);
+NETSTAT_ENTRY(rx_over_errors);
+NETSTAT_ENTRY(rx_crc_errors);
+NETSTAT_ENTRY(rx_frame_errors);
+NETSTAT_ENTRY(rx_fifo_errors);
+NETSTAT_ENTRY(rx_missed_errors);
+NETSTAT_ENTRY(tx_aborted_errors);
+NETSTAT_ENTRY(tx_carrier_errors);
+NETSTAT_ENTRY(tx_fifo_errors);
+NETSTAT_ENTRY(tx_heartbeat_errors);
+NETSTAT_ENTRY(tx_window_errors);
+NETSTAT_ENTRY(rx_compressed);
+NETSTAT_ENTRY(tx_compressed);
+
+static struct attribute *netstat_attrs[] = {
+	&dev_attr_rx_packets.attr,
+	&dev_attr_tx_packets.attr,
+	&dev_attr_rx_bytes.attr,
+	&dev_attr_tx_bytes.attr,
+	&dev_attr_rx_errors.attr,
+	&dev_attr_tx_errors.attr,
+	&dev_attr_rx_dropped.attr,
+	&dev_attr_tx_dropped.attr,
+	&dev_attr_multicast.attr,
+	&dev_attr_collisions.attr,
+	&dev_attr_rx_length_errors.attr,
+	&dev_attr_rx_over_errors.attr,
+	&dev_attr_rx_crc_errors.attr,
+	&dev_attr_rx_frame_errors.attr,
+	&dev_attr_rx_fifo_errors.attr,
+	&dev_attr_rx_missed_errors.attr,
+	&dev_attr_tx_aborted_errors.attr,
+	&dev_attr_tx_carrier_errors.attr,
+	&dev_attr_tx_fifo_errors.attr,
+	&dev_attr_tx_heartbeat_errors.attr,
+	&dev_attr_tx_window_errors.attr,
+	&dev_attr_rx_compressed.attr,
+	&dev_attr_tx_compressed.attr,
+	NULL
+};
+
+
+static struct attribute_group netstat_group = {
+	.name  = "statistics",
+	.attrs  = netstat_attrs,
+};
+
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
+/* helper function that does all the locking etc for wireless stats */
+static ssize_t wireless_show(struct device *d, char *buf,
+			     ssize_t (*format)(const struct iw_statistics *,
+					       char *))
+{
+	struct net_device *dev = to_net_dev(d);
+	const struct iw_statistics *iw;
+	ssize_t ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+	if (dev_isalive(dev)) {
+		iw = get_wireless_stats(dev);
+		if (iw)
+			ret = (*format)(iw, buf);
+	}
+	rtnl_unlock();
+
+	return ret;
+}
+
+/* show function template for wireless fields */
+#define WIRELESS_SHOW(name, field, format_string)			\
+static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
+{									\
+	return sprintf(buf, format_string, iw->field);			\
+}									\
+static ssize_t show_iw_##name(struct device *d,				\
+			      struct device_attribute *attr, char *buf)	\
+{									\
+	return wireless_show(d, buf, format_iw_##name);			\
+}									\
+static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
+
+WIRELESS_SHOW(status, status, fmt_hex);
+WIRELESS_SHOW(link, qual.qual, fmt_dec);
+WIRELESS_SHOW(level, qual.level, fmt_dec);
+WIRELESS_SHOW(noise, qual.noise, fmt_dec);
+WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
+WIRELESS_SHOW(crypt, discard.code, fmt_dec);
+WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
+WIRELESS_SHOW(misc, discard.misc, fmt_dec);
+WIRELESS_SHOW(retries, discard.retries, fmt_dec);
+WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
+
+static struct attribute *wireless_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_link.attr,
+	&dev_attr_level.attr,
+	&dev_attr_noise.attr,
+	&dev_attr_nwid.attr,
+	&dev_attr_crypt.attr,
+	&dev_attr_fragment.attr,
+	&dev_attr_retries.attr,
+	&dev_attr_misc.attr,
+	&dev_attr_beacon.attr,
+	NULL
+};
+
+static struct attribute_group wireless_group = {
+	.name = "wireless",
+	.attrs = wireless_attrs,
+};
+#endif
+#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_RPS
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,		\
+    struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+				  char *buf)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops rx_queue_sysfs_ops = {
+	.show = rx_queue_attr_show,
+	.store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+			    struct rx_queue_attribute *attribute, char *buf)
+{
+	struct rps_map *map;
+	cpumask_var_t mask;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rcu_read_lock();
+	map = rcu_dereference(queue->rps_map);
+	if (map)
+		for (i = 0; i < map->len; i++)
+			cpumask_set_cpu(map->cpus[i], mask);
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		rcu_read_unlock();
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
+		      struct rx_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct rps_map *old_map, *map;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	map = kzalloc(max_t(unsigned,
+	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+	    GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+
+	if (i)
+		map->len = i;
+	else {
+		kfree(map);
+		map = NULL;
+	}
+
+	spin_lock(&rps_map_lock);
+	old_map = rcu_dereference_protected(queue->rps_map,
+					    lockdep_is_held(&rps_map_lock));
+	rcu_assign_pointer(queue->rps_map, map);
+	spin_unlock(&rps_map_lock);
+
+	if (map)
+		static_key_slow_inc(&rps_needed);
+	if (old_map) {
+		kfree_rcu(old_map, rcu);
+		static_key_slow_dec(&rps_needed);
+	}
+	free_cpumask_var(mask);
+	return len;
+}
+
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+					   struct rx_queue_attribute *attr,
+					   char *buf)
+{
+	struct rps_dev_flow_table *flow_table;
+	unsigned long val = 0;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(queue->rps_flow_table);
+	if (flow_table)
+		val = (unsigned long)flow_table->mask + 1;
+	rcu_read_unlock();
+
+	return sprintf(buf, "%lu\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+	struct rps_dev_flow_table *table = container_of(work,
+	    struct rps_dev_flow_table, free_work);
+
+	vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct rps_dev_flow_table *table = container_of(rcu,
+	    struct rps_dev_flow_table, rcu);
+
+	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+	schedule_work(&table->free_work);
+}
+
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+				     struct rx_queue_attribute *attr,
+				     const char *buf, size_t len)
+{
+	unsigned long mask, count;
+	struct rps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+	int rc;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	rc = kstrtoul(buf, 0, &count);
+	if (rc < 0)
+		return rc;
+
+	if (count) {
+		mask = count - 1;
+		/* mask = roundup_pow_of_two(count) - 1;
+		 * without overflows...
+		 */
+		while ((mask | (mask >> 1)) != mask)
+			mask |= (mask >> 1);
+		/* On 64 bit arches, must check mask fits in table->mask (u32),
+		 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)
+		 * doesnt overflow.
+		 */
+#if BITS_PER_LONG > 32
+		if (mask > (unsigned long)(u32)mask)
+			return -EINVAL;
+#else
+		if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
+				/ sizeof(struct rps_dev_flow)) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+#endif
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = mask;
+		for (count = 0; count <= mask; count++)
+			table->flows[count].cpu = RPS_NO_CPU;
+	} else
+		table = NULL;
+
+	spin_lock(&rps_dev_flow_lock);
+	old_table = rcu_dereference_protected(queue->rps_flow_table,
+					      lockdep_is_held(&rps_dev_flow_lock));
+	rcu_assign_pointer(queue->rps_flow_table, table);
+	spin_unlock(&rps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+	return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
+static struct attribute *rx_queue_default_attrs[] = {
+	&rps_cpus_attribute.attr,
+	&rps_dev_flow_table_cnt_attribute.attr,
+	NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+
+
+	map = rcu_dereference_protected(queue->rps_map, 1);
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
+		kfree_rcu(map, rcu);
+	}
+
+	flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
+		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type rx_queue_ktype = {
+	.sysfs_ops = &rx_queue_sysfs_ops,
+	.release = rx_queue_release,
+	.default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_rx_queue *queue = net->_rx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+	    "rx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return error;
+}
+#endif /* CONFIG_RPS */
+
+int
+net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+#ifdef CONFIG_RPS
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = rx_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num)
+		kobject_put(&net->_rx[i].kobj);
+
+	return error;
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_SYSFS
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr,		\
+    struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+				       struct attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+	.show = netdev_queue_attr_show,
+	.store = netdev_queue_attr_store,
+};
+
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attribute,
+				  char *buf)
+{
+	unsigned long trans_timeout;
+
+	spin_lock_irq(&queue->_xmit_lock);
+	trans_timeout = queue->trans_timeout;
+	spin_unlock_irq(&queue->_xmit_lock);
+
+	return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_BQL
+/*
+ * Byte queue limits sysfs structures and functions.
+ */
+static ssize_t bql_show(char *buf, unsigned int value)
+{
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+		       unsigned int *pvalue)
+{
+	unsigned int value;
+	int err;
+
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+		value = DQL_MAX_LIMIT;
+	else {
+		err = kstrtouint(buf, 10, &value);
+		if (err < 0)
+			return err;
+		if (value > DQL_MAX_LIMIT)
+			return -EINVAL;
+	}
+
+	*pvalue = value;
+
+	return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attr,
+				  char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attribute,
+				 const char *buf, size_t len)
+{
+	struct dql *dql = &queue->dql;
+	unsigned value;
+	int err;
+
+	err = kstrtouint(buf, 10, &value);
+	if (err < 0)
+		return err;
+
+	dql->slack_hold_time = msecs_to_jiffies(value);
+
+	return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+	    bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attr,
+				 char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD)						\
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
+				 struct netdev_queue_attribute *attr,	\
+				 char *buf)				\
+{									\
+	return bql_show(buf, queue->dql.FIELD);				\
+}									\
+									\
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
+				struct netdev_queue_attribute *attr,	\
+				const char *buf, size_t len)		\
+{									\
+	return bql_set(buf, len, &queue->dql.FIELD);			\
+}									\
+									\
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
+	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
+	    bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+	&bql_limit_attribute.attr,
+	&bql_limit_max_attribute.attr,
+	&bql_limit_min_attribute.attr,
+	&bql_hold_time_attribute.attr,
+	&bql_inflight_attribute.attr,
+	NULL
+};
+
+static struct attribute_group dql_group = {
+	.name  = "byte_queue_limits",
+	.attrs  = dql_attrs,
+};
+#endif /* CONFIG_BQL */
+
+#ifdef CONFIG_XPS
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+			    struct netdev_queue_attribute *attribute, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	cpumask_var_t mask;
+	unsigned long index;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			struct xps_map *map =
+			    rcu_dereference(dev_maps->cpu_map[i]);
+			if (map) {
+				int j;
+				for (j = 0; j < map->len; j++) {
+					if (map->queues[j] == index) {
+						cpumask_set_cpu(i, mask);
+						break;
+					}
+				}
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static void xps_queue_release(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	unsigned long index;
+	int i, pos, nonempty = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			map = xmap_dereference(dev_maps->cpu_map[i]);
+			if (!map)
+				continue;
+
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+
+			if (pos < map->len) {
+				if (map->len > 1)
+					map->queues[pos] =
+					    map->queues[--map->len];
+				else {
+					RCU_INIT_POINTER(dev_maps->cpu_map[i],
+					    NULL);
+					kfree_rcu(map, rcu);
+					map = NULL;
+				}
+			}
+			if (map)
+				nonempty = 1;
+		}
+
+		if (!nonempty) {
+			RCU_INIT_POINTER(dev->xps_maps, NULL);
+			kfree_rcu(dev_maps, rcu);
+		}
+	}
+	mutex_unlock(&xps_map_mutex);
+}
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+		      struct netdev_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	cpumask_var_t mask;
+	int err, i, cpu, pos, map_len, alloc_len, need_set;
+	unsigned long index;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+	int numa_node_id = -2;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	new_dev_maps = kzalloc(max_t(unsigned,
+	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+	if (!new_dev_maps) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node_id == -2)
+				numa_node_id = cpu_to_node(cpu);
+			else if (numa_node_id != cpu_to_node(cpu))
+				numa_node_id = -1;
+		}
+#endif
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+						       GFP_KERNEL,
+						       cpu_to_node(cpu));
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+			kfree_rcu(map, rcu);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty) {
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	} else {
+		kfree(new_dev_maps);
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		kfree_rcu(dev_maps, rcu);
+
+	netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
+					    NUMA_NO_NODE);
+
+	mutex_unlock(&xps_map_mutex);
+
+	free_cpumask_var(mask);
+	return len;
+
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
+	kfree(new_dev_maps);
+	free_cpumask_var(mask);
+	return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
+
+static struct attribute *netdev_queue_default_attrs[] = {
+	&queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
+	&xps_cpus_attribute.attr,
+#endif
+	NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+#ifdef CONFIG_XPS
+	xps_queue_release(queue);
+#endif
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+	.sysfs_ops = &netdev_queue_sysfs_ops,
+	.release = netdev_queue_release,
+	.default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_queue *queue = net->_tx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+	    "tx-%u", index);
+	if (error)
+		goto exit;
+
+#ifdef CONFIG_BQL
+	error = sysfs_create_group(kobj, &dql_group);
+	if (error)
+		goto exit;
+#endif
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return 0;
+exit:
+	kobject_put(kobj);
+	return error;
+}
+#endif /* CONFIG_SYSFS */
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+#ifdef CONFIG_SYSFS
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = netdev_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num) {
+		struct netdev_queue *queue = net->_tx + i;
+
+#ifdef CONFIG_BQL
+		sysfs_remove_group(&queue->kobj, &dql_group);
+#endif
+		kobject_put(&queue->kobj);
+	}
+
+	return error;
+#else
+	return 0;
+#endif /* CONFIG_SYSFS */
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_SYSFS
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+#endif
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	error = net_rx_queue_update_kobjects(net, 0, real_rx);
+	if (error)
+		goto error;
+	rxq = real_rx;
+
+	error = netdev_queue_update_kobjects(net, 0, real_tx);
+	if (error)
+		goto error;
+	txq = real_tx;
+
+	return 0;
+
+error:
+	netdev_queue_update_kobjects(net, txq, 0);
+	net_rx_queue_update_kobjects(net, rxq, 0);
+	return error;
+}
+
+static void remove_queue_kobjects(struct net_device *net)
+{
+	int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	net_rx_queue_update_kobjects(net, real_rx, 0);
+	netdev_queue_update_kobjects(net, real_tx, 0);
+#ifdef CONFIG_SYSFS
+	kset_unregister(net->queues_kset);
+#endif
+}
+
+static void *net_grab_current_ns(void)
+{
+	struct net *ns = current->nsproxy->net_ns;
+#ifdef CONFIG_NET_NS
+	if (ns)
+		atomic_inc(&ns->passive);
+#endif
+	return ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.grab_current_ns = net_grab_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+	.drop_ns = net_drop_ns,
+};
+EXPORT_SYMBOL_GPL(net_ns_type_operations);
+
+#ifdef CONFIG_HOTPLUG
+static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
+{
+	struct net_device *dev = to_net_dev(d);
+	int retval;
+
+	/* pass interface to uevent. */
+	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
+	if (retval)
+		goto exit;
+
+	/* pass ifindex to uevent.
+	 * ifindex is useful as it won't change (interface name may change)
+	 * and is what RtNetlink uses natively. */
+	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
+
+exit:
+	return retval;
+}
+#endif
+
+/*
+ *	netdev_release -- destroy and free a dead device.
+ *	Called when last reference to device kobject is gone.
+ */
+static void netdev_release(struct device *d)
+{
+	struct net_device *dev = to_net_dev(d);
+
+	BUG_ON(dev->reg_state != NETREG_RELEASED);
+
+	kfree(dev->ifalias);
+	kfree((char *)dev - dev->padded);
+}
+
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
+static struct class net_class = {
+	.name = "net",
+	.dev_release = netdev_release,
+#ifdef CONFIG_SYSFS
+	.dev_attrs = net_class_attributes,
+#endif /* CONFIG_SYSFS */
+#ifdef CONFIG_HOTPLUG
+	.dev_uevent = netdev_uevent,
+#endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
+};
+
+/* Delete sysfs entries but hold kobject reference until after all
+ * netdev references are gone.
+ */
+void netdev_unregister_kobject(struct net_device * net)
+{
+	struct device *dev = &(net->dev);
+
+	kobject_get(&dev->kobj);
+
+	remove_queue_kobjects(net);
+
+	device_del(dev);
+}
+
+/* Create sysfs entries for network device. */
+int netdev_register_kobject(struct net_device *net)
+{
+	struct device *dev = &(net->dev);
+	const struct attribute_group **groups = net->sysfs_groups;
+	int error = 0;
+
+	device_initialize(dev);
+	dev->class = &net_class;
+	dev->platform_data = net;
+	dev->groups = groups;
+
+	dev_set_name(dev, "%s", net->name);
+
+#ifdef CONFIG_SYSFS
+	/* Allow for a device specific group */
+	if (*groups)
+		groups++;
+
+	*groups++ = &netstat_group;
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
+	if (net->ieee80211_ptr)
+		*groups++ = &wireless_group;
+#ifdef CONFIG_WIRELESS_EXT
+	else if (net->wireless_handlers)
+		*groups++ = &wireless_group;
+#endif
+#endif
+#endif /* CONFIG_SYSFS */
+
+	error = device_add(dev);
+	if (error)
+		return error;
+
+	error = register_queue_kobjects(net);
+	if (error) {
+		device_del(dev);
+		return error;
+	}
+
+	return error;
+}
+
+int netdev_class_create_file(struct class_attribute *class_attr)
+{
+	return class_create_file(&net_class, class_attr);
+}
+EXPORT_SYMBOL(netdev_class_create_file);
+
+void netdev_class_remove_file(struct class_attribute *class_attr)
+{
+	class_remove_file(&net_class, class_attr);
+}
+EXPORT_SYMBOL(netdev_class_remove_file);
+
+int netdev_kobject_init(void)
+{
+	kobj_ns_type_register(&net_ns_type_operations);
+	return class_register(&net_class);
+}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
new file mode 100644
index 00000000..bd7751ec
--- /dev/null
+++ b/net/core/net-sysfs.h
@@ -0,0 +1,11 @@
+#ifndef __NET_SYSFS_H__
+#define __NET_SYSFS_H__
+
+int netdev_kobject_init(void);
+int netdev_register_kobject(struct net_device *);
+void netdev_unregister_kobject(struct net_device *);
+int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+int netdev_queue_update_kobjects(struct net_device *net,
+				 int old_num, int new_num);
+
+#endif
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
new file mode 100644
index 00000000..ba3c0120
--- /dev/null
+++ b/net/core/net-traces.c
@@ -0,0 +1,37 @@
+/*
+ * consolidates trace point definitions
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/export.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <linux/slab.h>
+
+#include <asm/unaligned.h>
+#include <asm/bitops.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/skb.h>
+#include <trace/events/net.h>
+#include <trace/events/napi.h>
+#include <trace/events/sock.h>
+#include <trace/events/udp.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
new file mode 100644
index 00000000..31a5ae51
--- /dev/null
+++ b/net/core/net_namespace.c
@@ -0,0 +1,640 @@
+#include <linux/workqueue.h>
+#include <linux/rtnetlink.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+#include <linux/rculist.h>
+#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+/*
+ *	Our network namespace constructor/destructor lists
+ */
+
+static LIST_HEAD(pernet_list);
+static struct list_head *first_device = &pernet_list;
+static DEFINE_MUTEX(net_mutex);
+
+LIST_HEAD(net_namespace_list);
+EXPORT_SYMBOL_GPL(net_namespace_list);
+
+struct net init_net;
+EXPORT_SYMBOL(init_net);
+
+#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
+
+static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
+
+static struct net_generic *net_alloc_generic(void)
+{
+	struct net_generic *ng;
+	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+	ng = kzalloc(generic_size, GFP_KERNEL);
+	if (ng)
+		ng->len = max_gen_ptrs;
+
+	return ng;
+}
+
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+	struct net_generic *ng, *old_ng;
+
+	BUG_ON(!mutex_is_locked(&net_mutex));
+	BUG_ON(id == 0);
+
+	old_ng = rcu_dereference_protected(net->gen,
+					   lockdep_is_held(&net_mutex));
+	ng = old_ng;
+	if (old_ng->len >= id)
+		goto assign;
+
+	ng = net_alloc_generic();
+	if (ng == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Some synchronisation notes:
+	 *
+	 * The net_generic explores the net->gen array inside rcu
+	 * read section. Besides once set the net->gen->ptr[x]
+	 * pointer never changes (see rules in netns/generic.h).
+	 *
+	 * That said, we simply duplicate this array and schedule
+	 * the old copy for kfree after a grace period.
+	 */
+
+	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+
+	rcu_assign_pointer(net->gen, ng);
+	kfree_rcu(old_ng, rcu);
+assign:
+	ng->ptr[id - 1] = data;
+	return 0;
+}
+
+static int ops_init(const struct pernet_operations *ops, struct net *net)
+{
+	int err = -ENOMEM;
+	void *data = NULL;
+
+	if (ops->id && ops->size) {
+		data = kzalloc(ops->size, GFP_KERNEL);
+		if (!data)
+			goto out;
+
+		err = net_assign_generic(net, *ops->id, data);
+		if (err)
+			goto cleanup;
+	}
+	err = 0;
+	if (ops->init)
+		err = ops->init(net);
+	if (!err)
+		return 0;
+
+cleanup:
+	kfree(data);
+
+out:
+	return err;
+}
+
+static void ops_free(const struct pernet_operations *ops, struct net *net)
+{
+	if (ops->id && ops->size) {
+		int id = *ops->id;
+		kfree(net_generic(net, id));
+	}
+}
+
+static void ops_exit_list(const struct pernet_operations *ops,
+			  struct list_head *net_exit_list)
+{
+	struct net *net;
+	if (ops->exit) {
+		list_for_each_entry(net, net_exit_list, exit_list)
+			ops->exit(net);
+	}
+	if (ops->exit_batch)
+		ops->exit_batch(net_exit_list);
+}
+
+static void ops_free_list(const struct pernet_operations *ops,
+			  struct list_head *net_exit_list)
+{
+	struct net *net;
+	if (ops->size && ops->id) {
+		list_for_each_entry(net, net_exit_list, exit_list)
+			ops_free(ops, net);
+	}
+}
+
+/*
+ * setup_net runs the initializers for the network namespace object.
+ */
+static __net_init int setup_net(struct net *net)
+{
+	/* Must be called with net_mutex held */
+	const struct pernet_operations *ops, *saved_ops;
+	int error = 0;
+	LIST_HEAD(net_exit_list);
+
+	atomic_set(&net->count, 1);
+	atomic_set(&net->passive, 1);
+	net->dev_base_seq = 1;
+
+#ifdef NETNS_REFCNT_DEBUG
+	atomic_set(&net->use_count, 0);
+#endif
+
+	list_for_each_entry(ops, &pernet_list, list) {
+		error = ops_init(ops, net);
+		if (error < 0)
+			goto out_undo;
+	}
+out:
+	return error;
+
+out_undo:
+	/* Walk through the list backwards calling the exit functions
+	 * for the pernet modules whose init functions did not fail.
+	 */
+	list_add(&net->exit_list, &net_exit_list);
+	saved_ops = ops;
+	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+		ops_exit_list(ops, &net_exit_list);
+
+	ops = saved_ops;
+	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+		ops_free_list(ops, &net_exit_list);
+
+	rcu_barrier();
+	goto out;
+}
+
+
+#ifdef CONFIG_NET_NS
+static struct kmem_cache *net_cachep;
+static struct workqueue_struct *netns_wq;
+
+static struct net *net_alloc(void)
+{
+	struct net *net = NULL;
+	struct net_generic *ng;
+
+	ng = net_alloc_generic();
+	if (!ng)
+		goto out;
+
+	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+	if (!net)
+		goto out_free;
+
+	rcu_assign_pointer(net->gen, ng);
+out:
+	return net;
+
+out_free:
+	kfree(ng);
+	goto out;
+}
+
+static void net_free(struct net *net)
+{
+#ifdef NETNS_REFCNT_DEBUG
+	if (unlikely(atomic_read(&net->use_count) != 0)) {
+		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+			atomic_read(&net->use_count));
+		return;
+	}
+#endif
+	kfree(net->gen);
+	kmem_cache_free(net_cachep, net);
+}
+
+void net_drop_ns(void *p)
+{
+	struct net *ns = p;
+	if (ns && atomic_dec_and_test(&ns->passive))
+		net_free(ns);
+}
+
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+	struct net *net;
+	int rv;
+
+	if (!(flags & CLONE_NEWNET))
+		return get_net(old_net);
+
+	net = net_alloc();
+	if (!net)
+		return ERR_PTR(-ENOMEM);
+	mutex_lock(&net_mutex);
+	rv = setup_net(net);
+	if (rv == 0) {
+		rtnl_lock();
+		list_add_tail_rcu(&net->list, &net_namespace_list);
+		rtnl_unlock();
+	}
+	mutex_unlock(&net_mutex);
+	if (rv < 0) {
+		net_drop_ns(net);
+		return ERR_PTR(rv);
+	}
+	return net;
+}
+
+static DEFINE_SPINLOCK(cleanup_list_lock);
+static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+
+static void cleanup_net(struct work_struct *work)
+{
+	const struct pernet_operations *ops;
+	struct net *net, *tmp;
+	LIST_HEAD(net_kill_list);
+	LIST_HEAD(net_exit_list);
+
+	/* Atomically snapshot the list of namespaces to cleanup */
+	spin_lock_irq(&cleanup_list_lock);
+	list_replace_init(&cleanup_list, &net_kill_list);
+	spin_unlock_irq(&cleanup_list_lock);
+
+	mutex_lock(&net_mutex);
+
+	/* Don't let anyone else find us. */
+	rtnl_lock();
+	list_for_each_entry(net, &net_kill_list, cleanup_list) {
+		list_del_rcu(&net->list);
+		list_add_tail(&net->exit_list, &net_exit_list);
+	}
+	rtnl_unlock();
+
+	/*
+	 * Another CPU might be rcu-iterating the list, wait for it.
+	 * This needs to be before calling the exit() notifiers, so
+	 * the rcu_barrier() below isn't sufficient alone.
+	 */
+	synchronize_rcu();
+
+	/* Run all of the network namespace exit methods */
+	list_for_each_entry_reverse(ops, &pernet_list, list)
+		ops_exit_list(ops, &net_exit_list);
+
+	/* Free the net generic variables */
+	list_for_each_entry_reverse(ops, &pernet_list, list)
+		ops_free_list(ops, &net_exit_list);
+
+	mutex_unlock(&net_mutex);
+
+	/* Ensure there are no outstanding rcu callbacks using this
+	 * network namespace.
+	 */
+	rcu_barrier();
+
+	/* Finally it is safe to free my network namespace structure */
+	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
+		list_del_init(&net->exit_list);
+		net_drop_ns(net);
+	}
+}
+static DECLARE_WORK(net_cleanup_work, cleanup_net);
+
+void __put_net(struct net *net)
+{
+	/* Cleanup the network namespace in process context */
+	unsigned long flags;
+
+	spin_lock_irqsave(&cleanup_list_lock, flags);
+	list_add(&net->cleanup_list, &cleanup_list);
+	spin_unlock_irqrestore(&cleanup_list_lock, flags);
+
+	queue_work(netns_wq, &net_cleanup_work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+struct net *get_net_ns_by_fd(int fd)
+{
+	struct proc_inode *ei;
+	struct file *file;
+	struct net *net;
+
+	file = proc_ns_fget(fd);
+	if (IS_ERR(file))
+		return ERR_CAST(file);
+
+	ei = PROC_I(file->f_dentry->d_inode);
+	if (ei->ns_ops == &netns_operations)
+		net = get_net(ei->ns);
+	else
+		net = ERR_PTR(-EINVAL);
+
+	fput(file);
+	return net;
+}
+
+#else
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+	if (flags & CLONE_NEWNET)
+		return ERR_PTR(-EINVAL);
+	return old_net;
+}
+
+struct net *get_net_ns_by_fd(int fd)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
+struct net *get_net_ns_by_pid(pid_t pid)
+{
+	struct task_struct *tsk;
+	struct net *net;
+
+	/* Lookup the network namespace */
+	net = ERR_PTR(-ESRCH);
+	rcu_read_lock();
+	tsk = find_task_by_vpid(pid);
+	if (tsk) {
+		struct nsproxy *nsproxy;
+		nsproxy = task_nsproxy(tsk);
+		if (nsproxy)
+			net = get_net(nsproxy->net_ns);
+	}
+	rcu_read_unlock();
+	return net;
+}
+EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+
+static int __init net_ns_init(void)
+{
+	struct net_generic *ng;
+
+#ifdef CONFIG_NET_NS
+	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
+					SMP_CACHE_BYTES,
+					SLAB_PANIC, NULL);
+
+	/* Create workqueue for cleanup */
+	netns_wq = create_singlethread_workqueue("netns");
+	if (!netns_wq)
+		panic("Could not create netns workq");
+#endif
+
+	ng = net_alloc_generic();
+	if (!ng)
+		panic("Could not allocate generic netns");
+
+	rcu_assign_pointer(init_net.gen, ng);
+
+	mutex_lock(&net_mutex);
+	if (setup_net(&init_net))
+		panic("Could not setup the initial network namespace");
+
+	rtnl_lock();
+	list_add_tail_rcu(&init_net.list, &net_namespace_list);
+	rtnl_unlock();
+
+	mutex_unlock(&net_mutex);
+
+	return 0;
+}
+
+pure_initcall(net_ns_init);
+
+#ifdef CONFIG_NET_NS
+static int __register_pernet_operations(struct list_head *list,
+					struct pernet_operations *ops)
+{
+	struct net *net;
+	int error;
+	LIST_HEAD(net_exit_list);
+
+	list_add_tail(&ops->list, list);
+	if (ops->init || (ops->id && ops->size)) {
+		for_each_net(net) {
+			error = ops_init(ops, net);
+			if (error)
+				goto out_undo;
+			list_add_tail(&net->exit_list, &net_exit_list);
+		}
+	}
+	return 0;
+
+out_undo:
+	/* If I have an error cleanup all namespaces I initialized */
+	list_del(&ops->list);
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
+	return error;
+}
+
+static void __unregister_pernet_operations(struct pernet_operations *ops)
+{
+	struct net *net;
+	LIST_HEAD(net_exit_list);
+
+	list_del(&ops->list);
+	for_each_net(net)
+		list_add_tail(&net->exit_list, &net_exit_list);
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
+}
+
+#else
+
+static int __register_pernet_operations(struct list_head *list,
+					struct pernet_operations *ops)
+{
+	return ops_init(ops, &init_net);
+}
+
+static void __unregister_pernet_operations(struct pernet_operations *ops)
+{
+	LIST_HEAD(net_exit_list);
+	list_add(&init_net.exit_list, &net_exit_list);
+	ops_exit_list(ops, &net_exit_list);
+	ops_free_list(ops, &net_exit_list);
+}
+
+#endif /* CONFIG_NET_NS */
+
+static DEFINE_IDA(net_generic_ids);
+
+static int register_pernet_operations(struct list_head *list,
+				      struct pernet_operations *ops)
+{
+	int error;
+
+	if (ops->id) {
+again:
+		error = ida_get_new_above(&net_generic_ids, 1, ops->id);
+		if (error < 0) {
+			if (error == -EAGAIN) {
+				ida_pre_get(&net_generic_ids, GFP_KERNEL);
+				goto again;
+			}
+			return error;
+		}
+		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
+	}
+	error = __register_pernet_operations(list, ops);
+	if (error) {
+		rcu_barrier();
+		if (ops->id)
+			ida_remove(&net_generic_ids, *ops->id);
+	}
+
+	return error;
+}
+
+static void unregister_pernet_operations(struct pernet_operations *ops)
+{
+	
+	__unregister_pernet_operations(ops);
+	rcu_barrier();
+	if (ops->id)
+		ida_remove(&net_generic_ids, *ops->id);
+}
+
+/**
+ *      register_pernet_subsys - register a network namespace subsystem
+ *	@ops:  pernet operations structure for the subsystem
+ *
+ *	Register a subsystem which has init and exit functions
+ *	that are called when network namespaces are created and
+ *	destroyed respectively.
+ *
+ *	When registered all network namespace init functions are
+ *	called for every existing network namespace.  Allowing kernel
+ *	modules to have a race free view of the set of network namespaces.
+ *
+ *	When a new network namespace is created all of the init
+ *	methods are called in the order in which they were registered.
+ *
+ *	When a network namespace is destroyed all of the exit methods
+ *	are called in the reverse of the order with which they were
+ *	registered.
+ */
+int register_pernet_subsys(struct pernet_operations *ops)
+{
+	int error;
+	mutex_lock(&net_mutex);
+	error =  register_pernet_operations(first_device, ops);
+	mutex_unlock(&net_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_subsys);
+
+/**
+ *      unregister_pernet_subsys - unregister a network namespace subsystem
+ *	@ops: pernet operations structure to manipulate
+ *
+ *	Remove the pernet operations structure from the list to be
+ *	used when network namespaces are created or destroyed.  In
+ *	addition run the exit method for all existing network
+ *	namespaces.
+ */
+void unregister_pernet_subsys(struct pernet_operations *ops)
+{
+	mutex_lock(&net_mutex);
+	unregister_pernet_operations(ops);
+	mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
+
+/**
+ *      register_pernet_device - register a network namespace device
+ *	@ops:  pernet operations structure for the subsystem
+ *
+ *	Register a device which has init and exit functions
+ *	that are called when network namespaces are created and
+ *	destroyed respectively.
+ *
+ *	When registered all network namespace init functions are
+ *	called for every existing network namespace.  Allowing kernel
+ *	modules to have a race free view of the set of network namespaces.
+ *
+ *	When a new network namespace is created all of the init
+ *	methods are called in the order in which they were registered.
+ *
+ *	When a network namespace is destroyed all of the exit methods
+ *	are called in the reverse of the order with which they were
+ *	registered.
+ */
+int register_pernet_device(struct pernet_operations *ops)
+{
+	int error;
+	mutex_lock(&net_mutex);
+	error = register_pernet_operations(&pernet_list, ops);
+	if (!error && (first_device == &pernet_list))
+		first_device = &ops->list;
+	mutex_unlock(&net_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_device);
+
+/**
+ *      unregister_pernet_device - unregister a network namespace netdevice
+ *	@ops: pernet operations structure to manipulate
+ *
+ *	Remove the pernet operations structure from the list to be
+ *	used when network namespaces are created or destroyed.  In
+ *	addition run the exit method for all existing network
+ *	namespaces.
+ */
+void unregister_pernet_device(struct pernet_operations *ops)
+{
+	mutex_lock(&net_mutex);
+	if (&ops->list == first_device)
+		first_device = first_device->next;
+	unregister_pernet_operations(ops);
+	mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+	struct net *net = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		net = get_net(nsproxy->net_ns);
+	rcu_read_unlock();
+
+	return net;
+}
+
+static void netns_put(void *ns)
+{
+	put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+	put_net(nsproxy->net_ns);
+	nsproxy->net_ns = get_net(ns);
+	return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+	.name		= "net",
+	.type		= CLONE_NEWNET,
+	.get		= netns_get,
+	.put		= netns_put,
+	.install	= netns_install,
+};
+#endif
diff --git a/net/core/netevent.c b/net/core/netevent.c
new file mode 100644
index 00000000..f17ccd29
--- /dev/null
+++ b/net/core/netevent.c
@@ -0,0 +1,70 @@
+/*
+ *	Network event notifiers
+ *
+ *	Authors:
+ *      Tom Tucker             <tom@opengridcomputing.com>
+ *      Steve Wise             <swise@opengridcomputing.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ */
+
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <net/netevent.h>
+
+static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
+
+/**
+ *	register_netevent_notifier - register a netevent notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when a netevent occurs.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ */
+int register_netevent_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
+
+/**
+ *	netevent_unregister_notifier - unregister a netevent notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by
+ *	register_neigh_notifier(). The notifier is unlinked into the
+ *	kernel structures and may then be reused. A negative errno code
+ *	is returned on a failure.
+ */
+
+int unregister_netevent_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
+
+/**
+ *	call_netevent_notifiers - call all netevent notifier blocks
+ *      @val: value passed unmodified to notifier function
+ *      @v:   pointer passed unmodified to notifier function
+ *
+ *	Call all neighbour notifier blocks.  Parameters and return value
+ *	are as for notifier_call_chain().
+ */
+
+int call_netevent_notifiers(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
+}
+EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
new file mode 100644
index 00000000..f9f40b93
--- /dev/null
+++ b/net/core/netpoll.c
@@ -0,0 +1,948 @@
+/*
+ * Common framework for low-level network console, dump, and debugger code
+ *
+ * Sep 8 2003  Matt Mackall <mpm@selenic.com>
+ *
+ * based on the netconsole code from:
+ *
+ * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002  Red Hat, Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/unaligned.h>
+#include <trace/events/napi.h>
+
+/*
+ * We maintain a small pool of fully-sized skbs, to make sure the
+ * message gets out even in extreme OOM situations.
+ */
+
+#define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+
+static struct sk_buff_head skb_pool;
+
+static atomic_t trapped;
+
+#define USEC_PER_POLL	50
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
+
+#define MAX_SKB_SIZE							\
+	(sizeof(struct ethhdr) +					\
+	 sizeof(struct iphdr) +						\
+	 sizeof(struct udphdr) +					\
+	 MAX_UDP_CHUNK)
+
+static void zap_completion_queue(void);
+static void arp_reply(struct sk_buff *skb);
+
+static unsigned int carrier_timeout = 4;
+module_param(carrier_timeout, uint, 0644);
+
+#define np_info(np, fmt, ...)				\
+	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
+#define np_err(np, fmt, ...)				\
+	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
+#define np_notice(np, fmt, ...)				\
+	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
+
+static void queue_process(struct work_struct *work)
+{
+	struct netpoll_info *npinfo =
+		container_of(work, struct netpoll_info, tx_work.work);
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	while ((skb = skb_dequeue(&npinfo->txq))) {
+		struct net_device *dev = skb->dev;
+		const struct net_device_ops *ops = dev->netdev_ops;
+		struct netdev_queue *txq;
+
+		if (!netif_device_present(dev) || !netif_running(dev)) {
+			__kfree_skb(skb);
+			continue;
+		}
+
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+		local_irq_save(flags);
+		__netif_tx_lock(txq, smp_processor_id());
+		if (netif_xmit_frozen_or_stopped(txq) ||
+		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
+			skb_queue_head(&npinfo->txq, skb);
+			__netif_tx_unlock(txq);
+			local_irq_restore(flags);
+
+			schedule_delayed_work(&npinfo->tx_work, HZ/10);
+			return;
+		}
+		__netif_tx_unlock(txq);
+		local_irq_restore(flags);
+	}
+}
+
+static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+			    unsigned short ulen, __be32 saddr, __be32 daddr)
+{
+	__wsum psum;
+
+	if (uh->check == 0 || skb_csum_unnecessary(skb))
+		return 0;
+
+	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/*
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
+ *
+ * In cases where there is bi-directional communications, reading only
+ * one message at a time can lead to packets being dropped by the
+ * network adapter, forcing superfluous retries and possibly timeouts.
+ * Thus, we set our budget to greater than 1.
+ */
+static int poll_one_napi(struct netpoll_info *npinfo,
+			 struct napi_struct *napi, int budget)
+{
+	int work;
+
+	/* net_rx_action's ->poll() invocations and our's are
+	 * synchronized by this test which is only made while
+	 * holding the napi->poll_lock.
+	 */
+	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
+		return budget;
+
+	npinfo->rx_flags |= NETPOLL_RX_DROP;
+	atomic_inc(&trapped);
+	set_bit(NAPI_STATE_NPSVC, &napi->state);
+
+	work = napi->poll(napi, budget);
+	trace_napi_poll(napi);
+
+	clear_bit(NAPI_STATE_NPSVC, &napi->state);
+	atomic_dec(&trapped);
+	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+
+	return budget - work;
+}
+
+static void poll_napi(struct net_device *dev)
+{
+	struct napi_struct *napi;
+	int budget = 16;
+
+	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		if (napi->poll_owner != smp_processor_id() &&
+		    spin_trylock(&napi->poll_lock)) {
+			budget = poll_one_napi(dev->npinfo, napi, budget);
+			spin_unlock(&napi->poll_lock);
+
+			if (!budget)
+				break;
+		}
+	}
+}
+
+static void service_arp_queue(struct netpoll_info *npi)
+{
+	if (npi) {
+		struct sk_buff *skb;
+
+		while ((skb = skb_dequeue(&npi->arp_tx)))
+			arp_reply(skb);
+	}
+}
+
+static void netpoll_poll_dev(struct net_device *dev)
+{
+	const struct net_device_ops *ops;
+
+	if (!dev || !netif_running(dev))
+		return;
+
+	ops = dev->netdev_ops;
+	if (!ops->ndo_poll_controller)
+		return;
+
+	/* Process pending work on NIC */
+	ops->ndo_poll_controller(dev);
+
+	poll_napi(dev);
+
+	if (dev->flags & IFF_SLAVE) {
+		if (dev->npinfo) {
+			struct net_device *bond_dev = dev->master;
+			struct sk_buff *skb;
+			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+				skb->dev = bond_dev;
+				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+			}
+		}
+	}
+
+	service_arp_queue(dev->npinfo);
+
+	zap_completion_queue();
+}
+
+static void refill_skbs(void)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&skb_pool.lock, flags);
+	while (skb_pool.qlen < MAX_SKBS) {
+		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+		if (!skb)
+			break;
+
+		__skb_queue_tail(&skb_pool, skb);
+	}
+	spin_unlock_irqrestore(&skb_pool.lock, flags);
+}
+
+static void zap_completion_queue(void)
+{
+	unsigned long flags;
+	struct softnet_data *sd = &get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+			if (skb->destructor) {
+				atomic_inc(&skb->users);
+				dev_kfree_skb_any(skb); /* put this one back */
+			} else {
+				__kfree_skb(skb);
+			}
+		}
+	}
+
+	put_cpu_var(softnet_data);
+}
+
+static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
+{
+	int count = 0;
+	struct sk_buff *skb;
+
+	zap_completion_queue();
+	refill_skbs();
+repeat:
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		skb = skb_dequeue(&skb_pool);
+
+	if (!skb) {
+		if (++count < 10) {
+			netpoll_poll_dev(np->dev);
+			goto repeat;
+		}
+		return NULL;
+	}
+
+	atomic_set(&skb->users, 1);
+	skb_reserve(skb, reserve);
+	return skb;
+}
+
+static int netpoll_owner_active(struct net_device *dev)
+{
+	struct napi_struct *napi;
+
+	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		if (napi->poll_owner == smp_processor_id())
+			return 1;
+	}
+	return 0;
+}
+
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+			     struct net_device *dev)
+{
+	int status = NETDEV_TX_BUSY;
+	unsigned long tries;
+	const struct net_device_ops *ops = dev->netdev_ops;
+	/* It is up to the caller to keep npinfo alive. */
+	struct netpoll_info *npinfo = np->dev->npinfo;
+
+	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* don't get messages out of order, and no recursion */
+	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
+		struct netdev_queue *txq;
+		unsigned long flags;
+
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+		local_irq_save(flags);
+		/* try until next clock tick */
+		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
+		     tries > 0; --tries) {
+			if (__netif_tx_trylock(txq)) {
+				if (!netif_xmit_stopped(txq)) {
+					status = ops->ndo_start_xmit(skb, dev);
+					if (status == NETDEV_TX_OK)
+						txq_trans_update(txq);
+				}
+				__netif_tx_unlock(txq);
+
+				if (status == NETDEV_TX_OK)
+					break;
+
+			}
+
+			/* tickle device maybe there is some cleanup */
+			netpoll_poll_dev(np->dev);
+
+			udelay(USEC_PER_POLL);
+		}
+
+		WARN_ONCE(!irqs_disabled(),
+			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+			dev->name, ops->ndo_start_xmit);
+
+		local_irq_restore(flags);
+	}
+
+	if (status != NETDEV_TX_OK) {
+		skb_queue_tail(&npinfo->txq, skb);
+		schedule_delayed_work(&npinfo->tx_work,0);
+	}
+}
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
+
+void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+{
+	int total_len, ip_len, udp_len;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct iphdr *iph;
+	struct ethhdr *eth;
+
+	udp_len = len + sizeof(*udph);
+	ip_len = udp_len + sizeof(*iph);
+	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
+
+	skb = find_skb(np, total_len + np->dev->needed_tailroom,
+		       total_len - len);
+	if (!skb)
+		return;
+
+	skb_copy_to_linear_data(skb, msg, len);
+	skb_put(skb, len);
+
+	skb_push(skb, sizeof(*udph));
+	skb_reset_transport_header(skb);
+	udph = udp_hdr(skb);
+	udph->source = htons(np->local_port);
+	udph->dest = htons(np->remote_port);
+	udph->len = htons(udp_len);
+	udph->check = 0;
+	udph->check = csum_tcpudp_magic(np->local_ip,
+					np->remote_ip,
+					udp_len, IPPROTO_UDP,
+					csum_partial(udph, udp_len, 0));
+	if (udph->check == 0)
+		udph->check = CSUM_MANGLED_0;
+
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+
+	/* iph->version = 4; iph->ihl = 5; */
+	put_unaligned(0x45, (unsigned char *)iph);
+	iph->tos      = 0;
+	put_unaligned(htons(ip_len), &(iph->tot_len));
+	iph->id       = 0;
+	iph->frag_off = 0;
+	iph->ttl      = 64;
+	iph->protocol = IPPROTO_UDP;
+	iph->check    = 0;
+	put_unaligned(np->local_ip, &(iph->saddr));
+	put_unaligned(np->remote_ip, &(iph->daddr));
+	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
+	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
+
+	skb->dev = np->dev;
+
+	netpoll_send_skb(np, skb);
+}
+EXPORT_SYMBOL(netpoll_send_udp);
+
+static void arp_reply(struct sk_buff *skb)
+{
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	__be32 sip, tip;
+	unsigned char *sha;
+	struct sk_buff *send_skb;
+	struct netpoll *np, *tmp;
+	unsigned long flags;
+	int hlen, tlen;
+	int hits = 0;
+
+	if (list_empty(&npinfo->rx_np))
+		return;
+
+	/* Before checking the packet, we do some early
+	   inspection whether this is interesting at all */
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->dev == skb->dev)
+			hits++;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
+	/* No netpoll struct is using this dev */
+	if (!hits)
+		return;
+
+	/* No arp on this interface */
+	if (skb->dev->flags & IFF_NOARP)
+		return;
+
+	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+		return;
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	arp = arp_hdr(skb);
+
+	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+	    arp->ar_pro != htons(ETH_P_IP) ||
+	    arp->ar_op != htons(ARPOP_REQUEST))
+		return;
+
+	arp_ptr = (unsigned char *)(arp+1);
+	/* save the location of the src hw addr */
+	sha = arp_ptr;
+	arp_ptr += skb->dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4;
+	/* If we actually cared about dst hw addr,
+	   it would get copied here */
+	arp_ptr += skb->dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+
+	/* Should we ignore arp? */
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+		return;
+
+	size = arp_hdr_len(skb->dev);
+
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (tip != np->local_ip)
+			continue;
+
+		hlen = LL_RESERVED_SPACE(np->dev);
+		tlen = np->dev->needed_tailroom;
+		send_skb = find_skb(np, size + hlen + tlen, hlen);
+		if (!send_skb)
+			continue;
+
+		skb_reset_network_header(send_skb);
+		arp = (struct arphdr *) skb_put(send_skb, size);
+		send_skb->dev = skb->dev;
+		send_skb->protocol = htons(ETH_P_ARP);
+
+		/* Fill the device header for the ARP frame */
+		if (dev_hard_header(send_skb, skb->dev, ptype,
+				    sha, np->dev->dev_addr,
+				    send_skb->len) < 0) {
+			kfree_skb(send_skb);
+			continue;
+		}
+
+		/*
+		 * Fill out the arp protocol part.
+		 *
+		 * we only support ethernet device type,
+		 * which (according to RFC 1390) should
+		 * always equal 1 (Ethernet).
+		 */
+
+		arp->ar_hrd = htons(np->dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		arp->ar_hln = np->dev->addr_len;
+		arp->ar_pln = 4;
+		arp->ar_op = htons(type);
+
+		arp_ptr = (unsigned char *)(arp + 1);
+		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &tip, 4);
+		arp_ptr += 4;
+		memcpy(arp_ptr, sha, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &sip, 4);
+
+		netpoll_send_skb(np, send_skb);
+
+		/* If there are several rx_hooks for the same address,
+		   we're fine by sending a single reply */
+		break;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+}
+
+int __netpoll_rx(struct sk_buff *skb)
+{
+	int proto, len, ulen;
+	int hits = 0;
+	const struct iphdr *iph;
+	struct udphdr *uh;
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll *np, *tmp;
+
+	if (list_empty(&npinfo->rx_np))
+		goto out;
+
+	if (skb->dev->type != ARPHRD_ETHER)
+		goto out;
+
+	/* check if netpoll clients need ARP */
+	if (skb->protocol == htons(ETH_P_ARP) &&
+	    atomic_read(&trapped)) {
+		skb_queue_tail(&npinfo->arp_tx, skb);
+		return 1;
+	}
+
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto != ETH_P_IP)
+		goto out;
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto out;
+	if (skb_shared(skb))
+		goto out;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+	iph = (struct iphdr *)skb->data;
+	if (iph->ihl < 5 || iph->version != 4)
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto out;
+	iph = (struct iphdr *)skb->data;
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto out;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < iph->ihl*4)
+		goto out;
+
+	/*
+	 * Our transport medium may have padded the buffer out.
+	 * Now We trim to the true length of the frame.
+	 */
+	if (pskb_trim_rcsum(skb, len))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	if (iph->protocol != IPPROTO_UDP)
+		goto out;
+
+	len -= iph->ihl*4;
+	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+	ulen = ntohs(uh->len);
+
+	if (ulen != len)
+		goto out;
+	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+		goto out;
+
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->local_ip && np->local_ip != iph->daddr)
+			continue;
+		if (np->remote_ip && np->remote_ip != iph->saddr)
+			continue;
+		if (np->local_port && np->local_port != ntohs(uh->dest))
+			continue;
+
+		np->rx_hook(np, ntohs(uh->source),
+			       (char *)(uh+1),
+			       ulen - sizeof(struct udphdr));
+		hits++;
+	}
+
+	if (!hits)
+		goto out;
+
+	kfree_skb(skb);
+	return 1;
+
+out:
+	if (atomic_read(&trapped)) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	return 0;
+}
+
+void netpoll_print_options(struct netpoll *np)
+{
+	np_info(np, "local port %d\n", np->local_port);
+	np_info(np, "local IP %pI4\n", &np->local_ip);
+	np_info(np, "interface '%s'\n", np->dev_name);
+	np_info(np, "remote port %d\n", np->remote_port);
+	np_info(np, "remote IP %pI4\n", &np->remote_ip);
+	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
+}
+EXPORT_SYMBOL(netpoll_print_options);
+
+int netpoll_parse_options(struct netpoll *np, char *opt)
+{
+	char *cur=opt, *delim;
+
+	if (*cur != '@') {
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->local_port = simple_strtol(cur, NULL, 10);
+		cur = delim;
+	}
+	cur++;
+
+	if (*cur != '/') {
+		if ((delim = strchr(cur, '/')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->local_ip = in_aton(cur);
+		cur = delim;
+	}
+	cur++;
+
+	if (*cur != ',') {
+		/* parse out dev name */
+		if ((delim = strchr(cur, ',')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+		cur = delim;
+	}
+	cur++;
+
+	if (*cur != '@') {
+		/* dst port */
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		if (*cur == ' ' || *cur == '\t')
+			np_info(np, "warning: whitespace is not allowed\n");
+		np->remote_port = simple_strtol(cur, NULL, 10);
+		cur = delim;
+	}
+	cur++;
+
+	/* dst ip */
+	if ((delim = strchr(cur, '/')) == NULL)
+		goto parse_failed;
+	*delim = 0;
+	np->remote_ip = in_aton(cur);
+	cur = delim + 1;
+
+	if (*cur != 0) {
+		/* MAC address */
+		if (!mac_pton(cur, np->remote_mac))
+			goto parse_failed;
+	}
+
+	netpoll_print_options(np);
+
+	return 0;
+
+ parse_failed:
+	np_info(np, "couldn't parse config at '%s'!\n", cur);
+	return -1;
+}
+EXPORT_SYMBOL(netpoll_parse_options);
+
+int __netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = np->dev;
+	struct netpoll_info *npinfo;
+	const struct net_device_ops *ops;
+	unsigned long flags;
+	int err;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		np_err(np, "%s doesn't support polling, aborting\n",
+		       np->dev_name);
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		list_add_tail(&np->rx, &npinfo->rx_np);
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* last thing to do is link it to the net device structure */
+	rcu_assign_pointer(ndev->npinfo, npinfo);
+
+	return 0;
+
+free_npinfo:
+	kfree(npinfo);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	int err;
+
+	if (np->dev_name)
+		ndev = dev_get_by_name(&init_net, np->dev_name);
+	if (!ndev) {
+		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
+		return -ENODEV;
+	}
+
+	if (ndev->master) {
+		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
+		err = -EBUSY;
+		goto put;
+	}
+
+	if (!netif_running(ndev)) {
+		unsigned long atmost, atleast;
+
+		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
+
+		rtnl_lock();
+		err = dev_open(ndev);
+		rtnl_unlock();
+
+		if (err) {
+			np_err(np, "failed to open %s\n", ndev->name);
+			goto put;
+		}
+
+		atleast = jiffies + HZ/10;
+		atmost = jiffies + carrier_timeout * HZ;
+		while (!netif_carrier_ok(ndev)) {
+			if (time_after(jiffies, atmost)) {
+				np_notice(np, "timeout waiting for carrier\n");
+				break;
+			}
+			msleep(1);
+		}
+
+		/* If carrier appears to come up instantly, we don't
+		 * trust it and pause so that we don't pump all our
+		 * queued console messages into the bitbucket.
+		 */
+
+		if (time_before(jiffies, atleast)) {
+			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
+			msleep(4000);
+		}
+	}
+
+	if (!np->local_ip) {
+		rcu_read_lock();
+		in_dev = __in_dev_get_rcu(ndev);
+
+		if (!in_dev || !in_dev->ifa_list) {
+			rcu_read_unlock();
+			np_err(np, "no IP address for %s, aborting\n",
+			       np->dev_name);
+			err = -EDESTADDRREQ;
+			goto put;
+		}
+
+		np->local_ip = in_dev->ifa_list->ifa_local;
+		rcu_read_unlock();
+		np_info(np, "local IP %pI4\n", &np->local_ip);
+	}
+
+	np->dev = ndev;
+
+	/* fill up the skb queue */
+	refill_skbs();
+
+	rtnl_lock();
+	err = __netpoll_setup(np);
+	rtnl_unlock();
+
+	if (err)
+		goto put;
+
+	return 0;
+
+put:
+	dev_put(ndev);
+	return err;
+}
+EXPORT_SYMBOL(netpoll_setup);
+
+static int __init netpoll_init(void)
+{
+	skb_queue_head_init(&skb_pool);
+	return 0;
+}
+core_initcall(netpoll_init);
+
+void __netpoll_cleanup(struct netpoll *np)
+{
+	struct netpoll_info *npinfo;
+	unsigned long flags;
+
+	npinfo = np->dev->npinfo;
+	if (!npinfo)
+		return;
+
+	if (!list_empty(&npinfo->rx_np)) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_del(&np->rx);
+		if (list_empty(&npinfo->rx_np))
+			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	if (atomic_dec_and_test(&npinfo->refcnt)) {
+		const struct net_device_ops *ops;
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_cleanup)
+			ops->ndo_netpoll_cleanup(np->dev);
+
+		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+
+		/* avoid racing with NAPI reading npinfo */
+		synchronize_rcu_bh();
+
+		skb_queue_purge(&npinfo->arp_tx);
+		skb_queue_purge(&npinfo->txq);
+		cancel_delayed_work_sync(&npinfo->tx_work);
+
+		/* clean after last, unfinished work */
+		__skb_queue_purge(&npinfo->txq);
+		kfree(npinfo);
+	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
+
+	rtnl_lock();
+	__netpoll_cleanup(np);
+	rtnl_unlock();
+
+	dev_put(np->dev);
+	np->dev = NULL;
+}
+EXPORT_SYMBOL(netpoll_cleanup);
+
+int netpoll_trap(void)
+{
+	return atomic_read(&trapped);
+}
+EXPORT_SYMBOL(netpoll_trap);
+
+void netpoll_set_trap(int trap)
+{
+	if (trap)
+		atomic_inc(&trapped);
+	else
+		atomic_dec(&trapped);
+}
+EXPORT_SYMBOL(netpoll_set_trap);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 00000000..ba6900f7
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,337 @@
+/*
+ * net/core/netprio_cgroup.c	Priority Control Group
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+#include <net/netprio_cgroup.h>
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_prio_subsys = {
+	.name		= "net_prio",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+#ifdef CONFIG_NETPRIO_CGROUP
+	.subsys_id	= net_prio_subsys_id,
+#endif
+	.module		= THIS_MODULE
+};
+
+#define PRIOIDX_SZ 128
+
+static unsigned long prioidx_map[PRIOIDX_SZ];
+static DEFINE_SPINLOCK(prioidx_map_lock);
+static atomic_t max_prioidx = ATOMIC_INIT(0);
+
+static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
+{
+	return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+			    struct cgroup_netprio_state, css);
+}
+
+static int get_prioidx(u32 *prio)
+{
+	unsigned long flags;
+	u32 prioidx;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
+		spin_unlock_irqrestore(&prioidx_map_lock, flags);
+		return -ENOSPC;
+	}
+	set_bit(prioidx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+	atomic_set(&max_prioidx, prioidx);
+	*prio = prioidx;
+	return 0;
+}
+
+static void put_prioidx(u32 idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	clear_bit(idx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+}
+
+static void extend_netdev_table(struct net_device *dev, u32 new_len)
+{
+	size_t new_size = sizeof(struct netprio_map) +
+			   ((sizeof(u32) * new_len));
+	struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
+	struct netprio_map *old_priomap;
+	int i;
+
+	old_priomap  = rtnl_dereference(dev->priomap);
+
+	if (!new_priomap) {
+		printk(KERN_WARNING "Unable to alloc new priomap!\n");
+		return;
+	}
+
+	for (i = 0;
+	     old_priomap && (i < old_priomap->priomap_len);
+	     i++)
+		new_priomap->priomap[i] = old_priomap->priomap[i];
+
+	new_priomap->priomap_len = new_len;
+
+	rcu_assign_pointer(dev->priomap, new_priomap);
+	if (old_priomap)
+		kfree_rcu(old_priomap, rcu);
+}
+
+static void update_netdev_tables(void)
+{
+	struct net_device *dev;
+	u32 max_len = atomic_read(&max_prioidx) + 1;
+	struct netprio_map *map;
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if ((!map) ||
+		    (map->priomap_len < max_len))
+			extend_netdev_table(dev, max_len);
+	}
+	rtnl_unlock();
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	int ret;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
+		kfree(cs);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ret = get_prioidx(&cs->prioidx);
+	if (ret != 0) {
+		printk(KERN_WARNING "No space in priority index array\n");
+		kfree(cs);
+		return ERR_PTR(ret);
+	}
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	cs = cgrp_netprio_state(cgrp);
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if (map)
+			map->priomap[cs->prioidx] = 0;
+	}
+	rtnl_unlock();
+	put_prioidx(cs->prioidx);
+	kfree(cs);
+}
+
+static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+{
+	return (u64)cgrp_netprio_state(cgrp)->prioidx;
+}
+
+static int read_priomap(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb)
+{
+	struct net_device *dev;
+	u32 prioidx = cgrp_netprio_state(cont)->prioidx;
+	u32 priority;
+	struct netprio_map *map;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		map = rcu_dereference(dev->priomap);
+		priority = map ? map->priomap[prioidx] : 0;
+		cb->fill(cb, dev->name, priority);
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+			 const char *buffer)
+{
+	char *devname = kstrdup(buffer, GFP_KERNEL);
+	int ret = -EINVAL;
+	u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
+	unsigned long priority;
+	char *priostr;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	if (!devname)
+		return -ENOMEM;
+
+	/*
+	 * Minimally sized valid priomap string
+	 */
+	if (strlen(devname) < 3)
+		goto out_free_devname;
+
+	priostr = strstr(devname, " ");
+	if (!priostr)
+		goto out_free_devname;
+
+	/*
+	 *Separate the devname from the associated priority
+	 *and advance the priostr poitner to the priority value
+	 */
+	*priostr = '\0';
+	priostr++;
+
+	/*
+	 * If the priostr points to NULL, we're at the end of the passed
+	 * in string, and its not a valid write
+	 */
+	if (*priostr == '\0')
+		goto out_free_devname;
+
+	ret = kstrtoul(priostr, 10, &priority);
+	if (ret < 0)
+		goto out_free_devname;
+
+	ret = -ENODEV;
+
+	dev = dev_get_by_name(&init_net, devname);
+	if (!dev)
+		goto out_free_devname;
+
+	update_netdev_tables();
+	ret = 0;
+	rcu_read_lock();
+	map = rcu_dereference(dev->priomap);
+	if (map)
+		map->priomap[prioidx] = priority;
+	rcu_read_unlock();
+	dev_put(dev);
+
+out_free_devname:
+	kfree(devname);
+	return ret;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "prioidx",
+		.read_u64 = read_prioidx,
+	},
+	{
+		.name = "ifpriomap",
+		.read_map = read_priomap,
+		.write_string = write_priomap,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+static int netprio_device_event(struct notifier_block *unused,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct netprio_map *old;
+
+	/*
+	 * Note this is called with rtnl_lock held so we have update side
+	 * protection on our rcu assignments
+	 */
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		old = rtnl_dereference(dev->priomap);
+		RCU_INIT_POINTER(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block netprio_device_notifier = {
+	.notifier_call = netprio_device_event
+};
+
+static int __init init_cgroup_netprio(void)
+{
+	int ret;
+
+	ret = cgroup_load_subsys(&net_prio_subsys);
+	if (ret)
+		goto out;
+#ifndef CONFIG_NETPRIO_CGROUP
+	smp_wmb();
+	net_prio_subsys_id = net_prio_subsys.subsys_id;
+#endif
+
+	register_netdevice_notifier(&netprio_device_notifier);
+
+out:
+	return ret;
+}
+
+static void __exit exit_cgroup_netprio(void)
+{
+	struct netprio_map *old;
+	struct net_device *dev;
+
+	unregister_netdevice_notifier(&netprio_device_notifier);
+
+	cgroup_unload_subsys(&net_prio_subsys);
+
+#ifndef CONFIG_NETPRIO_CGROUP
+	net_prio_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		old = rtnl_dereference(dev->priomap);
+		RCU_INIT_POINTER(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+	}
+	rtnl_unlock();
+}
+
+module_init(init_cgroup_netprio);
+module_exit(exit_cgroup_netprio);
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
new file mode 100644
index 00000000..b81369b6
--- /dev/null
+++ b/net/core/pktgen.c
@@ -0,0 +1,3796 @@
+/*
+ * Authors:
+ * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
+ *                             Uppsala University and
+ *                             Swedish University of Agricultural Sciences
+ *
+ * Alexey Kuznetsov  <kuznet@ms2.inr.ac.ru>
+ * Ben Greear <greearb@candelatech.com>
+ * Jens Låås <jens.laas@data.slu.se>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * A tool for loading the network with preconfigurated packets.
+ * The tool is implemented as a linux module.  Parameters are output
+ * device, delay (to hard_xmit), number of packets, and whether
+ * to use multiple SKBs or just the same one.
+ * pktgen uses the installed interface's output routine.
+ *
+ * Additional hacking by:
+ *
+ * Jens.Laas@data.slu.se
+ * Improved by ANK. 010120.
+ * Improved by ANK even more. 010212.
+ * MAC address typo fixed. 010417 --ro
+ * Integrated.  020301 --DaveM
+ * Added multiskb option 020301 --DaveM
+ * Scaling of results. 020417--sigurdur@linpro.no
+ * Significant re-work of the module:
+ *   *  Convert to threaded model to more efficiently be able to transmit
+ *       and receive on multiple interfaces at once.
+ *   *  Converted many counters to __u64 to allow longer runs.
+ *   *  Allow configuration of ranges, like min/max IP address, MACs,
+ *       and UDP-ports, for both source and destination, and can
+ *       set to use a random distribution or sequentially walk the range.
+ *   *  Can now change most values after starting.
+ *   *  Place 12-byte packet in UDP payload with magic number,
+ *       sequence number, and timestamp.
+ *   *  Add receiver code that detects dropped pkts, re-ordered pkts, and
+ *       latencies (with micro-second) precision.
+ *   *  Add IOCTL interface to easily get counters & configuration.
+ *   --Ben Greear <greearb@candelatech.com>
+ *
+ * Renamed multiskb to clone_skb and cleaned up sending core for two distinct
+ * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0
+ * as a "fastpath" with a configurable number of clones after alloc's.
+ * clone_skb=0 means all packets are allocated this also means ranges time
+ * stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100
+ * clones.
+ *
+ * Also moved to /proc/net/pktgen/
+ * --ro
+ *
+ * Sept 10:  Fixed threading/locking.  Lots of bone-headed and more clever
+ *    mistakes.  Also merged in DaveM's patch in the -pre6 patch.
+ * --Ben Greear <greearb@candelatech.com>
+ *
+ * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
+ *
+ *
+ * 021124 Finished major redesign and rewrite for new functionality.
+ * See Documentation/networking/pktgen.txt for how to use this.
+ *
+ * The new operation:
+ * For each CPU one thread/process is created at start. This process checks
+ * for running devices in the if_list and sends packets until count is 0 it
+ * also the thread checks the thread->control which is used for inter-process
+ * communication. controlling process "posts" operations to the threads this
+ * way. The if_lock should be possible to remove when add/rem_device is merged
+ * into this too.
+ *
+ * By design there should only be *one* "controlling" process. In practice
+ * multiple write accesses gives unpredictable result. Understood by "write"
+ * to /proc gives result code thats should be read be the "writer".
+ * For practical use this should be no problem.
+ *
+ * Note when adding devices to a specific CPU there good idea to also assign
+ * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU.
+ * --ro
+ *
+ * Fix refcount off by one if first packet fails, potential null deref,
+ * memleak 030710- KJP
+ *
+ * First "ranges" functionality for ipv6 030726 --ro
+ *
+ * Included flow support. 030802 ANK.
+ *
+ * Fixed unaligned access on IA-64 Grant Grundler <grundler@parisc-linux.org>
+ *
+ * Remove if fix from added Harald Welte <laforge@netfilter.org> 040419
+ * ia64 compilation fix from  Aron Griffis <aron@hp.com> 040604
+ *
+ * New xmit() return, do_div and misc clean up by Stephen Hemminger
+ * <shemminger@osdl.org> 040923
+ *
+ * Randy Dunlap fixed u64 printk compiler waring
+ *
+ * Remove FCS from BW calculation.  Lennert Buytenhek <buytenh@wantstofly.org>
+ * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
+ *
+ * Corrections from Nikolai Malykh (nmalykh@bilim.com)
+ * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230
+ *
+ * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan <nacc@us.ibm.com>
+ * 050103
+ *
+ * MPLS support by Steven Whitehouse <steve@chygwyn.com>
+ *
+ * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com>
+ *
+ * Fixed src_mac command to set source mac of packet to value specified in
+ * command by Adit Ranadive <adit.262@gmail.com>
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/sys.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/hrtimer.h>
+#include <linux/freezer.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/wait.h>
+#include <linux/etherdevice.h>
+#include <linux/kthread.h>
+#include <linux/prefetch.h>
+#include <net/net_namespace.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#ifdef CONFIG_XFRM
+#include <net/xfrm.h>
+#endif
+#include <asm/byteorder.h>
+#include <linux/rcupdate.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/timex.h>
+#include <linux/uaccess.h>
+#include <asm/dma.h>
+#include <asm/div64.h>		/* do_div */
+
+#define VERSION	"2.74"
+#define IP_NAME_SZ 32
+#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
+#define MPLS_STACK_BOTTOM htonl(0x00000100)
+
+#define func_enter() pr_debug("entering %s\n", __func__);
+
+/* Device flag bits */
+#define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
+#define F_IPDST_RND   (1<<1)	/* IP-Dst Random  */
+#define F_UDPSRC_RND  (1<<2)	/* UDP-Src Random */
+#define F_UDPDST_RND  (1<<3)	/* UDP-Dst Random */
+#define F_MACSRC_RND  (1<<4)	/* MAC-Src Random */
+#define F_MACDST_RND  (1<<5)	/* MAC-Dst Random */
+#define F_TXSIZE_RND  (1<<6)	/* Transmit size is random */
+#define F_IPV6        (1<<7)	/* Interface in IPV6 Mode */
+#define F_MPLS_RND    (1<<8)	/* Random MPLS labels */
+#define F_VID_RND     (1<<9)	/* Random VLAN ID */
+#define F_SVID_RND    (1<<10)	/* Random SVLAN ID */
+#define F_FLOW_SEQ    (1<<11)	/* Sequential flows */
+#define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
+#define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
+#define F_QUEUE_MAP_CPU (1<<14)	/* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15)	/* Node memory alloc*/
+
+/* Thread control flag bits */
+#define T_STOP        (1<<0)	/* Stop run */
+#define T_RUN         (1<<1)	/* Start run */
+#define T_REMDEVALL   (1<<2)	/* Remove all devs */
+#define T_REMDEV      (1<<3)	/* Remove one dev */
+
+/* If lock -- can be removed after some work */
+#define   if_lock(t)           spin_lock(&(t->if_lock));
+#define   if_unlock(t)           spin_unlock(&(t->if_lock));
+
+/* Used to help with determining the pkts on receive */
+#define PKTGEN_MAGIC 0xbe9be955
+#define PG_PROC_DIR "pktgen"
+#define PGCTRL	    "pgctrl"
+static struct proc_dir_entry *pg_proc_dir;
+
+#define MAX_CFLOWS  65536
+
+#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4)
+#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
+
+struct flow_state {
+	__be32 cur_daddr;
+	int count;
+#ifdef CONFIG_XFRM
+	struct xfrm_state *x;
+#endif
+	__u32 flags;
+};
+
+/* flow flag bits */
+#define F_INIT   (1<<0)		/* flow has been initialized */
+
+struct pktgen_dev {
+	/*
+	 * Try to keep frequent/infrequent used vars. separated.
+	 */
+	struct proc_dir_entry *entry;	/* proc file */
+	struct pktgen_thread *pg_thread;/* the owner */
+	struct list_head list;		/* chaining in the thread's run-queue */
+
+	int running;		/* if false, the test will stop */
+
+	/* If min != max, then we will either do a linear iteration, or
+	 * we will do a random selection from within the range.
+	 */
+	__u32 flags;
+	int removal_mark;	/* non-zero => the device is marked for
+				 * removal by worker thread */
+
+	int min_pkt_size;	/* = ETH_ZLEN; */
+	int max_pkt_size;	/* = ETH_ZLEN; */
+	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
+	int nfrags;
+	struct page *page;
+	u64 delay;		/* nano-seconds */
+
+	__u64 count;		/* Default No packets to send */
+	__u64 sofar;		/* How many pkts we've sent so far */
+	__u64 tx_bytes;		/* How many bytes we've transmitted */
+	__u64 errors;		/* Errors when trying to transmit, */
+
+	/* runtime counters relating to clone_skb */
+
+	__u64 allocated_skbs;
+	__u32 clone_count;
+	int last_ok;		/* Was last skb sent?
+				 * Or a failed transmit of some sort?
+				 * This will keep sequence numbers in order
+				 */
+	ktime_t next_tx;
+	ktime_t started_at;
+	ktime_t stopped_at;
+	u64	idle_acc;	/* nano-seconds */
+
+	__u32 seq_num;
+
+	int clone_skb;		/*
+				 * Use multiple SKBs during packet gen.
+				 * If this number is greater than 1, then
+				 * that many copies of the same packet will be
+				 * sent before a new packet is allocated.
+				 * If you want to send 1024 identical packets
+				 * before creating a new packet,
+				 * set clone_skb to 1024.
+				 */
+
+	char dst_min[IP_NAME_SZ];	/* IP, ie 1.2.3.4 */
+	char dst_max[IP_NAME_SZ];	/* IP, ie 1.2.3.4 */
+	char src_min[IP_NAME_SZ];	/* IP, ie 1.2.3.4 */
+	char src_max[IP_NAME_SZ];	/* IP, ie 1.2.3.4 */
+
+	struct in6_addr in6_saddr;
+	struct in6_addr in6_daddr;
+	struct in6_addr cur_in6_daddr;
+	struct in6_addr cur_in6_saddr;
+	/* For ranges */
+	struct in6_addr min_in6_daddr;
+	struct in6_addr max_in6_daddr;
+	struct in6_addr min_in6_saddr;
+	struct in6_addr max_in6_saddr;
+
+	/* If we're doing ranges, random or incremental, then this
+	 * defines the min/max for those ranges.
+	 */
+	__be32 saddr_min;	/* inclusive, source IP address */
+	__be32 saddr_max;	/* exclusive, source IP address */
+	__be32 daddr_min;	/* inclusive, dest IP address */
+	__be32 daddr_max;	/* exclusive, dest IP address */
+
+	__u16 udp_src_min;	/* inclusive, source UDP port */
+	__u16 udp_src_max;	/* exclusive, source UDP port */
+	__u16 udp_dst_min;	/* inclusive, dest UDP port */
+	__u16 udp_dst_max;	/* exclusive, dest UDP port */
+
+	/* DSCP + ECN */
+	__u8 tos;            /* six MSB of (former) IPv4 TOS
+				are for dscp codepoint */
+	__u8 traffic_class;  /* ditto for the (former) Traffic Class in IPv6
+				(see RFC 3260, sec. 4) */
+
+	/* MPLS */
+	unsigned nr_labels;	/* Depth of stack, 0 = no MPLS */
+	__be32 labels[MAX_MPLS_LABELS];
+
+	/* VLAN/SVLAN (802.1Q/Q-in-Q) */
+	__u8  vlan_p;
+	__u8  vlan_cfi;
+	__u16 vlan_id;  /* 0xffff means no vlan tag */
+
+	__u8  svlan_p;
+	__u8  svlan_cfi;
+	__u16 svlan_id; /* 0xffff means no svlan tag */
+
+	__u32 src_mac_count;	/* How many MACs to iterate through */
+	__u32 dst_mac_count;	/* How many MACs to iterate through */
+
+	unsigned char dst_mac[ETH_ALEN];
+	unsigned char src_mac[ETH_ALEN];
+
+	__u32 cur_dst_mac_offset;
+	__u32 cur_src_mac_offset;
+	__be32 cur_saddr;
+	__be32 cur_daddr;
+	__u16 ip_id;
+	__u16 cur_udp_dst;
+	__u16 cur_udp_src;
+	__u16 cur_queue_map;
+	__u32 cur_pkt_size;
+	__u32 last_pkt_size;
+
+	__u8 hh[14];
+	/* = {
+	   0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB,
+
+	   We fill in SRC address later
+	   0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	   0x08, 0x00
+	   };
+	 */
+	__u16 pad;		/* pad out the hh struct to an even 16 bytes */
+
+	struct sk_buff *skb;	/* skb we are to transmit next, used for when we
+				 * are transmitting the same one multiple times
+				 */
+	struct net_device *odev; /* The out-going device.
+				  * Note that the device should have it's
+				  * pg_info pointer pointing back to this
+				  * device.
+				  * Set when the user specifies the out-going
+				  * device name (not when the inject is
+				  * started as it used to do.)
+				  */
+	char odevname[32];
+	struct flow_state *flows;
+	unsigned cflows;	/* Concurrent flows (config) */
+	unsigned lflow;		/* Flow length  (config) */
+	unsigned nflows;	/* accumulated flows (stats) */
+	unsigned curfl;		/* current sequenced flow (state)*/
+
+	u16 queue_map_min;
+	u16 queue_map_max;
+	__u32 skb_priority;	/* skb priority field */
+	int node;               /* Memory node */
+
+#ifdef CONFIG_XFRM
+	__u8	ipsmode;		/* IPSEC mode (config) */
+	__u8	ipsproto;		/* IPSEC type (config) */
+#endif
+	char result[512];
+};
+
+struct pktgen_hdr {
+	__be32 pgh_magic;
+	__be32 seq_num;
+	__be32 tv_sec;
+	__be32 tv_usec;
+};
+
+static bool pktgen_exiting __read_mostly;
+
+struct pktgen_thread {
+	spinlock_t if_lock;		/* for list of devices */
+	struct list_head if_list;	/* All device here */
+	struct list_head th_list;
+	struct task_struct *tsk;
+	char result[512];
+
+	/* Field for thread to receive "posted" events terminate,
+	   stop ifs etc. */
+
+	u32 control;
+	int cpu;
+
+	wait_queue_head_t queue;
+	struct completion start_done;
+};
+
+#define REMOVE 1
+#define FIND   0
+
+static inline ktime_t ktime_now(void)
+{
+	struct timespec ts;
+	ktime_get_ts(&ts);
+
+	return timespec_to_ktime(ts);
+}
+
+/* This works even if 32 bit because of careful byte order choice */
+static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return cmp1.tv64 < cmp2.tv64;
+}
+
+static const char version[] =
+	"Packet Generator for packet performance testing. "
+	"Version: " VERSION "\n";
+
+static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
+static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
+					  const char *ifname, bool exact);
+static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
+static void pktgen_run_all_threads(void);
+static void pktgen_reset_all_threads(void);
+static void pktgen_stop_all_threads_ifs(void);
+
+static void pktgen_stop(struct pktgen_thread *t);
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+
+static unsigned int scan_ip6(const char *s, char ip[16]);
+
+/* Module parameters, defaults. */
+static int pg_count_d __read_mostly = 1000;
+static int pg_delay_d __read_mostly;
+static int pg_clone_skb_d  __read_mostly;
+static int debug  __read_mostly;
+
+static DEFINE_MUTEX(pktgen_thread_lock);
+static LIST_HEAD(pktgen_threads);
+
+static struct notifier_block pktgen_notifier_block = {
+	.notifier_call = pktgen_device_event,
+};
+
+/*
+ * /proc handling functions
+ *
+ */
+
+static int pgctrl_show(struct seq_file *seq, void *v)
+{
+	seq_puts(seq, version);
+	return 0;
+}
+
+static ssize_t pgctrl_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int err = 0;
+	char data[128];
+
+	if (!capable(CAP_NET_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	if (count > sizeof(data))
+		count = sizeof(data);
+
+	if (copy_from_user(data, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
+	data[count - 1] = 0;	/* Make string */
+
+	if (!strcmp(data, "stop"))
+		pktgen_stop_all_threads_ifs();
+
+	else if (!strcmp(data, "start"))
+		pktgen_run_all_threads();
+
+	else if (!strcmp(data, "reset"))
+		pktgen_reset_all_threads();
+
+	else
+		pr_warning("Unknown command: %s\n", data);
+
+	err = count;
+
+out:
+	return err;
+}
+
+static int pgctrl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pgctrl_show, PDE(inode)->data);
+}
+
+static const struct file_operations pktgen_fops = {
+	.owner   = THIS_MODULE,
+	.open    = pgctrl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = pgctrl_write,
+	.release = single_release,
+};
+
+static int pktgen_if_show(struct seq_file *seq, void *v)
+{
+	const struct pktgen_dev *pkt_dev = seq->private;
+	ktime_t stopped;
+	u64 idle;
+
+	seq_printf(seq,
+		   "Params: count %llu  min_pkt_size: %u  max_pkt_size: %u\n",
+		   (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size,
+		   pkt_dev->max_pkt_size);
+
+	seq_printf(seq,
+		   "     frags: %d  delay: %llu  clone_skb: %d  ifname: %s\n",
+		   pkt_dev->nfrags, (unsigned long long) pkt_dev->delay,
+		   pkt_dev->clone_skb, pkt_dev->odevname);
+
+	seq_printf(seq, "     flows: %u flowlen: %u\n", pkt_dev->cflows,
+		   pkt_dev->lflow);
+
+	seq_printf(seq,
+		   "     queue_map_min: %u  queue_map_max: %u\n",
+		   pkt_dev->queue_map_min,
+		   pkt_dev->queue_map_max);
+
+	if (pkt_dev->skb_priority)
+		seq_printf(seq, "     skb_priority: %u\n",
+			   pkt_dev->skb_priority);
+
+	if (pkt_dev->flags & F_IPV6) {
+		seq_printf(seq,
+			   "     saddr: %pI6c  min_saddr: %pI6c  max_saddr: %pI6c\n"
+			   "     daddr: %pI6c  min_daddr: %pI6c  max_daddr: %pI6c\n",
+			   &pkt_dev->in6_saddr,
+			   &pkt_dev->min_in6_saddr, &pkt_dev->max_in6_saddr,
+			   &pkt_dev->in6_daddr,
+			   &pkt_dev->min_in6_daddr, &pkt_dev->max_in6_daddr);
+	} else {
+		seq_printf(seq,
+			   "     dst_min: %s  dst_max: %s\n",
+			   pkt_dev->dst_min, pkt_dev->dst_max);
+		seq_printf(seq,
+			   "        src_min: %s  src_max: %s\n",
+			   pkt_dev->src_min, pkt_dev->src_max);
+	}
+
+	seq_puts(seq, "     src_mac: ");
+
+	seq_printf(seq, "%pM ",
+		   is_zero_ether_addr(pkt_dev->src_mac) ?
+			     pkt_dev->odev->dev_addr : pkt_dev->src_mac);
+
+	seq_printf(seq, "dst_mac: ");
+	seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
+
+	seq_printf(seq,
+		   "     udp_src_min: %d  udp_src_max: %d"
+		   "  udp_dst_min: %d  udp_dst_max: %d\n",
+		   pkt_dev->udp_src_min, pkt_dev->udp_src_max,
+		   pkt_dev->udp_dst_min, pkt_dev->udp_dst_max);
+
+	seq_printf(seq,
+		   "     src_mac_count: %d  dst_mac_count: %d\n",
+		   pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
+
+	if (pkt_dev->nr_labels) {
+		unsigned i;
+		seq_printf(seq, "     mpls: ");
+		for (i = 0; i < pkt_dev->nr_labels; i++)
+			seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
+				   i == pkt_dev->nr_labels-1 ? "\n" : ", ");
+	}
+
+	if (pkt_dev->vlan_id != 0xffff)
+		seq_printf(seq, "     vlan_id: %u  vlan_p: %u  vlan_cfi: %u\n",
+			   pkt_dev->vlan_id, pkt_dev->vlan_p,
+			   pkt_dev->vlan_cfi);
+
+	if (pkt_dev->svlan_id != 0xffff)
+		seq_printf(seq, "     svlan_id: %u  vlan_p: %u  vlan_cfi: %u\n",
+			   pkt_dev->svlan_id, pkt_dev->svlan_p,
+			   pkt_dev->svlan_cfi);
+
+	if (pkt_dev->tos)
+		seq_printf(seq, "     tos: 0x%02x\n", pkt_dev->tos);
+
+	if (pkt_dev->traffic_class)
+		seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+
+	if (pkt_dev->node >= 0)
+		seq_printf(seq, "     node: %d\n", pkt_dev->node);
+
+	seq_printf(seq, "     Flags: ");
+
+	if (pkt_dev->flags & F_IPV6)
+		seq_printf(seq, "IPV6  ");
+
+	if (pkt_dev->flags & F_IPSRC_RND)
+		seq_printf(seq, "IPSRC_RND  ");
+
+	if (pkt_dev->flags & F_IPDST_RND)
+		seq_printf(seq, "IPDST_RND  ");
+
+	if (pkt_dev->flags & F_TXSIZE_RND)
+		seq_printf(seq, "TXSIZE_RND  ");
+
+	if (pkt_dev->flags & F_UDPSRC_RND)
+		seq_printf(seq, "UDPSRC_RND  ");
+
+	if (pkt_dev->flags & F_UDPDST_RND)
+		seq_printf(seq, "UDPDST_RND  ");
+
+	if (pkt_dev->flags & F_MPLS_RND)
+		seq_printf(seq,  "MPLS_RND  ");
+
+	if (pkt_dev->flags & F_QUEUE_MAP_RND)
+		seq_printf(seq,  "QUEUE_MAP_RND  ");
+
+	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
+		seq_printf(seq,  "QUEUE_MAP_CPU  ");
+
+	if (pkt_dev->cflows) {
+		if (pkt_dev->flags & F_FLOW_SEQ)
+			seq_printf(seq,  "FLOW_SEQ  "); /*in sequence flows*/
+		else
+			seq_printf(seq,  "FLOW_RND  ");
+	}
+
+#ifdef CONFIG_XFRM
+	if (pkt_dev->flags & F_IPSEC_ON)
+		seq_printf(seq,  "IPSEC  ");
+#endif
+
+	if (pkt_dev->flags & F_MACSRC_RND)
+		seq_printf(seq, "MACSRC_RND  ");
+
+	if (pkt_dev->flags & F_MACDST_RND)
+		seq_printf(seq, "MACDST_RND  ");
+
+	if (pkt_dev->flags & F_VID_RND)
+		seq_printf(seq, "VID_RND  ");
+
+	if (pkt_dev->flags & F_SVID_RND)
+		seq_printf(seq, "SVID_RND  ");
+
+	if (pkt_dev->flags & F_NODE)
+		seq_printf(seq, "NODE_ALLOC  ");
+
+	seq_puts(seq, "\n");
+
+	/* not really stopped, more like last-running-at */
+	stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at;
+	idle = pkt_dev->idle_acc;
+	do_div(idle, NSEC_PER_USEC);
+
+	seq_printf(seq,
+		   "Current:\n     pkts-sofar: %llu  errors: %llu\n",
+		   (unsigned long long)pkt_dev->sofar,
+		   (unsigned long long)pkt_dev->errors);
+
+	seq_printf(seq,
+		   "     started: %lluus  stopped: %lluus idle: %lluus\n",
+		   (unsigned long long) ktime_to_us(pkt_dev->started_at),
+		   (unsigned long long) ktime_to_us(stopped),
+		   (unsigned long long) idle);
+
+	seq_printf(seq,
+		   "     seq_num: %d  cur_dst_mac_offset: %d  cur_src_mac_offset: %d\n",
+		   pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset,
+		   pkt_dev->cur_src_mac_offset);
+
+	if (pkt_dev->flags & F_IPV6) {
+		seq_printf(seq, "     cur_saddr: %pI6c  cur_daddr: %pI6c\n",
+				&pkt_dev->cur_in6_saddr,
+				&pkt_dev->cur_in6_daddr);
+	} else
+		seq_printf(seq, "     cur_saddr: 0x%x  cur_daddr: 0x%x\n",
+			   pkt_dev->cur_saddr, pkt_dev->cur_daddr);
+
+	seq_printf(seq, "     cur_udp_dst: %d  cur_udp_src: %d\n",
+		   pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+
+	seq_printf(seq, "     cur_queue_map: %u\n", pkt_dev->cur_queue_map);
+
+	seq_printf(seq, "     flows: %u\n", pkt_dev->nflows);
+
+	if (pkt_dev->result[0])
+		seq_printf(seq, "Result: %s\n", pkt_dev->result);
+	else
+		seq_printf(seq, "Result: Idle\n");
+
+	return 0;
+}
+
+
+static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
+		     __u32 *num)
+{
+	int i = 0;
+	*num = 0;
+
+	for (; i < maxlen; i++) {
+		int value;
+		char c;
+		*num <<= 4;
+		if (get_user(c, &user_buffer[i]))
+			return -EFAULT;
+		value = hex_to_bin(c);
+		if (value >= 0)
+			*num |= value;
+		else
+			break;
+	}
+	return i;
+}
+
+static int count_trail_chars(const char __user * user_buffer,
+			     unsigned int maxlen)
+{
+	int i;
+
+	for (i = 0; i < maxlen; i++) {
+		char c;
+		if (get_user(c, &user_buffer[i]))
+			return -EFAULT;
+		switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+		case '=':
+			break;
+		default:
+			goto done;
+		}
+	}
+done:
+	return i;
+}
+
+static long num_arg(const char __user *user_buffer, unsigned long maxlen,
+				unsigned long *num)
+{
+	int i;
+	*num = 0;
+
+	for (i = 0; i < maxlen; i++) {
+		char c;
+		if (get_user(c, &user_buffer[i]))
+			return -EFAULT;
+		if ((c >= '0') && (c <= '9')) {
+			*num *= 10;
+			*num += c - '0';
+		} else
+			break;
+	}
+	return i;
+}
+
+static int strn_len(const char __user * user_buffer, unsigned int maxlen)
+{
+	int i;
+
+	for (i = 0; i < maxlen; i++) {
+		char c;
+		if (get_user(c, &user_buffer[i]))
+			return -EFAULT;
+		switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+			goto done_str;
+			break;
+		default:
+			break;
+		}
+	}
+done_str:
+	return i;
+}
+
+static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
+{
+	unsigned n = 0;
+	char c;
+	ssize_t i = 0;
+	int len;
+
+	pkt_dev->nr_labels = 0;
+	do {
+		__u32 tmp;
+		len = hex32_arg(&buffer[i], 8, &tmp);
+		if (len <= 0)
+			return len;
+		pkt_dev->labels[n] = htonl(tmp);
+		if (pkt_dev->labels[n] & MPLS_STACK_BOTTOM)
+			pkt_dev->flags |= F_MPLS_RND;
+		i += len;
+		if (get_user(c, &buffer[i]))
+			return -EFAULT;
+		i++;
+		n++;
+		if (n >= MAX_MPLS_LABELS)
+			return -E2BIG;
+	} while (c == ',');
+
+	pkt_dev->nr_labels = n;
+	return i;
+}
+
+static ssize_t pktgen_if_write(struct file *file,
+			       const char __user * user_buffer, size_t count,
+			       loff_t * offset)
+{
+	struct seq_file *seq = file->private_data;
+	struct pktgen_dev *pkt_dev = seq->private;
+	int i, max, len;
+	char name[16], valstr[32];
+	unsigned long value = 0;
+	char *pg_result = NULL;
+	int tmp = 0;
+	char buf[128];
+
+	pg_result = &(pkt_dev->result[0]);
+
+	if (count < 1) {
+		pr_warning("wrong command format\n");
+		return -EINVAL;
+	}
+
+	max = count;
+	tmp = count_trail_chars(user_buffer, max);
+	if (tmp < 0) {
+		pr_warning("illegal format\n");
+		return tmp;
+	}
+	i = tmp;
+
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+	if (len < 0)
+		return len;
+
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len))
+		return -EFAULT;
+	i += len;
+
+	max = count - i;
+	len = count_trail_chars(&user_buffer[i], max);
+	if (len < 0)
+		return len;
+
+	i += len;
+
+	if (debug) {
+		size_t copy = min_t(size_t, count, 1023);
+		char tb[copy + 1];
+		if (copy_from_user(tb, user_buffer, copy))
+			return -EFAULT;
+		tb[copy] = 0;
+		printk(KERN_DEBUG "pktgen: %s,%lu  buffer -:%s:-\n", name,
+		       (unsigned long)count, tb);
+	}
+
+	if (!strcmp(name, "min_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value < 14 + 20 + 8)
+			value = 14 + 20 + 8;
+		if (value != pkt_dev->min_pkt_size) {
+			pkt_dev->min_pkt_size = value;
+			pkt_dev->cur_pkt_size = value;
+		}
+		sprintf(pg_result, "OK: min_pkt_size=%u",
+			pkt_dev->min_pkt_size);
+		return count;
+	}
+
+	if (!strcmp(name, "max_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value < 14 + 20 + 8)
+			value = 14 + 20 + 8;
+		if (value != pkt_dev->max_pkt_size) {
+			pkt_dev->max_pkt_size = value;
+			pkt_dev->cur_pkt_size = value;
+		}
+		sprintf(pg_result, "OK: max_pkt_size=%u",
+			pkt_dev->max_pkt_size);
+		return count;
+	}
+
+	/* Shortcut for min = max */
+
+	if (!strcmp(name, "pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value < 14 + 20 + 8)
+			value = 14 + 20 + 8;
+		if (value != pkt_dev->min_pkt_size) {
+			pkt_dev->min_pkt_size = value;
+			pkt_dev->max_pkt_size = value;
+			pkt_dev->cur_pkt_size = value;
+		}
+		sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size);
+		return count;
+	}
+
+	if (!strcmp(name, "debug")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		debug = value;
+		sprintf(pg_result, "OK: debug=%u", debug);
+		return count;
+	}
+
+	if (!strcmp(name, "frags")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->nfrags = value;
+		sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags);
+		return count;
+	}
+	if (!strcmp(name, "delay")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value == 0x7FFFFFFF)
+			pkt_dev->delay = ULLONG_MAX;
+		else
+			pkt_dev->delay = (u64)value;
+
+		sprintf(pg_result, "OK: delay=%llu",
+			(unsigned long long) pkt_dev->delay);
+		return count;
+	}
+	if (!strcmp(name, "rate")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
+		if (debug)
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
+	if (!strcmp(name, "ratep")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = NSEC_PER_SEC/value;
+		if (debug)
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
+	if (!strcmp(name, "udp_src_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value != pkt_dev->udp_src_min) {
+			pkt_dev->udp_src_min = value;
+			pkt_dev->cur_udp_src = value;
+		}
+		sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min);
+		return count;
+	}
+	if (!strcmp(name, "udp_dst_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value != pkt_dev->udp_dst_min) {
+			pkt_dev->udp_dst_min = value;
+			pkt_dev->cur_udp_dst = value;
+		}
+		sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min);
+		return count;
+	}
+	if (!strcmp(name, "udp_src_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value != pkt_dev->udp_src_max) {
+			pkt_dev->udp_src_max = value;
+			pkt_dev->cur_udp_src = value;
+		}
+		sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max);
+		return count;
+	}
+	if (!strcmp(name, "udp_dst_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value != pkt_dev->udp_dst_max) {
+			pkt_dev->udp_dst_max = value;
+			pkt_dev->cur_udp_dst = value;
+		}
+		sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max);
+		return count;
+	}
+	if (!strcmp(name, "clone_skb")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+		if ((value > 0) &&
+		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+			return -ENOTSUPP;
+		i += len;
+		pkt_dev->clone_skb = value;
+
+		sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb);
+		return count;
+	}
+	if (!strcmp(name, "count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->count = value;
+		sprintf(pg_result, "OK: count=%llu",
+			(unsigned long long)pkt_dev->count);
+		return count;
+	}
+	if (!strcmp(name, "src_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (pkt_dev->src_mac_count != value) {
+			pkt_dev->src_mac_count = value;
+			pkt_dev->cur_src_mac_offset = 0;
+		}
+		sprintf(pg_result, "OK: src_mac_count=%d",
+			pkt_dev->src_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (pkt_dev->dst_mac_count != value) {
+			pkt_dev->dst_mac_count = value;
+			pkt_dev->cur_dst_mac_offset = 0;
+		}
+		sprintf(pg_result, "OK: dst_mac_count=%d",
+			pkt_dev->dst_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "node")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+
+		if (node_possible(value)) {
+			pkt_dev->node = value;
+			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+			if (pkt_dev->page) {
+				put_page(pkt_dev->page);
+				pkt_dev->page = NULL;
+			}
+		}
+		else
+			sprintf(pg_result, "ERROR: node not possible");
+		return count;
+	}
+	if (!strcmp(name, "flag")) {
+		char f[32];
+		memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+		if (len < 0)
+			return len;
+
+		if (copy_from_user(f, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+		if (strcmp(f, "IPSRC_RND") == 0)
+			pkt_dev->flags |= F_IPSRC_RND;
+
+		else if (strcmp(f, "!IPSRC_RND") == 0)
+			pkt_dev->flags &= ~F_IPSRC_RND;
+
+		else if (strcmp(f, "TXSIZE_RND") == 0)
+			pkt_dev->flags |= F_TXSIZE_RND;
+
+		else if (strcmp(f, "!TXSIZE_RND") == 0)
+			pkt_dev->flags &= ~F_TXSIZE_RND;
+
+		else if (strcmp(f, "IPDST_RND") == 0)
+			pkt_dev->flags |= F_IPDST_RND;
+
+		else if (strcmp(f, "!IPDST_RND") == 0)
+			pkt_dev->flags &= ~F_IPDST_RND;
+
+		else if (strcmp(f, "UDPSRC_RND") == 0)
+			pkt_dev->flags |= F_UDPSRC_RND;
+
+		else if (strcmp(f, "!UDPSRC_RND") == 0)
+			pkt_dev->flags &= ~F_UDPSRC_RND;
+
+		else if (strcmp(f, "UDPDST_RND") == 0)
+			pkt_dev->flags |= F_UDPDST_RND;
+
+		else if (strcmp(f, "!UDPDST_RND") == 0)
+			pkt_dev->flags &= ~F_UDPDST_RND;
+
+		else if (strcmp(f, "MACSRC_RND") == 0)
+			pkt_dev->flags |= F_MACSRC_RND;
+
+		else if (strcmp(f, "!MACSRC_RND") == 0)
+			pkt_dev->flags &= ~F_MACSRC_RND;
+
+		else if (strcmp(f, "MACDST_RND") == 0)
+			pkt_dev->flags |= F_MACDST_RND;
+
+		else if (strcmp(f, "!MACDST_RND") == 0)
+			pkt_dev->flags &= ~F_MACDST_RND;
+
+		else if (strcmp(f, "MPLS_RND") == 0)
+			pkt_dev->flags |= F_MPLS_RND;
+
+		else if (strcmp(f, "!MPLS_RND") == 0)
+			pkt_dev->flags &= ~F_MPLS_RND;
+
+		else if (strcmp(f, "VID_RND") == 0)
+			pkt_dev->flags |= F_VID_RND;
+
+		else if (strcmp(f, "!VID_RND") == 0)
+			pkt_dev->flags &= ~F_VID_RND;
+
+		else if (strcmp(f, "SVID_RND") == 0)
+			pkt_dev->flags |= F_SVID_RND;
+
+		else if (strcmp(f, "!SVID_RND") == 0)
+			pkt_dev->flags &= ~F_SVID_RND;
+
+		else if (strcmp(f, "FLOW_SEQ") == 0)
+			pkt_dev->flags |= F_FLOW_SEQ;
+
+		else if (strcmp(f, "QUEUE_MAP_RND") == 0)
+			pkt_dev->flags |= F_QUEUE_MAP_RND;
+
+		else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
+			pkt_dev->flags &= ~F_QUEUE_MAP_RND;
+
+		else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
+			pkt_dev->flags |= F_QUEUE_MAP_CPU;
+
+		else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
+			pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
+#ifdef CONFIG_XFRM
+		else if (strcmp(f, "IPSEC") == 0)
+			pkt_dev->flags |= F_IPSEC_ON;
+#endif
+
+		else if (strcmp(f, "!IPV6") == 0)
+			pkt_dev->flags &= ~F_IPV6;
+
+		else if (strcmp(f, "NODE_ALLOC") == 0)
+			pkt_dev->flags |= F_NODE;
+
+		else if (strcmp(f, "!NODE_ALLOC") == 0)
+			pkt_dev->flags &= ~F_NODE;
+
+		else {
+			sprintf(pg_result,
+				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
+				f,
+				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
+			return count;
+		}
+		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
+		return count;
+	}
+	if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1);
+		if (len < 0)
+			return len;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+		if (strcmp(buf, pkt_dev->dst_min) != 0) {
+			memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
+			strncpy(pkt_dev->dst_min, buf, len);
+			pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+			pkt_dev->cur_daddr = pkt_dev->daddr_min;
+		}
+		if (debug)
+			printk(KERN_DEBUG "pktgen: dst_min set to: %s\n",
+			       pkt_dev->dst_min);
+		i += len;
+		sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
+		return count;
+	}
+	if (!strcmp(name, "dst_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1);
+		if (len < 0)
+			return len;
+
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+
+		buf[len] = 0;
+		if (strcmp(buf, pkt_dev->dst_max) != 0) {
+			memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
+			strncpy(pkt_dev->dst_max, buf, len);
+			pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+			pkt_dev->cur_daddr = pkt_dev->daddr_max;
+		}
+		if (debug)
+			printk(KERN_DEBUG "pktgen: dst_max set to: %s\n",
+			       pkt_dev->dst_max);
+		i += len;
+		sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
+		return count;
+	}
+	if (!strcmp(name, "dst6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+		if (len < 0)
+			return len;
+
+		pkt_dev->flags |= F_IPV6;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
+		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
+
+		pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
+
+		if (debug)
+			printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
+
+		i += len;
+		sprintf(pg_result, "OK: dst6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_min")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+		if (len < 0)
+			return len;
+
+		pkt_dev->flags |= F_IPV6;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
+		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
+
+		pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
+		if (debug)
+			printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
+
+		i += len;
+		sprintf(pg_result, "OK: dst6_min=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_max")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+		if (len < 0)
+			return len;
+
+		pkt_dev->flags |= F_IPV6;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
+		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr);
+
+		if (debug)
+			printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
+
+		i += len;
+		sprintf(pg_result, "OK: dst6_max=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+		if (len < 0)
+			return len;
+
+		pkt_dev->flags |= F_IPV6;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
+		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
+
+		pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
+
+		if (debug)
+			printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
+
+		i += len;
+		sprintf(pg_result, "OK: src6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src_min")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1);
+		if (len < 0)
+			return len;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+		if (strcmp(buf, pkt_dev->src_min) != 0) {
+			memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
+			strncpy(pkt_dev->src_min, buf, len);
+			pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+			pkt_dev->cur_saddr = pkt_dev->saddr_min;
+		}
+		if (debug)
+			printk(KERN_DEBUG "pktgen: src_min set to: %s\n",
+			       pkt_dev->src_min);
+		i += len;
+		sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
+		return count;
+	}
+	if (!strcmp(name, "src_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1);
+		if (len < 0)
+			return len;
+
+		if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+		buf[len] = 0;
+		if (strcmp(buf, pkt_dev->src_max) != 0) {
+			memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
+			strncpy(pkt_dev->src_max, buf, len);
+			pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+			pkt_dev->cur_saddr = pkt_dev->saddr_max;
+		}
+		if (debug)
+			printk(KERN_DEBUG "pktgen: src_max set to: %s\n",
+			       pkt_dev->src_max);
+		i += len;
+		sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac")) {
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+		if (len < 0)
+			return len;
+
+		memset(valstr, 0, sizeof(valstr));
+		if (copy_from_user(valstr, &user_buffer[i], len))
+			return -EFAULT;
+
+		if (!mac_pton(valstr, pkt_dev->dst_mac))
+			return -EINVAL;
+		/* Set up Dest MAC */
+		memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
+
+		sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
+		return count;
+	}
+	if (!strcmp(name, "src_mac")) {
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+		if (len < 0)
+			return len;
+
+		memset(valstr, 0, sizeof(valstr));
+		if (copy_from_user(valstr, &user_buffer[i], len))
+			return -EFAULT;
+
+		if (!mac_pton(valstr, pkt_dev->src_mac))
+			return -EINVAL;
+		/* Set up Src MAC */
+		memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
+
+		sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
+		return count;
+	}
+
+	if (!strcmp(name, "clear_counters")) {
+		pktgen_clear_counters(pkt_dev);
+		sprintf(pg_result, "OK: Clearing counters.\n");
+		return count;
+	}
+
+	if (!strcmp(name, "flows")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value > MAX_CFLOWS)
+			value = MAX_CFLOWS;
+
+		pkt_dev->cflows = value;
+		sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows);
+		return count;
+	}
+
+	if (!strcmp(name, "flowlen")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->lflow = value;
+		sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow);
+		return count;
+	}
+
+	if (!strcmp(name, "queue_map_min")) {
+		len = num_arg(&user_buffer[i], 5, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->queue_map_min = value;
+		sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min);
+		return count;
+	}
+
+	if (!strcmp(name, "queue_map_max")) {
+		len = num_arg(&user_buffer[i], 5, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->queue_map_max = value;
+		sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max);
+		return count;
+	}
+
+	if (!strcmp(name, "mpls")) {
+		unsigned n, cnt;
+
+		len = get_labels(&user_buffer[i], pkt_dev);
+		if (len < 0)
+			return len;
+		i += len;
+		cnt = sprintf(pg_result, "OK: mpls=");
+		for (n = 0; n < pkt_dev->nr_labels; n++)
+			cnt += sprintf(pg_result + cnt,
+				       "%08x%s", ntohl(pkt_dev->labels[n]),
+				       n == pkt_dev->nr_labels-1 ? "" : ",");
+
+		if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) {
+			pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+			pkt_dev->svlan_id = 0xffff;
+
+			if (debug)
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "vlan_id")) {
+		len = num_arg(&user_buffer[i], 4, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (value <= 4095) {
+			pkt_dev->vlan_id = value;  /* turn on VLAN */
+
+			if (debug)
+				printk(KERN_DEBUG "pktgen: VLAN turned on\n");
+
+			if (debug && pkt_dev->nr_labels)
+				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
+
+			pkt_dev->nr_labels = 0;    /* turn off MPLS */
+			sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id);
+		} else {
+			pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+			pkt_dev->svlan_id = 0xffff;
+
+			if (debug)
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "vlan_p")) {
+		len = num_arg(&user_buffer[i], 1, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) {
+			pkt_dev->vlan_p = value;
+			sprintf(pg_result, "OK: vlan_p=%u", pkt_dev->vlan_p);
+		} else {
+			sprintf(pg_result, "ERROR: vlan_p must be 0-7");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "vlan_cfi")) {
+		len = num_arg(&user_buffer[i], 1, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) {
+			pkt_dev->vlan_cfi = value;
+			sprintf(pg_result, "OK: vlan_cfi=%u", pkt_dev->vlan_cfi);
+		} else {
+			sprintf(pg_result, "ERROR: vlan_cfi must be 0-1");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "svlan_id")) {
+		len = num_arg(&user_buffer[i], 4, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) {
+			pkt_dev->svlan_id = value;  /* turn on SVLAN */
+
+			if (debug)
+				printk(KERN_DEBUG "pktgen: SVLAN turned on\n");
+
+			if (debug && pkt_dev->nr_labels)
+				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
+
+			pkt_dev->nr_labels = 0;    /* turn off MPLS */
+			sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id);
+		} else {
+			pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
+			pkt_dev->svlan_id = 0xffff;
+
+			if (debug)
+				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "svlan_p")) {
+		len = num_arg(&user_buffer[i], 1, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) {
+			pkt_dev->svlan_p = value;
+			sprintf(pg_result, "OK: svlan_p=%u", pkt_dev->svlan_p);
+		} else {
+			sprintf(pg_result, "ERROR: svlan_p must be 0-7");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "svlan_cfi")) {
+		len = num_arg(&user_buffer[i], 1, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) {
+			pkt_dev->svlan_cfi = value;
+			sprintf(pg_result, "OK: svlan_cfi=%u", pkt_dev->svlan_cfi);
+		} else {
+			sprintf(pg_result, "ERROR: svlan_cfi must be 0-1");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "tos")) {
+		__u32 tmp_value = 0;
+		len = hex32_arg(&user_buffer[i], 2, &tmp_value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (len == 2) {
+			pkt_dev->tos = tmp_value;
+			sprintf(pg_result, "OK: tos=0x%02x", pkt_dev->tos);
+		} else {
+			sprintf(pg_result, "ERROR: tos must be 00-ff");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "traffic_class")) {
+		__u32 tmp_value = 0;
+		len = hex32_arg(&user_buffer[i], 2, &tmp_value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (len == 2) {
+			pkt_dev->traffic_class = tmp_value;
+			sprintf(pg_result, "OK: traffic_class=0x%02x", pkt_dev->traffic_class);
+		} else {
+			sprintf(pg_result, "ERROR: traffic_class must be 00-ff");
+		}
+		return count;
+	}
+
+	if (!strcmp(name, "skb_priority")) {
+		len = num_arg(&user_buffer[i], 9, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->skb_priority = value;
+		sprintf(pg_result, "OK: skb_priority=%i",
+			pkt_dev->skb_priority);
+		return count;
+	}
+
+	sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
+	return -EINVAL;
+}
+
+static int pktgen_if_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pktgen_if_show, PDE(inode)->data);
+}
+
+static const struct file_operations pktgen_if_fops = {
+	.owner   = THIS_MODULE,
+	.open    = pktgen_if_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = pktgen_if_write,
+	.release = single_release,
+};
+
+static int pktgen_thread_show(struct seq_file *seq, void *v)
+{
+	struct pktgen_thread *t = seq->private;
+	const struct pktgen_dev *pkt_dev;
+
+	BUG_ON(!t);
+
+	seq_printf(seq, "Running: ");
+
+	if_lock(t);
+	list_for_each_entry(pkt_dev, &t->if_list, list)
+		if (pkt_dev->running)
+			seq_printf(seq, "%s ", pkt_dev->odevname);
+
+	seq_printf(seq, "\nStopped: ");
+
+	list_for_each_entry(pkt_dev, &t->if_list, list)
+		if (!pkt_dev->running)
+			seq_printf(seq, "%s ", pkt_dev->odevname);
+
+	if (t->result[0])
+		seq_printf(seq, "\nResult: %s\n", t->result);
+	else
+		seq_printf(seq, "\nResult: NA\n");
+
+	if_unlock(t);
+
+	return 0;
+}
+
+static ssize_t pktgen_thread_write(struct file *file,
+				   const char __user * user_buffer,
+				   size_t count, loff_t * offset)
+{
+	struct seq_file *seq = file->private_data;
+	struct pktgen_thread *t = seq->private;
+	int i, max, len, ret;
+	char name[40];
+	char *pg_result;
+
+	if (count < 1) {
+		//      sprintf(pg_result, "Wrong command format");
+		return -EINVAL;
+	}
+
+	max = count;
+	len = count_trail_chars(user_buffer, max);
+	if (len < 0)
+		return len;
+
+	i = len;
+
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+	if (len < 0)
+		return len;
+
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len))
+		return -EFAULT;
+	i += len;
+
+	max = count - i;
+	len = count_trail_chars(&user_buffer[i], max);
+	if (len < 0)
+		return len;
+
+	i += len;
+
+	if (debug)
+		printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n",
+		       name, (unsigned long)count);
+
+	if (!t) {
+		pr_err("ERROR: No thread\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pg_result = &(t->result[0]);
+
+	if (!strcmp(name, "add_device")) {
+		char f[32];
+		memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+		if (len < 0) {
+			ret = len;
+			goto out;
+		}
+		if (copy_from_user(f, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+		mutex_lock(&pktgen_thread_lock);
+		pktgen_add_device(t, f);
+		mutex_unlock(&pktgen_thread_lock);
+		ret = count;
+		sprintf(pg_result, "OK: add_device=%s", f);
+		goto out;
+	}
+
+	if (!strcmp(name, "rem_device_all")) {
+		mutex_lock(&pktgen_thread_lock);
+		t->control |= T_REMDEVALL;
+		mutex_unlock(&pktgen_thread_lock);
+		schedule_timeout_interruptible(msecs_to_jiffies(125));	/* Propagate thread->control  */
+		ret = count;
+		sprintf(pg_result, "OK: rem_device_all");
+		goto out;
+	}
+
+	if (!strcmp(name, "max_before_softirq")) {
+		sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use");
+		ret = count;
+		goto out;
+	}
+
+	ret = -EINVAL;
+out:
+	return ret;
+}
+
+static int pktgen_thread_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pktgen_thread_show, PDE(inode)->data);
+}
+
+static const struct file_operations pktgen_thread_fops = {
+	.owner   = THIS_MODULE,
+	.open    = pktgen_thread_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = pktgen_thread_write,
+	.release = single_release,
+};
+
+/* Think find or remove for NN */
+static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
+{
+	struct pktgen_thread *t;
+	struct pktgen_dev *pkt_dev = NULL;
+	bool exact = (remove == FIND);
+
+	list_for_each_entry(t, &pktgen_threads, th_list) {
+		pkt_dev = pktgen_find_dev(t, ifname, exact);
+		if (pkt_dev) {
+			if (remove) {
+				if_lock(t);
+				pkt_dev->removal_mark = 1;
+				t->control |= T_REMDEV;
+				if_unlock(t);
+			}
+			break;
+		}
+	}
+	return pkt_dev;
+}
+
+/*
+ * mark a device for removal
+ */
+static void pktgen_mark_device(const char *ifname)
+{
+	struct pktgen_dev *pkt_dev = NULL;
+	const int max_tries = 10, msec_per_try = 125;
+	int i = 0;
+
+	mutex_lock(&pktgen_thread_lock);
+	pr_debug("%s: marking %s for removal\n", __func__, ifname);
+
+	while (1) {
+
+		pkt_dev = __pktgen_NN_threads(ifname, REMOVE);
+		if (pkt_dev == NULL)
+			break;	/* success */
+
+		mutex_unlock(&pktgen_thread_lock);
+		pr_debug("%s: waiting for %s to disappear....\n",
+			 __func__, ifname);
+		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
+		mutex_lock(&pktgen_thread_lock);
+
+		if (++i >= max_tries) {
+			pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
+			       __func__, msec_per_try * i, ifname);
+			break;
+		}
+
+	}
+
+	mutex_unlock(&pktgen_thread_lock);
+}
+
+static void pktgen_change_name(struct net_device *dev)
+{
+	struct pktgen_thread *t;
+
+	list_for_each_entry(t, &pktgen_threads, th_list) {
+		struct pktgen_dev *pkt_dev;
+
+		list_for_each_entry(pkt_dev, &t->if_list, list) {
+			if (pkt_dev->odev != dev)
+				continue;
+
+			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+			pkt_dev->entry = proc_create_data(dev->name, 0600,
+							  pg_proc_dir,
+							  &pktgen_if_fops,
+							  pkt_dev);
+			if (!pkt_dev->entry)
+				pr_err("can't move proc entry for '%s'\n",
+				       dev->name);
+			break;
+		}
+	}
+}
+
+static int pktgen_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting)
+		return NOTIFY_DONE;
+
+	/* It is OK that we do not hold the group lock right now,
+	 * as we run under the RTNL lock.
+	 */
+
+	switch (event) {
+	case NETDEV_CHANGENAME:
+		pktgen_change_name(dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		pktgen_mark_device(dev->name);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
+						 const char *ifname)
+{
+	char b[IFNAMSIZ+5];
+	int i;
+
+	for (i = 0; ifname[i] != '@'; i++) {
+		if (i == IFNAMSIZ)
+			break;
+
+		b[i] = ifname[i];
+	}
+	b[i] = 0;
+
+	return dev_get_by_name(&init_net, b);
+}
+
+
+/* Associate pktgen_dev with a device. */
+
+static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
+{
+	struct net_device *odev;
+	int err;
+
+	/* Clean old setups */
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+		pkt_dev->odev = NULL;
+	}
+
+	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
+	if (!odev) {
+		pr_err("no such netdevice: \"%s\"\n", ifname);
+		return -ENODEV;
+	}
+
+	if (odev->type != ARPHRD_ETHER) {
+		pr_err("not an ethernet device: \"%s\"\n", ifname);
+		err = -EINVAL;
+	} else if (!netif_running(odev)) {
+		pr_err("device is down: \"%s\"\n", ifname);
+		err = -ENETDOWN;
+	} else {
+		pkt_dev->odev = odev;
+		return 0;
+	}
+
+	dev_put(odev);
+	return err;
+}
+
+/* Read pkt_dev from the interface and set up internal pktgen_dev
+ * structure to have the right information to create/send packets
+ */
+static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+{
+	int ntxq;
+
+	if (!pkt_dev->odev) {
+		pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
+		sprintf(pkt_dev->result,
+			"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+		return;
+	}
+
+	/* make sure that we don't pick a non-existing transmit queue */
+	ntxq = pkt_dev->odev->real_num_tx_queues;
+
+	if (ntxq <= pkt_dev->queue_map_min) {
+		pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
+		pkt_dev->queue_map_min = (ntxq ?: 1) - 1;
+	}
+	if (pkt_dev->queue_map_max >= ntxq) {
+		pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
+		pkt_dev->queue_map_max = (ntxq ?: 1) - 1;
+	}
+
+	/* Default to the interface's mac if not explicitly set. */
+
+	if (is_zero_ether_addr(pkt_dev->src_mac))
+		memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN);
+
+	/* Set up Dest MAC */
+	memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
+
+	/* Set up pkt size */
+	pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
+
+	if (pkt_dev->flags & F_IPV6) {
+		/*
+		 * Skip this automatic address setting until locks or functions
+		 * gets exported
+		 */
+
+#ifdef NOTNOW
+		int i, set = 0, err = 1;
+		struct inet6_dev *idev;
+
+		for (i = 0; i < IN6_ADDR_HSIZE; i++)
+			if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
+				set = 1;
+				break;
+			}
+
+		if (!set) {
+
+			/*
+			 * Use linklevel address if unconfigured.
+			 *
+			 * use ipv6_get_lladdr if/when it's get exported
+			 */
+
+			rcu_read_lock();
+			idev = __in6_dev_get(pkt_dev->odev);
+			if (idev) {
+				struct inet6_ifaddr *ifp;
+
+				read_lock_bh(&idev->lock);
+				for (ifp = idev->addr_list; ifp;
+				     ifp = ifp->if_next) {
+					if (ifp->scope == IFA_LINK &&
+					    !(ifp->flags & IFA_F_TENTATIVE)) {
+						pkt_dev->cur_in6_saddr = ifp->addr;
+						err = 0;
+						break;
+					}
+				}
+				read_unlock_bh(&idev->lock);
+			}
+			rcu_read_unlock();
+			if (err)
+				pr_err("ERROR: IPv6 link address not available\n");
+		}
+#endif
+	} else {
+		pkt_dev->saddr_min = 0;
+		pkt_dev->saddr_max = 0;
+		if (strlen(pkt_dev->src_min) == 0) {
+
+			struct in_device *in_dev;
+
+			rcu_read_lock();
+			in_dev = __in_dev_get_rcu(pkt_dev->odev);
+			if (in_dev) {
+				if (in_dev->ifa_list) {
+					pkt_dev->saddr_min =
+					    in_dev->ifa_list->ifa_address;
+					pkt_dev->saddr_max = pkt_dev->saddr_min;
+				}
+			}
+			rcu_read_unlock();
+		} else {
+			pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+			pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+		}
+
+		pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+		pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+	}
+	/* Initialize current values. */
+	pkt_dev->cur_dst_mac_offset = 0;
+	pkt_dev->cur_src_mac_offset = 0;
+	pkt_dev->cur_saddr = pkt_dev->saddr_min;
+	pkt_dev->cur_daddr = pkt_dev->daddr_min;
+	pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+	pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+	pkt_dev->nflows = 0;
+}
+
+
+static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
+{
+	ktime_t start_time, end_time;
+	s64 remaining;
+	struct hrtimer_sleeper t;
+
+	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_set_expires(&t.timer, spin_until);
+
+	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
+	if (remaining <= 0) {
+		pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+		return;
+	}
+
+	start_time = ktime_now();
+	if (remaining < 100000) {
+		/* for small delays (<100us), just loop until limit is reached */
+		do {
+			end_time = ktime_now();
+		} while (ktime_lt(end_time, spin_until));
+	} else {
+		/* see do_nanosleep */
+		hrtimer_init_sleeper(&t, current);
+		do {
+			set_current_state(TASK_INTERRUPTIBLE);
+			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
+			if (!hrtimer_active(&t.timer))
+				t.task = NULL;
+
+			if (likely(t.task))
+				schedule();
+
+			hrtimer_cancel(&t.timer);
+		} while (t.task && pkt_dev->running && !signal_pending(current));
+		__set_current_state(TASK_RUNNING);
+		end_time = ktime_now();
+	}
+
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+	pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+}
+
+static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
+{
+	pkt_dev->pkt_overhead = 0;
+	pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
+	pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
+	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
+}
+
+static inline int f_seen(const struct pktgen_dev *pkt_dev, int flow)
+{
+	return !!(pkt_dev->flows[flow].flags & F_INIT);
+}
+
+static inline int f_pick(struct pktgen_dev *pkt_dev)
+{
+	int flow = pkt_dev->curfl;
+
+	if (pkt_dev->flags & F_FLOW_SEQ) {
+		if (pkt_dev->flows[flow].count >= pkt_dev->lflow) {
+			/* reset time */
+			pkt_dev->flows[flow].count = 0;
+			pkt_dev->flows[flow].flags = 0;
+			pkt_dev->curfl += 1;
+			if (pkt_dev->curfl >= pkt_dev->cflows)
+				pkt_dev->curfl = 0; /*reset */
+		}
+	} else {
+		flow = random32() % pkt_dev->cflows;
+		pkt_dev->curfl = flow;
+
+		if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
+			pkt_dev->flows[flow].count = 0;
+			pkt_dev->flows[flow].flags = 0;
+		}
+	}
+
+	return pkt_dev->curfl;
+}
+
+
+#ifdef CONFIG_XFRM
+/* If there was already an IPSEC SA, we keep it as is, else
+ * we go look for it ...
+*/
+#define DUMMY_MARK 0
+static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
+{
+	struct xfrm_state *x = pkt_dev->flows[flow].x;
+	if (!x) {
+		/*slow path: we dont already have xfrm_state*/
+		x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
+					(xfrm_address_t *)&pkt_dev->cur_daddr,
+					(xfrm_address_t *)&pkt_dev->cur_saddr,
+					AF_INET,
+					pkt_dev->ipsmode,
+					pkt_dev->ipsproto, 0);
+		if (x) {
+			pkt_dev->flows[flow].x = x;
+			set_pkt_overhead(pkt_dev);
+			pkt_dev->pkt_overhead += x->props.header_len;
+		}
+
+	}
+}
+#endif
+static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
+{
+
+	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
+		pkt_dev->cur_queue_map = smp_processor_id();
+
+	else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
+		__u16 t;
+		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+			t = random32() %
+				(pkt_dev->queue_map_max -
+				 pkt_dev->queue_map_min + 1)
+				+ pkt_dev->queue_map_min;
+		} else {
+			t = pkt_dev->cur_queue_map + 1;
+			if (t > pkt_dev->queue_map_max)
+				t = pkt_dev->queue_map_min;
+		}
+		pkt_dev->cur_queue_map = t;
+	}
+	pkt_dev->cur_queue_map  = pkt_dev->cur_queue_map % pkt_dev->odev->real_num_tx_queues;
+}
+
+/* Increment/randomize headers according to flags and current values
+ * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
+ */
+static void mod_cur_headers(struct pktgen_dev *pkt_dev)
+{
+	__u32 imn;
+	__u32 imx;
+	int flow = 0;
+
+	if (pkt_dev->cflows)
+		flow = f_pick(pkt_dev);
+
+	/*  Deal with source MAC */
+	if (pkt_dev->src_mac_count > 1) {
+		__u32 mc;
+		__u32 tmp;
+
+		if (pkt_dev->flags & F_MACSRC_RND)
+			mc = random32() % pkt_dev->src_mac_count;
+		else {
+			mc = pkt_dev->cur_src_mac_offset++;
+			if (pkt_dev->cur_src_mac_offset >=
+			    pkt_dev->src_mac_count)
+				pkt_dev->cur_src_mac_offset = 0;
+		}
+
+		tmp = pkt_dev->src_mac[5] + (mc & 0xFF);
+		pkt_dev->hh[11] = tmp;
+		tmp = (pkt_dev->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[10] = tmp;
+		tmp = (pkt_dev->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[9] = tmp;
+		tmp = (pkt_dev->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[8] = tmp;
+		tmp = (pkt_dev->src_mac[1] + (tmp >> 8));
+		pkt_dev->hh[7] = tmp;
+	}
+
+	/*  Deal with Destination MAC */
+	if (pkt_dev->dst_mac_count > 1) {
+		__u32 mc;
+		__u32 tmp;
+
+		if (pkt_dev->flags & F_MACDST_RND)
+			mc = random32() % pkt_dev->dst_mac_count;
+
+		else {
+			mc = pkt_dev->cur_dst_mac_offset++;
+			if (pkt_dev->cur_dst_mac_offset >=
+			    pkt_dev->dst_mac_count) {
+				pkt_dev->cur_dst_mac_offset = 0;
+			}
+		}
+
+		tmp = pkt_dev->dst_mac[5] + (mc & 0xFF);
+		pkt_dev->hh[5] = tmp;
+		tmp = (pkt_dev->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[4] = tmp;
+		tmp = (pkt_dev->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[3] = tmp;
+		tmp = (pkt_dev->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+		pkt_dev->hh[2] = tmp;
+		tmp = (pkt_dev->dst_mac[1] + (tmp >> 8));
+		pkt_dev->hh[1] = tmp;
+	}
+
+	if (pkt_dev->flags & F_MPLS_RND) {
+		unsigned i;
+		for (i = 0; i < pkt_dev->nr_labels; i++)
+			if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
+				pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
+					     ((__force __be32)random32() &
+						      htonl(0x000fffff));
+	}
+
+	if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
+		pkt_dev->vlan_id = random32() & (4096-1);
+	}
+
+	if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
+		pkt_dev->svlan_id = random32() & (4096 - 1);
+	}
+
+	if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
+		if (pkt_dev->flags & F_UDPSRC_RND)
+			pkt_dev->cur_udp_src = random32() %
+				(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+				+ pkt_dev->udp_src_min;
+
+		else {
+			pkt_dev->cur_udp_src++;
+			if (pkt_dev->cur_udp_src >= pkt_dev->udp_src_max)
+				pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+		}
+	}
+
+	if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
+		if (pkt_dev->flags & F_UDPDST_RND) {
+			pkt_dev->cur_udp_dst = random32() %
+				(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+				+ pkt_dev->udp_dst_min;
+		} else {
+			pkt_dev->cur_udp_dst++;
+			if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
+				pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+		}
+	}
+
+	if (!(pkt_dev->flags & F_IPV6)) {
+
+		imn = ntohl(pkt_dev->saddr_min);
+		imx = ntohl(pkt_dev->saddr_max);
+		if (imn < imx) {
+			__u32 t;
+			if (pkt_dev->flags & F_IPSRC_RND)
+				t = random32() % (imx - imn) + imn;
+			else {
+				t = ntohl(pkt_dev->cur_saddr);
+				t++;
+				if (t > imx)
+					t = imn;
+
+			}
+			pkt_dev->cur_saddr = htonl(t);
+		}
+
+		if (pkt_dev->cflows && f_seen(pkt_dev, flow)) {
+			pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr;
+		} else {
+			imn = ntohl(pkt_dev->daddr_min);
+			imx = ntohl(pkt_dev->daddr_max);
+			if (imn < imx) {
+				__u32 t;
+				__be32 s;
+				if (pkt_dev->flags & F_IPDST_RND) {
+
+					t = random32() % (imx - imn) + imn;
+					s = htonl(t);
+
+					while (ipv4_is_loopback(s) ||
+					       ipv4_is_multicast(s) ||
+					       ipv4_is_lbcast(s) ||
+					       ipv4_is_zeronet(s) ||
+					       ipv4_is_local_multicast(s)) {
+						t = random32() % (imx - imn) + imn;
+						s = htonl(t);
+					}
+					pkt_dev->cur_daddr = s;
+				} else {
+					t = ntohl(pkt_dev->cur_daddr);
+					t++;
+					if (t > imx) {
+						t = imn;
+					}
+					pkt_dev->cur_daddr = htonl(t);
+				}
+			}
+			if (pkt_dev->cflows) {
+				pkt_dev->flows[flow].flags |= F_INIT;
+				pkt_dev->flows[flow].cur_daddr =
+				    pkt_dev->cur_daddr;
+#ifdef CONFIG_XFRM
+				if (pkt_dev->flags & F_IPSEC_ON)
+					get_ipsec_sa(pkt_dev, flow);
+#endif
+				pkt_dev->nflows++;
+			}
+		}
+	} else {		/* IPV6 * */
+
+		if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 &&
+		    pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
+		    pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
+		    pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
+		else {
+			int i;
+
+			/* Only random destinations yet */
+
+			for (i = 0; i < 4; i++) {
+				pkt_dev->cur_in6_daddr.s6_addr32[i] =
+				    (((__force __be32)random32() |
+				      pkt_dev->min_in6_daddr.s6_addr32[i]) &
+				     pkt_dev->max_in6_daddr.s6_addr32[i]);
+			}
+		}
+	}
+
+	if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
+		__u32 t;
+		if (pkt_dev->flags & F_TXSIZE_RND) {
+			t = random32() %
+				(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+				+ pkt_dev->min_pkt_size;
+		} else {
+			t = pkt_dev->cur_pkt_size + 1;
+			if (t > pkt_dev->max_pkt_size)
+				t = pkt_dev->min_pkt_size;
+		}
+		pkt_dev->cur_pkt_size = t;
+	}
+
+	set_cur_queue_map(pkt_dev);
+
+	pkt_dev->flows[flow].count++;
+}
+
+
+#ifdef CONFIG_XFRM
+static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
+{
+	struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
+	int err = 0;
+
+	if (!x)
+		return 0;
+	/* XXX: we dont support tunnel mode for now until
+	 * we resolve the dst issue */
+	if (x->props.mode != XFRM_MODE_TRANSPORT)
+		return 0;
+
+	spin_lock(&x->lock);
+
+	err = x->outer_mode->output(x, skb);
+	if (err)
+		goto error;
+	err = x->type->output(x, skb);
+	if (err)
+		goto error;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+error:
+	spin_unlock(&x->lock);
+	return err;
+}
+
+static void free_SAs(struct pktgen_dev *pkt_dev)
+{
+	if (pkt_dev->cflows) {
+		/* let go of the SAs if we have them */
+		int i;
+		for (i = 0; i < pkt_dev->cflows; i++) {
+			struct xfrm_state *x = pkt_dev->flows[i].x;
+			if (x) {
+				xfrm_state_put(x);
+				pkt_dev->flows[i].x = NULL;
+			}
+		}
+	}
+}
+
+static int process_ipsec(struct pktgen_dev *pkt_dev,
+			      struct sk_buff *skb, __be16 protocol)
+{
+	if (pkt_dev->flags & F_IPSEC_ON) {
+		struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
+		int nhead = 0;
+		if (x) {
+			int ret;
+			__u8 *eth;
+			nhead = x->props.header_len - skb_headroom(skb);
+			if (nhead > 0) {
+				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+				if (ret < 0) {
+					pr_err("Error expanding ipsec packet %d\n",
+					       ret);
+					goto err;
+				}
+			}
+
+			/* ipsec is not expecting ll header */
+			skb_pull(skb, ETH_HLEN);
+			ret = pktgen_output_ipsec(skb, pkt_dev);
+			if (ret) {
+				pr_err("Error creating ipsec packet %d\n", ret);
+				goto err;
+			}
+			/* restore ll */
+			eth = (__u8 *) skb_push(skb, ETH_HLEN);
+			memcpy(eth, pkt_dev->hh, 12);
+			*(u16 *) &eth[12] = protocol;
+		}
+	}
+	return 1;
+err:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
+static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
+{
+	unsigned i;
+	for (i = 0; i < pkt_dev->nr_labels; i++)
+		*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
+
+	mpls--;
+	*mpls |= MPLS_STACK_BOTTOM;
+}
+
+static inline __be16 build_tci(unsigned int id, unsigned int cfi,
+			       unsigned int prio)
+{
+	return htons(id | (cfi << 12) | (prio << 13));
+}
+
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+				int datalen)
+{
+	struct timeval timestamp;
+	struct pktgen_hdr *pgh;
+
+	pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+	datalen -= sizeof(*pgh);
+
+	if (pkt_dev->nfrags <= 0) {
+		memset(skb_put(skb, datalen), 0, datalen);
+	} else {
+		int frags = pkt_dev->nfrags;
+		int i, len;
+		int frag_len;
+
+
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		len = datalen - frags * PAGE_SIZE;
+		if (len > 0) {
+			memset(skb_put(skb, len), 0, len);
+			datalen = frags * PAGE_SIZE;
+		}
+
+		i = 0;
+		frag_len = (datalen/frags) < PAGE_SIZE ?
+			   (datalen/frags) : PAGE_SIZE;
+		while (datalen > 0) {
+			if (unlikely(!pkt_dev->page)) {
+				int node = numa_node_id();
+
+				if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+					node = pkt_dev->node;
+				pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+				if (!pkt_dev->page)
+					break;
+			}
+			get_page(pkt_dev->page);
+			skb_frag_set_page(skb, i, pkt_dev->page);
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			/*last fragment, fill rest of data*/
+			if (i == (frags - 1))
+				skb_frag_size_set(&skb_shinfo(skb)->frags[i],
+				    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
+			else
+				skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
+			datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
+			skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+			skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+	/* Stamp the time, and sequence number,
+	 * convert them to network byte order
+	 */
+	pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	pgh->seq_num = htonl(pkt_dev->seq_num);
+
+	do_gettimeofday(&timestamp);
+	pgh->tv_sec = htonl(timestamp.tv_sec);
+	pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
+static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
+					struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen, iplen;
+	struct iphdr *iph;
+	__be16 protocol = htons(ETH_P_IP);
+	__be32 *mpls;
+	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
+	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
+	__be16 *svlan_tci = NULL;                /* Encapsulates priority and SVLAN ID */
+	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+	u16 queue_map;
+
+	if (pkt_dev->nr_labels)
+		protocol = htons(ETH_P_MPLS_UC);
+
+	if (pkt_dev->vlan_id != 0xffff)
+		protocol = htons(ETH_P_8021Q);
+
+	/* Update any of the values, used when we're incrementing various
+	 * fields.
+	 */
+	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
+
+	datalen = (odev->hard_header_len + 16) & ~0xf;
+
+	if (pkt_dev->flags & F_NODE) {
+		int node;
+
+		if (pkt_dev->node >= 0)
+			node = pkt_dev->node;
+		else
+			node =  numa_node_id();
+
+		skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+				  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+		if (likely(skb)) {
+			skb_reserve(skb, NET_SKB_PAD);
+			skb->dev = odev;
+		}
+	}
+	else
+	  skb = __netdev_alloc_skb(odev,
+				   pkt_dev->cur_pkt_size + 64
+				   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+	prefetchw(skb->data);
+
+	skb_reserve(skb, datalen);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
+	if (pkt_dev->nr_labels)
+		mpls_push(mpls, pkt_dev);
+
+	if (pkt_dev->vlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
+			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+			*svlan_tci = build_tci(pkt_dev->svlan_id,
+					       pkt_dev->svlan_cfi,
+					       pkt_dev->svlan_p);
+			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
+		}
+		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+		*vlan_tci = build_tci(pkt_dev->vlan_id,
+				      pkt_dev->vlan_cfi,
+				      pkt_dev->vlan_p);
+		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+		*vlan_encapsulated_proto = htons(ETH_P_IP);
+	}
+
+	skb->network_header = skb->tail;
+	skb->transport_header = skb->network_header + sizeof(struct iphdr);
+	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
+
+	iph = ip_hdr(skb);
+	udph = udp_hdr(skb);
+
+	memcpy(eth, pkt_dev->hh, 12);
+	*(__be16 *) & eth[12] = protocol;
+
+	/* Eth + IPh + UDPh + mpls */
+	datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
+		  pkt_dev->pkt_overhead;
+	if (datalen < sizeof(struct pktgen_hdr))
+		datalen = sizeof(struct pktgen_hdr);
+
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + 8);	/* DATA + udphdr */
+	udph->check = 0;	/* No checksum */
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 32;
+	iph->tos = pkt_dev->tos;
+	iph->protocol = IPPROTO_UDP;	/* UDP */
+	iph->saddr = pkt_dev->cur_saddr;
+	iph->daddr = pkt_dev->cur_daddr;
+	iph->id = htons(pkt_dev->ip_id);
+	pkt_dev->ip_id++;
+	iph->frag_off = 0;
+	iplen = 20 + 8 + datalen;
+	iph->tot_len = htons(iplen);
+	iph->check = 0;
+	iph->check = ip_fast_csum((void *)iph, iph->ihl);
+	skb->protocol = protocol;
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->pkt_overhead);
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
+
+#ifdef CONFIG_XFRM
+	if (!process_ipsec(pkt_dev, skb, protocol))
+		return NULL;
+#endif
+
+	return skb;
+}
+
+/*
+ * scan_ip6, fmt_ip taken from dietlibc-0.21
+ * Author Felix von Leitner <felix-dietlibc@fefe.de>
+ *
+ * Slightly modified for kernel.
+ * Should be candidate for net/ipv4/utils.c
+ * --ro
+ */
+
+static unsigned int scan_ip6(const char *s, char ip[16])
+{
+	unsigned int i;
+	unsigned int len = 0;
+	unsigned long u;
+	char suffix[16];
+	unsigned int prefixlen = 0;
+	unsigned int suffixlen = 0;
+	__be32 tmp;
+	char *pos;
+
+	for (i = 0; i < 16; i++)
+		ip[i] = 0;
+
+	for (;;) {
+		if (*s == ':') {
+			len++;
+			if (s[1] == ':') {	/* Found "::", skip to part 2 */
+				s += 2;
+				len++;
+				break;
+			}
+			s++;
+		}
+
+		u = simple_strtoul(s, &pos, 16);
+		i = pos - s;
+		if (!i)
+			return 0;
+		if (prefixlen == 12 && s[i] == '.') {
+
+			/* the last 4 bytes may be written as IPv4 address */
+
+			tmp = in_aton(s);
+			memcpy((struct in_addr *)(ip + 12), &tmp, sizeof(tmp));
+			return i + len;
+		}
+		ip[prefixlen++] = (u >> 8);
+		ip[prefixlen++] = (u & 255);
+		s += i;
+		len += i;
+		if (prefixlen == 16)
+			return len;
+	}
+
+/* part 2, after "::" */
+	for (;;) {
+		if (*s == ':') {
+			if (suffixlen == 0)
+				break;
+			s++;
+			len++;
+		} else if (suffixlen != 0)
+			break;
+
+		u = simple_strtol(s, &pos, 16);
+		i = pos - s;
+		if (!i) {
+			if (*s)
+				len--;
+			break;
+		}
+		if (suffixlen + prefixlen <= 12 && s[i] == '.') {
+			tmp = in_aton(s);
+			memcpy((struct in_addr *)(suffix + suffixlen), &tmp,
+			       sizeof(tmp));
+			suffixlen += 4;
+			len += strlen(s);
+			break;
+		}
+		suffix[suffixlen++] = (u >> 8);
+		suffix[suffixlen++] = (u & 255);
+		s += i;
+		len += i;
+		if (prefixlen + suffixlen == 16)
+			break;
+	}
+	for (i = 0; i < suffixlen; i++)
+		ip[16 - suffixlen + i] = suffix[i];
+	return len;
+}
+
+static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
+					struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen;
+	struct ipv6hdr *iph;
+	__be16 protocol = htons(ETH_P_IPV6);
+	__be32 *mpls;
+	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
+	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
+	__be16 *svlan_tci = NULL;                /* Encapsulates priority and SVLAN ID */
+	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+	u16 queue_map;
+
+	if (pkt_dev->nr_labels)
+		protocol = htons(ETH_P_MPLS_UC);
+
+	if (pkt_dev->vlan_id != 0xffff)
+		protocol = htons(ETH_P_8021Q);
+
+	/* Update any of the values, used when we're incrementing various
+	 * fields.
+	 */
+	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
+
+	skb = __netdev_alloc_skb(odev,
+				 pkt_dev->cur_pkt_size + 64
+				 + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT);
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+	prefetchw(skb->data);
+
+	skb_reserve(skb, 16);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32));
+	if (pkt_dev->nr_labels)
+		mpls_push(mpls, pkt_dev);
+
+	if (pkt_dev->vlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
+			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+			*svlan_tci = build_tci(pkt_dev->svlan_id,
+					       pkt_dev->svlan_cfi,
+					       pkt_dev->svlan_p);
+			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
+		}
+		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
+		*vlan_tci = build_tci(pkt_dev->vlan_id,
+				      pkt_dev->vlan_cfi,
+				      pkt_dev->vlan_p);
+		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
+		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
+	}
+
+	skb->network_header = skb->tail;
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
+	iph = ipv6_hdr(skb);
+	udph = udp_hdr(skb);
+
+	memcpy(eth, pkt_dev->hh, 12);
+	*(__be16 *) &eth[12] = protocol;
+
+	/* Eth + IPh + UDPh + mpls */
+	datalen = pkt_dev->cur_pkt_size - 14 -
+		  sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
+		  pkt_dev->pkt_overhead;
+
+	if (datalen < sizeof(struct pktgen_hdr)) {
+		datalen = sizeof(struct pktgen_hdr);
+		if (net_ratelimit())
+			pr_info("increased datalen to %d\n", datalen);
+	}
+
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + sizeof(struct udphdr));
+	udph->check = 0;	/* No checksum */
+
+	*(__be32 *) iph = htonl(0x60000000);	/* Version + flow */
+
+	if (pkt_dev->traffic_class) {
+		/* Version + traffic class + flow (0) */
+		*(__be32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20));
+	}
+
+	iph->hop_limit = 32;
+
+	iph->payload_len = htons(sizeof(struct udphdr) + datalen);
+	iph->nexthdr = IPPROTO_UDP;
+
+	iph->daddr = pkt_dev->cur_in6_daddr;
+	iph->saddr = pkt_dev->cur_in6_saddr;
+
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->pkt_overhead);
+	skb->protocol = protocol;
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
+
+	return skb;
+}
+
+static struct sk_buff *fill_packet(struct net_device *odev,
+				   struct pktgen_dev *pkt_dev)
+{
+	if (pkt_dev->flags & F_IPV6)
+		return fill_packet_ipv6(odev, pkt_dev);
+	else
+		return fill_packet_ipv4(odev, pkt_dev);
+}
+
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev)
+{
+	pkt_dev->seq_num = 1;
+	pkt_dev->idle_acc = 0;
+	pkt_dev->sofar = 0;
+	pkt_dev->tx_bytes = 0;
+	pkt_dev->errors = 0;
+}
+
+/* Set up structure for sending pkts, clear counters */
+
+static void pktgen_run(struct pktgen_thread *t)
+{
+	struct pktgen_dev *pkt_dev;
+	int started = 0;
+
+	func_enter();
+
+	if_lock(t);
+	list_for_each_entry(pkt_dev, &t->if_list, list) {
+
+		/*
+		 * setup odev and create initial packet.
+		 */
+		pktgen_setup_inject(pkt_dev);
+
+		if (pkt_dev->odev) {
+			pktgen_clear_counters(pkt_dev);
+			pkt_dev->running = 1;	/* Cranke yeself! */
+			pkt_dev->skb = NULL;
+			pkt_dev->started_at =
+				pkt_dev->next_tx = ktime_now();
+
+			set_pkt_overhead(pkt_dev);
+
+			strcpy(pkt_dev->result, "Starting");
+			started++;
+		} else
+			strcpy(pkt_dev->result, "Error starting");
+	}
+	if_unlock(t);
+	if (started)
+		t->control &= ~(T_STOP);
+}
+
+static void pktgen_stop_all_threads_ifs(void)
+{
+	struct pktgen_thread *t;
+
+	func_enter();
+
+	mutex_lock(&pktgen_thread_lock);
+
+	list_for_each_entry(t, &pktgen_threads, th_list)
+		t->control |= T_STOP;
+
+	mutex_unlock(&pktgen_thread_lock);
+}
+
+static int thread_is_running(const struct pktgen_thread *t)
+{
+	const struct pktgen_dev *pkt_dev;
+
+	list_for_each_entry(pkt_dev, &t->if_list, list)
+		if (pkt_dev->running)
+			return 1;
+	return 0;
+}
+
+static int pktgen_wait_thread_run(struct pktgen_thread *t)
+{
+	if_lock(t);
+
+	while (thread_is_running(t)) {
+
+		if_unlock(t);
+
+		msleep_interruptible(100);
+
+		if (signal_pending(current))
+			goto signal;
+		if_lock(t);
+	}
+	if_unlock(t);
+	return 1;
+signal:
+	return 0;
+}
+
+static int pktgen_wait_all_threads_run(void)
+{
+	struct pktgen_thread *t;
+	int sig = 1;
+
+	mutex_lock(&pktgen_thread_lock);
+
+	list_for_each_entry(t, &pktgen_threads, th_list) {
+		sig = pktgen_wait_thread_run(t);
+		if (sig == 0)
+			break;
+	}
+
+	if (sig == 0)
+		list_for_each_entry(t, &pktgen_threads, th_list)
+			t->control |= (T_STOP);
+
+	mutex_unlock(&pktgen_thread_lock);
+	return sig;
+}
+
+static void pktgen_run_all_threads(void)
+{
+	struct pktgen_thread *t;
+
+	func_enter();
+
+	mutex_lock(&pktgen_thread_lock);
+
+	list_for_each_entry(t, &pktgen_threads, th_list)
+		t->control |= (T_RUN);
+
+	mutex_unlock(&pktgen_thread_lock);
+
+	/* Propagate thread->control  */
+	schedule_timeout_interruptible(msecs_to_jiffies(125));
+
+	pktgen_wait_all_threads_run();
+}
+
+static void pktgen_reset_all_threads(void)
+{
+	struct pktgen_thread *t;
+
+	func_enter();
+
+	mutex_lock(&pktgen_thread_lock);
+
+	list_for_each_entry(t, &pktgen_threads, th_list)
+		t->control |= (T_REMDEVALL);
+
+	mutex_unlock(&pktgen_thread_lock);
+
+	/* Propagate thread->control  */
+	schedule_timeout_interruptible(msecs_to_jiffies(125));
+
+	pktgen_wait_all_threads_run();
+}
+
+static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
+{
+	__u64 bps, mbps, pps;
+	char *p = pkt_dev->result;
+	ktime_t elapsed = ktime_sub(pkt_dev->stopped_at,
+				    pkt_dev->started_at);
+	ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
+
+	p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
+		     (unsigned long long)ktime_to_us(elapsed),
+		     (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
+		     (unsigned long long)ktime_to_us(idle),
+		     (unsigned long long)pkt_dev->sofar,
+		     pkt_dev->cur_pkt_size, nr_frags);
+
+	pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC,
+			ktime_to_ns(elapsed));
+
+	bps = pps * 8 * pkt_dev->cur_pkt_size;
+
+	mbps = bps;
+	do_div(mbps, 1000000);
+	p += sprintf(p, "  %llupps %lluMb/sec (%llubps) errors: %llu",
+		     (unsigned long long)pps,
+		     (unsigned long long)mbps,
+		     (unsigned long long)bps,
+		     (unsigned long long)pkt_dev->errors);
+}
+
+/* Set stopped-at timer, remove from running list, do counters & statistics */
+static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
+{
+	int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
+
+	if (!pkt_dev->running) {
+		pr_warning("interface: %s is already stopped\n",
+			   pkt_dev->odevname);
+		return -EINVAL;
+	}
+
+	kfree_skb(pkt_dev->skb);
+	pkt_dev->skb = NULL;
+	pkt_dev->stopped_at = ktime_now();
+	pkt_dev->running = 0;
+
+	show_results(pkt_dev, nr_frags);
+
+	return 0;
+}
+
+static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
+{
+	struct pktgen_dev *pkt_dev, *best = NULL;
+
+	if_lock(t);
+
+	list_for_each_entry(pkt_dev, &t->if_list, list) {
+		if (!pkt_dev->running)
+			continue;
+		if (best == NULL)
+			best = pkt_dev;
+		else if (ktime_lt(pkt_dev->next_tx, best->next_tx))
+			best = pkt_dev;
+	}
+	if_unlock(t);
+	return best;
+}
+
+static void pktgen_stop(struct pktgen_thread *t)
+{
+	struct pktgen_dev *pkt_dev;
+
+	func_enter();
+
+	if_lock(t);
+
+	list_for_each_entry(pkt_dev, &t->if_list, list) {
+		pktgen_stop_device(pkt_dev);
+	}
+
+	if_unlock(t);
+}
+
+/*
+ * one of our devices needs to be removed - find it
+ * and remove it
+ */
+static void pktgen_rem_one_if(struct pktgen_thread *t)
+{
+	struct list_head *q, *n;
+	struct pktgen_dev *cur;
+
+	func_enter();
+
+	if_lock(t);
+
+	list_for_each_safe(q, n, &t->if_list) {
+		cur = list_entry(q, struct pktgen_dev, list);
+
+		if (!cur->removal_mark)
+			continue;
+
+		kfree_skb(cur->skb);
+		cur->skb = NULL;
+
+		pktgen_remove_device(t, cur);
+
+		break;
+	}
+
+	if_unlock(t);
+}
+
+static void pktgen_rem_all_ifs(struct pktgen_thread *t)
+{
+	struct list_head *q, *n;
+	struct pktgen_dev *cur;
+
+	func_enter();
+
+	/* Remove all devices, free mem */
+
+	if_lock(t);
+
+	list_for_each_safe(q, n, &t->if_list) {
+		cur = list_entry(q, struct pktgen_dev, list);
+
+		kfree_skb(cur->skb);
+		cur->skb = NULL;
+
+		pktgen_remove_device(t, cur);
+	}
+
+	if_unlock(t);
+}
+
+static void pktgen_rem_thread(struct pktgen_thread *t)
+{
+	/* Remove from the thread list */
+
+	remove_proc_entry(t->tsk->comm, pg_proc_dir);
+
+}
+
+static void pktgen_resched(struct pktgen_dev *pkt_dev)
+{
+	ktime_t idle_start = ktime_now();
+	schedule();
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
+
+static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
+{
+	ktime_t idle_start = ktime_now();
+
+	while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+		if (signal_pending(current))
+			break;
+
+		if (need_resched())
+			pktgen_resched(pkt_dev);
+		else
+			cpu_relax();
+	}
+	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+}
+
+static void pktgen_xmit(struct pktgen_dev *pkt_dev)
+{
+	struct net_device *odev = pkt_dev->odev;
+	netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
+		= odev->netdev_ops->ndo_start_xmit;
+	struct netdev_queue *txq;
+	u16 queue_map;
+	int ret;
+
+	/* If device is offline, then don't send */
+	if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
+		pktgen_stop_device(pkt_dev);
+		return;
+	}
+
+	/* This is max DELAY, this has special meaning of
+	 * "never transmit"
+	 */
+	if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
+		pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
+		return;
+	}
+
+	/* If no skb or clone count exhausted then get new one */
+	if (!pkt_dev->skb || (pkt_dev->last_ok &&
+			      ++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
+		/* build a new pkt */
+		kfree_skb(pkt_dev->skb);
+
+		pkt_dev->skb = fill_packet(odev, pkt_dev);
+		if (pkt_dev->skb == NULL) {
+			pr_err("ERROR: couldn't allocate skb in fill_packet\n");
+			schedule();
+			pkt_dev->clone_count--;	/* back out increment, OOM */
+			return;
+		}
+		pkt_dev->last_pkt_size = pkt_dev->skb->len;
+		pkt_dev->allocated_skbs++;
+		pkt_dev->clone_count = 0;	/* reset counter */
+	}
+
+	if (pkt_dev->delay && pkt_dev->last_ok)
+		spin(pkt_dev, pkt_dev->next_tx);
+
+	queue_map = skb_get_queue_mapping(pkt_dev->skb);
+	txq = netdev_get_tx_queue(odev, queue_map);
+
+	__netif_tx_lock_bh(txq);
+
+	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
+		ret = NETDEV_TX_BUSY;
+		pkt_dev->last_ok = 0;
+		goto unlock;
+	}
+	atomic_inc(&(pkt_dev->skb->users));
+	ret = (*xmit)(pkt_dev->skb, odev);
+
+	switch (ret) {
+	case NETDEV_TX_OK:
+		txq_trans_update(txq);
+		pkt_dev->last_ok = 1;
+		pkt_dev->sofar++;
+		pkt_dev->seq_num++;
+		pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+		break;
+	case NET_XMIT_DROP:
+	case NET_XMIT_CN:
+	case NET_XMIT_POLICED:
+		/* skb has been consumed */
+		pkt_dev->errors++;
+		break;
+	default: /* Drivers are not supposed to return other values! */
+		if (net_ratelimit())
+			pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret);
+		pkt_dev->errors++;
+		/* fallthru */
+	case NETDEV_TX_LOCKED:
+	case NETDEV_TX_BUSY:
+		/* Retry it next time */
+		atomic_dec(&(pkt_dev->skb->users));
+		pkt_dev->last_ok = 0;
+	}
+unlock:
+	__netif_tx_unlock_bh(txq);
+
+	/* If pkt_dev->count is zero, then run forever */
+	if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
+		pktgen_wait_for_skb(pkt_dev);
+
+		/* Done with this */
+		pktgen_stop_device(pkt_dev);
+	}
+}
+
+/*
+ * Main loop of the thread goes here
+ */
+
+static int pktgen_thread_worker(void *arg)
+{
+	DEFINE_WAIT(wait);
+	struct pktgen_thread *t = arg;
+	struct pktgen_dev *pkt_dev = NULL;
+	int cpu = t->cpu;
+
+	BUG_ON(smp_processor_id() != cpu);
+
+	init_waitqueue_head(&t->queue);
+	complete(&t->start_done);
+
+	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	set_freezable();
+
+	while (!kthread_should_stop()) {
+		pkt_dev = next_to_run(t);
+
+		if (unlikely(!pkt_dev && t->control == 0)) {
+			if (pktgen_exiting)
+				break;
+			wait_event_interruptible_timeout(t->queue,
+							 t->control != 0,
+							 HZ/10);
+			try_to_freeze();
+			continue;
+		}
+
+		__set_current_state(TASK_RUNNING);
+
+		if (likely(pkt_dev)) {
+			pktgen_xmit(pkt_dev);
+
+			if (need_resched())
+				pktgen_resched(pkt_dev);
+			else
+				cpu_relax();
+		}
+
+		if (t->control & T_STOP) {
+			pktgen_stop(t);
+			t->control &= ~(T_STOP);
+		}
+
+		if (t->control & T_RUN) {
+			pktgen_run(t);
+			t->control &= ~(T_RUN);
+		}
+
+		if (t->control & T_REMDEVALL) {
+			pktgen_rem_all_ifs(t);
+			t->control &= ~(T_REMDEVALL);
+		}
+
+		if (t->control & T_REMDEV) {
+			pktgen_rem_one_if(t);
+			t->control &= ~(T_REMDEV);
+		}
+
+		try_to_freeze();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	pr_debug("%s stopping all device\n", t->tsk->comm);
+	pktgen_stop(t);
+
+	pr_debug("%s removing all device\n", t->tsk->comm);
+	pktgen_rem_all_ifs(t);
+
+	pr_debug("%s removing thread\n", t->tsk->comm);
+	pktgen_rem_thread(t);
+
+	/* Wait for kthread_stop */
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
+					  const char *ifname, bool exact)
+{
+	struct pktgen_dev *p, *pkt_dev = NULL;
+	size_t len = strlen(ifname);
+
+	if_lock(t);
+	list_for_each_entry(p, &t->if_list, list)
+		if (strncmp(p->odevname, ifname, len) == 0) {
+			if (p->odevname[len]) {
+				if (exact || p->odevname[len] != '@')
+					continue;
+			}
+			pkt_dev = p;
+			break;
+		}
+
+	if_unlock(t);
+	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
+	return pkt_dev;
+}
+
+/*
+ * Adds a dev at front of if_list.
+ */
+
+static int add_dev_to_thread(struct pktgen_thread *t,
+			     struct pktgen_dev *pkt_dev)
+{
+	int rv = 0;
+
+	if_lock(t);
+
+	if (pkt_dev->pg_thread) {
+		pr_err("ERROR: already assigned to a thread\n");
+		rv = -EBUSY;
+		goto out;
+	}
+
+	list_add(&pkt_dev->list, &t->if_list);
+	pkt_dev->pg_thread = t;
+	pkt_dev->running = 0;
+
+out:
+	if_unlock(t);
+	return rv;
+}
+
+/* Called under thread lock */
+
+static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
+{
+	struct pktgen_dev *pkt_dev;
+	int err;
+	int node = cpu_to_node(t->cpu);
+
+	/* We don't allow a device to be on several threads */
+
+	pkt_dev = __pktgen_NN_threads(ifname, FIND);
+	if (pkt_dev) {
+		pr_err("ERROR: interface already used\n");
+		return -EBUSY;
+	}
+
+	pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node);
+	if (!pkt_dev)
+		return -ENOMEM;
+
+	strcpy(pkt_dev->odevname, ifname);
+	pkt_dev->flows = vzalloc_node(MAX_CFLOWS * sizeof(struct flow_state),
+				      node);
+	if (pkt_dev->flows == NULL) {
+		kfree(pkt_dev);
+		return -ENOMEM;
+	}
+
+	pkt_dev->removal_mark = 0;
+	pkt_dev->min_pkt_size = ETH_ZLEN;
+	pkt_dev->max_pkt_size = ETH_ZLEN;
+	pkt_dev->nfrags = 0;
+	pkt_dev->delay = pg_delay_d;
+	pkt_dev->count = pg_count_d;
+	pkt_dev->sofar = 0;
+	pkt_dev->udp_src_min = 9;	/* sink port */
+	pkt_dev->udp_src_max = 9;
+	pkt_dev->udp_dst_min = 9;
+	pkt_dev->udp_dst_max = 9;
+	pkt_dev->vlan_p = 0;
+	pkt_dev->vlan_cfi = 0;
+	pkt_dev->vlan_id = 0xffff;
+	pkt_dev->svlan_p = 0;
+	pkt_dev->svlan_cfi = 0;
+	pkt_dev->svlan_id = 0xffff;
+	pkt_dev->node = -1;
+
+	err = pktgen_setup_dev(pkt_dev, ifname);
+	if (err)
+		goto out1;
+	if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
+		pkt_dev->clone_skb = pg_clone_skb_d;
+
+	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
+					  &pktgen_if_fops, pkt_dev);
+	if (!pkt_dev->entry) {
+		pr_err("cannot create %s/%s procfs entry\n",
+		       PG_PROC_DIR, ifname);
+		err = -EINVAL;
+		goto out2;
+	}
+#ifdef CONFIG_XFRM
+	pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
+	pkt_dev->ipsproto = IPPROTO_ESP;
+#endif
+
+	return add_dev_to_thread(t, pkt_dev);
+out2:
+	dev_put(pkt_dev->odev);
+out1:
+#ifdef CONFIG_XFRM
+	free_SAs(pkt_dev);
+#endif
+	vfree(pkt_dev->flows);
+	kfree(pkt_dev);
+	return err;
+}
+
+static int __init pktgen_create_thread(int cpu)
+{
+	struct pktgen_thread *t;
+	struct proc_dir_entry *pe;
+	struct task_struct *p;
+
+	t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
+			 cpu_to_node(cpu));
+	if (!t) {
+		pr_err("ERROR: out of memory, can't create new thread\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&t->if_lock);
+	t->cpu = cpu;
+
+	INIT_LIST_HEAD(&t->if_list);
+
+	list_add_tail(&t->th_list, &pktgen_threads);
+	init_completion(&t->start_done);
+
+	p = kthread_create_on_node(pktgen_thread_worker,
+				   t,
+				   cpu_to_node(cpu),
+				   "kpktgend_%d", cpu);
+	if (IS_ERR(p)) {
+		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
+		list_del(&t->th_list);
+		kfree(t);
+		return PTR_ERR(p);
+	}
+	kthread_bind(p, cpu);
+	t->tsk = p;
+
+	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
+			      &pktgen_thread_fops, t);
+	if (!pe) {
+		pr_err("cannot create %s/%s procfs entry\n",
+		       PG_PROC_DIR, t->tsk->comm);
+		kthread_stop(p);
+		list_del(&t->th_list);
+		kfree(t);
+		return -EINVAL;
+	}
+
+	wake_up_process(p);
+	wait_for_completion(&t->start_done);
+
+	return 0;
+}
+
+/*
+ * Removes a device from the thread if_list.
+ */
+static void _rem_dev_from_if_list(struct pktgen_thread *t,
+				  struct pktgen_dev *pkt_dev)
+{
+	struct list_head *q, *n;
+	struct pktgen_dev *p;
+
+	list_for_each_safe(q, n, &t->if_list) {
+		p = list_entry(q, struct pktgen_dev, list);
+		if (p == pkt_dev)
+			list_del(&p->list);
+	}
+}
+
+static int pktgen_remove_device(struct pktgen_thread *t,
+				struct pktgen_dev *pkt_dev)
+{
+
+	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
+
+	if (pkt_dev->running) {
+		pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
+		pktgen_stop_device(pkt_dev);
+	}
+
+	/* Dis-associate from the interface */
+
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+		pkt_dev->odev = NULL;
+	}
+
+	/* And update the thread if_list */
+
+	_rem_dev_from_if_list(t, pkt_dev);
+
+	if (pkt_dev->entry)
+		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+#ifdef CONFIG_XFRM
+	free_SAs(pkt_dev);
+#endif
+	vfree(pkt_dev->flows);
+	if (pkt_dev->page)
+		put_page(pkt_dev->page);
+	kfree(pkt_dev);
+	return 0;
+}
+
+static int __init pg_init(void)
+{
+	int cpu;
+	struct proc_dir_entry *pe;
+	int ret = 0;
+
+	pr_info("%s", version);
+
+	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
+	if (!pg_proc_dir)
+		return -ENODEV;
+
+	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
+	if (pe == NULL) {
+		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
+		ret = -EINVAL;
+		goto remove_dir;
+	}
+
+	register_netdevice_notifier(&pktgen_notifier_block);
+
+	for_each_online_cpu(cpu) {
+		int err;
+
+		err = pktgen_create_thread(cpu);
+		if (err)
+			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+				   cpu, err);
+	}
+
+	if (list_empty(&pktgen_threads)) {
+		pr_err("ERROR: Initialization failed for all threads\n");
+		ret = -ENODEV;
+		goto unregister;
+	}
+
+	return 0;
+
+ unregister:
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+	remove_proc_entry(PGCTRL, pg_proc_dir);
+ remove_dir:
+	proc_net_remove(&init_net, PG_PROC_DIR);
+	return ret;
+}
+
+static void __exit pg_cleanup(void)
+{
+	struct pktgen_thread *t;
+	struct list_head *q, *n;
+	LIST_HEAD(list);
+
+	/* Stop all interfaces & threads */
+	pktgen_exiting = true;
+
+	mutex_lock(&pktgen_thread_lock);
+	list_splice_init(&pktgen_threads, &list);
+	mutex_unlock(&pktgen_thread_lock);
+
+	list_for_each_safe(q, n, &list) {
+		t = list_entry(q, struct pktgen_thread, th_list);
+		list_del(&t->th_list);
+		kthread_stop(t->tsk);
+		kfree(t);
+	}
+
+	/* Un-register us from receiving netdevice events */
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+
+	/* Clean up proc file system */
+	remove_proc_entry(PGCTRL, pg_proc_dir);
+	proc_net_remove(&init_net, PG_PROC_DIR);
+}
+
+module_init(pg_init);
+module_exit(pg_cleanup);
+
+MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se>");
+MODULE_DESCRIPTION("Packet Generator tool");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(VERSION);
+module_param(pg_count_d, int, 0);
+MODULE_PARM_DESC(pg_count_d, "Default number of packets to inject");
+module_param(pg_delay_d, int, 0);
+MODULE_PARM_DESC(pg_delay_d, "Default delay between packets (nanoseconds)");
+module_param(pg_clone_skb_d, int, 0);
+MODULE_PARM_DESC(pg_clone_skb_d, "Default number of copies of the same packet");
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Enable debugging of pktgen module");
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 00000000..9b570a6a
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,132 @@
+/*
+ * NET		Generic infrastructure for Network protocols.
+ *
+ * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * 		From code originally in include/net/tcp.h
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <net/request_sock.h>
+
+/*
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ *
+ * The minimum value of it is 128. Experiments with real servers show that
+ * it is absolutely not enough even at 100conn/sec. 256 cures most
+ * of problems.
+ * This value is adjusted to 128 for low memory machines,
+ * and it will increase in proportion to the memory of machine.
+ * Note : Dont forget somaxconn that may limit backlog too.
+ */
+int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
+
+int reqsk_queue_alloc(struct request_sock_queue *queue,
+		      unsigned int nr_table_entries)
+{
+	size_t lopt_size = sizeof(struct listen_sock);
+	struct listen_sock *lopt;
+
+	nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
+	nr_table_entries = max_t(u32, nr_table_entries, 8);
+	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
+	lopt_size += nr_table_entries * sizeof(struct request_sock *);
+	if (lopt_size > PAGE_SIZE)
+		lopt = vzalloc(lopt_size);
+	else
+		lopt = kzalloc(lopt_size, GFP_KERNEL);
+	if (lopt == NULL)
+		return -ENOMEM;
+
+	for (lopt->max_qlen_log = 3;
+	     (1 << lopt->max_qlen_log) < nr_table_entries;
+	     lopt->max_qlen_log++);
+
+	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
+	rwlock_init(&queue->syn_wait_lock);
+	queue->rskq_accept_head = NULL;
+	lopt->nr_table_entries = nr_table_entries;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	queue->listen_opt = lopt;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return 0;
+}
+
+void __reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	struct listen_sock *lopt;
+	size_t lopt_size;
+
+	/*
+	 * this is an error recovery path only
+	 * no locking needed and the lopt is not NULL
+	 */
+
+	lopt = queue->listen_opt;
+	lopt_size = sizeof(struct listen_sock) +
+		lopt->nr_table_entries * sizeof(struct request_sock *);
+
+	if (lopt_size > PAGE_SIZE)
+		vfree(lopt);
+	else
+		kfree(lopt);
+}
+
+static inline struct listen_sock *reqsk_queue_yank_listen_sk(
+		struct request_sock_queue *queue)
+{
+	struct listen_sock *lopt;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	lopt = queue->listen_opt;
+	queue->listen_opt = NULL;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return lopt;
+}
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	/* make all the listen_opt local to us */
+	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+	size_t lopt_size = sizeof(struct listen_sock) +
+		lopt->nr_table_entries * sizeof(struct request_sock *);
+
+	if (lopt->qlen != 0) {
+		unsigned int i;
+
+		for (i = 0; i < lopt->nr_table_entries; i++) {
+			struct request_sock *req;
+
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				reqsk_free(req);
+			}
+		}
+	}
+
+	WARN_ON(lopt->qlen != 0);
+	if (lopt_size > PAGE_SIZE)
+		vfree(lopt);
+	else
+		kfree(lopt);
+}
+
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
new file mode 100644
index 00000000..90430b77
--- /dev/null
+++ b/net/core/rtnetlink.c
@@ -0,0 +1,2148 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Routing netlink socket interface: protocol independent part.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/mutex.h>
+#include <linux/if_addr.h>
+#include <linux/pci.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/fib_rules.h>
+#include <net/rtnetlink.h>
+#include <net/net_namespace.h>
+
+struct rtnl_link {
+	rtnl_doit_func		doit;
+	rtnl_dumpit_func	dumpit;
+	rtnl_calcit_func 	calcit;
+};
+
+static DEFINE_MUTEX(rtnl_mutex);
+
+void rtnl_lock(void)
+{
+	mutex_lock(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock);
+
+void __rtnl_unlock(void)
+{
+	mutex_unlock(&rtnl_mutex);
+}
+
+void rtnl_unlock(void)
+{
+	/* This fellow will unlock it for us. */
+	netdev_run_todo();
+}
+EXPORT_SYMBOL(rtnl_unlock);
+
+int rtnl_trylock(void)
+{
+	return mutex_trylock(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_trylock);
+
+int rtnl_is_locked(void)
+{
+	return mutex_is_locked(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_is_locked);
+
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rtnl_is_held(void)
+{
+	return lockdep_is_held(&rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_is_held);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+
+static inline int rtm_msgindex(int msgtype)
+{
+	int msgindex = msgtype - RTM_BASE;
+
+	/*
+	 * msgindex < 0 implies someone tried to register a netlink
+	 * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+	 * the message type has not been added to linux/rtnetlink.h
+	 */
+	BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+	return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
+	if (tab == NULL || tab[msgindex].doit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
+	if (tab == NULL || tab[msgindex].dumpit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].dumpit : NULL;
+}
+
+static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
+	if (tab == NULL || tab[msgindex].calcit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].calcit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @calcit: Function pointer to calc size of dump message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+		    rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+		    rtnl_calcit_func calcit)
+{
+	struct rtnl_link *tab;
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
+	msgindex = rtm_msgindex(msgtype);
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL) {
+		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+		if (tab == NULL)
+			return -ENOBUFS;
+
+		rtnl_msg_handlers[protocol] = tab;
+	}
+
+	if (doit)
+		tab[msgindex].doit = doit;
+
+	if (dumpit)
+		tab[msgindex].dumpit = dumpit;
+
+	if (calcit)
+		tab[msgindex].calcit = calcit;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continuing.
+ */
+void rtnl_register(int protocol, int msgtype,
+		   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+		   rtnl_calcit_func calcit)
+{
+	if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
+		panic("Unable to register rtnetlink message handler, "
+		      "protocol = %d, message type = %d\n",
+		      protocol, msgtype);
+}
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
+	msgindex = rtm_msgindex(msgtype);
+
+	if (rtnl_msg_handlers[protocol] == NULL)
+		return -ENOENT;
+
+	rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+	rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
+
+	kfree(rtnl_msg_handlers[protocol]);
+	rtnl_msg_handlers[protocol] = NULL;
+}
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
+
+static LIST_HEAD(link_ops);
+
+static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+{
+	const struct rtnl_link_ops *ops;
+
+	list_for_each_entry(ops, &link_ops, list) {
+		if (!strcmp(ops->kind, kind))
+			return ops;
+	}
+	return NULL;
+}
+
+/**
+ * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
+ * @ops: struct rtnl_link_ops * to register
+ *
+ * The caller must hold the rtnl_mutex. This function should be used
+ * by drivers that create devices during module initialization. It
+ * must be called before registering the devices.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_link_register(struct rtnl_link_ops *ops)
+{
+	if (rtnl_link_ops_get(ops->kind))
+		return -EEXIST;
+
+	if (!ops->dellink)
+		ops->dellink = unregister_netdevice_queue;
+
+	list_add_tail(&ops->list, &link_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_link_register);
+
+/**
+ * rtnl_link_register - Register rtnl_link_ops with rtnetlink.
+ * @ops: struct rtnl_link_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_link_register(struct rtnl_link_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_link_register(ops);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_link_register);
+
+static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
+{
+	struct net_device *dev;
+	LIST_HEAD(list_kill);
+
+	for_each_netdev(net, dev) {
+		if (dev->rtnl_link_ops == ops)
+			ops->dellink(dev, &list_kill);
+	}
+	unregister_netdevice_many(&list_kill);
+}
+
+/**
+ * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
+ * @ops: struct rtnl_link_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_link_unregister(struct rtnl_link_ops *ops)
+{
+	struct net *net;
+
+	for_each_net(net) {
+		__rtnl_kill_links(net, ops);
+	}
+	list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
+
+/**
+ * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
+ * @ops: struct rtnl_link_ops * to unregister
+ */
+void rtnl_link_unregister(struct rtnl_link_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_link_unregister(ops);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_link_unregister);
+
+static size_t rtnl_link_get_size(const struct net_device *dev)
+{
+	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+	size_t size;
+
+	if (!ops)
+		return 0;
+
+	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
+
+	if (ops->get_size)
+		/* IFLA_INFO_DATA + nested data */
+		size += nla_total_size(sizeof(struct nlattr)) +
+			ops->get_size(dev);
+
+	if (ops->get_xstats_size)
+		/* IFLA_INFO_XSTATS */
+		size += nla_total_size(ops->get_xstats_size(dev));
+
+	return size;
+}
+
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+	const struct rtnl_af_ops *ops;
+
+	list_for_each_entry(ops, &rtnl_af_ops, list) {
+		if (ops->family == family)
+			return ops;
+	}
+
+	return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	list_add_tail(&ops->list, &rtnl_af_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_af_register(ops);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_af_unregister(ops);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+	struct rtnl_af_ops *af_ops;
+	size_t size;
+
+	/* IFLA_AF_SPEC */
+	size = nla_total_size(sizeof(struct nlattr));
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->get_link_af_size) {
+			/* AF_* + nested data */
+			size += nla_total_size(sizeof(struct nlattr)) +
+				af_ops->get_link_af_size(dev);
+		}
+	}
+
+	return size;
+}
+
+static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
+{
+	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+	struct nlattr *linkinfo, *data;
+	int err = -EMSGSIZE;
+
+	linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
+	if (linkinfo == NULL)
+		goto out;
+
+	if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
+		goto err_cancel_link;
+	if (ops->fill_xstats) {
+		err = ops->fill_xstats(skb, dev);
+		if (err < 0)
+			goto err_cancel_link;
+	}
+	if (ops->fill_info) {
+		data = nla_nest_start(skb, IFLA_INFO_DATA);
+		if (data == NULL)
+			goto err_cancel_link;
+		err = ops->fill_info(skb, dev);
+		if (err < 0)
+			goto err_cancel_data;
+		nla_nest_end(skb, data);
+	}
+
+	nla_nest_end(skb, linkinfo);
+	return 0;
+
+err_cancel_data:
+	nla_nest_cancel(skb, data);
+err_cancel_link:
+	nla_nest_cancel(skb, linkinfo);
+out:
+	return err;
+}
+
+static const int rtm_min[RTM_NR_FAMILIES] =
+{
+	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
+	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)),
+	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+};
+
+static const int rta_max[RTM_NR_FAMILIES] =
+{
+	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
+	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
+	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
+	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
+	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
+	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
+	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+	struct rtattr *rta;
+	int size = RTA_LENGTH(attrlen);
+
+	rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
+	rta->rta_type = attrtype;
+	rta->rta_len = size;
+	memcpy(RTA_DATA(rta), data, attrlen);
+	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
+}
+EXPORT_SYMBOL(__rta_fill);
+
+int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
+{
+	struct sock *rtnl = net->rtnl;
+	int err = 0;
+
+	NETLINK_CB(skb).dst_group = group;
+	if (echo)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+	if (echo)
+		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	return err;
+}
+
+int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
+{
+	struct sock *rtnl = net->rtnl;
+
+	return nlmsg_unicast(rtnl, skb, pid);
+}
+EXPORT_SYMBOL(rtnl_unicast);
+
+void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+		 struct nlmsghdr *nlh, gfp_t flags)
+{
+	struct sock *rtnl = net->rtnl;
+	int report = 0;
+
+	if (nlh)
+		report = nlmsg_report(nlh);
+
+	nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+EXPORT_SYMBOL(rtnl_notify);
+
+void rtnl_set_sk_err(struct net *net, u32 group, int error)
+{
+	struct sock *rtnl = net->rtnl;
+
+	netlink_set_err(rtnl, 0, group, error);
+}
+EXPORT_SYMBOL(rtnl_set_sk_err);
+
+int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
+{
+	struct nlattr *mx;
+	int i, valid = 0;
+
+	mx = nla_nest_start(skb, RTA_METRICS);
+	if (mx == NULL)
+		return -ENOBUFS;
+
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (metrics[i]) {
+			valid++;
+			NLA_PUT_U32(skb, i+1, metrics[i]);
+		}
+	}
+
+	if (!valid) {
+		nla_nest_cancel(skb, mx);
+		return 0;
+	}
+
+	return nla_nest_end(skb, mx);
+
+nla_put_failure:
+	nla_nest_cancel(skb, mx);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(rtnetlink_put_metrics);
+
+int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
+		       u32 ts, u32 tsage, long expires, u32 error)
+{
+	struct rta_cacheinfo ci = {
+		.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+		.rta_used = dst->__use,
+		.rta_clntref = atomic_read(&(dst->__refcnt)),
+		.rta_error = error,
+		.rta_id =  id,
+		.rta_ts = ts,
+		.rta_tsage = tsage,
+	};
+
+	if (expires)
+		ci.rta_expires = jiffies_to_clock_t(expires);
+
+	return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+}
+EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
+
+static void set_operstate(struct net_device *dev, unsigned char transition)
+{
+	unsigned char operstate = dev->operstate;
+
+	switch (transition) {
+	case IF_OPER_UP:
+		if ((operstate == IF_OPER_DORMANT ||
+		     operstate == IF_OPER_UNKNOWN) &&
+		    !netif_dormant(dev))
+			operstate = IF_OPER_UP;
+		break;
+
+	case IF_OPER_DORMANT:
+		if (operstate == IF_OPER_UP ||
+		    operstate == IF_OPER_UNKNOWN)
+			operstate = IF_OPER_DORMANT;
+		break;
+	}
+
+	if (dev->operstate != operstate) {
+		write_lock_bh(&dev_base_lock);
+		dev->operstate = operstate;
+		write_unlock_bh(&dev_base_lock);
+		netdev_state_change(dev);
+	}
+}
+
+static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
+					   const struct ifinfomsg *ifm)
+{
+	unsigned int flags = ifm->ifi_flags;
+
+	/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
+	if (ifm->ifi_change)
+		flags = (flags & ifm->ifi_change) |
+			(dev->flags & ~ifm->ifi_change);
+
+	return flags;
+}
+
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+				 const struct rtnl_link_stats64 *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+}
+
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
+{
+	memcpy(v, b, sizeof(*b));
+}
+
+/* All VF info */
+static inline int rtnl_vfinfo_size(const struct net_device *dev,
+				   u32 ext_filter_mask)
+{
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
+	    (ext_filter_mask & RTEXT_FILTER_VF)) {
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nla_total_size(sizeof(struct nlattr));
+		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs *
+			(nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
+			 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+		return size;
+	} else
+		return 0;
+}
+
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+	size_t port_size = nla_total_size(4)		/* PORT_VF */
+		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
+		+ nla_total_size(sizeof(struct ifla_port_vsi))
+							/* PORT_VSI_TYPE */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_INSTANCE_UUID */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_HOST_UUID */
+		+ nla_total_size(1)			/* PROT_VDP_REQUEST */
+		+ nla_total_size(2);			/* PORT_VDP_RESPONSE */
+	size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+	size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+	if (dev_num_vf(dev->dev.parent))
+		return port_self_size + vf_ports_size +
+			vf_port_size * dev_num_vf(dev->dev.parent);
+	else
+		return port_self_size;
+}
+
+static noinline size_t if_nlmsg_size(const struct net_device *dev,
+				     u32 ext_filter_mask)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
+	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
+	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(sizeof(struct rtnl_link_stats64))
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+	       + nla_total_size(4) /* IFLA_TXQLEN */
+	       + nla_total_size(4) /* IFLA_WEIGHT */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(1) /* IFLA_OPERSTATE */
+	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + nla_total_size(ext_filter_mask
+			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+}
+
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *vf_ports;
+	struct nlattr *vf_port;
+	int vf;
+	int err;
+
+	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	if (!vf_ports)
+		return -EMSGSIZE;
+
+	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		if (!vf_port)
+			goto nla_put_failure;
+		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err == -EMSGSIZE)
+			goto nla_put_failure;
+		if (err) {
+			nla_nest_cancel(skb, vf_port);
+			continue;
+		}
+		nla_nest_end(skb, vf_port);
+	}
+
+	nla_nest_end(skb, vf_ports);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, vf_ports);
+	return -EMSGSIZE;
+}
+
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *port_self;
+	int err;
+
+	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	if (!port_self)
+		return -EMSGSIZE;
+
+	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+	if (err) {
+		nla_nest_cancel(skb, port_self);
+		return (err == -EMSGSIZE) ? err : 0;
+	}
+
+	nla_nest_end(skb, port_self);
+
+	return 0;
+}
+
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+
+	err = rtnl_port_self_fill(skb, dev);
+	if (err)
+		return err;
+
+	if (dev_num_vf(dev->dev.parent)) {
+		err = rtnl_vf_ports_fill(skb, dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+			    int type, u32 pid, u32 seq, u32 change,
+			    unsigned int flags, u32 ext_filter_mask)
+{
+	struct ifinfomsg *ifm;
+	struct nlmsghdr *nlh;
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats;
+	struct nlattr *attr, *af_spec;
+	struct rtnl_af_ops *af_ops;
+
+	ASSERT_RTNL();
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = change;
+
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+	NLA_PUT_U8(skb, IFLA_OPERSTATE,
+		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+	NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
+
+	if (dev->ifindex != dev->iflink)
+		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+	if (dev->master)
+		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
+
+	if (dev->qdisc)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id);
+
+	if (dev->ifalias)
+		NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias);
+
+	if (1) {
+		struct rtnl_link_ifmap map = {
+			.mem_start   = dev->mem_start,
+			.mem_end     = dev->mem_end,
+			.base_addr   = dev->base_addr,
+			.irq         = dev->irq,
+			.dma         = dev->dma,
+			.port        = dev->if_port,
+		};
+		NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+	}
+
+	if (dev->addr_len) {
+		NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+
+	attr = nla_reserve(skb, IFLA_STATS,
+			sizeof(struct rtnl_link_stats));
+	if (attr == NULL)
+		goto nla_put_failure;
+
+	stats = dev_get_stats(dev, &temp);
+	copy_rtnl_link_stats(nla_data(attr), stats);
+
+	attr = nla_reserve(skb, IFLA_STATS64,
+			sizeof(struct rtnl_link_stats64));
+	if (attr == NULL)
+		goto nla_put_failure;
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
+	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
+	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
+		int i;
+
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
+			struct ifla_vf_spoofchk vf_spoofchk;
+
+			/*
+			 * Not all SR-IOV capable drivers support the
+			 * spoofcheck query.  Preset to -1 so the user
+			 * space tool can detect that the driver didn't
+			 * report anything.
+			 */
+			ivi.spoofchk = -1;
+			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
+				break;
+			vf_mac.vf =
+				vf_vlan.vf =
+				vf_tx_rate.vf =
+				vf_spoofchk.vf = ivi.vf;
+
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf_spoofchk.setting = ivi.spoofchk;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+				&vf_tx_rate);
+			NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+				&vf_spoofchk);
+			nla_nest_end(skb, vf);
+		}
+		nla_nest_end(skb, vfinfo);
+	}
+
+	if (rtnl_port_fill(skb, dev))
+		goto nla_put_failure;
+
+	if (dev->rtnl_link_ops) {
+		if (rtnl_link_fill(skb, dev) < 0)
+			goto nla_put_failure;
+	}
+
+	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+		goto nla_put_failure;
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->fill_link_af) {
+			struct nlattr *af;
+			int err;
+
+			if (!(af = nla_nest_start(skb, af_ops->family)))
+				goto nla_put_failure;
+
+			err = af_ops->fill_link_af(skb, dev);
+
+			/*
+			 * Caller may return ENODATA to indicate that there
+			 * was no data to be dumped. This is not an error, it
+			 * means we should trim the attribute header and
+			 * continue.
+			 */
+			if (err == -ENODATA)
+				nla_nest_cancel(skb, af);
+			else if (err < 0)
+				goto nla_put_failure;
+
+			nla_nest_end(skb, af);
+		}
+	}
+
+	nla_nest_end(skb, af_spec);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int h, s_h;
+	int idx = 0, s_idx;
+	struct net_device *dev;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
+
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	rcu_read_lock();
+	cb->seq = net->dev_base_seq;
+
+	if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+			ifla_policy) >= 0) {
+
+		if (tb[IFLA_EXT_MASK])
+			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+	}
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+			if (idx < s_idx)
+				goto cont;
+			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+					     NETLINK_CB(cb->skb).pid,
+					     cb->nlh->nlmsg_seq, 0,
+					     NLM_F_MULTI,
+					     ext_filter_mask) <= 0)
+				goto out;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+			idx++;
+		}
+	}
+out:
+	rcu_read_unlock();
+	cb->args[1] = idx;
+	cb->args[0] = h;
+
+	return skb->len;
+}
+
+const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+	[IFLA_ADDRESS]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_BROADCAST]	= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_MTU]		= { .type = NLA_U32 },
+	[IFLA_LINK]		= { .type = NLA_U32 },
+	[IFLA_MASTER]		= { .type = NLA_U32 },
+	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+	[IFLA_WEIGHT]		= { .type = NLA_U32 },
+	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
+	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
+	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
+	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
+	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
+	[IFLA_EXT_MASK]		= { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(ifla_policy);
+
+static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+	[IFLA_INFO_KIND]	= { .type = NLA_STRING },
+	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VF_SPOOFCHK]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_spoofchk) },
+};
+
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
+	[IFLA_PORT_VF]		= { .type = NLA_U32 },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_STRING,
+				    .len = PORT_PROFILE_MAX },
+	[IFLA_PORT_VSI_TYPE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_vsi)},
+	[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+				      .len = PORT_UUID_MAX },
+	[IFLA_PORT_HOST_UUID]	= { .type = NLA_STRING,
+				    .len = PORT_UUID_MAX },
+	[IFLA_PORT_REQUEST]	= { .type = NLA_U8, },
+	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
+};
+
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+{
+	struct net *net;
+	/* Examine the link attributes and figure out which
+	 * network namespace we are talking about.
+	 */
+	if (tb[IFLA_NET_NS_PID])
+		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else if (tb[IFLA_NET_NS_FD])
+		net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
+	else
+		net = get_net(src_net);
+	return net;
+}
+EXPORT_SYMBOL(rtnl_link_get_net);
+
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+{
+	if (dev) {
+		if (tb[IFLA_ADDRESS] &&
+		    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+			return -EINVAL;
+
+		if (tb[IFLA_BROADCAST] &&
+		    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+			return -EINVAL;
+	}
+
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem, err;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				return -EAFNOSUPPORT;
+
+			if (!af_ops->set_link_af)
+				return -EOPNOTSUPP;
+
+			if (af_ops->validate_link_af) {
+				err = af_ops->validate_link_af(dev, af);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		case IFLA_VF_SPOOFCHK: {
+			struct ifla_vf_spoofchk *ivs;
+			ivs = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_spoofchk)
+				err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+							       ivs->setting);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static int do_set_master(struct net_device *dev, int ifindex)
+{
+	struct net_device *master_dev;
+	const struct net_device_ops *ops;
+	int err;
+
+	if (dev->master) {
+		if (dev->master->ifindex == ifindex)
+			return 0;
+		ops = dev->master->netdev_ops;
+		if (ops->ndo_del_slave) {
+			err = ops->ndo_del_slave(dev->master, dev);
+			if (err)
+				return err;
+		} else {
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (ifindex) {
+		master_dev = __dev_get_by_index(dev_net(dev), ifindex);
+		if (!master_dev)
+			return -EINVAL;
+		ops = master_dev->netdev_ops;
+		if (ops->ndo_add_slave) {
+			err = ops->ndo_add_slave(master_dev, dev);
+			if (err)
+				return err;
+		} else {
+			return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+		      struct nlattr **tb, char *ifname, int modified)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int send_addr_notify = 0;
+	int err;
+
+	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
+		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
+		if (IS_ERR(net)) {
+			err = PTR_ERR(net);
+			goto errout;
+		}
+		err = dev_change_net_namespace(dev, net, ifname);
+		put_net(net);
+		if (err)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_MAP]) {
+		struct rtnl_link_ifmap *u_map;
+		struct ifmap k_map;
+
+		if (!ops->ndo_set_config) {
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto errout;
+		}
+
+		u_map = nla_data(tb[IFLA_MAP]);
+		k_map.mem_start = (unsigned long) u_map->mem_start;
+		k_map.mem_end = (unsigned long) u_map->mem_end;
+		k_map.base_addr = (unsigned short) u_map->base_addr;
+		k_map.irq = (unsigned char) u_map->irq;
+		k_map.dma = (unsigned char) u_map->dma;
+		k_map.port = (unsigned char) u_map->port;
+
+		err = ops->ndo_set_config(dev, &k_map);
+		if (err < 0)
+			goto errout;
+
+		modified = 1;
+	}
+
+	if (tb[IFLA_ADDRESS]) {
+		struct sockaddr *sa;
+		int len;
+
+		if (!ops->ndo_set_mac_address) {
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto errout;
+		}
+
+		len = sizeof(sa_family_t) + dev->addr_len;
+		sa = kmalloc(len, GFP_KERNEL);
+		if (!sa) {
+			err = -ENOMEM;
+			goto errout;
+		}
+		sa->sa_family = dev->type;
+		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+		       dev->addr_len);
+		err = ops->ndo_set_mac_address(dev, sa);
+		kfree(sa);
+		if (err)
+			goto errout;
+		send_addr_notify = 1;
+		modified = 1;
+	}
+
+	if (tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_GROUP]) {
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+		modified = 1;
+	}
+
+	/*
+	 * Interface selected by interface index but interface
+	 * name provided implies that a name change has been
+	 * requested.
+	 */
+	if (ifm->ifi_index > 0 && ifname[0]) {
+		err = dev_change_name(dev, ifname);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_IFALIAS]) {
+		err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]),
+				    nla_len(tb[IFLA_IFALIAS]));
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_BROADCAST]) {
+		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+		send_addr_notify = 1;
+	}
+
+	if (ifm->ifi_flags || ifm->ifi_change) {
+		err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		if (err < 0)
+			goto errout;
+	}
+
+	if (tb[IFLA_MASTER]) {
+		err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+		if (err)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+
+	if (tb[IFLA_LINKMODE]) {
+		write_lock_bh(&dev_base_lock);
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+		write_unlock_bh(&dev_base_lock);
+	}
+
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO) {
+				err = -EINVAL;
+				goto errout;
+			}
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_VF_PORTS]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+		struct nlattr *attr;
+		int vf;
+		int rem;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_port)
+			goto errout;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+			if (nla_type(attr) != IFLA_VF_PORT)
+				continue;
+			err = nla_parse_nested(port, IFLA_PORT_MAX,
+				attr, ifla_port_policy);
+			if (err < 0)
+				goto errout;
+			if (!port[IFLA_PORT_VF]) {
+				err = -EOPNOTSUPP;
+				goto errout;
+			}
+			vf = nla_get_u32(port[IFLA_PORT_VF]);
+			err = ops->ndo_set_vf_port(dev, vf, port);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_PORT_SELF]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+
+		err = nla_parse_nested(port, IFLA_PORT_MAX,
+			tb[IFLA_PORT_SELF], ifla_port_policy);
+		if (err < 0)
+			goto errout;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port)
+			err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				BUG();
+
+			err = af_ops->set_link_af(dev, af);
+			if (err < 0)
+				goto errout;
+
+			modified = 1;
+		}
+	}
+	err = 0;
+
+errout:
+	if (err < 0 && modified && net_ratelimit())
+		printk(KERN_WARNING "A link change request failed with "
+		       "some changes committed already. Interface %s may "
+		       "have been left with an inconsistent configuration, "
+		       "please check.\n", dev->name);
+
+	if (send_addr_notify)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
+	return err;
+}
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	int err;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
+
+	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(net, ifname);
+	else
+		goto errout;
+
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
+	err = validate_linkmsg(dev, tb);
+	if (err < 0)
+		goto errout;
+
+	err = do_setlink(dev, ifm, tb, ifname, 0);
+errout:
+	return err;
+}
+
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	const struct rtnl_link_ops *ops;
+	struct net_device *dev;
+	struct ifinfomsg *ifm;
+	char ifname[IFNAMSIZ];
+	struct nlattr *tb[IFLA_MAX+1];
+	int err;
+	LIST_HEAD(list_kill);
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(net, ifname);
+	else
+		return -EINVAL;
+
+	if (!dev)
+		return -ENODEV;
+
+	ops = dev->rtnl_link_ops;
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	ops->dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
+	list_del(&list_kill);
+	return 0;
+}
+
+int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
+{
+	unsigned int old_flags;
+	int err;
+
+	old_flags = dev->flags;
+	if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
+		err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+		if (err < 0)
+			return err;
+	}
+
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+
+	__dev_notify_flags(dev, old_flags);
+	return 0;
+}
+EXPORT_SYMBOL(rtnl_configure_link);
+
+struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
+	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
+{
+	int err;
+	struct net_device *dev;
+	unsigned int num_queues = 1;
+	unsigned int real_num_queues = 1;
+
+	if (ops->get_tx_queues) {
+		err = ops->get_tx_queues(src_net, tb, &num_queues,
+					 &real_num_queues);
+		if (err)
+			goto err;
+	}
+	err = -ENOMEM;
+	dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues);
+	if (!dev)
+		goto err;
+
+	dev_net_set(dev, net);
+	dev->rtnl_link_ops = ops;
+	dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
+
+	if (tb[IFLA_MTU])
+		dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+	if (tb[IFLA_ADDRESS])
+		memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
+				nla_len(tb[IFLA_ADDRESS]));
+	if (tb[IFLA_BROADCAST])
+		memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
+				nla_len(tb[IFLA_BROADCAST]));
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+	if (tb[IFLA_LINKMODE])
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+	if (tb[IFLA_GROUP])
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+
+	return dev;
+
+err:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(rtnl_create_link);
+
+static int rtnl_group_changelink(struct net *net, int group,
+		struct ifinfomsg *ifm,
+		struct nlattr **tb)
+{
+	struct net_device *dev;
+	int err;
+
+	for_each_netdev(net, dev) {
+		if (dev->group == group) {
+			err = do_setlink(dev, ifm, tb, NULL, 0);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	const struct rtnl_link_ops *ops;
+	struct net_device *dev;
+	struct ifinfomsg *ifm;
+	char kind[MODULE_NAME_LEN];
+	char ifname[IFNAMSIZ];
+	struct nlattr *tb[IFLA_MAX+1];
+	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
+	int err;
+
+#ifdef CONFIG_MODULES
+replay:
+#endif
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else {
+		if (ifname[0])
+			dev = __dev_get_by_name(net, ifname);
+		else
+			dev = NULL;
+	}
+
+	err = validate_linkmsg(dev, tb);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_LINKINFO]) {
+		err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
+				       tb[IFLA_LINKINFO], ifla_info_policy);
+		if (err < 0)
+			return err;
+	} else
+		memset(linkinfo, 0, sizeof(linkinfo));
+
+	if (linkinfo[IFLA_INFO_KIND]) {
+		nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
+		ops = rtnl_link_ops_get(kind);
+	} else {
+		kind[0] = '\0';
+		ops = NULL;
+	}
+
+	if (1) {
+		struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
+		struct net *dest_net;
+
+		if (ops) {
+			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
+				err = nla_parse_nested(attr, ops->maxtype,
+						       linkinfo[IFLA_INFO_DATA],
+						       ops->policy);
+				if (err < 0)
+					return err;
+				data = attr;
+			}
+			if (ops->validate) {
+				err = ops->validate(tb, data);
+				if (err < 0)
+					return err;
+			}
+		}
+
+		if (dev) {
+			int modified = 0;
+
+			if (nlh->nlmsg_flags & NLM_F_EXCL)
+				return -EEXIST;
+			if (nlh->nlmsg_flags & NLM_F_REPLACE)
+				return -EOPNOTSUPP;
+
+			if (linkinfo[IFLA_INFO_DATA]) {
+				if (!ops || ops != dev->rtnl_link_ops ||
+				    !ops->changelink)
+					return -EOPNOTSUPP;
+
+				err = ops->changelink(dev, tb, data);
+				if (err < 0)
+					return err;
+				modified = 1;
+			}
+
+			return do_setlink(dev, ifm, tb, ifname, modified);
+		}
+
+		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+			if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+				return rtnl_group_changelink(net,
+						nla_get_u32(tb[IFLA_GROUP]),
+						ifm, tb);
+			return -ENODEV;
+		}
+
+		if (ifm->ifi_index)
+			return -EOPNOTSUPP;
+		if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
+			return -EOPNOTSUPP;
+
+		if (!ops) {
+#ifdef CONFIG_MODULES
+			if (kind[0]) {
+				__rtnl_unlock();
+				request_module("rtnl-link-%s", kind);
+				rtnl_lock();
+				ops = rtnl_link_ops_get(kind);
+				if (ops)
+					goto replay;
+			}
+#endif
+			return -EOPNOTSUPP;
+		}
+
+		if (!ifname[0])
+			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+
+		dest_net = rtnl_link_get_net(net, tb);
+		if (IS_ERR(dest_net))
+			return PTR_ERR(dest_net);
+
+		dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
+
+		if (IS_ERR(dev))
+			err = PTR_ERR(dev);
+		else if (ops->newlink)
+			err = ops->newlink(net, dev, tb, data);
+		else
+			err = register_netdevice(dev);
+
+		if (err < 0 && !IS_ERR(dev))
+			free_netdev(dev);
+		if (err < 0)
+			goto out;
+
+		err = rtnl_configure_link(dev, ifm);
+		if (err < 0)
+			unregister_netdevice(dev);
+out:
+		put_net(dest_net);
+		return err;
+	}
+}
+
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	char ifname[IFNAMSIZ];
+	struct nlattr *tb[IFLA_MAX+1];
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	int err;
+	u32 ext_filter_mask = 0;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+
+	if (tb[IFLA_EXT_MASK])
+		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index > 0)
+		dev = __dev_get_by_index(net, ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = __dev_get_by_name(net, ifname);
+	else
+		return -EINVAL;
+
+	if (dev == NULL)
+		return -ENODEV;
+
+	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+	if (nskb == NULL)
+		return -ENOBUFS;
+
+	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in if_nlmsg_size */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(nskb);
+	} else
+		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+
+	return err;
+}
+
+static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct nlattr *tb[IFLA_MAX+1];
+	u32 ext_filter_mask = 0;
+	u16 min_ifinfo_dump_size = 0;
+
+	if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+			ifla_policy) >= 0) {
+		if (tb[IFLA_EXT_MASK])
+			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+	}
+
+	if (!ext_filter_mask)
+		return NLMSG_GOODSIZE;
+	/*
+	 * traverse the list of net devices and compute the minimum
+	 * buffer size based upon the filter mask.
+	 */
+	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+					     if_nlmsg_size(dev,
+						           ext_filter_mask));
+	}
+
+	return min_ifinfo_dump_size;
+}
+
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == PF_PACKET)
+			continue;
+		if (rtnl_msg_handlers[idx] == NULL ||
+		    rtnl_msg_handlers[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+{
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+	size_t if_info_size;
+
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+}
+
+/* Protected by RTNL sempahore.  */
+static struct rtattr **rta_buf;
+static int rtattr_max;
+
+/* Process one rtnetlink message. */
+
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	rtnl_doit_func doit;
+	int sz_idx, kind;
+	int min_len;
+	int family;
+	int type;
+	int err;
+
+	type = nlh->nlmsg_type;
+	if (type > RTM_MAX)
+		return -EOPNOTSUPP;
+
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
+	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+		return 0;
+
+	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		struct sock *rtnl;
+		rtnl_dumpit_func dumpit;
+		rtnl_calcit_func calcit;
+		u16 min_dump_alloc = 0;
+
+		dumpit = rtnl_get_dumpit(family, type);
+		if (dumpit == NULL)
+			return -EOPNOTSUPP;
+		calcit = rtnl_get_calcit(family, type);
+		if (calcit)
+			min_dump_alloc = calcit(skb, nlh);
+
+		__rtnl_unlock();
+		rtnl = net->rtnl;
+		{
+			struct netlink_dump_control c = {
+				.dump		= dumpit,
+				.min_dump_alloc	= min_dump_alloc,
+			};
+			err = netlink_dump_start(rtnl, skb, nlh, &c);
+		}
+		rtnl_lock();
+		return err;
+	}
+
+	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
+
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
+		return -EINVAL;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					return -EINVAL;
+				rta_buf[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	doit = rtnl_get_doit(family, type);
+	if (doit == NULL)
+		return -EOPNOTSUPP;
+
+	return doit(skb, nlh, (void *)&rta_buf[0]);
+}
+
+static void rtnetlink_rcv(struct sk_buff *skb)
+{
+	rtnl_lock();
+	netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
+	rtnl_unlock();
+}
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_PRE_UP:
+	case NETDEV_POST_INIT:
+	case NETDEV_REGISTER:
+	case NETDEV_CHANGE:
+	case NETDEV_PRE_TYPE_CHANGE:
+	case NETDEV_GOING_DOWN:
+	case NETDEV_UNREGISTER:
+	case NETDEV_UNREGISTER_BATCH:
+	case NETDEV_RELEASE:
+	case NETDEV_JOIN:
+		break;
+	default:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rtnetlink_dev_notifier = {
+	.notifier_call	= rtnetlink_event,
+};
+
+
+static int __net_init rtnetlink_net_init(struct net *net)
+{
+	struct sock *sk;
+	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
+				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
+	if (!sk)
+		return -ENOMEM;
+	net->rtnl = sk;
+	return 0;
+}
+
+static void __net_exit rtnetlink_net_exit(struct net *net)
+{
+	netlink_kernel_release(net->rtnl);
+	net->rtnl = NULL;
+}
+
+static struct pernet_operations rtnetlink_net_ops = {
+	.init = rtnetlink_net_init,
+	.exit = rtnetlink_net_exit,
+};
+
+void __init rtnetlink_init(void)
+{
+	int i;
+
+	rtattr_max = 0;
+	for (i = 0; i < ARRAY_SIZE(rta_max); i++)
+		if (rta_max[i] > rtattr_max)
+			rtattr_max = rta_max[i];
+	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
+	if (!rta_buf)
+		panic("rtnetlink_init: cannot allocate rta_buf\n");
+
+	if (register_pernet_subsys(&rtnetlink_net_ops))
+		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+
+	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+	register_netdevice_notifier(&rtnetlink_dev_notifier);
+
+	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
+		      rtnl_dump_ifinfo, rtnl_calcit);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
+
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+}
+
diff --git a/net/core/scm.c b/net/core/scm.c
new file mode 100644
index 00000000..611c5efd
--- /dev/null
+++ b/net/core/scm.c
@@ -0,0 +1,345 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/compat.h>
+#include <net/scm.h>
+
+
+/*
+ *	Only allow a user to send credentials, that they could set with
+ *	setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+	const struct cred *cred = current_cred();
+
+	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
+	    ((creds->uid == cred->uid   || creds->uid == cred->euid ||
+	      creds->uid == cred->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == cred->gid   || creds->gid == cred->egid ||
+	      creds->gid == cred->sgid) || capable(CAP_SETGID))) {
+	       return 0;
+	}
+	return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+	int *fdp = (int*)CMSG_DATA(cmsg);
+	struct scm_fp_list *fpl = *fplp;
+	struct file **fpp;
+	int i, num;
+
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+	if (num <= 0)
+		return 0;
+
+	if (num > SCM_MAX_FD)
+		return -EINVAL;
+
+	if (!fpl)
+	{
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		if (!fpl)
+			return -ENOMEM;
+		*fplp = fpl;
+		fpl->count = 0;
+		fpl->max = SCM_MAX_FD;
+	}
+	fpp = &fpl->fp[fpl->count];
+
+	if (fpl->count + num > fpl->max)
+		return -EINVAL;
+
+	/*
+	 *	Verify the descriptors and increment the usage count.
+	 */
+
+	for (i=0; i< num; i++)
+	{
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget_raw(fd)))
+			return -EBADF;
+		*fpp++ = file;
+		fpl->count++;
+	}
+	return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+	struct scm_fp_list *fpl = scm->fp;
+	int i;
+
+	if (fpl) {
+		scm->fp = NULL;
+		if (current->scm_work_list) {
+			list_add_tail(&fpl->list, current->scm_work_list);
+		} else {
+			LIST_HEAD(work_list);
+
+			current->scm_work_list = &work_list;
+
+			list_add(&fpl->list, &work_list);
+			while (!list_empty(&work_list)) {
+				fpl = list_first_entry(&work_list, struct scm_fp_list, list);
+
+				list_del(&fpl->list);
+				for (i=fpl->count-1; i>=0; i--)
+					fput(fpl->fp[i]);
+				kfree(fpl);
+			}
+
+			current->scm_work_list = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL(__scm_destroy);
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+	struct cmsghdr *cmsg;
+	int err;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+	{
+		err = -EINVAL;
+
+		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+		/* The first check was omitted in <= 2.2.5. The reasoning was
+		   that parser checks cmsg_len in any case, so that
+		   additional check would be work duplication.
+		   But if cmsg_level is not SOL_SOCKET, we do not check
+		   for too short ancillary data object at all! Oops.
+		   OK, let's add it...
+		 */
+		if (!CMSG_OK(msg, cmsg))
+			goto error;
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type)
+		{
+		case SCM_RIGHTS:
+			if (!sock->ops || sock->ops->family != PF_UNIX)
+				goto error;
+			err=scm_fp_copy(cmsg, &p->fp);
+			if (err<0)
+				goto error;
+			break;
+		case SCM_CREDENTIALS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+				goto error;
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&p->creds);
+			if (err)
+				goto error;
+
+			if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if (!p->cred ||
+			    (p->cred->euid != p->creds.uid) ||
+			    (p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.gid;
+				if (p->cred)
+					put_cred(p->cred);
+				p->cred = cred;
+			}
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	if (p->fp && !p->fp->count)
+	{
+		kfree(p->fp);
+		p->fp = NULL;
+	}
+	return 0;
+
+error:
+	scm_destroy(p);
+	return err;
+}
+EXPORT_SYMBOL(__scm_send);
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+	struct cmsghdr __user *cm
+		= (__force struct cmsghdr __user *)msg->msg_control;
+	struct cmsghdr cmhdr;
+	int cmlen = CMSG_LEN(len);
+	int err;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags)
+		return put_cmsg_compat(msg, level, type, len, data);
+
+	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+	if (msg->msg_controllen < cmlen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		cmlen = msg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out;
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	if (msg->msg_controllen < cmlen)
+		cmlen = msg->msg_controllen;
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
+	return err;
+}
+EXPORT_SYMBOL(put_cmsg);
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct cmsghdr __user *cm
+		= (__force struct cmsghdr __user*)msg->msg_control;
+
+	int fdmax = 0;
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int __user *cmfptr;
+	int err = 0, i;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags) {
+		scm_detach_fds_compat(msg, scm);
+		return;
+	}
+
+	if (msg->msg_controllen > sizeof(struct cmsghdr))
+		fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
+			 / sizeof(int));
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
+	     i++, cmfptr++)
+	{
+		int new_fd;
+		err = security_file_receive(fp[i]);
+		if (err)
+			break;
+		err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags
+					  ? O_CLOEXEC : 0);
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		get_file(fp[i]);
+		fd_install(new_fd, fp[i]);
+	}
+
+	if (i > 0)
+	{
+		int cmlen = CMSG_LEN(i*sizeof(int));
+		err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_SPACE(i*sizeof(int));
+			msg->msg_control += cmlen;
+			msg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum || (fdnum && fdmax <= 0))
+		msg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+EXPORT_SYMBOL(scm_detach_fds);
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+	struct scm_fp_list *new_fpl;
+	int i;
+
+	if (!fpl)
+		return NULL;
+
+	new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+			  GFP_KERNEL);
+	if (new_fpl) {
+		for (i = 0; i < fpl->count; i++)
+			get_file(fpl->fp[i]);
+		new_fpl->max = new_fpl->count;
+	}
+	return new_fpl;
+}
+EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
new file mode 100644
index 00000000..99b25965
--- /dev/null
+++ b/net/core/secure_seq.c
@@ -0,0 +1,186 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/cryptohash.h>
+#include <linux/module.h>
+#include <linux/cache.h>
+#include <linux/random.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+
+#include <net/secure_seq.h>
+
+static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+
+static int __init net_secret_init(void)
+{
+	get_random_bytes(net_secret, sizeof(net_secret));
+	return 0;
+}
+late_initcall(net_secret_init);
+
+#ifdef CONFIG_INET
+static u32 seq_scale(u32 seq)
+{
+	/*
+	 *	As close as possible to RFC 793, which
+	 *	suggests using a 250 kHz clock.
+	 *	Further reading shows this assumes 2 Mb/s networks.
+	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+	 *	For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
+	 *	we also need to limit the resolution so that the u32 seq
+	 *	overlaps less than one time per MSL (2 minutes).
+	 *	Choosing a clock of 64 ns period is OK. (period of 274 s)
+	 */
+	return seq + (ktime_to_ns(ktime_get_real()) >> 6);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+				   __be16 sport, __be16 dport)
+{
+	u32 secret[MD5_MESSAGE_BYTES / 4];
+	u32 hash[MD5_DIGEST_WORDS];
+	u32 i;
+
+	memcpy(hash, saddr, 16);
+	for (i = 0; i < 4; i++)
+		secret[i] = net_secret[i] + (__force u32)daddr[i];
+	secret[4] = net_secret[4] +
+		(((__force u16)sport << 16) + (__force u16)dport);
+	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+		secret[i] = net_secret[i];
+
+	md5_transform(hash, secret);
+
+	return seq_scale(hash[0]);
+}
+EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+
+u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+			       __be16 dport)
+{
+	u32 secret[MD5_MESSAGE_BYTES / 4];
+	u32 hash[MD5_DIGEST_WORDS];
+	u32 i;
+
+	memcpy(hash, saddr, 16);
+	for (i = 0; i < 4; i++)
+		secret[i] = net_secret[i] + (__force u32) daddr[i];
+	secret[4] = net_secret[4] + (__force u32)dport;
+	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+		secret[i] = net_secret[i];
+
+	md5_transform(hash, secret);
+
+	return hash[0];
+}
+#endif
+
+#ifdef CONFIG_INET
+__u32 secure_ip_id(__be32 daddr)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+
+	hash[0] = (__force __u32) daddr;
+	hash[1] = net_secret[13];
+	hash[2] = net_secret[14];
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	return hash[0];
+}
+
+__u32 secure_ipv6_id(const __be32 daddr[4])
+{
+	__u32 hash[4];
+
+	memcpy(hash, daddr, 16);
+	md5_transform(hash, net_secret);
+
+	return hash[0];
+}
+
+__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+				 __be16 sport, __be16 dport)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+
+	hash[0] = (__force u32)saddr;
+	hash[1] = (__force u32)daddr;
+	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	return seq_scale(hash[0]);
+}
+
+u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+
+	hash[0] = (__force u32)saddr;
+	hash[1] = (__force u32)daddr;
+	hash[2] = (__force u32)dport ^ net_secret[14];
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	return hash[0];
+}
+EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
+#endif
+
+#if IS_ENABLED(CONFIG_IP_DCCP)
+u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
+				__be16 sport, __be16 dport)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+	u64 seq;
+
+	hash[0] = (__force u32)saddr;
+	hash[1] = (__force u32)daddr;
+	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	seq = hash[0] | (((u64)hash[1]) << 32);
+	seq += ktime_to_ns(ktime_get_real());
+	seq &= (1ull << 48) - 1;
+
+	return seq;
+}
+EXPORT_SYMBOL(secure_dccp_sequence_number);
+
+#if IS_ENABLED(CONFIG_IPV6)
+u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+				  __be16 sport, __be16 dport)
+{
+	u32 secret[MD5_MESSAGE_BYTES / 4];
+	u32 hash[MD5_DIGEST_WORDS];
+	u64 seq;
+	u32 i;
+
+	memcpy(hash, saddr, 16);
+	for (i = 0; i < 4; i++)
+		secret[i] = net_secret[i] + daddr[i];
+	secret[4] = net_secret[4] +
+		(((__force u16)sport << 16) + (__force u16)dport);
+	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+		secret[i] = net_secret[i];
+
+	md5_transform(hash, secret);
+
+	seq = hash[0] | (((u64)hash[1]) << 32);
+	seq += ktime_to_ns(ktime_get_real());
+	seq &= (1ull << 48) - 1;
+
+	return seq;
+}
+EXPORT_SYMBOL(secure_dccpv6_sequence_number);
+#endif
+#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
new file mode 100644
index 00000000..e99aedd9
--- /dev/null
+++ b/net/core/skbuff.c
@@ -0,0 +1,3283 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Fixes:
+ *		Alan Cox	:	Fixed the worst of the load
+ *					balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *		Robert Olsson	:	Removed skb_head_pool
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts
+ *	disabled, or you better be *real* sure that the operation is atomic
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NET_CLS_ACT
+#include <net/pkt_sched.h>
+#endif
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/splice.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/scatterlist.h>
+#include <linux/errqueue.h>
+#include <linux/prefetch.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+#include <trace/events/skb.h>
+
+#include "kmap_skb.h"
+
+static struct kmem_cache *skbuff_head_cache __read_mostly;
+static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+				  struct pipe_buffer *buf)
+{
+	put_page(buf->page);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+				struct pipe_buffer *buf)
+{
+	get_page(buf->page);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+			       struct pipe_buffer *buf)
+{
+	return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static const struct pipe_buf_operations sock_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = sock_pipe_buf_release,
+	.steal = sock_pipe_buf_steal,
+	.get = sock_pipe_buf_get,
+};
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable.
+ */
+
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put(). Not user callable.
+ */
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
+	       skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push(). Not user callable.
+ */
+
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
+	       skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *	'private' fields and also do memory statistics to find all the
+ *	[BEEP] leaks.
+ *
+ */
+
+/**
+ *	__alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *	@fclone: allocate from fclone cache instead of head cache
+ *		and allocate a cloned (child) skb
+ *	@node: numa node to allocate memory on
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+			    int fclone, int node)
+{
+	struct kmem_cache *cache;
+	struct skb_shared_info *shinfo;
+	struct sk_buff *skb;
+	u8 *data;
+
+	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
+	if (!skb)
+		goto out;
+	prefetchw(skb);
+
+	/* We do our best to align skb_shared_info on a separate cache
+	 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+	 * Both skb->head and skb_shared_info are cache line aligned.
+	 */
+	size = SKB_DATA_ALIGN(size);
+	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	data = kmalloc_node_track_caller(size, gfp_mask, node);
+	if (!data)
+		goto nodata;
+	/* kmalloc(size) might give us more room than requested.
+	 * Put skb_shared_info exactly at the end of allocated zone,
+	 * to allow max possible filling before reallocation.
+	 */
+	size = SKB_WITH_OVERHEAD(ksize(data));
+	prefetchw(data + size);
+
+	/*
+	 * Only clear those fields we need to clear, not those that we will
+	 * actually initialise below. Hence, don't put any more fields after
+	 * the tail pointer in struct sk_buff!
+	 */
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	/* Account for allocated memory : skb + skb->head */
+	skb->truesize = SKB_TRUESIZE(size);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+	if (fclone) {
+		struct sk_buff *child = skb + 1;
+		atomic_t *fclone_ref = (atomic_t *) (child + 1);
+
+		kmemcheck_annotate_bitfield(child, flags1);
+		kmemcheck_annotate_bitfield(child, flags2);
+		skb->fclone = SKB_FCLONE_ORIG;
+		atomic_set(fclone_ref, 1);
+
+		child->fclone = SKB_FCLONE_UNAVAILABLE;
+	}
+out:
+	return skb;
+nodata:
+	kmem_cache_free(cache, skb);
+	skb = NULL;
+	goto out;
+}
+EXPORT_SYMBOL(__alloc_skb);
+
+/**
+ * build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. @data must have been allocated by kmalloc()
+ * The return is the new skb buffer.
+ * On a failure the return is %NULL, and @data is not freed.
+ * Notes :
+ *  Before IO, driver allocates only data buffer where NIC put incoming frame
+ *  Driver should add room at head (NET_SKB_PAD) and
+ *  MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
+ *  After IO, driver calls build_skb(), to allocate sk_buff and populate it
+ *  before giving packet to stack.
+ *  RX rings only contains data buffers, not full skbs.
+ */
+struct sk_buff *build_skb(void *data)
+{
+	struct skb_shared_info *shinfo;
+	struct sk_buff *skb;
+	unsigned int size;
+
+	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->truesize = SKB_TRUESIZE(size);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+	return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
+/**
+ *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ *	@dev: network device to receive on
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned if there is no free memory.
+ */
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+		unsigned int length, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
+	if (likely(skb)) {
+		skb_reserve(skb, NET_SKB_PAD);
+		skb->dev = dev;
+	}
+	return skb;
+}
+EXPORT_SYMBOL(__netdev_alloc_skb);
+
+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+		     int size, unsigned int truesize)
+{
+	skb_fill_page_desc(skb, i, page, off, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += truesize;
+}
+EXPORT_SYMBOL(skb_add_rx_frag);
+
+/**
+ *	dev_alloc_skb - allocate an skbuff for receiving
+ *	@length: length to allocate
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned if there is no free memory. Although this function
+ *	allocates memory it can be called from an interrupt.
+ */
+struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+	/*
+	 * There is more code here than it seems:
+	 * __dev_alloc_skb is an inline
+	 */
+	return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(dev_alloc_skb);
+
+static void skb_drop_list(struct sk_buff **listp)
+{
+	struct sk_buff *list = *listp;
+
+	*listp = NULL;
+
+	do {
+		struct sk_buff *this = list;
+		list = list->next;
+		kfree_skb(this);
+	} while (list);
+}
+
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+	skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list;
+
+	skb_walk_frags(skb, list)
+		skb_get(list);
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+	if (!skb->cloned ||
+	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+			       &skb_shinfo(skb)->dataref)) {
+		if (skb_shinfo(skb)->nr_frags) {
+			int i;
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+				skb_frag_unref(skb, i);
+		}
+
+		/*
+		 * If skb buf is from userspace, we need to notify the caller
+		 * the lower device DMA has done;
+		 */
+		if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+			struct ubuf_info *uarg;
+
+			uarg = skb_shinfo(skb)->destructor_arg;
+			if (uarg->callback)
+				uarg->callback(uarg);
+		}
+
+		if (skb_has_frag_list(skb))
+			skb_drop_fraglist(skb);
+
+		kfree(skb->head);
+	}
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state.
+ */
+static void kfree_skbmem(struct sk_buff *skb)
+{
+	struct sk_buff *other;
+	atomic_t *fclone_ref;
+
+	switch (skb->fclone) {
+	case SKB_FCLONE_UNAVAILABLE:
+		kmem_cache_free(skbuff_head_cache, skb);
+		break;
+
+	case SKB_FCLONE_ORIG:
+		fclone_ref = (atomic_t *) (skb + 2);
+		if (atomic_dec_and_test(fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, skb);
+		break;
+
+	case SKB_FCLONE_CLONE:
+		fclone_ref = (atomic_t *) (skb + 1);
+		other = skb - 1;
+
+		/* The clone portion is available for
+		 * fast-cloning again.
+		 */
+		skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+		if (atomic_dec_and_test(fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, other);
+		break;
+	}
+}
+
+static void skb_release_head_state(struct sk_buff *skb)
+{
+	skb_dst_drop(skb);
+#ifdef CONFIG_XFRM
+	secpath_put(skb->sp);
+#endif
+	if (skb->destructor) {
+		WARN_ON(in_irq());
+		skb->destructor(skb);
+	}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
+	nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(skb->nf_bridge);
+#endif
+/* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+	skb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = 0;
+#endif
+#endif
+}
+
+/* Free everything but the sk_buff shell. */
+static void skb_release_all(struct sk_buff *skb)
+{
+	skb_release_head_state(skb);
+	skb_release_data(skb);
+}
+
+/**
+ *	__kfree_skb - private function
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer.
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+	skb_release_all(skb);
+	kfree_skbmem(skb);
+}
+EXPORT_SYMBOL(__kfree_skb);
+
+/**
+ *	kfree_skb - free an sk_buff
+ *	@skb: buffer to free
+ *
+ *	Drop a reference to the buffer and free it if the usage count has
+ *	hit zero.
+ */
+void kfree_skb(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return;
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	trace_kfree_skb(skb, __builtin_return_address(0));
+	__kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb);
+
+/**
+ *	consume_skb - free an skbuff
+ *	@skb: buffer to free
+ *
+ *	Drop a ref to the buffer and free it if the usage count has hit zero
+ *	Functions identically to kfree_skb, but kfree_skb assumes that the frame
+ *	is being dropped after a failure and notes that
+ */
+void consume_skb(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return;
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	trace_consume_skb(skb);
+	__kfree_skb(skb);
+}
+EXPORT_SYMBOL(consume_skb);
+
+/**
+ * 	skb_recycle - clean up an skb for reuse
+ * 	@skb: buffer
+ *
+ * 	Recycles the skb to be reused as a receive buffer. This
+ * 	function does any necessary reference count dropping, and
+ * 	cleans up the skbuff as if it just came from __alloc_skb().
+ */
+void skb_recycle(struct sk_buff *skb)
+{
+	struct skb_shared_info *shinfo;
+
+	skb_release_head_state(skb);
+
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->data = skb->head + NET_SKB_PAD;
+	skb_reset_tail_pointer(skb);
+}
+EXPORT_SYMBOL(skb_recycle);
+
+/**
+ *	skb_recycle_check - check if skb can be reused for receive
+ *	@skb: buffer
+ *	@skb_size: minimum receive buffer size
+ *
+ *	Checks that the skb passed in is not shared or cloned, and
+ *	that it is linear and its head portion at least as large as
+ *	skb_size so that it can be recycled as a receive buffer.
+ *	If these conditions are met, this function does any necessary
+ *	reference count dropping and cleans up the skbuff as if it
+ *	just came from __alloc_skb().
+ */
+bool skb_recycle_check(struct sk_buff *skb, int skb_size)
+{
+	if (!skb_is_recycleable(skb, skb_size))
+		return false;
+
+	skb_recycle(skb);
+
+	return true;
+}
+EXPORT_SYMBOL(skb_recycle_check);
+
+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	new->tstamp		= old->tstamp;
+	new->dev		= old->dev;
+	new->transport_header	= old->transport_header;
+	new->network_header	= old->network_header;
+	new->mac_header		= old->mac_header;
+	skb_dst_copy(new, old);
+	new->rxhash		= old->rxhash;
+	new->ooo_okay		= old->ooo_okay;
+	new->l4_rxhash		= old->l4_rxhash;
+	new->no_fcs		= old->no_fcs;
+#ifdef CONFIG_XFRM
+	new->sp			= secpath_get(old->sp);
+#endif
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	new->csum		= old->csum;
+	new->local_df		= old->local_df;
+	new->pkt_type		= old->pkt_type;
+	new->ip_summed		= old->ip_summed;
+	skb_copy_queue_mapping(new, old);
+	new->priority		= old->priority;
+#if IS_ENABLED(CONFIG_IP_VS)
+	new->ipvs_property	= old->ipvs_property;
+#endif
+	new->protocol		= old->protocol;
+	new->mark		= old->mark;
+	new->skb_iif		= old->skb_iif;
+	__nf_copy(new, old);
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+	new->nf_trace		= old->nf_trace;
+#endif
+#ifdef CONFIG_NET_SCHED
+	new->tc_index		= old->tc_index;
+#ifdef CONFIG_NET_CLS_ACT
+	new->tc_verd		= old->tc_verd;
+#endif
+#endif
+	new->vlan_tci		= old->vlan_tci;
+
+	skb_copy_secmark(new, old);
+}
+
+/*
+ * You should not add any new code to this function.  Add it to
+ * __copy_skb_header above instead.
+ */
+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+{
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->sk = NULL;
+	__copy_skb_header(n, skb);
+
+	C(len);
+	C(data_len);
+	C(mac_len);
+	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+	n->cloned = 1;
+	n->nohdr = 0;
+	n->destructor = NULL;
+	C(tail);
+	C(end);
+	C(head);
+	C(data);
+	C(truesize);
+	atomic_set(&n->users, 1);
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+
+	return n;
+#undef C
+}
+
+/**
+ *	skb_morph	-	morph one skb into another
+ *	@dst: the skb to receive the contents
+ *	@src: the skb to supply the contents
+ *
+ *	This is identical to skb_clone except that the target skb is
+ *	supplied by the user.
+ *
+ *	The target skb is returned upon exit.
+ */
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
+{
+	skb_release_all(dst);
+	return __skb_clone(dst, src);
+}
+EXPORT_SYMBOL_GPL(skb_morph);
+
+/*	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
+ *	@skb: the skb to modify
+ *	@gfp_mask: allocation priority
+ *
+ *	This must be called on SKBTX_DEV_ZEROCOPY skb.
+ *	It will copy all frags into kernel and drop the reference
+ *	to userspace pages.
+ *
+ *	If this function is called from an interrupt gfp_mask() must be
+ *	%GFP_ATOMIC.
+ *
+ *	Returns 0 on success or a negative error code on failure
+ *	to allocate kernel memory to copy to.
+ */
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+{
+	int i;
+	int num_frags = skb_shinfo(skb)->nr_frags;
+	struct page *page, *head = NULL;
+	struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+
+	for (i = 0; i < num_frags; i++) {
+		u8 *vaddr;
+		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+
+		page = alloc_page(GFP_ATOMIC);
+		if (!page) {
+			while (head) {
+				struct page *next = (struct page *)head->private;
+				put_page(head);
+				head = next;
+			}
+			return -ENOMEM;
+		}
+		vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+		memcpy(page_address(page),
+		       vaddr + f->page_offset, skb_frag_size(f));
+		kunmap_skb_frag(vaddr);
+		page->private = (unsigned long)head;
+		head = page;
+	}
+
+	/* skb frags release userspace buffers */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_frag_unref(skb, i);
+
+	uarg->callback(uarg);
+
+	/* skb frags point to kernel buffers */
+	for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
+		__skb_fill_page_desc(skb, i-1, head, 0,
+				     skb_shinfo(skb)->frags[i - 1].size);
+		head = (struct page *)head->private;
+	}
+
+	skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+	return 0;
+}
+
+
+/**
+ *	skb_clone	-	duplicate an sk_buff
+ *	@skb: buffer to clone
+ *	@gfp_mask: allocation priority
+ *
+ *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *	copies share the same packet data but not structure. The new
+ *	buffer has a reference count of 1. If the allocation fails the
+ *	function returns %NULL otherwise the new buffer is returned.
+ *
+ *	If this function is called from an interrupt gfp_mask() must be
+ *	%GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
+{
+	struct sk_buff *n;
+
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		if (skb_copy_ubufs(skb, gfp_mask))
+			return NULL;
+	}
+
+	n = skb + 1;
+	if (skb->fclone == SKB_FCLONE_ORIG &&
+	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
+		atomic_t *fclone_ref = (atomic_t *) (n + 1);
+		n->fclone = SKB_FCLONE_CLONE;
+		atomic_inc(fclone_ref);
+	} else {
+		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+		if (!n)
+			return NULL;
+
+		kmemcheck_annotate_bitfield(n, flags1);
+		kmemcheck_annotate_bitfield(n, flags2);
+		n->fclone = SKB_FCLONE_UNAVAILABLE;
+	}
+
+	return __skb_clone(n, skb);
+}
+EXPORT_SYMBOL(skb_clone);
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	unsigned long offset = new->data - old->data;
+#endif
+
+	__copy_skb_header(new, old);
+
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+	/* {transport,network,mac}_header are relative to skb->head */
+	new->transport_header += offset;
+	new->network_header   += offset;
+	if (skb_mac_header_was_set(new))
+		new->mac_header	      += offset;
+#endif
+	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
+}
+
+/**
+ *	skb_copy	-	create private copy of an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the
+ *	data to alter. Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	As by-product this function converts non-linear &sk_buff to linear
+ *	one, so that &sk_buff becomes completely private and caller is allowed
+ *	to modify all the data of returned buffer. This means that this
+ *	function is not recommended for use in circumstances when only
+ *	header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
+{
+	int headerlen = skb_headroom(skb);
+	unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+	struct sk_buff *n = alloc_skb(size, gfp_mask);
+
+	if (!n)
+		return NULL;
+
+	/* Set the data pointer */
+	skb_reserve(n, headerlen);
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+
+	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+		BUG();
+
+	copy_skb_header(n, skb);
+	return n;
+}
+EXPORT_SYMBOL(skb_copy);
+
+/**
+ *	__pskb_copy	-	create copy of an sk_buff with private head.
+ *	@skb: buffer to copy
+ *	@headroom: headroom of new skb
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and part of its data, located
+ *	in header. Fragmented data remain shared. This is used when
+ *	the caller wishes to modify only header of &sk_buff and needs
+ *	private copy of the header to alter. Returns %NULL on failure
+ *	or the pointer to the buffer on success.
+ *	The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+{
+	unsigned int size = skb_headlen(skb) + headroom;
+	struct sk_buff *n = alloc_skb(size, gfp_mask);
+
+	if (!n)
+		goto out;
+
+	/* Set the data pointer */
+	skb_reserve(n, headroom);
+	/* Set the tail pointer and length */
+	skb_put(n, skb_headlen(skb));
+	/* Copy the bytes */
+	skb_copy_from_linear_data(skb, n->data, n->len);
+
+	n->truesize += skb->data_len;
+	n->data_len  = skb->data_len;
+	n->len	     = skb->len;
+
+	if (skb_shinfo(skb)->nr_frags) {
+		int i;
+
+		if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+			if (skb_copy_ubufs(skb, gfp_mask)) {
+				kfree_skb(n);
+				n = NULL;
+				goto out;
+			}
+		}
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+			skb_frag_ref(skb, i);
+		}
+		skb_shinfo(n)->nr_frags = i;
+	}
+
+	if (skb_has_frag_list(skb)) {
+		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+		skb_clone_fraglist(n);
+	}
+
+	copy_skb_header(n, skb);
+out:
+	return n;
+}
+EXPORT_SYMBOL(__pskb_copy);
+
+/**
+ *	pskb_expand_head - reallocate header of &sk_buff
+ *	@skb: buffer to reallocate
+ *	@nhead: room to add at head
+ *	@ntail: room to add at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	reference count of 1. Returns zero in the case of success or error,
+ *	if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+		     gfp_t gfp_mask)
+{
+	int i;
+	u8 *data;
+	int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
+	long off;
+	bool fastpath;
+
+	BUG_ON(nhead < 0);
+
+	if (skb_shared(skb))
+		BUG();
+
+	size = SKB_DATA_ALIGN(size);
+
+	/* Check if we can avoid taking references on fragments if we own
+	 * the last reference on skb->head. (see skb_release_data())
+	 */
+	if (!skb->cloned)
+		fastpath = true;
+	else {
+		int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+		fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+	}
+
+	if (fastpath &&
+	    size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
+		memmove(skb->head + size, skb_shinfo(skb),
+			offsetof(struct skb_shared_info,
+				 frags[skb_shinfo(skb)->nr_frags]));
+		memmove(skb->head + nhead, skb->head,
+			skb_tail_pointer(skb) - skb->head);
+		off = nhead;
+		goto adjust_others;
+	}
+
+	data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+		       gfp_mask);
+	if (!data)
+		goto nodata;
+	size = SKB_WITH_OVERHEAD(ksize(data));
+
+	/* Copy only real data... and, alas, header. This should be
+	 * optimized for the cases when header is void.
+	 */
+	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
+
+	memcpy((struct skb_shared_info *)(data + size),
+	       skb_shinfo(skb),
+	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+
+	if (fastpath) {
+		kfree(skb->head);
+	} else {
+		/* copy this zero copy skb frags */
+		if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+			if (skb_copy_ubufs(skb, gfp_mask))
+				goto nofrags;
+		}
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			skb_frag_ref(skb, i);
+
+		if (skb_has_frag_list(skb))
+			skb_clone_fraglist(skb);
+
+		skb_release_data(skb);
+	}
+	off = (data + nhead) - skb->head;
+
+	skb->head     = data;
+adjust_others:
+	skb->data    += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->end      = size;
+	off           = nhead;
+#else
+	skb->end      = skb->head + size;
+#endif
+	/* {transport,network,mac}_header and tail are relative to skb->head */
+	skb->tail	      += off;
+	skb->transport_header += off;
+	skb->network_header   += off;
+	if (skb_mac_header_was_set(skb))
+		skb->mac_header += off;
+	/* Only adjust this if it actually is csum_start rather than csum */
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		skb->csum_start += nhead;
+	skb->cloned   = 0;
+	skb->hdr_len  = 0;
+	skb->nohdr    = 0;
+	atomic_set(&skb_shinfo(skb)->dataref, 1);
+	return 0;
+
+nofrags:
+	kfree(data);
+nodata:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(pskb_expand_head);
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+	struct sk_buff *skb2;
+	int delta = headroom - skb_headroom(skb);
+
+	if (delta <= 0)
+		skb2 = pskb_copy(skb, GFP_ATOMIC);
+	else {
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+					     GFP_ATOMIC)) {
+			kfree_skb(skb2);
+			skb2 = NULL;
+		}
+	}
+	return skb2;
+}
+EXPORT_SYMBOL(skb_realloc_headroom);
+
+/**
+ *	skb_copy_expand	-	copy and expand sk_buff
+ *	@skb: buffer to copy
+ *	@newheadroom: new free bytes at head
+ *	@newtailroom: new free bytes at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data and while doing so
+ *	allocate additional space.
+ *
+ *	This is used when the caller wishes to modify the data and needs a
+ *	private copy of the data to alter as well as more space for new fields.
+ *	Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass %GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+				int newheadroom, int newtailroom,
+				gfp_t gfp_mask)
+{
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+				      gfp_mask);
+	int oldheadroom = skb_headroom(skb);
+	int head_copy_len, head_copy_off;
+	int off;
+
+	if (!n)
+		return NULL;
+
+	skb_reserve(n, newheadroom);
+
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+
+	head_copy_len = oldheadroom;
+	head_copy_off = 0;
+	if (newheadroom <= head_copy_len)
+		head_copy_len = newheadroom;
+	else
+		head_copy_off = newheadroom - head_copy_len;
+
+	/* Copy the linear header and data. */
+	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+			  skb->len + head_copy_len))
+		BUG();
+
+	copy_skb_header(n, skb);
+
+	off                  = newheadroom - oldheadroom;
+	if (n->ip_summed == CHECKSUM_PARTIAL)
+		n->csum_start += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	n->transport_header += off;
+	n->network_header   += off;
+	if (skb_mac_header_was_set(skb))
+		n->mac_header += off;
+#endif
+
+	return n;
+}
+EXPORT_SYMBOL(skb_copy_expand);
+
+/**
+ *	skb_pad			-	zero pad the tail of an skb
+ *	@skb: buffer to pad
+ *	@pad: space to pad
+ *
+ *	Ensure that a buffer is followed by a padding area that is zero
+ *	filled. Used by network drivers which may DMA or transfer data
+ *	beyond the buffer end onto the wire.
+ *
+ *	May return error in out of memory cases. The skb is freed on error.
+ */
+
+int skb_pad(struct sk_buff *skb, int pad)
+{
+	int err;
+	int ntail;
+
+	/* If the skbuff is non linear tailroom is always zero.. */
+	if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+		memset(skb->data+skb->len, 0, pad);
+		return 0;
+	}
+
+	ntail = skb->data_len + pad - (skb->end - skb->tail);
+	if (likely(skb_cloned(skb) || ntail > 0)) {
+		err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
+		if (unlikely(err))
+			goto free_skb;
+	}
+
+	/* FIXME: The use of this function with non-linear skb's really needs
+	 * to be audited.
+	 */
+	err = skb_linearize(skb);
+	if (unlikely(err))
+		goto free_skb;
+
+	memset(skb->data + skb->len, 0, pad);
+	return 0;
+
+free_skb:
+	kfree_skb(skb);
+	return err;
+}
+EXPORT_SYMBOL(skb_pad);
+
+/**
+ *	skb_put - add data to a buffer
+ *	@skb: buffer to use
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer. If this would
+ *	exceed the total buffer size the kernel will panic. A pointer to the
+ *	first byte of the extra data is returned.
+ */
+unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+	unsigned char *tmp = skb_tail_pointer(skb);
+	SKB_LINEAR_ASSERT(skb);
+	skb->tail += len;
+	skb->len  += len;
+	if (unlikely(skb->tail > skb->end))
+		skb_over_panic(skb, len, __builtin_return_address(0));
+	return tmp;
+}
+EXPORT_SYMBOL(skb_put);
+
+/**
+ *	skb_push - add data to the start of a buffer
+ *	@skb: buffer to use
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer at the buffer
+ *	start. If this would exceed the total buffer headroom the kernel will
+ *	panic. A pointer to the first byte of the extra data is returned.
+ */
+unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+	skb->data -= len;
+	skb->len  += len;
+	if (unlikely(skb->data<skb->head))
+		skb_under_panic(skb, len, __builtin_return_address(0));
+	return skb->data;
+}
+EXPORT_SYMBOL(skb_push);
+
+/**
+ *	skb_pull - remove data from the start of a buffer
+ *	@skb: buffer to use
+ *	@len: amount of data to remove
+ *
+ *	This function removes data from the start of a buffer, returning
+ *	the memory to the headroom. A pointer to the next data in the buffer
+ *	is returned. Once the data has been pulled future pushes will overwrite
+ *	the old data.
+ */
+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+{
+	return skb_pull_inline(skb, len);
+}
+EXPORT_SYMBOL(skb_pull);
+
+/**
+ *	skb_trim - remove end from a buffer
+ *	@skb: buffer to alter
+ *	@len: new length
+ *
+ *	Cut the length of a buffer down by removing data from the tail. If
+ *	the buffer is already under the length specified it is not modified.
+ *	The skb must be linear.
+ */
+void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->len > len)
+		__skb_trim(skb, len);
+}
+EXPORT_SYMBOL(skb_trim);
+
+/* Trims skb to length len. It can change skb pointers.
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+	struct sk_buff **fragp;
+	struct sk_buff *frag;
+	int offset = skb_headlen(skb);
+	int nfrags = skb_shinfo(skb)->nr_frags;
+	int i;
+	int err;
+
+	if (skb_cloned(skb) &&
+	    unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+		return err;
+
+	i = 0;
+	if (offset >= len)
+		goto drop_pages;
+
+	for (; i < nfrags; i++) {
+		int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+		if (end < len) {
+			offset = end;
+			continue;
+		}
+
+		skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
+
+drop_pages:
+		skb_shinfo(skb)->nr_frags = i;
+
+		for (; i < nfrags; i++)
+			skb_frag_unref(skb, i);
+
+		if (skb_has_frag_list(skb))
+			skb_drop_fraglist(skb);
+		goto done;
+	}
+
+	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+	     fragp = &frag->next) {
+		int end = offset + frag->len;
+
+		if (skb_shared(frag)) {
+			struct sk_buff *nfrag;
+
+			nfrag = skb_clone(frag, GFP_ATOMIC);
+			if (unlikely(!nfrag))
+				return -ENOMEM;
+
+			nfrag->next = frag->next;
+			kfree_skb(frag);
+			frag = nfrag;
+			*fragp = frag;
+		}
+
+		if (end < len) {
+			offset = end;
+			continue;
+		}
+
+		if (end > len &&
+		    unlikely((err = pskb_trim(frag, len - offset))))
+			return err;
+
+		if (frag->next)
+			skb_drop_list(&frag->next);
+		break;
+	}
+
+done:
+	if (len > skb_headlen(skb)) {
+		skb->data_len -= skb->len - len;
+		skb->len       = len;
+	} else {
+		skb->len       = len;
+		skb->data_len  = 0;
+		skb_set_tail_pointer(skb, len);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(___pskb_trim);
+
+/**
+ *	__pskb_pull_tail - advance tail of skb header
+ *	@skb: buffer to reallocate
+ *	@delta: number of bytes to advance tail
+ *
+ *	The function makes a sense only on a fragmented &sk_buff,
+ *	it expands header moving its tail forward and copying necessary
+ *	data from fragmented part.
+ *
+ *	&sk_buff MUST have reference count of 1.
+ *
+ *	Returns %NULL (and &sk_buff does not change) if pull failed
+ *	or value of new tail of skb in the case of success.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+	/* If skb has not enough free space at tail, get new one
+	 * plus 128 bytes for future expansions. If we have enough
+	 * room at tail, reallocate without expansion only if skb is cloned.
+	 */
+	int i, k, eat = (skb->tail + delta) - skb->end;
+
+	if (eat > 0 || skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+				     GFP_ATOMIC))
+			return NULL;
+	}
+
+	if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
+		BUG();
+
+	/* Optimization: no fragments, no reasons to preestimate
+	 * size of pulled pages. Superb.
+	 */
+	if (!skb_has_frag_list(skb))
+		goto pull_pages;
+
+	/* Estimate size of pulled pages. */
+	eat = delta;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+		if (size >= eat)
+			goto pull_pages;
+		eat -= size;
+	}
+
+	/* If we need update frag list, we are in troubles.
+	 * Certainly, it possible to add an offset to skb data,
+	 * but taking into account that pulling is expected to
+	 * be very rare operation, it is worth to fight against
+	 * further bloating skb head and crucify ourselves here instead.
+	 * Pure masohism, indeed. 8)8)
+	 */
+	if (eat) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+		struct sk_buff *clone = NULL;
+		struct sk_buff *insp = NULL;
+
+		do {
+			BUG_ON(!list);
+
+			if (list->len <= eat) {
+				/* Eaten as whole. */
+				eat -= list->len;
+				list = list->next;
+				insp = list;
+			} else {
+				/* Eaten partially. */
+
+				if (skb_shared(list)) {
+					/* Sucks! We need to fork list. :-( */
+					clone = skb_clone(list, GFP_ATOMIC);
+					if (!clone)
+						return NULL;
+					insp = list->next;
+					list = clone;
+				} else {
+					/* This may be pulled without
+					 * problems. */
+					insp = list;
+				}
+				if (!pskb_pull(list, eat)) {
+					kfree_skb(clone);
+					return NULL;
+				}
+				break;
+			}
+		} while (eat);
+
+		/* Free pulled out fragments. */
+		while ((list = skb_shinfo(skb)->frag_list) != insp) {
+			skb_shinfo(skb)->frag_list = list->next;
+			kfree_skb(list);
+		}
+		/* And insert new clone at head. */
+		if (clone) {
+			clone->next = list;
+			skb_shinfo(skb)->frag_list = clone;
+		}
+	}
+	/* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+	eat = delta;
+	k = 0;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+		if (size <= eat) {
+			skb_frag_unref(skb, i);
+			eat -= size;
+		} else {
+			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			if (eat) {
+				skb_shinfo(skb)->frags[k].page_offset += eat;
+				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				eat = 0;
+			}
+			k++;
+		}
+	}
+	skb_shinfo(skb)->nr_frags = k;
+
+	skb->tail     += delta;
+	skb->data_len -= delta;
+
+	return skb_tail_pointer(skb);
+}
+EXPORT_SYMBOL(__pskb_pull_tail);
+
+/**
+ *	skb_copy_bits - copy bits from skb to kernel buffer
+ *	@skb: source skb
+ *	@offset: offset in source
+ *	@to: destination buffer
+ *	@len: number of bytes to copy
+ *
+ *	Copy the specified number of bytes from the source skb to the
+ *	destination buffer.
+ *
+ *	CAUTION ! :
+ *		If its prototype is ever changed,
+ *		check arch/{*}/net/{*}.S files,
+ *		since it is called from BPF assembly code.
+ */
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+	int start = skb_headlen(skb);
+	struct sk_buff *frag_iter;
+	int i, copy;
+
+	if (offset > (int)skb->len - len)
+		goto fault;
+
+	/* Copy header. */
+	if ((copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+		skb_copy_from_linear_data_offset(skb, offset, to, copy);
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to     += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+			memcpy(to,
+			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
+			       offset - start, copy);
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_bits(frag_iter, offset - start, to, copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_bits);
+
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+	put_page(spd->pages[i]);
+}
+
+static inline struct page *linear_to_page(struct page *page, unsigned int *len,
+					  unsigned int *offset,
+					  struct sk_buff *skb, struct sock *sk)
+{
+	struct page *p = sk->sk_sndmsg_page;
+	unsigned int off;
+
+	if (!p) {
+new_page:
+		p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
+		if (!p)
+			return NULL;
+
+		off = sk->sk_sndmsg_off = 0;
+		/* hold one ref to this page until it's full */
+	} else {
+		unsigned int mlen;
+
+		off = sk->sk_sndmsg_off;
+		mlen = PAGE_SIZE - off;
+		if (mlen < 64 && mlen < *len) {
+			put_page(p);
+			goto new_page;
+		}
+
+		*len = min_t(unsigned int, *len, mlen);
+	}
+
+	memcpy(page_address(p) + off, page_address(page) + *offset, *len);
+	sk->sk_sndmsg_off += *len;
+	*offset = off;
+	get_page(p);
+
+	return p;
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd,
+				struct pipe_inode_info *pipe, struct page *page,
+				unsigned int *len, unsigned int offset,
+				struct sk_buff *skb, int linear,
+				struct sock *sk)
+{
+	if (unlikely(spd->nr_pages == pipe->buffers))
+		return 1;
+
+	if (linear) {
+		page = linear_to_page(page, len, &offset, skb, sk);
+		if (!page)
+			return 1;
+	} else
+		get_page(page);
+
+	spd->pages[spd->nr_pages] = page;
+	spd->partial[spd->nr_pages].len = *len;
+	spd->partial[spd->nr_pages].offset = offset;
+	spd->nr_pages++;
+
+	return 0;
+}
+
+static inline void __segment_seek(struct page **page, unsigned int *poff,
+				  unsigned int *plen, unsigned int off)
+{
+	unsigned long n;
+
+	*poff += off;
+	n = *poff / PAGE_SIZE;
+	if (n)
+		*page = nth_page(*page, n);
+
+	*poff = *poff % PAGE_SIZE;
+	*plen -= off;
+}
+
+static inline int __splice_segment(struct page *page, unsigned int poff,
+				   unsigned int plen, unsigned int *off,
+				   unsigned int *len, struct sk_buff *skb,
+				   struct splice_pipe_desc *spd, int linear,
+				   struct sock *sk,
+				   struct pipe_inode_info *pipe)
+{
+	if (!*len)
+		return 1;
+
+	/* skip this segment if already processed */
+	if (*off >= plen) {
+		*off -= plen;
+		return 0;
+	}
+
+	/* ignore any bits we already processed */
+	if (*off) {
+		__segment_seek(&page, &poff, &plen, *off);
+		*off = 0;
+	}
+
+	do {
+		unsigned int flen = min(*len, plen);
+
+		/* the linear region may spread across several pages  */
+		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
+
+		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
+			return 1;
+
+		__segment_seek(&page, &poff, &plen, flen);
+		*len -= flen;
+
+	} while (*len && plen);
+
+	return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. It reports failure if the
+ * pipe is full or if we already spliced the requested length.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+			     unsigned int *offset, unsigned int *len,
+			     struct splice_pipe_desc *spd, struct sock *sk)
+{
+	int seg;
+
+	/*
+	 * map the linear part
+	 */
+	if (__splice_segment(virt_to_page(skb->data),
+			     (unsigned long) skb->data & (PAGE_SIZE - 1),
+			     skb_headlen(skb),
+			     offset, len, skb, spd, 1, sk, pipe))
+		return 1;
+
+	/*
+	 * then map the fragments
+	 */
+	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+		if (__splice_segment(skb_frag_page(f),
+				     f->page_offset, skb_frag_size(f),
+				     offset, len, skb, spd, 0, sk, pipe))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Map data from the skb to a pipe. Should handle both the linear part,
+ * the fragments, and the frag list. It does NOT handle frag lists within
+ * the frag list, if such a thing exists. We'd probably need to recurse to
+ * handle that cleanly.
+ */
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int tlen,
+		    unsigned int flags)
+{
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.nr_pages_max = MAX_SKB_FRAGS,
+		.flags = flags,
+		.ops = &sock_pipe_buf_ops,
+		.spd_release = sock_spd_release,
+	};
+	struct sk_buff *frag_iter;
+	struct sock *sk = skb->sk;
+	int ret = 0;
+
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
+	/*
+	 * __skb_splice_bits() only fails if the output has no room left,
+	 * so no point in going over the frag_list for the error case.
+	 */
+	if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
+		goto done;
+	else if (!tlen)
+		goto done;
+
+	/*
+	 * now see if we have a frag_list to map
+	 */
+	skb_walk_frags(skb, frag_iter) {
+		if (!tlen)
+			break;
+		if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
+			break;
+	}
+
+done:
+	if (spd.nr_pages) {
+		/*
+		 * Drop the socket lock, otherwise we have reverse
+		 * locking dependencies between sk_lock and i_mutex
+		 * here as compared to sendfile(). We enter here
+		 * with the socket lock held, and splice_to_pipe() will
+		 * grab the pipe inode lock. For sendfile() emulation,
+		 * we call into ->sendpage() with the i_mutex lock held
+		 * and networking will grab the socket lock.
+		 */
+		release_sock(sk);
+		ret = splice_to_pipe(pipe, &spd);
+		lock_sock(sk);
+	}
+
+	splice_shrink_spd(&spd);
+	return ret;
+}
+
+/**
+ *	skb_store_bits - store bits from kernel buffer to skb
+ *	@skb: destination buffer
+ *	@offset: offset in destination
+ *	@from: source buffer
+ *	@len: number of bytes to copy
+ *
+ *	Copy the specified number of bytes from the source buffer to the
+ *	destination skb.  This function handles all the messy bits of
+ *	traversing fragment lists and such.
+ */
+
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
+{
+	int start = skb_headlen(skb);
+	struct sk_buff *frag_iter;
+	int i, copy;
+
+	if (offset > (int)skb->len - len)
+		goto fault;
+
+	if ((copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+		skb_copy_to_linear_data_offset(skb, offset, from, copy);
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		from += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(frag);
+			memcpy(vaddr + frag->page_offset + offset - start,
+			       from, copy);
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			from += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_store_bits(frag_iter, offset - start,
+					   from, copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			from += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_store_bits);
+
+/* Checksum skb data. */
+
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+			  int len, __wsum csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+	int pos = 0;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial(skb->data + offset, copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		if ((copy = end - offset) > 0) {
+			__wsum csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial(vaddr + frag->page_offset +
+					     offset - start, copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			__wsum csum2;
+			if (copy > len)
+				copy = len;
+			csum2 = skb_checksum(frag_iter, offset - start,
+					     copy, 0);
+			csum = csum_block_add(csum, csum2, pos);
+			if ((len -= copy) == 0)
+				return csum;
+			offset += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+	BUG_ON(len);
+
+	return csum;
+}
+EXPORT_SYMBOL(skb_checksum);
+
+/* Both of above in one bottle. */
+
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+				    u8 *to, int len, __wsum csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+	int pos = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial_copy_nocheck(skb->data + offset, to,
+						 copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		to     += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		if ((copy = end - offset) > 0) {
+			__wsum csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial_copy_nocheck(vaddr +
+							  frag->page_offset +
+							  offset - start, to,
+							  copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			to     += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		__wsum csum2;
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			csum2 = skb_copy_and_csum_bits(frag_iter,
+						       offset - start,
+						       to, copy, 0);
+			csum = csum_block_add(csum, csum2, pos);
+			if ((len -= copy) == 0)
+				return csum;
+			offset += copy;
+			to     += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+	BUG_ON(len);
+	return csum;
+}
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+	__wsum csum;
+	long csstart;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		csstart = skb_checksum_start_offset(skb);
+	else
+		csstart = skb_headlen(skb);
+
+	BUG_ON(csstart > skb_headlen(skb));
+
+	skb_copy_from_linear_data(skb, to, csstart);
+
+	csum = 0;
+	if (csstart != skb->len)
+		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+					      skb->len - csstart, 0);
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		long csstuff = csstart + skb->csum_offset;
+
+		*((__sum16 *)(to + csstuff)) = csum_fold(csum);
+	}
+}
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+EXPORT_SYMBOL(skb_dequeue);
+
+/**
+ *	skb_dequeue_tail - remove from the tail of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the tail of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue_tail(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+EXPORT_SYMBOL(skb_dequeue_tail);
+
+/**
+ *	skb_queue_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function takes the list
+ *	lock and is atomic with respect to other list locking functions.
+ */
+void skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(list)) != NULL)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL(skb_queue_purge);
+
+/**
+ *	skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_head(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+EXPORT_SYMBOL(skb_queue_head);
+
+/**
+ *	skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the tail of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_tail(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+EXPORT_SYMBOL(skb_queue_tail);
+
+/**
+ *	skb_unlink	-	remove a buffer from a list
+ *	@skb: buffer to remove
+ *	@list: list to use
+ *
+ *	Remove a packet from a list. The list locks are taken and this
+ *	function is atomic with respect to other list locked calls
+ *
+ *	You must know what list the SKB is on.
+ */
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_unlink(skb, list);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+EXPORT_SYMBOL(skb_unlink);
+
+/**
+ *	skb_append	-	append a buffer
+ *	@old: buffer to insert after
+ *	@newsk: buffer to insert
+ *	@list: list to use
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls.
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_after(list, old, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+EXPORT_SYMBOL(skb_append);
+
+/**
+ *	skb_insert	-	insert a buffer
+ *	@old: buffer to insert before
+ *	@newsk: buffer to insert
+ *	@list: list to use
+ *
+ *	Place a packet before a given packet in a list. The list locks are
+ * 	taken and this function is atomic with respect to other list locked
+ *	calls.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_insert(newsk, old->prev, old, list);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+EXPORT_SYMBOL(skb_insert);
+
+static inline void skb_split_inside_header(struct sk_buff *skb,
+					   struct sk_buff* skb1,
+					   const u32 len, const int pos)
+{
+	int i;
+
+	skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
+					 pos - len);
+	/* And move data appendix as is. */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb1->data_len		   = skb->data_len;
+	skb1->len		   += skb1->data_len;
+	skb->data_len		   = 0;
+	skb->len		   = len;
+	skb_set_tail_pointer(skb, len);
+}
+
+static inline void skb_split_no_header(struct sk_buff *skb,
+				       struct sk_buff* skb1,
+				       const u32 len, int pos)
+{
+	int i, k = 0;
+	const int nfrags = skb_shinfo(skb)->nr_frags;
+
+	skb_shinfo(skb)->nr_frags = 0;
+	skb1->len		  = skb1->data_len = skb->len - len;
+	skb->len		  = len;
+	skb->data_len		  = len - pos;
+
+	for (i = 0; i < nfrags; i++) {
+		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+		if (pos + size > len) {
+			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+			if (pos < len) {
+				/* Split frag.
+				 * We have two variants in this case:
+				 * 1. Move all the frag to the second
+				 *    part, if it is possible. F.e.
+				 *    this approach is mandatory for TUX,
+				 *    where splitting is expensive.
+				 * 2. Split is accurately. We make this.
+				 */
+				skb_frag_ref(skb, i);
+				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
+				skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
+				skb_shinfo(skb)->nr_frags++;
+			}
+			k++;
+		} else
+			skb_shinfo(skb)->nr_frags++;
+		pos += size;
+	}
+	skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ * @skb: the buffer to split
+ * @skb1: the buffer to receive the second part
+ * @len: new length for skb
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+	int pos = skb_headlen(skb);
+
+	if (len < pos)	/* Split line is inside header. */
+		skb_split_inside_header(skb, skb1, len, pos);
+	else		/* Second chunk has no header, nothing to copy. */
+		skb_split_no_header(skb, skb1, len, pos);
+}
+EXPORT_SYMBOL(skb_split);
+
+/* Shifting from/to a cloned skb is a no-go.
+ *
+ * Caller cannot keep skb_shinfo related pointers past calling here!
+ */
+static int skb_prepare_for_shift(struct sk_buff *skb)
+{
+	return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/**
+ * skb_shift - Shifts paged data partially from skb to another
+ * @tgt: buffer into which tail data gets added
+ * @skb: buffer from which the paged data comes from
+ * @shiftlen: shift up to this many bytes
+ *
+ * Attempts to shift up to shiftlen worth of bytes, which may be less than
+ * the length of the skb, from skb to tgt. Returns number bytes shifted.
+ * It's up to caller to free skb if everything was shifted.
+ *
+ * If @tgt runs out of frags, the whole operation is aborted.
+ *
+ * Skb cannot include anything else but paged data while tgt is allowed
+ * to have non-paged data as well.
+ *
+ * TODO: full sized shift could be optimized but that would need
+ * specialized skb free'er to handle frags without up-to-date nr_frags.
+ */
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
+{
+	int from, to, merge, todo;
+	struct skb_frag_struct *fragfrom, *fragto;
+
+	BUG_ON(shiftlen > skb->len);
+	BUG_ON(skb_headlen(skb));	/* Would corrupt stream */
+
+	todo = shiftlen;
+	from = 0;
+	to = skb_shinfo(tgt)->nr_frags;
+	fragfrom = &skb_shinfo(skb)->frags[from];
+
+	/* Actual merge is delayed until the point when we know we can
+	 * commit all, so that we don't have to undo partial changes
+	 */
+	if (!to ||
+	    !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+			      fragfrom->page_offset)) {
+		merge = -1;
+	} else {
+		merge = to - 1;
+
+		todo -= skb_frag_size(fragfrom);
+		if (todo < 0) {
+			if (skb_prepare_for_shift(skb) ||
+			    skb_prepare_for_shift(tgt))
+				return 0;
+
+			/* All previous frag pointers might be stale! */
+			fragfrom = &skb_shinfo(skb)->frags[from];
+			fragto = &skb_shinfo(tgt)->frags[merge];
+
+			skb_frag_size_add(fragto, shiftlen);
+			skb_frag_size_sub(fragfrom, shiftlen);
+			fragfrom->page_offset += shiftlen;
+
+			goto onlymerged;
+		}
+
+		from++;
+	}
+
+	/* Skip full, not-fitting skb to avoid expensive operations */
+	if ((shiftlen == skb->len) &&
+	    (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
+		return 0;
+
+	if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
+		return 0;
+
+	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
+		if (to == MAX_SKB_FRAGS)
+			return 0;
+
+		fragfrom = &skb_shinfo(skb)->frags[from];
+		fragto = &skb_shinfo(tgt)->frags[to];
+
+		if (todo >= skb_frag_size(fragfrom)) {
+			*fragto = *fragfrom;
+			todo -= skb_frag_size(fragfrom);
+			from++;
+			to++;
+
+		} else {
+			__skb_frag_ref(fragfrom);
+			fragto->page = fragfrom->page;
+			fragto->page_offset = fragfrom->page_offset;
+			skb_frag_size_set(fragto, todo);
+
+			fragfrom->page_offset += todo;
+			skb_frag_size_sub(fragfrom, todo);
+			todo = 0;
+
+			to++;
+			break;
+		}
+	}
+
+	/* Ready to "commit" this state change to tgt */
+	skb_shinfo(tgt)->nr_frags = to;
+
+	if (merge >= 0) {
+		fragfrom = &skb_shinfo(skb)->frags[0];
+		fragto = &skb_shinfo(tgt)->frags[merge];
+
+		skb_frag_size_add(fragto, skb_frag_size(fragfrom));
+		__skb_frag_unref(fragfrom);
+	}
+
+	/* Reposition in the original skb */
+	to = 0;
+	while (from < skb_shinfo(skb)->nr_frags)
+		skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
+	skb_shinfo(skb)->nr_frags = to;
+
+	BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
+
+onlymerged:
+	/* Most likely the tgt won't ever need its checksum anymore, skb on
+	 * the other hand might need it if it needs to be resent
+	 */
+	tgt->ip_summed = CHECKSUM_PARTIAL;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	/* Yak, is it really working this way? Some helper please? */
+	skb->len -= shiftlen;
+	skb->data_len -= shiftlen;
+	skb->truesize -= shiftlen;
+	tgt->len += shiftlen;
+	tgt->data_len += shiftlen;
+	tgt->truesize += shiftlen;
+
+	return shiftlen;
+}
+
+/**
+ * skb_prepare_seq_read - Prepare a sequential read of skb data
+ * @skb: the buffer to read
+ * @from: lower offset of data to be read
+ * @to: upper offset of data to be read
+ * @st: state variable
+ *
+ * Initializes the specified state variable. Must be called before
+ * invoking skb_seq_read() for the first time.
+ */
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+			  unsigned int to, struct skb_seq_state *st)
+{
+	st->lower_offset = from;
+	st->upper_offset = to;
+	st->root_skb = st->cur_skb = skb;
+	st->frag_idx = st->stepped_offset = 0;
+	st->frag_data = NULL;
+}
+EXPORT_SYMBOL(skb_prepare_seq_read);
+
+/**
+ * skb_seq_read - Sequentially read skb data
+ * @consumed: number of bytes consumed by the caller so far
+ * @data: destination pointer for data to be returned
+ * @st: state variable
+ *
+ * Reads a block of skb data at &consumed relative to the
+ * lower offset specified to skb_prepare_seq_read(). Assigns
+ * the head of the data block to &data and returns the length
+ * of the block or 0 if the end of the skb data or the upper
+ * offset has been reached.
+ *
+ * The caller is not required to consume all of the data
+ * returned, i.e. &consumed is typically set to the number
+ * of bytes already consumed and the next call to
+ * skb_seq_read() will return the remaining part of the block.
+ *
+ * Note 1: The size of each block of data returned can be arbitrary,
+ *       this limitation is the cost for zerocopy seqeuental
+ *       reads of potentially non linear data.
+ *
+ * Note 2: Fragment lists within fragments are not implemented
+ *       at the moment, state->root_skb could be replaced with
+ *       a stack for this purpose.
+ */
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+			  struct skb_seq_state *st)
+{
+	unsigned int block_limit, abs_offset = consumed + st->lower_offset;
+	skb_frag_t *frag;
+
+	if (unlikely(abs_offset >= st->upper_offset))
+		return 0;
+
+next_skb:
+	block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
+
+	if (abs_offset < block_limit && !st->frag_data) {
+		*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
+		return block_limit - abs_offset;
+	}
+
+	if (st->frag_idx == 0 && !st->frag_data)
+		st->stepped_offset += skb_headlen(st->cur_skb);
+
+	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
+		frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
+		block_limit = skb_frag_size(frag) + st->stepped_offset;
+
+		if (abs_offset < block_limit) {
+			if (!st->frag_data)
+				st->frag_data = kmap_skb_frag(frag);
+
+			*data = (u8 *) st->frag_data + frag->page_offset +
+				(abs_offset - st->stepped_offset);
+
+			return block_limit - abs_offset;
+		}
+
+		if (st->frag_data) {
+			kunmap_skb_frag(st->frag_data);
+			st->frag_data = NULL;
+		}
+
+		st->frag_idx++;
+		st->stepped_offset += skb_frag_size(frag);
+	}
+
+	if (st->frag_data) {
+		kunmap_skb_frag(st->frag_data);
+		st->frag_data = NULL;
+	}
+
+	if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
+		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+		st->frag_idx = 0;
+		goto next_skb;
+	} else if (st->cur_skb->next) {
+		st->cur_skb = st->cur_skb->next;
+		st->frag_idx = 0;
+		goto next_skb;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(skb_seq_read);
+
+/**
+ * skb_abort_seq_read - Abort a sequential read of skb data
+ * @st: state variable
+ *
+ * Must be called if skb_seq_read() was not called until it
+ * returned 0.
+ */
+void skb_abort_seq_read(struct skb_seq_state *st)
+{
+	if (st->frag_data)
+		kunmap_skb_frag(st->frag_data);
+}
+EXPORT_SYMBOL(skb_abort_seq_read);
+
+#define TS_SKB_CB(state)	((struct skb_seq_state *) &((state)->cb))
+
+static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
+					  struct ts_config *conf,
+					  struct ts_state *state)
+{
+	return skb_seq_read(offset, text, TS_SKB_CB(state));
+}
+
+static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
+{
+	skb_abort_seq_read(TS_SKB_CB(state));
+}
+
+/**
+ * skb_find_text - Find a text pattern in skb data
+ * @skb: the buffer to look in
+ * @from: search offset
+ * @to: search limit
+ * @config: textsearch configuration
+ * @state: uninitialized textsearch state variable
+ *
+ * Finds a pattern in the skb data according to the specified
+ * textsearch configuration. Use textsearch_next() to retrieve
+ * subsequent occurrences of the pattern. Returns the offset
+ * to the first occurrence or UINT_MAX if no match was found.
+ */
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+			   unsigned int to, struct ts_config *config,
+			   struct ts_state *state)
+{
+	unsigned int ret;
+
+	config->get_next_block = skb_ts_get_next_block;
+	config->finish = skb_ts_finish;
+
+	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+
+	ret = textsearch_find(config, state);
+	return (ret <= to - from ? ret : UINT_MAX);
+}
+EXPORT_SYMBOL(skb_find_text);
+
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock  structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns  -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+			int (*getfrag)(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			void *from, int length)
+{
+	int frg_cnt = 0;
+	skb_frag_t *frag = NULL;
+	struct page *page = NULL;
+	int copy, left;
+	int offset = 0;
+	int ret;
+
+	do {
+		/* Return error if we don't have space for new frag */
+		frg_cnt = skb_shinfo(skb)->nr_frags;
+		if (frg_cnt >= MAX_SKB_FRAGS)
+			return -EFAULT;
+
+		/* allocate a new page for next frag */
+		page = alloc_pages(sk->sk_allocation, 0);
+
+		/* If alloc_page fails just return failure and caller will
+		 * free previous allocated pages by doing kfree_skb()
+		 */
+		if (page == NULL)
+			return -ENOMEM;
+
+		/* initialize the next frag */
+		skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+		skb->truesize += PAGE_SIZE;
+		atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+		/* get the new initialized frag */
+		frg_cnt = skb_shinfo(skb)->nr_frags;
+		frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+		/* copy the user data to page */
+		left = PAGE_SIZE - frag->page_offset;
+		copy = (length > left)? left : length;
+
+		ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
+			    offset, copy, 0, skb);
+		if (ret < 0)
+			return -EFAULT;
+
+		/* copy was successful so update the size parameters */
+		skb_frag_size_add(frag, copy);
+		skb->len += copy;
+		skb->data_len += copy;
+		offset += copy;
+		length -= copy;
+
+	} while (length > 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(skb_append_datato_frags);
+
+/**
+ *	skb_pull_rcsum - pull skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_pull on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_pull unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	BUG_ON(len > skb->len);
+	skb->len -= len;
+	BUG_ON(skb->len < skb->data_len);
+	skb_postpull_rcsum(skb, skb->data, len);
+	return skb->data += len;
+}
+EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+
+/**
+ *	skb_segment - Perform protocol segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *
+ *	This function performs segmentation on the given skb.  It returns
+ *	a pointer to the first in a list of new skbs for the segments.
+ *	In case of error it returns ERR_PTR(err).
+ */
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
+{
+	struct sk_buff *segs = NULL;
+	struct sk_buff *tail = NULL;
+	struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
+	unsigned int mss = skb_shinfo(skb)->gso_size;
+	unsigned int doffset = skb->data - skb_mac_header(skb);
+	unsigned int offset = doffset;
+	unsigned int headroom;
+	unsigned int len;
+	int sg = !!(features & NETIF_F_SG);
+	int nfrags = skb_shinfo(skb)->nr_frags;
+	int err = -ENOMEM;
+	int i = 0;
+	int pos;
+
+	__skb_push(skb, doffset);
+	headroom = skb_headroom(skb);
+	pos = skb_headlen(skb);
+
+	do {
+		struct sk_buff *nskb;
+		skb_frag_t *frag;
+		int hsize;
+		int size;
+
+		len = skb->len - offset;
+		if (len > mss)
+			len = mss;
+
+		hsize = skb_headlen(skb) - offset;
+		if (hsize < 0)
+			hsize = 0;
+		if (hsize > len || !sg)
+			hsize = len;
+
+		if (!hsize && i >= nfrags) {
+			BUG_ON(fskb->len != len);
+
+			pos += len;
+			nskb = skb_clone(fskb, GFP_ATOMIC);
+			fskb = fskb->next;
+
+			if (unlikely(!nskb))
+				goto err;
+
+			hsize = skb_end_pointer(nskb) - nskb->head;
+			if (skb_cow_head(nskb, doffset + headroom)) {
+				kfree_skb(nskb);
+				goto err;
+			}
+
+			nskb->truesize += skb_end_pointer(nskb) - nskb->head -
+					  hsize;
+			skb_release_head_state(nskb);
+			__skb_push(nskb, doffset);
+		} else {
+			nskb = alloc_skb(hsize + doffset + headroom,
+					 GFP_ATOMIC);
+
+			if (unlikely(!nskb))
+				goto err;
+
+			skb_reserve(nskb, headroom);
+			__skb_put(nskb, doffset);
+		}
+
+		if (segs)
+			tail->next = nskb;
+		else
+			segs = nskb;
+		tail = nskb;
+
+		__copy_skb_header(nskb, skb);
+		nskb->mac_len = skb->mac_len;
+
+		/* nskb and skb might have different headroom */
+		if (nskb->ip_summed == CHECKSUM_PARTIAL)
+			nskb->csum_start += skb_headroom(nskb) - headroom;
+
+		skb_reset_mac_header(nskb);
+		skb_set_network_header(nskb, skb->mac_len);
+		nskb->transport_header = (nskb->network_header +
+					  skb_network_header_len(skb));
+		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		if (fskb != skb_shinfo(skb)->frag_list)
+			continue;
+
+		if (!sg) {
+			nskb->ip_summed = CHECKSUM_NONE;
+			nskb->csum = skb_copy_and_csum_bits(skb, offset,
+							    skb_put(nskb, len),
+							    len, 0);
+			continue;
+		}
+
+		frag = skb_shinfo(nskb)->frags;
+
+		skb_copy_from_linear_data_offset(skb, offset,
+						 skb_put(nskb, hsize), hsize);
+
+		while (pos < offset + len && i < nfrags) {
+			*frag = skb_shinfo(skb)->frags[i];
+			__skb_frag_ref(frag);
+			size = skb_frag_size(frag);
+
+			if (pos < offset) {
+				frag->page_offset += offset - pos;
+				skb_frag_size_sub(frag, offset - pos);
+			}
+
+			skb_shinfo(nskb)->nr_frags++;
+
+			if (pos + size <= offset + len) {
+				i++;
+				pos += size;
+			} else {
+				skb_frag_size_sub(frag, pos + size - (offset + len));
+				goto skip_fraglist;
+			}
+
+			frag++;
+		}
+
+		if (pos < offset + len) {
+			struct sk_buff *fskb2 = fskb;
+
+			BUG_ON(pos + fskb->len != offset + len);
+
+			pos += fskb->len;
+			fskb = fskb->next;
+
+			if (fskb2->next) {
+				fskb2 = skb_clone(fskb2, GFP_ATOMIC);
+				if (!fskb2)
+					goto err;
+			} else
+				skb_get(fskb2);
+
+			SKB_FRAG_ASSERT(nskb);
+			skb_shinfo(nskb)->frag_list = fskb2;
+		}
+
+skip_fraglist:
+		nskb->data_len = len - hsize;
+		nskb->len += nskb->data_len;
+		nskb->truesize += nskb->data_len;
+	} while ((offset += len) < skb->len);
+
+	return segs;
+
+err:
+	while ((skb = segs)) {
+		segs = skb->next;
+		kfree_skb(skb);
+	}
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(skb_segment);
+
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+	struct sk_buff *p = *head;
+	struct sk_buff *nskb;
+	struct skb_shared_info *skbinfo = skb_shinfo(skb);
+	struct skb_shared_info *pinfo = skb_shinfo(p);
+	unsigned int headroom;
+	unsigned int len = skb_gro_len(skb);
+	unsigned int offset = skb_gro_offset(skb);
+	unsigned int headlen = skb_headlen(skb);
+
+	if (p->len + len >= 65536)
+		return -E2BIG;
+
+	if (pinfo->frag_list)
+		goto merge;
+	else if (headlen <= offset) {
+		skb_frag_t *frag;
+		skb_frag_t *frag2;
+		int i = skbinfo->nr_frags;
+		int nr_frags = pinfo->nr_frags + i;
+
+		offset -= headlen;
+
+		if (nr_frags > MAX_SKB_FRAGS)
+			return -E2BIG;
+
+		pinfo->nr_frags = nr_frags;
+		skbinfo->nr_frags = 0;
+
+		frag = pinfo->frags + nr_frags;
+		frag2 = skbinfo->frags + i;
+		do {
+			*--frag = *--frag2;
+		} while (--i);
+
+		frag->page_offset += offset;
+		skb_frag_size_sub(frag, offset);
+
+		skb->truesize -= skb->data_len;
+		skb->len -= skb->data_len;
+		skb->data_len = 0;
+
+		NAPI_GRO_CB(skb)->free = 1;
+		goto done;
+	} else if (skb_gro_len(p) != pinfo->gso_size)
+		return -E2BIG;
+
+	headroom = skb_headroom(p);
+	nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
+	if (unlikely(!nskb))
+		return -ENOMEM;
+
+	__copy_skb_header(nskb, p);
+	nskb->mac_len = p->mac_len;
+
+	skb_reserve(nskb, headroom);
+	__skb_put(nskb, skb_gro_offset(p));
+
+	skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
+	skb_set_network_header(nskb, skb_network_offset(p));
+	skb_set_transport_header(nskb, skb_transport_offset(p));
+
+	__skb_pull(p, skb_gro_offset(p));
+	memcpy(skb_mac_header(nskb), skb_mac_header(p),
+	       p->data - skb_mac_header(p));
+
+	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
+	skb_shinfo(nskb)->frag_list = p;
+	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
+	pinfo->gso_size = 0;
+	skb_header_release(p);
+	nskb->prev = p;
+
+	nskb->data_len += p->len;
+	nskb->truesize += p->truesize;
+	nskb->len += p->len;
+
+	*head = nskb;
+	nskb->next = p->next;
+	p->next = NULL;
+
+	p = nskb;
+
+merge:
+	p->truesize += skb->truesize - len;
+	if (offset > headlen) {
+		unsigned int eat = offset - headlen;
+
+		skbinfo->frags[0].page_offset += eat;
+		skb_frag_size_sub(&skbinfo->frags[0], eat);
+		skb->data_len -= eat;
+		skb->len -= eat;
+		offset = headlen;
+	}
+
+	__skb_pull(skb, offset);
+
+	p->prev->next = skb;
+	p->prev = skb;
+	skb_header_release(skb);
+
+done:
+	NAPI_GRO_CB(p)->count++;
+	p->data_len += len;
+	p->truesize += len;
+	p->len += len;
+
+	NAPI_GRO_CB(skb)->same_flow = 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive);
+
+void __init skb_init(void)
+{
+	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+					      sizeof(struct sk_buff),
+					      0,
+					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					      NULL);
+	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+						(2*sizeof(struct sk_buff)) +
+						sizeof(atomic_t),
+						0,
+						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+						NULL);
+}
+
+/**
+ *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ *	@skb: Socket buffer containing the buffers to be mapped
+ *	@sg: The scatter-gather list to map into
+ *	@offset: The offset into the buffer's contents to start mapping
+ *	@len: Length of buffer space to be mapped
+ *
+ *	Fill the specified scatter-gather list with mappings/pointers into a
+ *	region of the buffer space attached to a socket buffer.
+ */
+static int
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg_set_buf(sg, skb->data + offset, copy);
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
+					frag->page_offset+offset-start);
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+					      copy);
+			if ((len -= copy) == 0)
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+	BUG_ON(len);
+	return elt;
+}
+
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int nsg = __skb_to_sgvec(skb, sg, offset, len);
+
+	sg_mark_end(&sg[nsg - 1]);
+
+	return nsg;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
+/**
+ *	skb_cow_data - Check that a socket buffer's data buffers are writable
+ *	@skb: The socket buffer to check.
+ *	@tailbits: Amount of trailing space to be added
+ *	@trailer: Returned pointer to the skb where the @tailbits space begins
+ *
+ *	Make sure that the data buffers attached to a socket buffer are
+ *	writable. If they are not, private copies are made of the data buffers
+ *	and the socket buffer is set to use these instead.
+ *
+ *	If @tailbits is given, make sure that there is space to write @tailbits
+ *	bytes of data beyond current end of socket buffer.  @trailer will be
+ *	set to point to the skb in which this space begins.
+ *
+ *	The number of scatterlist elements required to completely map the
+ *	COW'd and extended socket buffer will be returned.
+ */
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_has_frag_list(skb)) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_has_frag_list(skb1) ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_has_frag_list(skb1)) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb2, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+EXPORT_SYMBOL_GPL(skb_cow_data);
+
+static void sock_rmem_free(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int len = skb->len;
+
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_rmem_free;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+	/* before exiting rcu section, make sure dst is refcounted */
+	skb_dst_force(skb);
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = orig_skb->sk;
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb;
+	int err;
+
+	if (!sk)
+		return;
+
+	skb = skb_clone(orig_skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (hwtstamps) {
+		*skb_hwtstamps(skb) =
+			*hwtstamps;
+	} else {
+		/*
+		 * no hardware time stamps available,
+		 * so keep the shared tx_flags and only
+		 * store software time stamp
+		 */
+		skb->tstamp = ktime_get_real();
+	}
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+
+	err = sock_queue_err_skb(sk, skb);
+
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_tstamp_tx);
+
+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	skb->wifi_acked_valid = 1;
+	skb->wifi_acked = acked;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
+
+
+/**
+ * skb_partial_csum_set - set up and verify partial csum values for packet
+ * @skb: the skb to set
+ * @start: the number of bytes after skb->data to start checksumming.
+ * @off: the offset from start to place the checksum.
+ *
+ * For untrusted partially-checksummed packets, we need to make sure the values
+ * for skb->csum_start and skb->csum_offset are valid so we don't oops.
+ *
+ * This function checks and sets those values and skb->ip_summed: if this
+ * returns false you should drop the packet.
+ */
+bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
+{
+	if (unlikely(start > skb_headlen(skb)) ||
+	    unlikely((int)start + off > skb_headlen(skb) - 2)) {
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "bad partial csum: csum=%u/%u len=%u\n",
+			       start, off, skb_headlen(skb));
+		return false;
+	}
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	skb->csum_start = skb_headroom(skb) + start;
+	skb->csum_offset = off;
+	return true;
+}
+EXPORT_SYMBOL_GPL(skb_partial_csum_set);
+
+void __skb_warn_lro_forwarding(const struct sk_buff *skb)
+{
+	if (net_ratelimit())
+		pr_warning("%s: received packets cannot be forwarded"
+			   " while LRO is enabled\n", skb->dev->name);
+}
+EXPORT_SYMBOL(__skb_warn_lro_forwarding);
diff --git a/net/core/sock.c b/net/core/sock.c
new file mode 100644
index 00000000..376566ad
--- /dev/null
+++ b/net/core/sock.c
@@ -0,0 +1,2691 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic socket support routines. Memory allocators, socket lock/release
+ *		handler for protocols to use and generic option handler.
+ *
+ *
+ * Authors:	Ross Biro
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Fixes:
+ *		Alan Cox	: 	Numerous verify_area() problems
+ *		Alan Cox	:	Connecting on a connecting socket
+ *					now returns an error for tcp.
+ *		Alan Cox	:	sock->protocol is set correctly.
+ *					and is not sometimes left as 0.
+ *		Alan Cox	:	connect handles icmp errors on a
+ *					connect properly. Unfortunately there
+ *					is a restart syscall nasty there. I
+ *					can't match BSD without hacking the C
+ *					library. Ideas urgently sought!
+ *		Alan Cox	:	Disallow bind() to addresses that are
+ *					not ours - especially broadcast ones!!
+ *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
+ *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
+ *					instead they leave that for the DESTROY timer.
+ *		Alan Cox	:	Clean up error flag in accept
+ *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
+ *					was buggy. Put a remove_sock() in the handler
+ *					for memory when we hit 0. Also altered the timer
+ *					code. The ACK stuff can wait and needs major
+ *					TCP layer surgery.
+ *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
+ *					and fixed timer/inet_bh race.
+ *		Alan Cox	:	Added zapped flag for TCP
+ *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
+ *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
+ *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
+ *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
+ *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
+ *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
+ *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Fixed connect() taking signals I think.
+ *		Alan Cox	:	SO_LINGER supported
+ *		Alan Cox	:	Error reporting fixes
+ *		Anonymous	:	inet_create tidied up (sk->reuse setting)
+ *		Alan Cox	:	inet sockets don't set sk->type!
+ *		Alan Cox	:	Split socket option code
+ *		Alan Cox	:	Callbacks
+ *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
+ *		Alex		:	Removed restriction on inet fioctl
+ *		Alan Cox	:	Splitting INET from NET core
+ *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
+ *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
+ *		Alan Cox	:	Split IP from generic code
+ *		Alan Cox	:	New kfree_skbmem()
+ *		Alan Cox	:	Make SO_DEBUG superuser only.
+ *		Alan Cox	:	Allow anyone to clear SO_DEBUG
+ *					(compatibility fix)
+ *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
+ *		Alan Cox	:	Allocator for a socket is settable.
+ *		Alan Cox	:	SO_ERROR includes soft errors.
+ *		Alan Cox	:	Allow NULL arguments on some SO_ opts
+ *		Alan Cox	: 	Generic socket allocation to make hooks
+ *					easier (suggested by Craig Metz).
+ *		Michael Pall	:	SO_ERROR returns positive errno again
+ *              Steve Whitehouse:       Added default destructor to free
+ *                                      protocol private data.
+ *              Steve Whitehouse:       Added various other default routines
+ *                                      common to several socket families.
+ *              Chris Evans     :       Call suser() check last on F_SETOWN
+ *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
+ *		Andi Kleen	:	Fix write_space callback
+ *		Chris Evans	:	Security fixes - signedness again
+ *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
+ *
+ * To Fix:
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/user_namespace.h>
+#include <linux/static_key.h>
+#include <linux/memcontrol.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/net_namespace.h>
+#include <net/request_sock.h>
+#include <net/sock.h>
+#include <linux/net_tstamp.h>
+#include <net/xfrm.h>
+#include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
+
+#include <linux/filter.h>
+
+#include <trace/events/sock.h>
+
+#ifdef CONFIG_INET
+#include <net/tcp.h>
+#endif
+
+static DEFINE_MUTEX(proto_list_mutex);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+	int ret = 0;
+
+	mutex_lock(&proto_list_mutex);
+	list_for_each_entry(proto, &proto_list, node) {
+		if (proto->init_cgroup) {
+			ret = proto->init_cgroup(cgrp, ss);
+			if (ret)
+				goto out;
+		}
+	}
+
+	mutex_unlock(&proto_list_mutex);
+	return ret;
+out:
+	list_for_each_entry_continue_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp);
+	mutex_unlock(&proto_list_mutex);
+	return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp)
+{
+	struct proto *proto;
+
+	mutex_lock(&proto_list_mutex);
+	list_for_each_entry_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp);
+	mutex_unlock(&proto_list_mutex);
+}
+#endif
+
+/*
+ * Each address family might have different locking rules, so we have
+ * one slock key per address family:
+ */
+static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_slock_keys[AF_MAX];
+
+struct static_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
+/*
+ * Make lock validator output more readable. (we pre-construct these
+ * strings build-time, so that runtime initialization of socket
+ * locks is fast):
+ */
+static const char *const af_family_key_strings[AF_MAX+1] = {
+  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
+  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
+  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
+  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
+  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
+  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
+  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
+  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
+  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
+  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
+  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
+  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
+  "sk_lock-AF_NFC"   , "sk_lock-AF_MAX"
+};
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
+  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
+  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
+  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
+  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
+  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
+  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
+  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
+  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
+  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
+  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
+  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
+  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
+  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
+  "slock-AF_NFC"   , "slock-AF_MAX"
+};
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
+  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
+  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
+  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
+  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
+  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
+  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
+  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
+  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
+  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
+  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
+  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
+  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
+  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
+  "clock-AF_NFC"   , "clock-AF_MAX"
+};
+
+/*
+ * sk_callback_lock locking rules are per-address-family,
+ * so split the lock classes by using a per-AF key:
+ */
+static struct lock_class_key af_callback_keys[AF_MAX];
+
+/* Take into consideration the size of the struct sk_buff overhead in the
+ * determination of these values, since that is non-constant across
+ * platforms.  This makes socket queueing behavior and performance
+ * not depend upon such differences.
+ */
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
+
+/* Maximal space eaten by iovec or ancillary data plus some space */
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
+EXPORT_SYMBOL(sysctl_optmem_max);
+
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
+
+static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
+{
+	struct timeval tv;
+
+	if (optlen < sizeof(tv))
+		return -EINVAL;
+	if (copy_from_user(&tv, optval, sizeof(tv)))
+		return -EFAULT;
+	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
+		return -EDOM;
+
+	if (tv.tv_sec < 0) {
+		static int warned __read_mostly;
+
+		*timeo_p = 0;
+		if (warned < 10 && net_ratelimit()) {
+			warned++;
+			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
+			       "tries to set negative timeout\n",
+				current->comm, task_pid_nr(current));
+		}
+		return 0;
+	}
+	*timeo_p = MAX_SCHEDULE_TIMEOUT;
+	if (tv.tv_sec == 0 && tv.tv_usec == 0)
+		return 0;
+	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
+		//*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
+		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec*HZ+(1000000-1))/1000000;
+	return 0;
+}
+
+static void sock_warn_obsolete_bsdism(const char *name)
+{
+	static int warned;
+	static char warncomm[TASK_COMM_LEN];
+	if (strcmp(warncomm, current->comm) && warned < 5) {
+		strcpy(warncomm,  current->comm);
+		printk(KERN_WARNING "process `%s' is using obsolete "
+		       "%s SO_BSDCOMPAT\n", warncomm, name);
+		warned++;
+	}
+}
+
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
+static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
+{
+	if (sk->sk_flags & flags) {
+		sk->sk_flags &= ~flags;
+		if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
+			net_disable_timestamp();
+	}
+}
+
+
+int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
+
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+		atomic_inc(&sk->sk_drops);
+		trace_sock_rcvqueue_full(sk, skb);
+		return -ENOMEM;
+	}
+
+	err = sk_filter(sk, skb);
+	if (err)
+		return err;
+
+	if (!sk_rmem_schedule(sk, skb->truesize)) {
+		atomic_inc(&sk->sk_drops);
+		return -ENOBUFS;
+	}
+
+	skb->dev = NULL;
+	skb_set_owner_r(skb, sk);
+
+	/* Cache the SKB length before we tack it onto the receive
+	 * queue.  Once it is added it no longer belongs to us and
+	 * may be freed by other threads of control pulling packets
+	 * from the queue.
+	 */
+	skb_len = skb->len;
+
+	/* we escape from rcu protected region, make sure we dont leak
+	 * a norefcounted dst
+	 */
+	skb_dst_force(skb);
+
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb_len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_rcv_skb);
+
+int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
+{
+	int rc = NET_RX_SUCCESS;
+
+	if (sk_filter(sk, skb))
+		goto discard_and_relse;
+
+	skb->dev = NULL;
+
+	if (sk_rcvqueues_full(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+	if (nested)
+		bh_lock_sock_nested(sk);
+	else
+		bh_lock_sock(sk);
+	if (!sock_owned_by_user(sk)) {
+		/*
+		 * trylock + unlock semantics:
+		 */
+		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+
+		rc = sk_backlog_rcv(sk, skb);
+
+		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+	} else if (sk_add_backlog(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
+	bh_unlock_sock(sk);
+out:
+	sock_put(sk);
+	return rc;
+discard_and_relse:
+	kfree_skb(skb);
+	goto out;
+}
+EXPORT_SYMBOL(sk_receive_skb);
+
+void sk_reset_txq(struct sock *sk)
+{
+	sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
+struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
+{
+	struct dst_entry *dst = __sk_dst_get(sk);
+
+	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+		sk_tx_queue_clear(sk);
+		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
+		dst_release(dst);
+		return NULL;
+	}
+
+	return dst;
+}
+EXPORT_SYMBOL(__sk_dst_check);
+
+struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+
+	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+		sk_dst_reset(sk);
+		dst_release(dst);
+		return NULL;
+	}
+
+	return dst;
+}
+EXPORT_SYMBOL(sk_dst_check);
+
+static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+{
+	int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+	struct net *net = sock_net(sk);
+	char devname[IFNAMSIZ];
+	int index;
+
+	/* Sorry... */
+	ret = -EPERM;
+	if (!capable(CAP_NET_RAW))
+		goto out;
+
+	ret = -EINVAL;
+	if (optlen < 0)
+		goto out;
+
+	/* Bind this socket to a particular device like "eth0",
+	 * as specified in the passed interface name. If the
+	 * name is "" or the option length is zero the socket
+	 * is not bound.
+	 */
+	if (optlen > IFNAMSIZ - 1)
+		optlen = IFNAMSIZ - 1;
+	memset(devname, 0, sizeof(devname));
+
+	ret = -EFAULT;
+	if (copy_from_user(devname, optval, optlen))
+		goto out;
+
+	index = 0;
+	if (devname[0] != '\0') {
+		struct net_device *dev;
+
+		rcu_read_lock();
+		dev = dev_get_by_name_rcu(net, devname);
+		if (dev)
+			index = dev->ifindex;
+		rcu_read_unlock();
+		ret = -ENODEV;
+		if (!dev)
+			goto out;
+	}
+
+	lock_sock(sk);
+	sk->sk_bound_dev_if = index;
+	sk_dst_reset(sk);
+	release_sock(sk);
+
+	ret = 0;
+
+out:
+#endif
+
+	return ret;
+}
+
+static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+{
+	if (valbool)
+		sock_set_flag(sk, bit);
+	else
+		sock_reset_flag(sk, bit);
+}
+
+/*
+ *	This is meant for all protocols to use and covers goings on
+ *	at the socket level. Everything here is generic.
+ */
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	int val;
+	int valbool;
+	struct linger ling;
+	int ret = 0;
+
+	/*
+	 *	Options without arguments
+	 */
+
+	if (optname == SO_BINDTODEVICE)
+		return sock_bindtodevice(sk, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	valbool = val ? 1 : 0;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SO_DEBUG:
+		if (val && !capable(CAP_NET_ADMIN))
+			ret = -EACCES;
+		else
+			sock_valbool_flag(sk, SOCK_DBG, valbool);
+		break;
+	case SO_REUSEADDR:
+		sk->sk_reuse = valbool;
+		break;
+	case SO_TYPE:
+	case SO_PROTOCOL:
+	case SO_DOMAIN:
+	case SO_ERROR:
+		ret = -ENOPROTOOPT;
+		break;
+	case SO_DONTROUTE:
+		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
+		break;
+	case SO_BROADCAST:
+		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+		break;
+	case SO_SNDBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
+
+		if (val > sysctl_wmem_max)
+			val = sysctl_wmem_max;
+set_sndbuf:
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+		if ((val * 2) < SOCK_MIN_SNDBUF)
+			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+		else
+			sk->sk_sndbuf = val * 2;
+
+		/*
+		 *	Wake up sending tasks if we
+		 *	upped the value.
+		 */
+		sk->sk_write_space(sk);
+		break;
+
+	case SO_SNDBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
+			break;
+		}
+		goto set_sndbuf;
+
+	case SO_RCVBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
+
+		if (val > sysctl_rmem_max)
+			val = sysctl_rmem_max;
+set_rcvbuf:
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+		/*
+		 * We double it on the way in to account for
+		 * "struct sk_buff" etc. overhead.   Applications
+		 * assume that the SO_RCVBUF setting they make will
+		 * allow that much actual data to be received on that
+		 * socket.
+		 *
+		 * Applications are unaware that "struct sk_buff" and
+		 * other overheads allocate from the receive buffer
+		 * during socket buffer allocation.
+		 *
+		 * And after considering the possible alternatives,
+		 * returning the value we actually used in getsockopt
+		 * is the most desirable behavior.
+		 */
+		if ((val * 2) < SOCK_MIN_RCVBUF)
+			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+		else
+			sk->sk_rcvbuf = val * 2;
+		break;
+
+	case SO_RCVBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
+			break;
+		}
+		goto set_rcvbuf;
+
+	case SO_KEEPALIVE:
+#ifdef CONFIG_INET
+		if (sk->sk_protocol == IPPROTO_TCP)
+			tcp_set_keepalive(sk, valbool);
+#endif
+		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+		break;
+
+	case SO_OOBINLINE:
+		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+		break;
+
+	case SO_NO_CHECK:
+		sk->sk_no_check = valbool;
+		break;
+
+	case SO_PRIORITY:
+		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+			sk->sk_priority = val;
+		else
+			ret = -EPERM;
+		break;
+
+	case SO_LINGER:
+		if (optlen < sizeof(ling)) {
+			ret = -EINVAL;	/* 1003.1g */
+			break;
+		}
+		if (copy_from_user(&ling, optval, sizeof(ling))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (!ling.l_onoff)
+			sock_reset_flag(sk, SOCK_LINGER);
+		else {
+#if (BITS_PER_LONG == 32)
+			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+			else
+#endif
+				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+			sock_set_flag(sk, SOCK_LINGER);
+		}
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("setsockopt");
+		break;
+
+	case SO_PASSCRED:
+		if (valbool)
+			set_bit(SOCK_PASSCRED, &sock->flags);
+		else
+			clear_bit(SOCK_PASSCRED, &sock->flags);
+		break;
+
+	case SO_TIMESTAMP:
+	case SO_TIMESTAMPNS:
+		if (valbool)  {
+			if (optname == SO_TIMESTAMP)
+				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+			else
+				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
+			sock_set_flag(sk, SOCK_RCVTSTAMP);
+			sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+		} else {
+			sock_reset_flag(sk, SOCK_RCVTSTAMP);
+			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+		}
+		break;
+
+	case SO_TIMESTAMPING:
+		if (val & ~SOF_TIMESTAMPING_MASK) {
+			ret = -EINVAL;
+			break;
+		}
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
+				  val & SOF_TIMESTAMPING_TX_HARDWARE);
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
+				  val & SOF_TIMESTAMPING_TX_SOFTWARE);
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
+				  val & SOF_TIMESTAMPING_RX_HARDWARE);
+		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+			sock_enable_timestamp(sk,
+					      SOCK_TIMESTAMPING_RX_SOFTWARE);
+		else
+			sock_disable_timestamp(sk,
+					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
+				  val & SOF_TIMESTAMPING_SOFTWARE);
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
+				  val & SOF_TIMESTAMPING_SYS_HARDWARE);
+		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
+				  val & SOF_TIMESTAMPING_RAW_HARDWARE);
+		break;
+
+	case SO_RCVLOWAT:
+		if (val < 0)
+			val = INT_MAX;
+		sk->sk_rcvlowat = val ? : 1;
+		break;
+
+	case SO_RCVTIMEO:
+		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+		break;
+
+	case SO_SNDTIMEO:
+		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+		break;
+
+	case SO_ATTACH_FILTER:
+		ret = -EINVAL;
+		if (optlen == sizeof(struct sock_fprog)) {
+			struct sock_fprog fprog;
+
+			ret = -EFAULT;
+			if (copy_from_user(&fprog, optval, sizeof(fprog)))
+				break;
+
+			ret = sk_attach_filter(&fprog, sk);
+		}
+		break;
+
+	case SO_DETACH_FILTER:
+		ret = sk_detach_filter(sk);
+		break;
+
+	case SO_PASSSEC:
+		if (valbool)
+			set_bit(SOCK_PASSSEC, &sock->flags);
+		else
+			clear_bit(SOCK_PASSSEC, &sock->flags);
+		break;
+	case SO_MARK:
+		if (!capable(CAP_NET_ADMIN))
+			ret = -EPERM;
+		else
+			sk->sk_mark = val;
+		break;
+
+		/* We implement the SO_SNDLOWAT etc to
+		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
+		break;
+
+	case SO_WIFI_STATUS:
+		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
+		break;
+
+	case SO_PEEK_OFF:
+		if (sock->ops->set_peek_off)
+			sock->ops->set_peek_off(sk, val);
+		else
+			ret = -EOPNOTSUPP;
+		break;
+
+	case SO_NOFCS:
+		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
+		break;
+
+	default:
+		ret = -ENOPROTOOPT;
+		break;
+	}
+	release_sock(sk);
+	return ret;
+}
+EXPORT_SYMBOL(sock_setsockopt);
+
+
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+		   struct ucred *ucred)
+{
+	ucred->pid = pid_vnr(pid);
+	ucred->uid = ucred->gid = -1;
+	if (cred) {
+		struct user_namespace *current_ns = current_user_ns();
+
+		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+	}
+}
+EXPORT_SYMBOL_GPL(cred_to_ucred);
+
+int sock_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+
+	union {
+		int val;
+		struct linger ling;
+		struct timeval tm;
+	} v;
+
+	int lv = sizeof(int);
+	int len;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+	if (len < 0)
+		return -EINVAL;
+
+	memset(&v, 0, sizeof(v));
+
+	switch (optname) {
+	case SO_DEBUG:
+		v.val = sock_flag(sk, SOCK_DBG);
+		break;
+
+	case SO_DONTROUTE:
+		v.val = sock_flag(sk, SOCK_LOCALROUTE);
+		break;
+
+	case SO_BROADCAST:
+		v.val = !!sock_flag(sk, SOCK_BROADCAST);
+		break;
+
+	case SO_SNDBUF:
+		v.val = sk->sk_sndbuf;
+		break;
+
+	case SO_RCVBUF:
+		v.val = sk->sk_rcvbuf;
+		break;
+
+	case SO_REUSEADDR:
+		v.val = sk->sk_reuse;
+		break;
+
+	case SO_KEEPALIVE:
+		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+		break;
+
+	case SO_TYPE:
+		v.val = sk->sk_type;
+		break;
+
+	case SO_PROTOCOL:
+		v.val = sk->sk_protocol;
+		break;
+
+	case SO_DOMAIN:
+		v.val = sk->sk_family;
+		break;
+
+	case SO_ERROR:
+		v.val = -sock_error(sk);
+		if (v.val == 0)
+			v.val = xchg(&sk->sk_err_soft, 0);
+		break;
+
+	case SO_OOBINLINE:
+		v.val = !!sock_flag(sk, SOCK_URGINLINE);
+		break;
+
+	case SO_NO_CHECK:
+		v.val = sk->sk_no_check;
+		break;
+
+	case SO_PRIORITY:
+		v.val = sk->sk_priority;
+		break;
+
+	case SO_LINGER:
+		lv		= sizeof(v.ling);
+		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+		v.ling.l_linger	= sk->sk_lingertime / HZ;
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("getsockopt");
+		break;
+
+	case SO_TIMESTAMP:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+				!sock_flag(sk, SOCK_RCVTSTAMPNS);
+		break;
+
+	case SO_TIMESTAMPNS:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+		break;
+
+	case SO_TIMESTAMPING:
+		v.val = 0;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
+			v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
+			v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
+			v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+			v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
+			v.val |= SOF_TIMESTAMPING_SOFTWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
+			v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
+			v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+		break;
+
+	case SO_RCVTIMEO:
+		lv = sizeof(struct timeval);
+		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
+
+	case SO_SNDTIMEO:
+		lv = sizeof(struct timeval);
+		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
+
+	case SO_RCVLOWAT:
+		v.val = sk->sk_rcvlowat;
+		break;
+
+	case SO_SNDLOWAT:
+		v.val = 1;
+		break;
+
+	case SO_PASSCRED:
+		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+		break;
+
+	case SO_PEERCRED:
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
+			return -EFAULT;
+		goto lenout;
+	}
+
+	case SO_PEERNAME:
+	{
+		char address[128];
+
+		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+			return -ENOTCONN;
+		if (lv < len)
+			return -EINVAL;
+		if (copy_to_user(optval, address, len))
+			return -EFAULT;
+		goto lenout;
+	}
+
+	/* Dubious BSD thing... Probably nobody even uses it, but
+	 * the UNIX standard wants it for whatever reason... -DaveM
+	 */
+	case SO_ACCEPTCONN:
+		v.val = sk->sk_state == TCP_LISTEN;
+		break;
+
+	case SO_PASSSEC:
+		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+		break;
+
+	case SO_PEERSEC:
+		return security_socket_getpeersec_stream(sock, optval, optlen, len);
+
+	case SO_MARK:
+		v.val = sk->sk_mark;
+		break;
+
+	case SO_RXQ_OVFL:
+		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
+	case SO_WIFI_STATUS:
+		v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+		break;
+
+	case SO_PEEK_OFF:
+		if (!sock->ops->set_peek_off)
+			return -EOPNOTSUPP;
+
+		v.val = sk->sk_peek_off;
+		break;
+	case SO_NOFCS:
+		v.val = !!sock_flag(sk, SOCK_NOFCS);
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (len > lv)
+		len = lv;
+	if (copy_to_user(optval, &v, len))
+		return -EFAULT;
+lenout:
+	if (put_user(len, optlen))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Initialize an sk_lock.
+ *
+ * (We also register the sk_lock with the lock validator.)
+ */
+static inline void sock_lock_init(struct sock *sk)
+{
+	sock_lock_init_class_and_name(sk,
+			af_family_slock_key_strings[sk->sk_family],
+			af_family_slock_keys + sk->sk_family,
+			af_family_key_strings[sk->sk_family],
+			af_family_keys + sk->sk_family);
+}
+
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
+ */
+static void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+	void *sptr = nsk->sk_security;
+#endif
+	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
+#ifdef CONFIG_SECURITY_NETWORK
+	nsk->sk_security = sptr;
+	security_sk_clone(osk, nsk);
+#endif
+}
+
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+	if (offsetof(struct sock, sk_node.next) != 0)
+		memset(sk, 0, offsetof(struct sock, sk_node.next));
+	memset(&sk->sk_node.pprev, 0,
+	       size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+	unsigned long nulls1, nulls2;
+
+	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+	if (nulls1 > nulls2)
+		swap(nulls1, nulls2);
+
+	if (nulls1 != 0)
+		memset((char *)sk, 0, nulls1);
+	memset((char *)sk + nulls1 + sizeof(void *), 0,
+	       nulls2 - nulls1 - sizeof(void *));
+	memset((char *)sk + nulls2 + sizeof(void *), 0,
+	       size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
+static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
+		int family)
+{
+	struct sock *sk;
+	struct kmem_cache *slab;
+
+	slab = prot->slab;
+	if (slab != NULL) {
+		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+		if (!sk)
+			return sk;
+		if (priority & __GFP_ZERO) {
+			if (prot->clear_sk)
+				prot->clear_sk(sk, prot->obj_size);
+			else
+				sk_prot_clear_nulls(sk, prot->obj_size);
+		}
+	} else
+		sk = kmalloc(prot->obj_size, priority);
+
+	if (sk != NULL) {
+		kmemcheck_annotate_bitfield(sk, flags);
+
+		if (security_sk_alloc(sk, family, priority))
+			goto out_free;
+
+		if (!try_module_get(prot->owner))
+			goto out_free_sec;
+		sk_tx_queue_clear(sk);
+	}
+
+	return sk;
+
+out_free_sec:
+	security_sk_free(sk);
+out_free:
+	if (slab != NULL)
+		kmem_cache_free(slab, sk);
+	else
+		kfree(sk);
+	return NULL;
+}
+
+static void sk_prot_free(struct proto *prot, struct sock *sk)
+{
+	struct kmem_cache *slab;
+	struct module *owner;
+
+	owner = prot->owner;
+	slab = prot->slab;
+
+	security_sk_free(sk);
+	if (slab != NULL)
+		kmem_cache_free(slab, sk);
+	else
+		kfree(sk);
+	module_put(owner);
+}
+
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+	u32 classid;
+
+	rcu_read_lock();  /* doing current task, which cannot vanish. */
+	classid = task_cls_classid(current);
+	rcu_read_unlock();
+	if (classid && classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+	if (in_interrupt())
+		return;
+
+	sk->sk_cgrp_prioidx = task_netprioidx(current);
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
+#endif
+
+/**
+ *	sk_alloc - All socket objects are allocated here
+ *	@net: the applicable net namespace
+ *	@family: protocol family
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *	@prot: struct proto associated with this new sock instance
+ */
+struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+		      struct proto *prot)
+{
+	struct sock *sk;
+
+	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
+	if (sk) {
+		sk->sk_family = family;
+		/*
+		 * See comment in struct sock definition to understand
+		 * why we need sk_prot_creator -acme
+		 */
+		sk->sk_prot = sk->sk_prot_creator = prot;
+		sock_lock_init(sk);
+		sock_net_set(sk, get_net(net));
+		atomic_set(&sk->sk_wmem_alloc, 1);
+
+		sock_update_classid(sk);
+		sock_update_netprioidx(sk);
+	}
+
+	return sk;
+}
+EXPORT_SYMBOL(sk_alloc);
+
+static void __sk_free(struct sock *sk)
+{
+	struct sk_filter *filter;
+
+	if (sk->sk_destruct)
+		sk->sk_destruct(sk);
+
+	filter = rcu_dereference_check(sk->sk_filter,
+				       atomic_read(&sk->sk_wmem_alloc) == 0);
+	if (filter) {
+		sk_filter_uncharge(sk, filter);
+		RCU_INIT_POINTER(sk->sk_filter, NULL);
+	}
+
+	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
+
+	if (atomic_read(&sk->sk_omem_alloc))
+		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
+		       __func__, atomic_read(&sk->sk_omem_alloc));
+
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
+	put_net(sock_net(sk));
+	sk_prot_free(sk->sk_prot_creator, sk);
+}
+
+void sk_free(struct sock *sk)
+{
+	/*
+	 * We subtract one from sk_wmem_alloc and can know if
+	 * some packets are still in some tx queue.
+	 * If not null, sock_wfree() will call __sk_free(sk) later
+	 */
+	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+		__sk_free(sk);
+}
+EXPORT_SYMBOL(sk_free);
+
+/*
+ * Last sock_put should drop reference to sk->sk_net. It has already
+ * been dropped in sk_change_net. Taking reference to stopping namespace
+ * is not an option.
+ * Take reference to a socket to remove it from hash _alive_ and after that
+ * destroy it in the context of init_net.
+ */
+void sk_release_kernel(struct sock *sk)
+{
+	if (sk == NULL || sk->sk_socket == NULL)
+		return;
+
+	sock_hold(sk);
+	sock_release(sk->sk_socket);
+	release_net(sock_net(sk));
+	sock_net_set(sk, get_net(&init_net));
+	sock_put(sk);
+}
+EXPORT_SYMBOL(sk_release_kernel);
+
+static void sk_update_clone(const struct sock *sk, struct sock *newsk)
+{
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sock_update_memcg(newsk);
+}
+
+/**
+ *	sk_clone_lock - clone a socket, and lock its clone
+ *	@sk: the socket to clone
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+{
+	struct sock *newsk;
+
+	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
+	if (newsk != NULL) {
+		struct sk_filter *filter;
+
+		sock_copy(newsk, sk);
+
+		/* SANITY */
+		get_net(sock_net(newsk));
+		sk_node_init(&newsk->sk_node);
+		sock_lock_init(newsk);
+		bh_lock_sock(newsk);
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
+
+		atomic_set(&newsk->sk_rmem_alloc, 0);
+		/*
+		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+		 */
+		atomic_set(&newsk->sk_wmem_alloc, 1);
+		atomic_set(&newsk->sk_omem_alloc, 0);
+		skb_queue_head_init(&newsk->sk_receive_queue);
+		skb_queue_head_init(&newsk->sk_write_queue);
+#ifdef CONFIG_NET_DMA
+		skb_queue_head_init(&newsk->sk_async_wait_queue);
+#endif
+
+		spin_lock_init(&newsk->sk_dst_lock);
+		rwlock_init(&newsk->sk_callback_lock);
+		lockdep_set_class_and_name(&newsk->sk_callback_lock,
+				af_callback_keys + newsk->sk_family,
+				af_family_clock_key_strings[newsk->sk_family]);
+
+		newsk->sk_dst_cache	= NULL;
+		newsk->sk_wmem_queued	= 0;
+		newsk->sk_forward_alloc = 0;
+		newsk->sk_send_head	= NULL;
+		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+		sock_reset_flag(newsk, SOCK_DONE);
+		skb_queue_head_init(&newsk->sk_error_queue);
+
+		filter = rcu_dereference_protected(newsk->sk_filter, 1);
+		if (filter != NULL)
+			sk_filter_charge(newsk, filter);
+
+		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			bh_unlock_sock(newsk);
+			sk_free(newsk);
+			newsk = NULL;
+			goto out;
+		}
+
+		newsk->sk_err	   = 0;
+		newsk->sk_priority = 0;
+		/*
+		 * Before updating sk_refcnt, we must commit prior changes to memory
+		 * (Documentation/RCU/rculist_nulls.txt for details)
+		 */
+		smp_wmb();
+		atomic_set(&newsk->sk_refcnt, 2);
+
+		/*
+		 * Increment the counter in the same struct proto as the master
+		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+		 * is the same as sk->sk_prot->socks, as this field was copied
+		 * with memcpy).
+		 *
+		 * This _changes_ the previous behaviour, where
+		 * tcp_create_openreq_child always was incrementing the
+		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+		 * to be taken into account in all callers. -acme
+		 */
+		sk_refcnt_debug_inc(newsk);
+		sk_set_socket(newsk, NULL);
+		newsk->sk_wq = NULL;
+
+		sk_update_clone(sk, newsk);
+
+		if (newsk->sk_prot->sockets_allocated)
+			sk_sockets_allocated_inc(newsk);
+
+		if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+			net_enable_timestamp();
+	}
+out:
+	return newsk;
+}
+EXPORT_SYMBOL_GPL(sk_clone_lock);
+
+void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+	__sk_dst_set(sk, dst);
+	sk->sk_route_caps = dst->dev->features;
+	if (sk->sk_route_caps & NETIF_F_GSO)
+		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+	sk->sk_route_caps &= ~sk->sk_route_nocaps;
+	if (sk_can_gso(sk)) {
+		if (dst->header_len) {
+			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		} else {
+			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+			sk->sk_gso_max_size = dst->dev->gso_max_size;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(sk_setup_caps);
+
+void __init sk_init(void)
+{
+	if (totalram_pages <= 4096) {
+		sysctl_wmem_max = 32767;
+		sysctl_rmem_max = 32767;
+		sysctl_wmem_default = 32767;
+		sysctl_rmem_default = 32767;
+	} else if (totalram_pages >= 131072) {
+		sysctl_wmem_max = 131071;
+		sysctl_rmem_max = 131071;
+	}
+}
+
+/*
+ *	Simple resource managers for sockets.
+ */
+
+
+/*
+ * Write buffer destructor automatically called from kfree_skb.
+ */
+void sock_wfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
+
+	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+		/*
+		 * Keep a reference on sk_wmem_alloc, this will be released
+		 * after sk_write_space() call
+		 */
+		atomic_sub(len - 1, &sk->sk_wmem_alloc);
+		sk->sk_write_space(sk);
+		len = 1;
+	}
+	/*
+	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+	 * could not do because of in-flight packets
+	 */
+	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+		__sk_free(sk);
+}
+EXPORT_SYMBOL(sock_wfree);
+
+/*
+ * Read buffer destructor automatically called from kfree_skb.
+ */
+void sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
+
+	atomic_sub(len, &sk->sk_rmem_alloc);
+	sk_mem_uncharge(sk, len);
+}
+EXPORT_SYMBOL(sock_rfree);
+
+
+int sock_i_uid(struct sock *sk)
+{
+	int uid;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
+	read_unlock_bh(&sk->sk_callback_lock);
+	return uid;
+}
+EXPORT_SYMBOL(sock_i_uid);
+
+unsigned long sock_i_ino(struct sock *sk)
+{
+	unsigned long ino;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
+	read_unlock_bh(&sk->sk_callback_lock);
+	return ino;
+}
+EXPORT_SYMBOL(sock_i_ino);
+
+/*
+ * Allocate a skb from the socket's send buffer.
+ */
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
+			     gfp_t priority)
+{
+	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_w(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(sock_wmalloc);
+
+/*
+ * Allocate a skb from the socket's receive buffer.
+ */
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
+			     gfp_t priority)
+{
+	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_r(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Allocate a memory block from the socket's option memory buffer.
+ */
+void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
+{
+	if ((unsigned)size <= sysctl_optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+		void *mem;
+		/* First do the add, to avoid the race if kmalloc
+		 * might sleep.
+		 */
+		atomic_add(size, &sk->sk_omem_alloc);
+		mem = kmalloc(size, priority);
+		if (mem)
+			return mem;
+		atomic_sub(size, &sk->sk_omem_alloc);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(sock_kmalloc);
+
+/*
+ * Free an option memory block.
+ */
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+	kfree(mem);
+	atomic_sub(size, &sk->sk_omem_alloc);
+}
+EXPORT_SYMBOL(sock_kfree_s);
+
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+   I think, these locks should be removed for datagram sockets.
+ */
+static long sock_wait_for_wmem(struct sock *sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	for (;;) {
+		if (!timeo)
+			break;
+		if (signal_pending(current))
+			break;
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+			break;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		timeo = schedule_timeout(timeo);
+	}
+	finish_wait(sk_sleep(sk), &wait);
+	return timeo;
+}
+
+
+/*
+ *	Generic send/receive buffer handlers
+ */
+
+struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+				     unsigned long data_len, int noblock,
+				     int *errcode)
+{
+	struct sk_buff *skb;
+	gfp_t gfp_mask;
+	long timeo;
+	int err;
+	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+
+	err = -EMSGSIZE;
+	if (npages > MAX_SKB_FRAGS)
+		goto failure;
+
+	gfp_mask = sk->sk_allocation;
+	if (gfp_mask & __GFP_WAIT)
+		gfp_mask |= __GFP_REPEAT;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	while (1) {
+		err = sock_error(sk);
+		if (err != 0)
+			goto failure;
+
+		err = -EPIPE;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			goto failure;
+
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(header_len, gfp_mask);
+			if (skb) {
+				int i;
+
+				/* No pages, we're done... */
+				if (!data_len)
+					break;
+
+				skb->truesize += data_len;
+				skb_shinfo(skb)->nr_frags = npages;
+				for (i = 0; i < npages; i++) {
+					struct page *page;
+
+					page = alloc_pages(sk->sk_allocation, 0);
+					if (!page) {
+						err = -ENOBUFS;
+						skb_shinfo(skb)->nr_frags = i;
+						kfree_skb(skb);
+						goto failure;
+					}
+
+					__skb_fill_page_desc(skb, i,
+							page, 0,
+							(data_len >= PAGE_SIZE ?
+							 PAGE_SIZE :
+							 data_len));
+					data_len -= PAGE_SIZE;
+				}
+
+				/* Full success... */
+				break;
+			}
+			err = -ENOBUFS;
+			goto failure;
+		}
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
+			goto failure;
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
+	}
+
+	skb_set_owner_w(skb, sk);
+	return skb;
+
+interrupted:
+	err = sock_intr_errno(timeo);
+failure:
+	*errcode = err;
+	return NULL;
+}
+EXPORT_SYMBOL(sock_alloc_send_pskb);
+
+struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
+				    int noblock, int *errcode)
+{
+	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+}
+EXPORT_SYMBOL(sock_alloc_send_skb);
+
+static void __lock_sock(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+	__acquires(&sk->sk_lock.slock)
+{
+	DEFINE_WAIT(wait);
+
+	for (;;) {
+		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
+					TASK_UNINTERRUPTIBLE);
+		spin_unlock_bh(&sk->sk_lock.slock);
+		schedule();
+		spin_lock_bh(&sk->sk_lock.slock);
+		if (!sock_owned_by_user(sk))
+			break;
+	}
+	finish_wait(&sk->sk_lock.wq, &wait);
+}
+
+static void __release_sock(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+	__acquires(&sk->sk_lock.slock)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	do {
+		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+		bh_unlock_sock(sk);
+
+		do {
+			struct sk_buff *next = skb->next;
+
+			WARN_ON_ONCE(skb_dst_is_noref(skb));
+			skb->next = NULL;
+			sk_backlog_rcv(sk, skb);
+
+			/*
+			 * We are in process context here with softirqs
+			 * disabled, use cond_resched_softirq() to preempt.
+			 * This is safe to do because we've taken the backlog
+			 * queue private:
+			 */
+			cond_resched_softirq();
+
+			skb = next;
+		} while (skb != NULL);
+
+		bh_lock_sock(sk);
+	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
+}
+
+/**
+ * sk_wait_data - wait for data to arrive at sk_receive_queue
+ * @sk:    sock to wait on
+ * @timeo: for how long
+ *
+ * Now socket state including sk->sk_err is changed only under lock,
+ * hence we may omit checks after joining wait queue.
+ * We check receive queue before schedule() only as optimization;
+ * it is very likely that release_sock() added new data.
+ */
+int sk_wait_data(struct sock *sk, long *timeo)
+{
+	int rc;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	finish_wait(sk_sleep(sk), &wait);
+	return rc;
+}
+EXPORT_SYMBOL(sk_wait_data);
+
+/**
+ *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ *	@sk: socket
+ *	@size: memory size to allocate
+ *	@kind: allocation type
+ *
+ *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ *	rmem allocation. This function assumes that protocols which have
+ *	memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+	struct proto *prot = sk->sk_prot;
+	int amt = sk_mem_pages(size);
+	long allocated;
+	int parent_status = UNDER_LIMIT;
+
+	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+
+	allocated = sk_memory_allocated_add(sk, amt, &parent_status);
+
+	/* Under limit. */
+	if (parent_status == UNDER_LIMIT &&
+			allocated <= sk_prot_mem_limits(sk, 0)) {
+		sk_leave_memory_pressure(sk);
+		return 1;
+	}
+
+	/* Under pressure. (we or our parents) */
+	if ((parent_status > SOFT_LIMIT) ||
+			allocated > sk_prot_mem_limits(sk, 1))
+		sk_enter_memory_pressure(sk);
+
+	/* Over hard limit (we or our parents) */
+	if ((parent_status == OVER_LIMIT) ||
+			(allocated > sk_prot_mem_limits(sk, 2)))
+		goto suppress_allocation;
+
+	/* guarantee minimum buffer size under pressure */
+	if (kind == SK_MEM_RECV) {
+		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+			return 1;
+
+	} else { /* SK_MEM_SEND */
+		if (sk->sk_type == SOCK_STREAM) {
+			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+				return 1;
+		} else if (atomic_read(&sk->sk_wmem_alloc) <
+			   prot->sysctl_wmem[0])
+				return 1;
+	}
+
+	if (sk_has_memory_pressure(sk)) {
+		int alloc;
+
+		if (!sk_under_memory_pressure(sk))
+			return 1;
+		alloc = sk_sockets_allocated_read_positive(sk);
+		if (sk_prot_mem_limits(sk, 2) > alloc *
+		    sk_mem_pages(sk->sk_wmem_queued +
+				 atomic_read(&sk->sk_rmem_alloc) +
+				 sk->sk_forward_alloc))
+			return 1;
+	}
+
+suppress_allocation:
+
+	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
+		sk_stream_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+			return 1;
+	}
+
+	trace_sock_exceed_buf_limit(sk, prot, allocated);
+
+	/* Alas. Undo changes. */
+	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
+
+	sk_memory_allocated_sub(sk, amt);
+
+	return 0;
+}
+EXPORT_SYMBOL(__sk_mem_schedule);
+
+/**
+ *	__sk_reclaim - reclaim memory_allocated
+ *	@sk: socket
+ */
+void __sk_mem_reclaim(struct sock *sk)
+{
+	sk_memory_allocated_sub(sk,
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
+	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
+
+	if (sk_under_memory_pressure(sk) &&
+	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+		sk_leave_memory_pressure(sk);
+}
+EXPORT_SYMBOL(__sk_mem_reclaim);
+
+
+/*
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
+ */
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_bind);
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
+		    int len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_connect);
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_socketpair);
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_accept);
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
+		    int *len, int peer)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_getname);
+
+unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
+{
+	return 0;
+}
+EXPORT_SYMBOL(sock_no_poll);
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_ioctl);
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_listen);
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_shutdown);
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, unsigned int optlen)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_setsockopt);
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_getsockopt);
+
+int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg);
+
+int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_recvmsg);
+
+int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+	/* Mirror missing mmap method error code */
+	return -ENODEV;
+}
+EXPORT_SYMBOL(sock_no_mmap);
+
+ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage);
+
+/*
+ *	Default Socket Callbacks
+ */
+
+static void sock_def_wakeup(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
+}
+
+static void sock_def_error_report(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_poll(&wq->wait, POLLERR);
+	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
+	rcu_read_unlock();
+}
+
+static void sock_def_readable(struct sock *sk, int len)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
+						POLLRDNORM | POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	rcu_read_unlock();
+}
+
+static void sock_def_write_space(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+
+	/* Do not wake up a writer until he can make "significant"
+	 * progress.  --DaveM
+	 */
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+						POLLWRNORM | POLLWRBAND);
+
+		/* Should agree with poll, otherwise some programs break */
+		if (sock_writeable(sk))
+			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	}
+
+	rcu_read_unlock();
+}
+
+static void sock_def_destruct(struct sock *sk)
+{
+	kfree(sk->sk_protinfo);
+}
+
+void sk_send_sigurg(struct sock *sk)
+{
+	if (sk->sk_socket && sk->sk_socket->file)
+		if (send_sigurg(&sk->sk_socket->file->f_owner))
+			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
+}
+EXPORT_SYMBOL(sk_send_sigurg);
+
+void sk_reset_timer(struct sock *sk, struct timer_list* timer,
+		    unsigned long expires)
+{
+	if (!mod_timer(timer, expires))
+		sock_hold(sk);
+}
+EXPORT_SYMBOL(sk_reset_timer);
+
+void sk_stop_timer(struct sock *sk, struct timer_list* timer)
+{
+	if (timer_pending(timer) && del_timer(timer))
+		__sock_put(sk);
+}
+EXPORT_SYMBOL(sk_stop_timer);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+	skb_queue_head_init(&sk->sk_receive_queue);
+	skb_queue_head_init(&sk->sk_write_queue);
+	skb_queue_head_init(&sk->sk_error_queue);
+#ifdef CONFIG_NET_DMA
+	skb_queue_head_init(&sk->sk_async_wait_queue);
+#endif
+
+	sk->sk_send_head	=	NULL;
+
+	init_timer(&sk->sk_timer);
+
+	sk->sk_allocation	=	GFP_KERNEL;
+	sk->sk_rcvbuf		=	sysctl_rmem_default;
+	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_state		=	TCP_CLOSE;
+	sk_set_socket(sk, sock);
+
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (sock) {
+		sk->sk_type	=	sock->type;
+		sk->sk_wq	=	sock->wq;
+		sock->sk	=	sk;
+	} else
+		sk->sk_wq	=	NULL;
+
+	spin_lock_init(&sk->sk_dst_lock);
+	rwlock_init(&sk->sk_callback_lock);
+	lockdep_set_class_and_name(&sk->sk_callback_lock,
+			af_callback_keys + sk->sk_family,
+			af_family_clock_key_strings[sk->sk_family]);
+
+	sk->sk_state_change	=	sock_def_wakeup;
+	sk->sk_data_ready	=	sock_def_readable;
+	sk->sk_write_space	=	sock_def_write_space;
+	sk->sk_error_report	=	sock_def_error_report;
+	sk->sk_destruct		=	sock_def_destruct;
+
+	sk->sk_sndmsg_page	=	NULL;
+	sk->sk_sndmsg_off	=	0;
+	sk->sk_peek_off		=	-1;
+
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
+	sk->sk_write_pending	=	0;
+	sk->sk_rcvlowat		=	1;
+	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
+
+	sk->sk_stamp = ktime_set(-1L, 0);
+
+	/*
+	 * Before updating sk_refcnt, we must commit prior changes to memory
+	 * (Documentation/RCU/rculist_nulls.txt for details)
+	 */
+	smp_wmb();
+	atomic_set(&sk->sk_refcnt, 1);
+	atomic_set(&sk->sk_drops, 0);
+}
+EXPORT_SYMBOL(sock_init_data);
+
+void lock_sock_nested(struct sock *sk, int subclass)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+	if (sk->sk_lock.owned)
+		__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+	local_bh_enable();
+}
+EXPORT_SYMBOL(lock_sock_nested);
+
+void release_sock(struct sock *sk)
+{
+	/*
+	 * The sk_lock has mutex_unlock() semantics:
+	 */
+	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+
+	spin_lock_bh(&sk->sk_lock.slock);
+	if (sk->sk_backlog.tail)
+		__release_sock(sk);
+	sk->sk_lock.owned = 0;
+	if (waitqueue_active(&sk->sk_lock.wq))
+		wake_up(&sk->sk_lock.wq);
+	spin_unlock_bh(&sk->sk_lock.slock);
+}
+EXPORT_SYMBOL(release_sock);
+
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
+int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
+{
+	struct timeval tv;
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+	tv = ktime_to_timeval(sk->sk_stamp);
+	if (tv.tv_sec == -1)
+		return -ENOENT;
+	if (tv.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		tv = ktime_to_timeval(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestamp);
+
+int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+	struct timespec ts;
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+	ts = ktime_to_timespec(sk->sk_stamp);
+	if (ts.tv_sec == -1)
+		return -ENOENT;
+	if (ts.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		ts = ktime_to_timespec(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestampns);
+
+void sock_enable_timestamp(struct sock *sk, int flag)
+{
+	if (!sock_flag(sk, flag)) {
+		unsigned long previous_flags = sk->sk_flags;
+
+		sock_set_flag(sk, flag);
+		/*
+		 * we just set one of the two flags which require net
+		 * time stamping, but time stamping might have been on
+		 * already because of the other one
+		 */
+		if (!(previous_flags & SK_FLAGS_TIMESTAMP))
+			net_enable_timestamp();
+	}
+}
+
+/*
+ *	Get a socket option on an socket.
+ *
+ *	FIX: POSIX 1003.1g is very ambiguous here. It states that
+ *	asynchronous errors should be reported by getsockopt. We assume
+ *	this means if you specify SO_ERROR (otherwise whats the point of it).
+ */
+int sock_common_getsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
+}
+EXPORT_SYMBOL(sock_common_getsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
+				  char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_prot->compat_getsockopt != NULL)
+		return sk->sk_prot->compat_getsockopt(sk, level, optname,
+						      optval, optlen);
+	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
+}
+EXPORT_SYMBOL(compat_sock_common_getsockopt);
+#endif
+
+int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+EXPORT_SYMBOL(sock_common_recvmsg);
+
+/*
+ *	Set socket options on an inet socket.
+ */
+int sock_common_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
+}
+EXPORT_SYMBOL(sock_common_setsockopt);
+
+#ifdef CONFIG_COMPAT
+int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
+				  char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_prot->compat_setsockopt != NULL)
+		return sk->sk_prot->compat_setsockopt(sk, level, optname,
+						      optval, optlen);
+	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
+}
+EXPORT_SYMBOL(compat_sock_common_setsockopt);
+#endif
+
+void sk_common_release(struct sock *sk)
+{
+	if (sk->sk_prot->destroy)
+		sk->sk_prot->destroy(sk);
+
+	/*
+	 * Observation: when sock_common_release is called, processes have
+	 * no access to socket. But net still has.
+	 * Step one, detach it from networking:
+	 *
+	 * A. Remove from hash tables.
+	 */
+
+	sk->sk_prot->unhash(sk);
+
+	/*
+	 * In this point socket cannot receive new packets, but it is possible
+	 * that some packets are in flight because some CPU runs receiver and
+	 * did hash table lookup before we unhashed socket. They will achieve
+	 * receive queue and will be purged by socket destructor.
+	 *
+	 * Also we still have packets pending on receive queue and probably,
+	 * our own packets waiting in device queues. sock_destroy will drain
+	 * receive queue, but transmitted packets will delay socket destruction
+	 * until the last reference will be released.
+	 */
+
+	sock_orphan(sk);
+
+	xfrm_sk_free_policy(sk);
+
+	sk_refcnt_debug_release(sk);
+	sock_put(sk);
+}
+EXPORT_SYMBOL(sk_common_release);
+
+#ifdef CONFIG_PROC_FS
+#define PROTO_INUSE_NR	64	/* should be enough for the first time */
+struct prot_inuse {
+	int val[PROTO_INUSE_NR];
+};
+
+static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
+
+#ifdef CONFIG_NET_NS
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+	int cpu, idx = prot->inuse_idx;
+	int res = 0;
+
+	for_each_possible_cpu(cpu)
+		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+
+	return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+
+static int __net_init sock_inuse_init_net(struct net *net)
+{
+	net->core.inuse = alloc_percpu(struct prot_inuse);
+	return net->core.inuse ? 0 : -ENOMEM;
+}
+
+static void __net_exit sock_inuse_exit_net(struct net *net)
+{
+	free_percpu(net->core.inuse);
+}
+
+static struct pernet_operations net_inuse_ops = {
+	.init = sock_inuse_init_net,
+	.exit = sock_inuse_exit_net,
+};
+
+static __init int net_inuse_init(void)
+{
+	if (register_pernet_subsys(&net_inuse_ops))
+		panic("Cannot initialize net inuse counters");
+
+	return 0;
+}
+
+core_initcall(net_inuse_init);
+#else
+static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
+
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+	int cpu, idx = prot->inuse_idx;
+	int res = 0;
+
+	for_each_possible_cpu(cpu)
+		res += per_cpu(prot_inuse, cpu).val[idx];
+
+	return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+#endif
+
+static void assign_proto_idx(struct proto *prot)
+{
+	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
+
+	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+		printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
+		return;
+	}
+
+	set_bit(prot->inuse_idx, proto_inuse_idx);
+}
+
+static void release_proto_idx(struct proto *prot)
+{
+	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+		clear_bit(prot->inuse_idx, proto_inuse_idx);
+}
+#else
+static inline void assign_proto_idx(struct proto *prot)
+{
+}
+
+static inline void release_proto_idx(struct proto *prot)
+{
+}
+#endif
+
+int proto_register(struct proto *prot, int alloc_slab)
+{
+	if (alloc_slab) {
+		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+					SLAB_HWCACHE_ALIGN | prot->slab_flags,
+					NULL);
+
+		if (prot->slab == NULL) {
+			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
+			       prot->name);
+			goto out;
+		}
+
+		if (prot->rsk_prot != NULL) {
+			prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
+			if (prot->rsk_prot->slab_name == NULL)
+				goto out_free_sock_slab;
+
+			prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
+								 prot->rsk_prot->obj_size, 0,
+								 SLAB_HWCACHE_ALIGN, NULL);
+
+			if (prot->rsk_prot->slab == NULL) {
+				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
+				       prot->name);
+				goto out_free_request_sock_slab_name;
+			}
+		}
+
+		if (prot->twsk_prot != NULL) {
+			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
+
+			if (prot->twsk_prot->twsk_slab_name == NULL)
+				goto out_free_request_sock_slab;
+
+			prot->twsk_prot->twsk_slab =
+				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
+						  prot->twsk_prot->twsk_obj_size,
+						  0,
+						  SLAB_HWCACHE_ALIGN |
+							prot->slab_flags,
+						  NULL);
+			if (prot->twsk_prot->twsk_slab == NULL)
+				goto out_free_timewait_sock_slab_name;
+		}
+	}
+
+	mutex_lock(&proto_list_mutex);
+	list_add(&prot->node, &proto_list);
+	assign_proto_idx(prot);
+	mutex_unlock(&proto_list_mutex);
+	return 0;
+
+out_free_timewait_sock_slab_name:
+	kfree(prot->twsk_prot->twsk_slab_name);
+out_free_request_sock_slab:
+	if (prot->rsk_prot && prot->rsk_prot->slab) {
+		kmem_cache_destroy(prot->rsk_prot->slab);
+		prot->rsk_prot->slab = NULL;
+	}
+out_free_request_sock_slab_name:
+	if (prot->rsk_prot)
+		kfree(prot->rsk_prot->slab_name);
+out_free_sock_slab:
+	kmem_cache_destroy(prot->slab);
+	prot->slab = NULL;
+out:
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(proto_register);
+
+void proto_unregister(struct proto *prot)
+{
+	mutex_lock(&proto_list_mutex);
+	release_proto_idx(prot);
+	list_del(&prot->node);
+	mutex_unlock(&proto_list_mutex);
+
+	if (prot->slab != NULL) {
+		kmem_cache_destroy(prot->slab);
+		prot->slab = NULL;
+	}
+
+	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
+		kmem_cache_destroy(prot->rsk_prot->slab);
+		kfree(prot->rsk_prot->slab_name);
+		prot->rsk_prot->slab = NULL;
+	}
+
+	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
+		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
+		kfree(prot->twsk_prot->twsk_slab_name);
+		prot->twsk_prot->twsk_slab = NULL;
+	}
+}
+EXPORT_SYMBOL(proto_unregister);
+
+#ifdef CONFIG_PROC_FS
+static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(proto_list_mutex)
+{
+	mutex_lock(&proto_list_mutex);
+	return seq_list_start_head(&proto_list, *pos);
+}
+
+static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &proto_list, pos);
+}
+
+static void proto_seq_stop(struct seq_file *seq, void *v)
+	__releases(proto_list_mutex)
+{
+	mutex_unlock(&proto_list_mutex);
+}
+
+static char proto_method_implemented(const void *method)
+{
+	return method == NULL ? 'n' : 'y';
+}
+static long sock_prot_memory_allocated(struct proto *proto)
+{
+	return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
+}
+
+static char *sock_prot_memory_pressure(struct proto *proto)
+{
+	return proto->memory_pressure != NULL ?
+	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
+}
+
+static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
+{
+
+	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
+			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+		   proto->name,
+		   proto->obj_size,
+		   sock_prot_inuse_get(seq_file_net(seq), proto),
+		   sock_prot_memory_allocated(proto),
+		   sock_prot_memory_pressure(proto),
+		   proto->max_header,
+		   proto->slab == NULL ? "no" : "yes",
+		   module_name(proto->owner),
+		   proto_method_implemented(proto->close),
+		   proto_method_implemented(proto->connect),
+		   proto_method_implemented(proto->disconnect),
+		   proto_method_implemented(proto->accept),
+		   proto_method_implemented(proto->ioctl),
+		   proto_method_implemented(proto->init),
+		   proto_method_implemented(proto->destroy),
+		   proto_method_implemented(proto->shutdown),
+		   proto_method_implemented(proto->setsockopt),
+		   proto_method_implemented(proto->getsockopt),
+		   proto_method_implemented(proto->sendmsg),
+		   proto_method_implemented(proto->recvmsg),
+		   proto_method_implemented(proto->sendpage),
+		   proto_method_implemented(proto->bind),
+		   proto_method_implemented(proto->backlog_rcv),
+		   proto_method_implemented(proto->hash),
+		   proto_method_implemented(proto->unhash),
+		   proto_method_implemented(proto->get_port),
+		   proto_method_implemented(proto->enter_memory_pressure));
+}
+
+static int proto_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == &proto_list)
+		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
+			   "protocol",
+			   "size",
+			   "sockets",
+			   "memory",
+			   "press",
+			   "maxhdr",
+			   "slab",
+			   "module",
+			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+	else
+		proto_seq_printf(seq, list_entry(v, struct proto, node));
+	return 0;
+}
+
+static const struct seq_operations proto_seq_ops = {
+	.start  = proto_seq_start,
+	.next   = proto_seq_next,
+	.stop   = proto_seq_stop,
+	.show   = proto_seq_show,
+};
+
+static int proto_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &proto_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations proto_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= proto_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+static __net_init int proto_init_net(struct net *net)
+{
+	if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static __net_exit void proto_exit_net(struct net *net)
+{
+	proc_net_remove(net, "protocols");
+}
+
+
+static __net_initdata struct pernet_operations proto_net_ops = {
+	.init = proto_init_net,
+	.exit = proto_exit_net,
+};
+
+static int __init proto_init(void)
+{
+	return register_pernet_subsys(&proto_net_ops);
+}
+
+subsys_initcall(proto_init);
+
+#endif /* PROC_FS */
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
new file mode 100644
index 00000000..b9868e1f
--- /dev/null
+++ b/net/core/sock_diag.c
@@ -0,0 +1,192 @@
+#include <linux/mutex.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
+static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
+static DEFINE_MUTEX(sock_diag_table_mutex);
+
+int sock_diag_check_cookie(void *sk, __u32 *cookie)
+{
+	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
+	     cookie[1] != INET_DIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
+		return -ESTALE;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
+
+void sock_diag_save_cookie(void *sk, __u32 *cookie)
+{
+	cookie[0] = (u32)(unsigned long)sk;
+	cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+}
+EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
+
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
+{
+	__u32 *mem;
+
+	mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
+
+	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
+
+void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+{
+	mutex_lock(&sock_diag_table_mutex);
+	inet_rcv_compat = fn;
+	mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
+
+void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+{
+	mutex_lock(&sock_diag_table_mutex);
+	inet_rcv_compat = NULL;
+	mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
+
+int sock_diag_register(struct sock_diag_handler *hndl)
+{
+	int err = 0;
+
+	if (hndl->family >= AF_MAX)
+		return -EINVAL;
+
+	mutex_lock(&sock_diag_table_mutex);
+	if (sock_diag_handlers[hndl->family])
+		err = -EBUSY;
+	else
+		sock_diag_handlers[hndl->family] = hndl;
+	mutex_unlock(&sock_diag_table_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(sock_diag_register);
+
+void sock_diag_unregister(struct sock_diag_handler *hnld)
+{
+	int family = hnld->family;
+
+	if (family >= AF_MAX)
+		return;
+
+	mutex_lock(&sock_diag_table_mutex);
+	BUG_ON(sock_diag_handlers[family] != hnld);
+	sock_diag_handlers[family] = NULL;
+	mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_unregister);
+
+static inline struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+	if (sock_diag_handlers[family] == NULL)
+		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				NETLINK_SOCK_DIAG, family);
+
+	mutex_lock(&sock_diag_table_mutex);
+	return sock_diag_handlers[family];
+}
+
+static inline void sock_diag_unlock_handler(struct sock_diag_handler *h)
+{
+	mutex_unlock(&sock_diag_table_mutex);
+}
+
+static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int err;
+	struct sock_diag_req *req = NLMSG_DATA(nlh);
+	struct sock_diag_handler *hndl;
+
+	if (nlmsg_len(nlh) < sizeof(*req))
+		return -EINVAL;
+
+	hndl = sock_diag_lock_handler(req->sdiag_family);
+	if (hndl == NULL)
+		err = -ENOENT;
+	else
+		err = hndl->dump(skb, nlh);
+	sock_diag_unlock_handler(hndl);
+
+	return err;
+}
+
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int ret;
+
+	switch (nlh->nlmsg_type) {
+	case TCPDIAG_GETSOCK:
+	case DCCPDIAG_GETSOCK:
+		if (inet_rcv_compat == NULL)
+			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+					NETLINK_SOCK_DIAG, AF_INET);
+
+		mutex_lock(&sock_diag_table_mutex);
+		if (inet_rcv_compat != NULL)
+			ret = inet_rcv_compat(skb, nlh);
+		else
+			ret = -EOPNOTSUPP;
+		mutex_unlock(&sock_diag_table_mutex);
+
+		return ret;
+	case SOCK_DIAG_BY_FAMILY:
+		return __sock_diag_rcv_msg(skb, nlh);
+	default:
+		return -EINVAL;
+	}
+}
+
+static DEFINE_MUTEX(sock_diag_mutex);
+
+static void sock_diag_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&sock_diag_mutex);
+	netlink_rcv_skb(skb, &sock_diag_rcv_msg);
+	mutex_unlock(&sock_diag_mutex);
+}
+
+struct sock *sock_diag_nlsk;
+EXPORT_SYMBOL_GPL(sock_diag_nlsk);
+
+static int __init sock_diag_init(void)
+{
+	sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0,
+					sock_diag_rcv, NULL, THIS_MODULE);
+	return sock_diag_nlsk == NULL ? -ENOMEM : 0;
+}
+
+static void __exit sock_diag_exit(void)
+{
+	netlink_kernel_release(sock_diag_nlsk);
+}
+
+module_init(sock_diag_init);
+module_exit(sock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/core/stream.c b/net/core/stream.c
new file mode 100644
index 00000000..f5df85dc
--- /dev/null
+++ b/net/core/stream.c
@@ -0,0 +1,208 @@
+/*
+ *     SUCS NET3:
+ *
+ *     Generic stream handling routines. These are generic for most
+ *     protocols. Even IP. Tonight 8-).
+ *     This is used because TCP, LLC (others too) layer all have mostly
+ *     identical sendmsg() and recvmsg() code.
+ *     So we (will) share it here.
+ *
+ *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *                     (from old tcp.c code)
+ *                     Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/signal.h>
+#include <linux/tcp.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+/**
+ * sk_stream_write_space - stream socket write_space callback.
+ * @sk: socket
+ *
+ * FIXME: write proper description
+ */
+void sk_stream_write_space(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+	struct socket_wq *wq;
+
+	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_poll(&wq->wait, POLLOUT |
+						POLLWRNORM | POLLWRBAND);
+		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(sk_stream_write_space);
+
+/**
+ * sk_stream_wait_connect - Wait for a socket to get into the connected state
+ * @sk: sock to wait on
+ * @timeo_p: for how long to wait
+ *
+ * Must be called with the socket locked.
+ */
+int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
+{
+	struct task_struct *tsk = current;
+	DEFINE_WAIT(wait);
+	int done;
+
+	do {
+		int err = sock_error(sk);
+		if (err)
+			return err;
+		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+			return -EPIPE;
+		if (!*timeo_p)
+			return -EAGAIN;
+		if (signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		sk->sk_write_pending++;
+		done = sk_wait_event(sk, timeo_p,
+				     !sk->sk_err &&
+				     !((1 << sk->sk_state) &
+				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
+		finish_wait(sk_sleep(sk), &wait);
+		sk->sk_write_pending--;
+	} while (!done);
+	return 0;
+}
+EXPORT_SYMBOL(sk_stream_wait_connect);
+
+/**
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk: socket to verify
+ */
+static inline int sk_stream_closing(struct sock *sk)
+{
+	return (1 << sk->sk_state) &
+	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+}
+
+void sk_stream_wait_close(struct sock *sk, long timeout)
+{
+	if (timeout) {
+		DEFINE_WAIT(wait);
+
+		do {
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+				break;
+		} while (!signal_pending(current) && timeout);
+
+		finish_wait(sk_sleep(sk), &wait);
+	}
+}
+EXPORT_SYMBOL(sk_stream_wait_close);
+
+/**
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk: socket to wait for memory
+ * @timeo_p: for how long
+ */
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	int err = 0;
+	long vm_wait = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	if (sk_stream_memory_free(sk))
+		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
+
+	while (1) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (signal_pending(current))
+			goto do_interrupted;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		if (sk_stream_memory_free(sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+						  (sk->sk_shutdown & SEND_SHUTDOWN) ||
+						  (sk_stream_memory_free(sk) &&
+						  !vm_wait));
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
+			    (current_timeo -= vm_wait) < 0)
+				current_timeo = 0;
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+out:
+	finish_wait(sk_sleep(sk), &wait);
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+}
+EXPORT_SYMBOL(sk_stream_wait_memory);
+
+int sk_stream_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+EXPORT_SYMBOL(sk_stream_error);
+
+void sk_stream_kill_queues(struct sock *sk)
+{
+	/* First the read buffer. */
+	__skb_queue_purge(&sk->sk_receive_queue);
+
+	/* Next, the error queue. */
+	__skb_queue_purge(&sk->sk_error_queue);
+
+	/* Next, the write queue. */
+	WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+
+	/* Account for returned memory. */
+	sk_mem_reclaim(sk);
+
+	WARN_ON(sk->sk_wmem_queued);
+	WARN_ON(sk->sk_forward_alloc);
+
+	/* It is _impossible_ for the backlog to contain anything
+	 * when we get here.  All user references to this socket
+	 * have gone away, only the net layer knows can touch it.
+	 */
+}
+EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
new file mode 100644
index 00000000..0c285087
--- /dev/null
+++ b/net/core/sysctl_net_core.c
@@ -0,0 +1,264 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/net_ratelimit.h>
+
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int orig_size, size;
+	int ret, i;
+	ctl_table tmp = {
+		.data = &size,
+		.maxlen = sizeof(size),
+		.mode = table->mode
+	};
+	struct rps_sock_flow_table *orig_sock_table, *sock_table;
+	static DEFINE_MUTEX(sock_flow_mutex);
+
+	mutex_lock(&sock_flow_mutex);
+
+	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+					lockdep_is_held(&sock_flow_mutex));
+	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+	if (write) {
+		if (size) {
+			if (size > 1<<30) {
+				/* Enforce limit to prevent overflow */
+				mutex_unlock(&sock_flow_mutex);
+				return -EINVAL;
+			}
+			size = roundup_pow_of_two(size);
+			if (size != orig_size) {
+				sock_table =
+				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+				if (!sock_table) {
+					mutex_unlock(&sock_flow_mutex);
+					return -ENOMEM;
+				}
+
+				sock_table->mask = size - 1;
+			} else
+				sock_table = orig_sock_table;
+
+			for (i = 0; i < size; i++)
+				sock_table->ents[i] = RPS_NO_CPU;
+		} else
+			sock_table = NULL;
+
+		if (sock_table != orig_sock_table) {
+			rcu_assign_pointer(rps_sock_flow_table, sock_table);
+			if (sock_table)
+				static_key_slow_inc(&rps_needed);
+			if (orig_sock_table) {
+				static_key_slow_dec(&rps_needed);
+				synchronize_rcu();
+				vfree(orig_sock_table);
+			}
+		}
+	}
+
+	mutex_unlock(&sock_flow_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_RPS */
+
+static struct ctl_table net_core_table[] = {
+#ifdef CONFIG_NET
+	{
+		.procname	= "wmem_max",
+		.data		= &sysctl_wmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "rmem_max",
+		.data		= &sysctl_rmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "wmem_default",
+		.data		= &sysctl_wmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "rmem_default",
+		.data		= &sysctl_rmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "dev_weight",
+		.data		= &weight_p,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "netdev_max_backlog",
+		.data		= &netdev_max_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#ifdef CONFIG_BPF_JIT
+	{
+		.procname	= "bpf_jit_enable",
+		.data		= &bpf_jit_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
+	{
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "message_cost",
+		.data		= &net_ratelimit_state.interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "message_burst",
+		.data		= &net_ratelimit_state.burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "optmem_max",
+		.data		= &sysctl_optmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "rps_sock_flow_entries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= rps_sock_flow_sysctl
+	},
+#endif
+#endif /* CONFIG_NET */
+	{
+		.procname	= "netdev_budget",
+		.data		= &netdev_budget,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "warnings",
+		.data		= &net_msg_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{ }
+};
+
+static struct ctl_table netns_core_table[] = {
+	{
+		.procname	= "somaxconn",
+		.data		= &init_net.core.sysctl_somaxconn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{ }
+};
+
+__net_initdata struct ctl_path net_core_path[] = {
+	{ .procname = "net", },
+	{ .procname = "core", },
+	{ },
+};
+
+static __net_init int sysctl_core_net_init(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	net->core.sysctl_somaxconn = SOMAXCONN;
+
+	tbl = netns_core_table;
+	if (!net_eq(net, &init_net)) {
+		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+
+		tbl[0].data = &net->core.sysctl_somaxconn;
+	}
+
+	net->core.sysctl_hdr = register_net_sysctl_table(net,
+			net_core_path, tbl);
+	if (net->core.sysctl_hdr == NULL)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	if (tbl != netns_core_table)
+		kfree(tbl);
+err_dup:
+	return -ENOMEM;
+}
+
+static __net_exit void sysctl_core_net_exit(struct net *net)
+{
+	struct ctl_table *tbl;
+
+	tbl = net->core.sysctl_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->core.sysctl_hdr);
+	BUG_ON(tbl == netns_core_table);
+	kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_core_ops = {
+	.init = sysctl_core_net_init,
+	.exit = sysctl_core_net_exit,
+};
+
+static __init int sysctl_core_init(void)
+{
+	static struct ctl_table empty[1];
+
+	register_sysctl_paths(net_core_path, empty);
+	register_net_sysctl_rotable(net_core_path, net_core_table);
+	return register_pernet_subsys(&sysctl_core_ops);
+}
+
+fs_initcall(sysctl_core_init);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 00000000..661b5a40
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,139 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static unsigned int classify(const struct sk_buff *skb)
+{
+	if (likely(skb->dev &&
+		   skb->dev->phydev &&
+		   skb->dev->phydev->drv))
+		return sk_run_filter(skb, ptp_filter);
+	else
+		return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	struct sk_buff *clone;
+	struct sock *sk = skb->sk;
+	unsigned int type;
+
+	if (!sk)
+		return;
+
+	type = classify(skb);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->txtstamp)) {
+			if (!atomic_inc_not_zero(&sk->sk_refcnt))
+				return;
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (!clone) {
+				sock_put(sk);
+				return;
+			}
+			clone->sk = sk;
+			phydev->drv->txtstamp(phydev, clone, type);
+		}
+		break;
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	if (!hwtstamps) {
+		sock_put(sk);
+		kfree_skb(skb);
+		return;
+	}
+
+	*skb_hwtstamps(skb) = *hwtstamps;
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	skb->sk = NULL;
+	err = sock_queue_err_skb(sk, skb);
+	sock_put(sk);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	unsigned int type;
+
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+	__skb_push(skb, ETH_HLEN);
+
+	type = classify(skb);
+
+	__skb_pull(skb, ETH_HLEN);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->rxtstamp))
+			return phydev->drv->rxtstamp(phydev, skb, type);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp);
+
+void __init skb_timestamping_init(void)
+{
+	BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
new file mode 100644
index 00000000..1b5fefdb
--- /dev/null
+++ b/net/core/user_dma.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/socket.h>
+#include <linux/export.h>
+#include <net/tcp.h>
+#include <net/netdma.h>
+
+#define NET_DMA_DEFAULT_COPYBREAK 4096
+
+int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
+
+/**
+ *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb - buffer to copy
+ *	@offset - offset in the buffer to start copying from
+ *	@iovec - io vector to copy to
+ *	@len - amount of data to copy from buffer to iovec
+ *	@pinned_list - locked iovec buffer data
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+			struct sk_buff *skb, int offset, struct iovec *to,
+			size_t len, struct dma_pinned_list *pinned_list)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+	dma_cookie_t cookie = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
+					    skb->data + offset, copy);
+		if (cookie < 0)
+			goto fault;
+		len -= copy;
+		if (len == 0)
+			goto end;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		copy = end - offset;
+		if (copy > 0) {
+			struct page *page = skb_frag_page(frag);
+
+			if (copy > len)
+				copy = len;
+
+			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
+					frag->page_offset + offset - start, copy);
+			if (cookie < 0)
+				goto fault;
+			len -= copy;
+			if (len == 0)
+				goto end;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		copy = end - offset;
+		if (copy > 0) {
+			if (copy > len)
+				copy = len;
+			cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
+							     offset - start,
+							     to, copy,
+							     pinned_list);
+			if (cookie < 0)
+				goto fault;
+			len -= copy;
+			if (len == 0)
+				goto end;
+			offset += copy;
+		}
+		start = end;
+	}
+
+end:
+	if (!len) {
+		skb->dma_cookie = cookie;
+		return cookie;
+	}
+
+fault:
+	return -EFAULT;
+}
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 00000000..dc3c3faf
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,322 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andi Kleen
+ *	in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/inet.h>
+#include <linux/mm.h>
+#include <linux/net.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/ratelimit.h>
+
+#include <net/sock.h>
+#include <net/net_ratelimit.h>
+
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+
+int net_msg_warn __read_mostly = 1;
+EXPORT_SYMBOL(net_msg_warn);
+
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
+/*
+ * All net warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+	return __ratelimit(&net_ratelimit_state);
+}
+EXPORT_SYMBOL(net_ratelimit);
+
+/*
+ * Convert an ASCII string to binary IP.
+ * This is outside of net/ipv4/ because various code that uses IP addresses
+ * is otherwise not dependent on the TCP/IP stack.
+ */
+
+__be32 in_aton(const char *str)
+{
+	unsigned long l;
+	unsigned int val;
+	int i;
+
+	l = 0;
+	for (i = 0; i < 4; i++)
+	{
+		l <<= 8;
+		if (*str != '\0')
+		{
+			val = 0;
+			while (*str != '\0' && *str != '.' && *str != '\n')
+			{
+				val *= 10;
+				val += *str - '0';
+				str++;
+			}
+			l |= val;
+			if (*str != '\0')
+				str++;
+		}
+	}
+	return htonl(l);
+}
+EXPORT_SYMBOL(in_aton);
+
+#define IN6PTON_XDIGIT		0x00010000
+#define IN6PTON_DIGIT		0x00020000
+#define IN6PTON_COLON_MASK	0x00700000
+#define IN6PTON_COLON_1		0x00100000	/* single : requested */
+#define IN6PTON_COLON_2		0x00200000	/* second : requested */
+#define IN6PTON_COLON_1_2	0x00400000	/* :: requested */
+#define IN6PTON_DOT		0x00800000	/* . */
+#define IN6PTON_DELIM		0x10000000
+#define IN6PTON_NULL		0x20000000	/* first/tail */
+#define IN6PTON_UNKNOWN		0x40000000
+
+static inline int xdigit2bin(char c, int delim)
+{
+	int val;
+
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == ':')
+		return IN6PTON_COLON_MASK;
+	if (c == '.')
+		return IN6PTON_DOT;
+
+	val = hex_to_bin(c);
+	if (val >= 0)
+		return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
+
+	if (delim == -1)
+		return IN6PTON_DELIM;
+	return IN6PTON_UNKNOWN;
+}
+
+int in4_pton(const char *src, int srclen,
+	     u8 *dst,
+	     int delim, const char **end)
+{
+	const char *s;
+	u8 *d;
+	u8 dbuf[4];
+	int ret = 0;
+	int i;
+	int w = 0;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+	s = src;
+	d = dbuf;
+	i = 0;
+	while(1) {
+		int c;
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
+			goto out;
+		}
+		if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			if (w == 0)
+				goto out;
+			*d++ = w & 0xff;
+			w = 0;
+			i++;
+			if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+				if (i != 4)
+					goto out;
+				break;
+			}
+			goto cont;
+		}
+		w = (w * 10) + c;
+		if ((w & 0xffff) > 255) {
+			goto out;
+		}
+cont:
+		if (i >= 4)
+			goto out;
+		s++;
+		srclen--;
+	}
+	ret = 1;
+	memcpy(dst, dbuf, sizeof(dbuf));
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+EXPORT_SYMBOL(in4_pton);
+
+int in6_pton(const char *src, int srclen,
+	     u8 *dst,
+	     int delim, const char **end)
+{
+	const char *s, *tok = NULL;
+	u8 *d, *dc = NULL;
+	u8 dbuf[16];
+	int ret = 0;
+	int i;
+	int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
+	int w = 0;
+
+	memset(dbuf, 0, sizeof(dbuf));
+
+	s = src;
+	d = dbuf;
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	while (1) {
+		int c;
+
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & state))
+			goto out;
+		if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			/* process one 16-bit word */
+			if (!(state & IN6PTON_NULL)) {
+				*d++ = (w >> 8) & 0xff;
+				*d++ = w & 0xff;
+			}
+			w = 0;
+			if (c & IN6PTON_DELIM) {
+				/* We've processed last word */
+				break;
+			}
+			/*
+			 * COLON_1 => XDIGIT
+			 * COLON_2 => XDIGIT|DELIM
+			 * COLON_1_2 => COLON_2
+			 */
+			switch (state & IN6PTON_COLON_MASK) {
+			case IN6PTON_COLON_2:
+				dc = d;
+				state = IN6PTON_XDIGIT | IN6PTON_DELIM;
+				if (dc - dbuf >= sizeof(dbuf))
+					state |= IN6PTON_NULL;
+				break;
+			case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
+				state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
+				break;
+			case IN6PTON_COLON_1:
+				state = IN6PTON_XDIGIT;
+				break;
+			case IN6PTON_COLON_1_2:
+				state = IN6PTON_COLON_2;
+				break;
+			default:
+				state = 0;
+			}
+			tok = s + 1;
+			goto cont;
+		}
+
+		if (c & IN6PTON_DOT) {
+			ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
+			if (ret > 0) {
+				d += 4;
+				break;
+			}
+			goto out;
+		}
+
+		w = (w << 4) | (0xff & c);
+		state = IN6PTON_COLON_1 | IN6PTON_DELIM;
+		if (!(w & 0xf000)) {
+			state |= IN6PTON_XDIGIT;
+		}
+		if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_COLON_1_2;
+			state &= ~IN6PTON_DELIM;
+		}
+		if (d + 2 >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
+		}
+cont:
+		if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
+		    d + 4 == dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_DOT;
+		}
+		if (d >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
+		}
+		s++;
+		srclen--;
+	}
+
+	i = 15; d--;
+
+	if (dc) {
+		while(d >= dc)
+			dst[i--] = *d--;
+		while(i >= dc - dbuf)
+			dst[i--] = 0;
+		while(i >= 0)
+			dst[i--] = *d--;
+	} else
+		memcpy(dst, dbuf, sizeof(dbuf));
+
+	ret = 1;
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+EXPORT_SYMBOL(in6_pton);
+
+void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
+			      __be32 from, __be32 to, int pseudohdr)
+{
+	__be32 diff[] = { ~from, to };
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		*sum = csum_fold(csum_partial(diff, sizeof(diff),
+				~csum_unfold(*sum)));
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = ~csum_partial(diff, sizeof(diff),
+						~skb->csum);
+	} else if (pseudohdr)
+		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+				csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace4);
+
+int mac_pton(const char *s, u8 *mac)
+{
+	int i;
+
+	/* XX:XX:XX:XX:XX:XX */
+	if (strlen(s) < 3 * ETH_ALEN - 1)
+		return 0;
+
+	/* Don't dirty result unless string is valid MAC. */
+	for (i = 0; i < ETH_ALEN; i++) {
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
+			return 0;
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
+			return 0;
+		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
+			return 0;
+	}
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
+	}
+	return 1;
+}
+EXPORT_SYMBOL(mac_pton);