Showing error 203

User: Jiri Slaby
Error type: BUG/WARNING
Error type description: An unsatisfied assertion in the code
File location: net/core/skbuff.c
Line in file: 147
Project: Linux Kernel
Project version: 2.6.28
Tools: Web Crawler (0.1)
URL: https://bugzilla.kernel.org/show_bug.cgi?id=12353
Entered: 2011-09-08 08:32:45 UTC


Source:

   1/*
   2 *        Routines having to do with the 'struct sk_buff' memory handlers.
   3 *
   4 *        Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                        Florian La Roche <rzsfl@rz.uni-sb.de>
   6 *
   7 *        Fixes:
   8 *                Alan Cox        :        Fixed the worst of the load
   9 *                                        balancer bugs.
  10 *                Dave Platt        :        Interrupt stacking fix.
  11 *        Richard Kooijman        :        Timestamp fixes.
  12 *                Alan Cox        :        Changed buffer format.
  13 *                Alan Cox        :        destructor hook for AF_UNIX etc.
  14 *                Linus Torvalds        :        Better skb_clone.
  15 *                Alan Cox        :        Added skb_copy.
  16 *                Alan Cox        :        Added all the changed routines Linus
  17 *                                        only put in the headers
  18 *                Ray VanTassle        :        Fixed --skb->lock in free
  19 *                Alan Cox        :        skb_copy copy arp field
  20 *                Andi Kleen        :        slabified it.
  21 *                Robert Olsson        :        Removed skb_head_pool
  22 *
  23 *        NOTE:
  24 *                The __skb_ routines should be called with interrupts
  25 *        disabled, or you better be *real* sure that the operation is atomic
  26 *        with respect to whatever list is being frobbed (e.g. via lock_sock()
  27 *        or via disabling bottom half handlers, etc).
  28 *
  29 *        This program is free software; you can redistribute it and/or
  30 *        modify it under the terms of the GNU General Public License
  31 *        as published by the Free Software Foundation; either version
  32 *        2 of the License, or (at your option) any later version.
  33 */
  34
  35/*
  36 *        The functions in this file will not compile correctly with gcc 2.4.x
  37 */
  38
  39#include <linux/module.h>
  40#include <linux/types.h>
  41#include <linux/kernel.h>
  42#include <linux/mm.h>
  43#include <linux/interrupt.h>
  44#include <linux/in.h>
  45#include <linux/inet.h>
  46#include <linux/slab.h>
  47#include <linux/netdevice.h>
  48#ifdef CONFIG_NET_CLS_ACT
  49#include <net/pkt_sched.h>
  50#endif
  51#include <linux/string.h>
  52#include <linux/skbuff.h>
  53#include <linux/splice.h>
  54#include <linux/cache.h>
  55#include <linux/rtnetlink.h>
  56#include <linux/init.h>
  57#include <linux/scatterlist.h>
  58
  59#include <net/protocol.h>
  60#include <net/dst.h>
  61#include <net/sock.h>
  62#include <net/checksum.h>
  63#include <net/xfrm.h>
  64
  65#include <asm/uaccess.h>
  66#include <asm/system.h>
  67
  68#include "kmap_skb.h"
  69
  70static struct kmem_cache *skbuff_head_cache __read_mostly;
  71static struct kmem_cache *skbuff_fclone_cache __read_mostly;
  72
  73static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
  74                                  struct pipe_buffer *buf)
  75{
  76        struct sk_buff *skb = (struct sk_buff *) buf->private;
  77
  78        kfree_skb(skb);
  79}
  80
  81static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
  82                                struct pipe_buffer *buf)
  83{
  84        struct sk_buff *skb = (struct sk_buff *) buf->private;
  85
  86        skb_get(skb);
  87}
  88
  89static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
  90                               struct pipe_buffer *buf)
  91{
  92        return 1;
  93}
  94
  95
  96/* Pipe buffer operations for a socket. */
  97static struct pipe_buf_operations sock_pipe_buf_ops = {
  98        .can_merge = 0,
  99        .map = generic_pipe_buf_map,
 100        .unmap = generic_pipe_buf_unmap,
 101        .confirm = generic_pipe_buf_confirm,
 102        .release = sock_pipe_buf_release,
 103        .steal = sock_pipe_buf_steal,
 104        .get = sock_pipe_buf_get,
 105};
 106
 107/*
 108 *        Keep out-of-line to prevent kernel bloat.
 109 *        __builtin_return_address is not used because it is not always
 110 *        reliable.
 111 */
 112
 113/**
 114 *        skb_over_panic        -         private function
 115 *        @skb: buffer
 116 *        @sz: size
 117 *        @here: address
 118 *
 119 *        Out of line support code for skb_put(). Not user callable.
 120 */
 121void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 122{
 123        printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 124                          "data:%p tail:%#lx end:%#lx dev:%s\n",
 125               here, skb->len, sz, skb->head, skb->data,
 126               (unsigned long)skb->tail, (unsigned long)skb->end,
 127               skb->dev ? skb->dev->name : "<NULL>");
 128        BUG();
 129}
 130
 131/**
 132 *        skb_under_panic        -         private function
 133 *        @skb: buffer
 134 *        @sz: size
 135 *        @here: address
 136 *
 137 *        Out of line support code for skb_push(). Not user callable.
 138 */
 139
 140void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 141{
 142        printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
 143                          "data:%p tail:%#lx end:%#lx dev:%s\n",
 144               here, skb->len, sz, skb->head, skb->data,
 145               (unsigned long)skb->tail, (unsigned long)skb->end,
 146               skb->dev ? skb->dev->name : "<NULL>");
 147        BUG();
 148}
 149
 150void skb_truesize_bug(struct sk_buff *skb)
 151{
 152        WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) "
 153               "len=%u, sizeof(sk_buff)=%Zd\n",
 154               skb->truesize, skb->len, sizeof(struct sk_buff));
 155}
 156EXPORT_SYMBOL(skb_truesize_bug);
 157
 158/*         Allocate a new skbuff. We do this ourselves so we can fill in a few
 159 *        'private' fields and also do memory statistics to find all the
 160 *        [BEEP] leaks.
 161 *
 162 */
 163
 164/**
 165 *        __alloc_skb        -        allocate a network buffer
 166 *        @size: size to allocate
 167 *        @gfp_mask: allocation mask
 168 *        @fclone: allocate from fclone cache instead of head cache
 169 *                and allocate a cloned (child) skb
 170 *        @node: numa node to allocate memory on
 171 *
 172 *        Allocate a new &sk_buff. The returned buffer has no headroom and a
 173 *        tail room of size bytes. The object has a reference count of one.
 174 *        The return is the buffer. On a failure the return is %NULL.
 175 *
 176 *        Buffers may only be allocated from interrupts using a @gfp_mask of
 177 *        %GFP_ATOMIC.
 178 */
 179struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 180                            int fclone, int node)
 181{
 182        struct kmem_cache *cache;
 183        struct skb_shared_info *shinfo;
 184        struct sk_buff *skb;
 185        u8 *data;
 186
 187        cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 188
 189        /* Get the HEAD */
 190        skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
 191        if (!skb)
 192                goto out;
 193
 194        size = SKB_DATA_ALIGN(size);
 195        data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 196                        gfp_mask, node);
 197        if (!data)
 198                goto nodata;
 199
 200        /*
 201         * Only clear those fields we need to clear, not those that we will
 202         * actually initialise below. Hence, don't put any more fields after
 203         * the tail pointer in struct sk_buff!
 204         */
 205        memset(skb, 0, offsetof(struct sk_buff, tail));
 206        skb->truesize = size + sizeof(struct sk_buff);
 207        atomic_set(&skb->users, 1);
 208        skb->head = data;
 209        skb->data = data;
 210        skb_reset_tail_pointer(skb);
 211        skb->end = skb->tail + size;
 212        /* make sure we initialize shinfo sequentially */
 213        shinfo = skb_shinfo(skb);
 214        atomic_set(&shinfo->dataref, 1);
 215        shinfo->nr_frags  = 0;
 216        shinfo->gso_size = 0;
 217        shinfo->gso_segs = 0;
 218        shinfo->gso_type = 0;
 219        shinfo->ip6_frag_id = 0;
 220        shinfo->frag_list = NULL;
 221
 222        if (fclone) {
 223                struct sk_buff *child = skb + 1;
 224                atomic_t *fclone_ref = (atomic_t *) (child + 1);
 225
 226                skb->fclone = SKB_FCLONE_ORIG;
 227                atomic_set(fclone_ref, 1);
 228
 229                child->fclone = SKB_FCLONE_UNAVAILABLE;
 230        }
 231out:
 232        return skb;
 233nodata:
 234        kmem_cache_free(cache, skb);
 235        skb = NULL;
 236        goto out;
 237}
 238
 239/**
 240 *        __netdev_alloc_skb - allocate an skbuff for rx on a specific device
 241 *        @dev: network device to receive on
 242 *        @length: length to allocate
 243 *        @gfp_mask: get_free_pages mask, passed to alloc_skb
 244 *
 245 *        Allocate a new &sk_buff and assign it a usage count of one. The
 246 *        buffer has unspecified headroom built in. Users should allocate
 247 *        the headroom they think they need without accounting for the
 248 *        built in space. The built in space is used for optimisations.
 249 *
 250 *        %NULL is returned if there is no free memory.
 251 */
 252struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 253                unsigned int length, gfp_t gfp_mask)
 254{
 255        int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 256        struct sk_buff *skb;
 257
 258        skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
 259        if (likely(skb)) {
 260                skb_reserve(skb, NET_SKB_PAD);
 261                skb->dev = dev;
 262        }
 263        return skb;
 264}
 265
 266struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
 267{
 268        int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 269        struct page *page;
 270
 271        page = alloc_pages_node(node, gfp_mask, 0);
 272        return page;
 273}
 274EXPORT_SYMBOL(__netdev_alloc_page);
 275
 276void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 277                int size)
 278{
 279        skb_fill_page_desc(skb, i, page, off, size);
 280        skb->len += size;
 281        skb->data_len += size;
 282        skb->truesize += size;
 283}
 284EXPORT_SYMBOL(skb_add_rx_frag);
 285
 286/**
 287 *        dev_alloc_skb - allocate an skbuff for receiving
 288 *        @length: length to allocate
 289 *
 290 *        Allocate a new &sk_buff and assign it a usage count of one. The
 291 *        buffer has unspecified headroom built in. Users should allocate
 292 *        the headroom they think they need without accounting for the
 293 *        built in space. The built in space is used for optimisations.
 294 *
 295 *        %NULL is returned if there is no free memory. Although this function
 296 *        allocates memory it can be called from an interrupt.
 297 */
 298struct sk_buff *dev_alloc_skb(unsigned int length)
 299{
 300        /*
 301         * There is more code here than it seems:
 302         * __dev_alloc_skb is an inline
 303         */
 304        return __dev_alloc_skb(length, GFP_ATOMIC);
 305}
 306EXPORT_SYMBOL(dev_alloc_skb);
 307
 308static void skb_drop_list(struct sk_buff **listp)
 309{
 310        struct sk_buff *list = *listp;
 311
 312        *listp = NULL;
 313
 314        do {
 315                struct sk_buff *this = list;
 316                list = list->next;
 317                kfree_skb(this);
 318        } while (list);
 319}
 320
 321static inline void skb_drop_fraglist(struct sk_buff *skb)
 322{
 323        skb_drop_list(&skb_shinfo(skb)->frag_list);
 324}
 325
 326static void skb_clone_fraglist(struct sk_buff *skb)
 327{
 328        struct sk_buff *list;
 329
 330        for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
 331                skb_get(list);
 332}
 333
 334static void skb_release_data(struct sk_buff *skb)
 335{
 336        if (!skb->cloned ||
 337            !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
 338                               &skb_shinfo(skb)->dataref)) {
 339                if (skb_shinfo(skb)->nr_frags) {
 340                        int i;
 341                        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 342                                put_page(skb_shinfo(skb)->frags[i].page);
 343                }
 344
 345                if (skb_shinfo(skb)->frag_list)
 346                        skb_drop_fraglist(skb);
 347
 348                kfree(skb->head);
 349        }
 350}
 351
 352/*
 353 *        Free an skbuff by memory without cleaning the state.
 354 */
 355static void kfree_skbmem(struct sk_buff *skb)
 356{
 357        struct sk_buff *other;
 358        atomic_t *fclone_ref;
 359
 360        switch (skb->fclone) {
 361        case SKB_FCLONE_UNAVAILABLE:
 362                kmem_cache_free(skbuff_head_cache, skb);
 363                break;
 364
 365        case SKB_FCLONE_ORIG:
 366                fclone_ref = (atomic_t *) (skb + 2);
 367                if (atomic_dec_and_test(fclone_ref))
 368                        kmem_cache_free(skbuff_fclone_cache, skb);
 369                break;
 370
 371        case SKB_FCLONE_CLONE:
 372                fclone_ref = (atomic_t *) (skb + 1);
 373                other = skb - 1;
 374
 375                /* The clone portion is available for
 376                 * fast-cloning again.
 377                 */
 378                skb->fclone = SKB_FCLONE_UNAVAILABLE;
 379
 380                if (atomic_dec_and_test(fclone_ref))
 381                        kmem_cache_free(skbuff_fclone_cache, other);
 382                break;
 383        }
 384}
 385
 386static void skb_release_head_state(struct sk_buff *skb)
 387{
 388        dst_release(skb->dst);
 389#ifdef CONFIG_XFRM
 390        secpath_put(skb->sp);
 391#endif
 392        if (skb->destructor) {
 393                WARN_ON(in_irq());
 394                skb->destructor(skb);
 395        }
 396#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 397        nf_conntrack_put(skb->nfct);
 398        nf_conntrack_put_reasm(skb->nfct_reasm);
 399#endif
 400#ifdef CONFIG_BRIDGE_NETFILTER
 401        nf_bridge_put(skb->nf_bridge);
 402#endif
 403/* XXX: IS this still necessary? - JHS */
 404#ifdef CONFIG_NET_SCHED
 405        skb->tc_index = 0;
 406#ifdef CONFIG_NET_CLS_ACT
 407        skb->tc_verd = 0;
 408#endif
 409#endif
 410}
 411
 412/* Free everything but the sk_buff shell. */
 413static void skb_release_all(struct sk_buff *skb)
 414{
 415        skb_release_head_state(skb);
 416        skb_release_data(skb);
 417}
 418
 419/**
 420 *        __kfree_skb - private function
 421 *        @skb: buffer
 422 *
 423 *        Free an sk_buff. Release anything attached to the buffer.
 424 *        Clean the state. This is an internal helper function. Users should
 425 *        always call kfree_skb
 426 */
 427
 428void __kfree_skb(struct sk_buff *skb)
 429{
 430        skb_release_all(skb);
 431        kfree_skbmem(skb);
 432}
 433
 434/**
 435 *        kfree_skb - free an sk_buff
 436 *        @skb: buffer to free
 437 *
 438 *        Drop a reference to the buffer and free it if the usage count has
 439 *        hit zero.
 440 */
 441void kfree_skb(struct sk_buff *skb)
 442{
 443        if (unlikely(!skb))
 444                return;
 445        if (likely(atomic_read(&skb->users) == 1))
 446                smp_rmb();
 447        else if (likely(!atomic_dec_and_test(&skb->users)))
 448                return;
 449        __kfree_skb(skb);
 450}
 451
 452/**
 453 *        skb_recycle_check - check if skb can be reused for receive
 454 *        @skb: buffer
 455 *        @skb_size: minimum receive buffer size
 456 *
 457 *        Checks that the skb passed in is not shared or cloned, and
 458 *        that it is linear and its head portion at least as large as
 459 *        skb_size so that it can be recycled as a receive buffer.
 460 *        If these conditions are met, this function does any necessary
 461 *        reference count dropping and cleans up the skbuff as if it
 462 *        just came from __alloc_skb().
 463 */
 464int skb_recycle_check(struct sk_buff *skb, int skb_size)
 465{
 466        struct skb_shared_info *shinfo;
 467
 468        if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
 469                return 0;
 470
 471        skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
 472        if (skb_end_pointer(skb) - skb->head < skb_size)
 473                return 0;
 474
 475        if (skb_shared(skb) || skb_cloned(skb))
 476                return 0;
 477
 478        skb_release_head_state(skb);
 479        shinfo = skb_shinfo(skb);
 480        atomic_set(&shinfo->dataref, 1);
 481        shinfo->nr_frags = 0;
 482        shinfo->gso_size = 0;
 483        shinfo->gso_segs = 0;
 484        shinfo->gso_type = 0;
 485        shinfo->ip6_frag_id = 0;
 486        shinfo->frag_list = NULL;
 487
 488        memset(skb, 0, offsetof(struct sk_buff, tail));
 489        skb->data = skb->head + NET_SKB_PAD;
 490        skb_reset_tail_pointer(skb);
 491
 492        return 1;
 493}
 494EXPORT_SYMBOL(skb_recycle_check);
 495
 496static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 497{
 498        new->tstamp                = old->tstamp;
 499        new->dev                = old->dev;
 500        new->transport_header        = old->transport_header;
 501        new->network_header        = old->network_header;
 502        new->mac_header                = old->mac_header;
 503        new->dst                = dst_clone(old->dst);
 504#ifdef CONFIG_INET
 505        new->sp                        = secpath_get(old->sp);
 506#endif
 507        memcpy(new->cb, old->cb, sizeof(old->cb));
 508        new->csum_start                = old->csum_start;
 509        new->csum_offset        = old->csum_offset;
 510        new->local_df                = old->local_df;
 511        new->pkt_type                = old->pkt_type;
 512        new->ip_summed                = old->ip_summed;
 513        skb_copy_queue_mapping(new, old);
 514        new->priority                = old->priority;
 515#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 516        new->ipvs_property        = old->ipvs_property;
 517#endif
 518        new->protocol                = old->protocol;
 519        new->mark                = old->mark;
 520        __nf_copy(new, old);
 521#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 522    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 523        new->nf_trace                = old->nf_trace;
 524#endif
 525#ifdef CONFIG_NET_SCHED
 526        new->tc_index                = old->tc_index;
 527#ifdef CONFIG_NET_CLS_ACT
 528        new->tc_verd                = old->tc_verd;
 529#endif
 530#endif
 531        new->vlan_tci                = old->vlan_tci;
 532
 533        skb_copy_secmark(new, old);
 534}
 535
 536static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 537{
 538#define C(x) n->x = skb->x
 539
 540        n->next = n->prev = NULL;
 541        n->sk = NULL;
 542        __copy_skb_header(n, skb);
 543
 544        C(len);
 545        C(data_len);
 546        C(mac_len);
 547        n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 548        n->cloned = 1;
 549        n->nohdr = 0;
 550        n->destructor = NULL;
 551        C(iif);
 552        C(tail);
 553        C(end);
 554        C(head);
 555        C(data);
 556        C(truesize);
 557#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 558        C(do_not_encrypt);
 559#endif
 560        atomic_set(&n->users, 1);
 561
 562        atomic_inc(&(skb_shinfo(skb)->dataref));
 563        skb->cloned = 1;
 564
 565        return n;
 566#undef C
 567}
 568
 569/**
 570 *        skb_morph        -        morph one skb into another
 571 *        @dst: the skb to receive the contents
 572 *        @src: the skb to supply the contents
 573 *
 574 *        This is identical to skb_clone except that the target skb is
 575 *        supplied by the user.
 576 *
 577 *        The target skb is returned upon exit.
 578 */
 579struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 580{
 581        skb_release_all(dst);
 582        return __skb_clone(dst, src);
 583}
 584EXPORT_SYMBOL_GPL(skb_morph);
 585
 586/**
 587 *        skb_clone        -        duplicate an sk_buff
 588 *        @skb: buffer to clone
 589 *        @gfp_mask: allocation priority
 590 *
 591 *        Duplicate an &sk_buff. The new one is not owned by a socket. Both
 592 *        copies share the same packet data but not structure. The new
 593 *        buffer has a reference count of 1. If the allocation fails the
 594 *        function returns %NULL otherwise the new buffer is returned.
 595 *
 596 *        If this function is called from an interrupt gfp_mask() must be
 597 *        %GFP_ATOMIC.
 598 */
 599
 600struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 601{
 602        struct sk_buff *n;
 603
 604        n = skb + 1;
 605        if (skb->fclone == SKB_FCLONE_ORIG &&
 606            n->fclone == SKB_FCLONE_UNAVAILABLE) {
 607                atomic_t *fclone_ref = (atomic_t *) (n + 1);
 608                n->fclone = SKB_FCLONE_CLONE;
 609                atomic_inc(fclone_ref);
 610        } else {
 611                n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
 612                if (!n)
 613                        return NULL;
 614                n->fclone = SKB_FCLONE_UNAVAILABLE;
 615        }
 616
 617        return __skb_clone(n, skb);
 618}
 619
 620static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 621{
 622#ifndef NET_SKBUFF_DATA_USES_OFFSET
 623        /*
 624         *        Shift between the two data areas in bytes
 625         */
 626        unsigned long offset = new->data - old->data;
 627#endif
 628
 629        __copy_skb_header(new, old);
 630
 631#ifndef NET_SKBUFF_DATA_USES_OFFSET
 632        /* {transport,network,mac}_header are relative to skb->head */
 633        new->transport_header += offset;
 634        new->network_header   += offset;
 635        new->mac_header              += offset;
 636#endif
 637        skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 638        skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
 639        skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
 640}
 641
 642/**
 643 *        skb_copy        -        create private copy of an sk_buff
 644 *        @skb: buffer to copy
 645 *        @gfp_mask: allocation priority
 646 *
 647 *        Make a copy of both an &sk_buff and its data. This is used when the
 648 *        caller wishes to modify the data and needs a private copy of the
 649 *        data to alter. Returns %NULL on failure or the pointer to the buffer
 650 *        on success. The returned buffer has a reference count of 1.
 651 *
 652 *        As by-product this function converts non-linear &sk_buff to linear
 653 *        one, so that &sk_buff becomes completely private and caller is allowed
 654 *        to modify all the data of returned buffer. This means that this
 655 *        function is not recommended for use in circumstances when only
 656 *        header is going to be modified. Use pskb_copy() instead.
 657 */
 658
 659struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 660{
 661        int headerlen = skb->data - skb->head;
 662        /*
 663         *        Allocate the copy buffer
 664         */
 665        struct sk_buff *n;
 666#ifdef NET_SKBUFF_DATA_USES_OFFSET
 667        n = alloc_skb(skb->end + skb->data_len, gfp_mask);
 668#else
 669        n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
 670#endif
 671        if (!n)
 672                return NULL;
 673
 674        /* Set the data pointer */
 675        skb_reserve(n, headerlen);
 676        /* Set the tail pointer and length */
 677        skb_put(n, skb->len);
 678
 679        if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
 680                BUG();
 681
 682        copy_skb_header(n, skb);
 683        return n;
 684}
 685
 686
 687/**
 688 *        pskb_copy        -        create copy of an sk_buff with private head.
 689 *        @skb: buffer to copy
 690 *        @gfp_mask: allocation priority
 691 *
 692 *        Make a copy of both an &sk_buff and part of its data, located
 693 *        in header. Fragmented data remain shared. This is used when
 694 *        the caller wishes to modify only header of &sk_buff and needs
 695 *        private copy of the header to alter. Returns %NULL on failure
 696 *        or the pointer to the buffer on success.
 697 *        The returned buffer has a reference count of 1.
 698 */
 699
 700struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 701{
 702        /*
 703         *        Allocate the copy buffer
 704         */
 705        struct sk_buff *n;
 706#ifdef NET_SKBUFF_DATA_USES_OFFSET
 707        n = alloc_skb(skb->end, gfp_mask);
 708#else
 709        n = alloc_skb(skb->end - skb->head, gfp_mask);
 710#endif
 711        if (!n)
 712                goto out;
 713
 714        /* Set the data pointer */
 715        skb_reserve(n, skb->data - skb->head);
 716        /* Set the tail pointer and length */
 717        skb_put(n, skb_headlen(skb));
 718        /* Copy the bytes */
 719        skb_copy_from_linear_data(skb, n->data, n->len);
 720
 721        n->truesize += skb->data_len;
 722        n->data_len  = skb->data_len;
 723        n->len             = skb->len;
 724
 725        if (skb_shinfo(skb)->nr_frags) {
 726                int i;
 727
 728                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 729                        skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
 730                        get_page(skb_shinfo(n)->frags[i].page);
 731                }
 732                skb_shinfo(n)->nr_frags = i;
 733        }
 734
 735        if (skb_shinfo(skb)->frag_list) {
 736                skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
 737                skb_clone_fraglist(n);
 738        }
 739
 740        copy_skb_header(n, skb);
 741out:
 742        return n;
 743}
 744
 745/**
 746 *        pskb_expand_head - reallocate header of &sk_buff
 747 *        @skb: buffer to reallocate
 748 *        @nhead: room to add at head
 749 *        @ntail: room to add at tail
 750 *        @gfp_mask: allocation priority
 751 *
 752 *        Expands (or creates identical copy, if &nhead and &ntail are zero)
 753 *        header of skb. &sk_buff itself is not changed. &sk_buff MUST have
 754 *        reference count of 1. Returns zero in the case of success or error,
 755 *        if expansion failed. In the last case, &sk_buff is not changed.
 756 *
 757 *        All the pointers pointing into skb header may change and must be
 758 *        reloaded after call to this function.
 759 */
 760
 761int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 762                     gfp_t gfp_mask)
 763{
 764        int i;
 765        u8 *data;
 766#ifdef NET_SKBUFF_DATA_USES_OFFSET
 767        int size = nhead + skb->end + ntail;
 768#else
 769        int size = nhead + (skb->end - skb->head) + ntail;
 770#endif
 771        long off;
 772
 773        BUG_ON(nhead < 0);
 774
 775        if (skb_shared(skb))
 776                BUG();
 777
 778        size = SKB_DATA_ALIGN(size);
 779
 780        data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 781        if (!data)
 782                goto nodata;
 783
 784        /* Copy only real data... and, alas, header. This should be
 785         * optimized for the cases when header is void. */
 786#ifdef NET_SKBUFF_DATA_USES_OFFSET
 787        memcpy(data + nhead, skb->head, skb->tail);
 788#else
 789        memcpy(data + nhead, skb->head, skb->tail - skb->head);
 790#endif
 791        memcpy(data + size, skb_end_pointer(skb),
 792               sizeof(struct skb_shared_info));
 793
 794        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 795                get_page(skb_shinfo(skb)->frags[i].page);
 796
 797        if (skb_shinfo(skb)->frag_list)
 798                skb_clone_fraglist(skb);
 799
 800        skb_release_data(skb);
 801
 802        off = (data + nhead) - skb->head;
 803
 804        skb->head     = data;
 805        skb->data    += off;
 806#ifdef NET_SKBUFF_DATA_USES_OFFSET
 807        skb->end      = size;
 808        off           = nhead;
 809#else
 810        skb->end      = skb->head + size;
 811#endif
 812        /* {transport,network,mac}_header and tail are relative to skb->head */
 813        skb->tail              += off;
 814        skb->transport_header += off;
 815        skb->network_header   += off;
 816        skb->mac_header              += off;
 817        skb->csum_start       += nhead;
 818        skb->cloned   = 0;
 819        skb->hdr_len  = 0;
 820        skb->nohdr    = 0;
 821        atomic_set(&skb_shinfo(skb)->dataref, 1);
 822        return 0;
 823
 824nodata:
 825        return -ENOMEM;
 826}
 827
 828/* Make private copy of skb with writable head and some headroom */
 829
 830struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 831{
 832        struct sk_buff *skb2;
 833        int delta = headroom - skb_headroom(skb);
 834
 835        if (delta <= 0)
 836                skb2 = pskb_copy(skb, GFP_ATOMIC);
 837        else {
 838                skb2 = skb_clone(skb, GFP_ATOMIC);
 839                if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
 840                                             GFP_ATOMIC)) {
 841                        kfree_skb(skb2);
 842                        skb2 = NULL;
 843                }
 844        }
 845        return skb2;
 846}
 847
 848
 849/**
 850 *        skb_copy_expand        -        copy and expand sk_buff
 851 *        @skb: buffer to copy
 852 *        @newheadroom: new free bytes at head
 853 *        @newtailroom: new free bytes at tail
 854 *        @gfp_mask: allocation priority
 855 *
 856 *        Make a copy of both an &sk_buff and its data and while doing so
 857 *        allocate additional space.
 858 *
 859 *        This is used when the caller wishes to modify the data and needs a
 860 *        private copy of the data to alter as well as more space for new fields.
 861 *        Returns %NULL on failure or the pointer to the buffer
 862 *        on success. The returned buffer has a reference count of 1.
 863 *
 864 *        You must pass %GFP_ATOMIC as the allocation priority if this function
 865 *        is called from an interrupt.
 866 */
 867struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 868                                int newheadroom, int newtailroom,
 869                                gfp_t gfp_mask)
 870{
 871        /*
 872         *        Allocate the copy buffer
 873         */
 874        struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
 875                                      gfp_mask);
 876        int oldheadroom = skb_headroom(skb);
 877        int head_copy_len, head_copy_off;
 878        int off;
 879
 880        if (!n)
 881                return NULL;
 882
 883        skb_reserve(n, newheadroom);
 884
 885        /* Set the tail pointer and length */
 886        skb_put(n, skb->len);
 887
 888        head_copy_len = oldheadroom;
 889        head_copy_off = 0;
 890        if (newheadroom <= head_copy_len)
 891                head_copy_len = newheadroom;
 892        else
 893                head_copy_off = newheadroom - head_copy_len;
 894
 895        /* Copy the linear header and data. */
 896        if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
 897                          skb->len + head_copy_len))
 898                BUG();
 899
 900        copy_skb_header(n, skb);
 901
 902        off                  = newheadroom - oldheadroom;
 903        n->csum_start       += off;
 904#ifdef NET_SKBUFF_DATA_USES_OFFSET
 905        n->transport_header += off;
 906        n->network_header   += off;
 907        n->mac_header            += off;
 908#endif
 909
 910        return n;
 911}
 912
 913/**
 914 *        skb_pad                        -        zero pad the tail of an skb
 915 *        @skb: buffer to pad
 916 *        @pad: space to pad
 917 *
 918 *        Ensure that a buffer is followed by a padding area that is zero
 919 *        filled. Used by network drivers which may DMA or transfer data
 920 *        beyond the buffer end onto the wire.
 921 *
 922 *        May return error in out of memory cases. The skb is freed on error.
 923 */
 924
 925int skb_pad(struct sk_buff *skb, int pad)
 926{
 927        int err;
 928        int ntail;
 929
 930        /* If the skbuff is non linear tailroom is always zero.. */
 931        if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
 932                memset(skb->data+skb->len, 0, pad);
 933                return 0;
 934        }
 935
 936        ntail = skb->data_len + pad - (skb->end - skb->tail);
 937        if (likely(skb_cloned(skb) || ntail > 0)) {
 938                err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
 939                if (unlikely(err))
 940                        goto free_skb;
 941        }
 942
 943        /* FIXME: The use of this function with non-linear skb's really needs
 944         * to be audited.
 945         */
 946        err = skb_linearize(skb);
 947        if (unlikely(err))
 948                goto free_skb;
 949
 950        memset(skb->data + skb->len, 0, pad);
 951        return 0;
 952
 953free_skb:
 954        kfree_skb(skb);
 955        return err;
 956}
 957
 958/**
 959 *        skb_put - add data to a buffer
 960 *        @skb: buffer to use
 961 *        @len: amount of data to add
 962 *
 963 *        This function extends the used data area of the buffer. If this would
 964 *        exceed the total buffer size the kernel will panic. A pointer to the
 965 *        first byte of the extra data is returned.
 966 */
 967unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
 968{
 969        unsigned char *tmp = skb_tail_pointer(skb);
 970        SKB_LINEAR_ASSERT(skb);
 971        skb->tail += len;
 972        skb->len  += len;
 973        if (unlikely(skb->tail > skb->end))
 974                skb_over_panic(skb, len, __builtin_return_address(0));
 975        return tmp;
 976}
 977EXPORT_SYMBOL(skb_put);
 978
 979/**
 980 *        skb_push - add data to the start of a buffer
 981 *        @skb: buffer to use
 982 *        @len: amount of data to add
 983 *
 984 *        This function extends the used data area of the buffer at the buffer
 985 *        start. If this would exceed the total buffer headroom the kernel will
 986 *        panic. A pointer to the first byte of the extra data is returned.
 987 */
 988unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
 989{
 990        skb->data -= len;
 991        skb->len  += len;
 992        if (unlikely(skb->data<skb->head))
 993                skb_under_panic(skb, len, __builtin_return_address(0));
 994        return skb->data;
 995}
 996EXPORT_SYMBOL(skb_push);
 997
 998/**
 999 *        skb_pull - remove data from the start of a buffer
1000 *        @skb: buffer to use
1001 *        @len: amount of data to remove
1002 *
1003 *        This function removes data from the start of a buffer, returning
1004 *        the memory to the headroom. A pointer to the next data in the buffer
1005 *        is returned. Once the data has been pulled future pushes will overwrite
1006 *        the old data.
1007 */
1008unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
1009{
1010        return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
1011}
1012EXPORT_SYMBOL(skb_pull);
1013
1014/**
1015 *        skb_trim - remove end from a buffer
1016 *        @skb: buffer to alter
1017 *        @len: new length
1018 *
1019 *        Cut the length of a buffer down by removing data from the tail. If
1020 *        the buffer is already under the length specified it is not modified.
1021 *        The skb must be linear.
1022 */
1023void skb_trim(struct sk_buff *skb, unsigned int len)
1024{
1025        if (skb->len > len)
1026                __skb_trim(skb, len);
1027}
1028EXPORT_SYMBOL(skb_trim);
1029
1030/* Trims skb to length len. It can change skb pointers.
1031 */
1032
1033int ___pskb_trim(struct sk_buff *skb, unsigned int len)
1034{
1035        struct sk_buff **fragp;
1036        struct sk_buff *frag;
1037        int offset = skb_headlen(skb);
1038        int nfrags = skb_shinfo(skb)->nr_frags;
1039        int i;
1040        int err;
1041
1042        if (skb_cloned(skb) &&
1043            unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
1044                return err;
1045
1046        i = 0;
1047        if (offset >= len)
1048                goto drop_pages;
1049
1050        for (; i < nfrags; i++) {
1051                int end = offset + skb_shinfo(skb)->frags[i].size;
1052
1053                if (end < len) {
1054                        offset = end;
1055                        continue;
1056                }
1057
1058                skb_shinfo(skb)->frags[i++].size = len - offset;
1059
1060drop_pages:
1061                skb_shinfo(skb)->nr_frags = i;
1062
1063                for (; i < nfrags; i++)
1064                        put_page(skb_shinfo(skb)->frags[i].page);
1065
1066                if (skb_shinfo(skb)->frag_list)
1067                        skb_drop_fraglist(skb);
1068                goto done;
1069        }
1070
1071        for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
1072             fragp = &frag->next) {
1073                int end = offset + frag->len;
1074
1075                if (skb_shared(frag)) {
1076                        struct sk_buff *nfrag;
1077
1078                        nfrag = skb_clone(frag, GFP_ATOMIC);
1079                        if (unlikely(!nfrag))
1080                                return -ENOMEM;
1081
1082                        nfrag->next = frag->next;
1083                        kfree_skb(frag);
1084                        frag = nfrag;
1085                        *fragp = frag;
1086                }
1087
1088                if (end < len) {
1089                        offset = end;
1090                        continue;
1091                }
1092
1093                if (end > len &&
1094                    unlikely((err = pskb_trim(frag, len - offset))))
1095                        return err;
1096
1097                if (frag->next)
1098                        skb_drop_list(&frag->next);
1099                break;
1100        }
1101
1102done:
1103        if (len > skb_headlen(skb)) {
1104                skb->data_len -= skb->len - len;
1105                skb->len       = len;
1106        } else {
1107                skb->len       = len;
1108                skb->data_len  = 0;
1109                skb_set_tail_pointer(skb, len);
1110        }
1111
1112        return 0;
1113}
1114
1115/**
1116 *        __pskb_pull_tail - advance tail of skb header
1117 *        @skb: buffer to reallocate
1118 *        @delta: number of bytes to advance tail
1119 *
1120 *        The function makes a sense only on a fragmented &sk_buff,
1121 *        it expands header moving its tail forward and copying necessary
1122 *        data from fragmented part.
1123 *
1124 *        &sk_buff MUST have reference count of 1.
1125 *
1126 *        Returns %NULL (and &sk_buff does not change) if pull failed
1127 *        or value of new tail of skb in the case of success.
1128 *
1129 *        All the pointers pointing into skb header may change and must be
1130 *        reloaded after call to this function.
1131 */
1132
1133/* Moves tail of skb head forward, copying data from fragmented part,
1134 * when it is necessary.
1135 * 1. It may fail due to malloc failure.
1136 * 2. It may change skb pointers.
1137 *
1138 * It is pretty complicated. Luckily, it is called only in exceptional cases.
1139 */
1140unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1141{
1142        /* If skb has not enough free space at tail, get new one
1143         * plus 128 bytes for future expansions. If we have enough
1144         * room at tail, reallocate without expansion only if skb is cloned.
1145         */
1146        int i, k, eat = (skb->tail + delta) - skb->end;
1147
1148        if (eat > 0 || skb_cloned(skb)) {
1149                if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
1150                                     GFP_ATOMIC))
1151                        return NULL;
1152        }
1153
1154        if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
1155                BUG();
1156
1157        /* Optimization: no fragments, no reasons to preestimate
1158         * size of pulled pages. Superb.
1159         */
1160        if (!skb_shinfo(skb)->frag_list)
1161                goto pull_pages;
1162
1163        /* Estimate size of pulled pages. */
1164        eat = delta;
1165        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1166                if (skb_shinfo(skb)->frags[i].size >= eat)
1167                        goto pull_pages;
1168                eat -= skb_shinfo(skb)->frags[i].size;
1169        }
1170
1171        /* If we need update frag list, we are in troubles.
1172         * Certainly, it possible to add an offset to skb data,
1173         * but taking into account that pulling is expected to
1174         * be very rare operation, it is worth to fight against
1175         * further bloating skb head and crucify ourselves here instead.
1176         * Pure masohism, indeed. 8)8)
1177         */
1178        if (eat) {
1179                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1180                struct sk_buff *clone = NULL;
1181                struct sk_buff *insp = NULL;
1182
1183                do {
1184                        BUG_ON(!list);
1185
1186                        if (list->len <= eat) {
1187                                /* Eaten as whole. */
1188                                eat -= list->len;
1189                                list = list->next;
1190                                insp = list;
1191                        } else {
1192                                /* Eaten partially. */
1193
1194                                if (skb_shared(list)) {
1195                                        /* Sucks! We need to fork list. :-( */
1196                                        clone = skb_clone(list, GFP_ATOMIC);
1197                                        if (!clone)
1198                                                return NULL;
1199                                        insp = list->next;
1200                                        list = clone;
1201                                } else {
1202                                        /* This may be pulled without
1203                                         * problems. */
1204                                        insp = list;
1205                                }
1206                                if (!pskb_pull(list, eat)) {
1207                                        if (clone)
1208                                                kfree_skb(clone);
1209                                        return NULL;
1210                                }
1211                                break;
1212                        }
1213                } while (eat);
1214
1215                /* Free pulled out fragments. */
1216                while ((list = skb_shinfo(skb)->frag_list) != insp) {
1217                        skb_shinfo(skb)->frag_list = list->next;
1218                        kfree_skb(list);
1219                }
1220                /* And insert new clone at head. */
1221                if (clone) {
1222                        clone->next = list;
1223                        skb_shinfo(skb)->frag_list = clone;
1224                }
1225        }
1226        /* Success! Now we may commit changes to skb data. */
1227
1228pull_pages:
1229        eat = delta;
1230        k = 0;
1231        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1232                if (skb_shinfo(skb)->frags[i].size <= eat) {
1233                        put_page(skb_shinfo(skb)->frags[i].page);
1234                        eat -= skb_shinfo(skb)->frags[i].size;
1235                } else {
1236                        skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1237                        if (eat) {
1238                                skb_shinfo(skb)->frags[k].page_offset += eat;
1239                                skb_shinfo(skb)->frags[k].size -= eat;
1240                                eat = 0;
1241                        }
1242                        k++;
1243                }
1244        }
1245        skb_shinfo(skb)->nr_frags = k;
1246
1247        skb->tail     += delta;
1248        skb->data_len -= delta;
1249
1250        return skb_tail_pointer(skb);
1251}
1252
1253/* Copy some data bits from skb to kernel buffer. */
1254
1255int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1256{
1257        int i, copy;
1258        int start = skb_headlen(skb);
1259
1260        if (offset > (int)skb->len - len)
1261                goto fault;
1262
1263        /* Copy header. */
1264        if ((copy = start - offset) > 0) {
1265                if (copy > len)
1266                        copy = len;
1267                skb_copy_from_linear_data_offset(skb, offset, to, copy);
1268                if ((len -= copy) == 0)
1269                        return 0;
1270                offset += copy;
1271                to     += copy;
1272        }
1273
1274        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1275                int end;
1276
1277                WARN_ON(start > offset + len);
1278
1279                end = start + skb_shinfo(skb)->frags[i].size;
1280                if ((copy = end - offset) > 0) {
1281                        u8 *vaddr;
1282
1283                        if (copy > len)
1284                                copy = len;
1285
1286                        vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
1287                        memcpy(to,
1288                               vaddr + skb_shinfo(skb)->frags[i].page_offset+
1289                               offset - start, copy);
1290                        kunmap_skb_frag(vaddr);
1291
1292                        if ((len -= copy) == 0)
1293                                return 0;
1294                        offset += copy;
1295                        to     += copy;
1296                }
1297                start = end;
1298        }
1299
1300        if (skb_shinfo(skb)->frag_list) {
1301                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1302
1303                for (; list; list = list->next) {
1304                        int end;
1305
1306                        WARN_ON(start > offset + len);
1307
1308                        end = start + list->len;
1309                        if ((copy = end - offset) > 0) {
1310                                if (copy > len)
1311                                        copy = len;
1312                                if (skb_copy_bits(list, offset - start,
1313                                                  to, copy))
1314                                        goto fault;
1315                                if ((len -= copy) == 0)
1316                                        return 0;
1317                                offset += copy;
1318                                to     += copy;
1319                        }
1320                        start = end;
1321                }
1322        }
1323        if (!len)
1324                return 0;
1325
1326fault:
1327        return -EFAULT;
1328}
1329
1330/*
1331 * Callback from splice_to_pipe(), if we need to release some pages
1332 * at the end of the spd in case we error'ed out in filling the pipe.
1333 */
1334static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1335{
1336        struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
1337
1338        kfree_skb(skb);
1339}
1340
1341/*
1342 * Fill page/offset/length into spd, if it can hold more pages.
1343 */
1344static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1345                                unsigned int len, unsigned int offset,
1346                                struct sk_buff *skb)
1347{
1348        if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1349                return 1;
1350
1351        spd->pages[spd->nr_pages] = page;
1352        spd->partial[spd->nr_pages].len = len;
1353        spd->partial[spd->nr_pages].offset = offset;
1354        spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
1355        spd->nr_pages++;
1356        return 0;
1357}
1358
1359static inline void __segment_seek(struct page **page, unsigned int *poff,
1360                                  unsigned int *plen, unsigned int off)
1361{
1362        *poff += off;
1363        *page += *poff / PAGE_SIZE;
1364        *poff = *poff % PAGE_SIZE;
1365        *plen -= off;
1366}
1367
1368static inline int __splice_segment(struct page *page, unsigned int poff,
1369                                   unsigned int plen, unsigned int *off,
1370                                   unsigned int *len, struct sk_buff *skb,
1371                                   struct splice_pipe_desc *spd)
1372{
1373        if (!*len)
1374                return 1;
1375
1376        /* skip this segment if already processed */
1377        if (*off >= plen) {
1378                *off -= plen;
1379                return 0;
1380        }
1381
1382        /* ignore any bits we already processed */
1383        if (*off) {
1384                __segment_seek(&page, &poff, &plen, *off);
1385                *off = 0;
1386        }
1387
1388        do {
1389                unsigned int flen = min(*len, plen);
1390
1391                /* the linear region may spread across several pages  */
1392                flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1393
1394                if (spd_fill_page(spd, page, flen, poff, skb))
1395                        return 1;
1396
1397                __segment_seek(&page, &poff, &plen, flen);
1398                *len -= flen;
1399
1400        } while (*len && plen);
1401
1402        return 0;
1403}
1404
1405/*
1406 * Map linear and fragment data from the skb to spd. It reports failure if the
1407 * pipe is full or if we already spliced the requested length.
1408 */
1409static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1410                      unsigned int *len,
1411                      struct splice_pipe_desc *spd)
1412{
1413        int seg;
1414
1415        /*
1416         * map the linear part
1417         */
1418        if (__splice_segment(virt_to_page(skb->data),
1419                             (unsigned long) skb->data & (PAGE_SIZE - 1),
1420                             skb_headlen(skb),
1421                             offset, len, skb, spd))
1422                return 1;
1423
1424        /*
1425         * then map the fragments
1426         */
1427        for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1428                const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1429
1430                if (__splice_segment(f->page, f->page_offset, f->size,
1431                                     offset, len, skb, spd))
1432                        return 1;
1433        }
1434
1435        return 0;
1436}
1437
1438/*
1439 * Map data from the skb to a pipe. Should handle both the linear part,
1440 * the fragments, and the frag list. It does NOT handle frag lists within
1441 * the frag list, if such a thing exists. We'd probably need to recurse to
1442 * handle that cleanly.
1443 */
1444int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
1445                    struct pipe_inode_info *pipe, unsigned int tlen,
1446                    unsigned int flags)
1447{
1448        struct partial_page partial[PIPE_BUFFERS];
1449        struct page *pages[PIPE_BUFFERS];
1450        struct splice_pipe_desc spd = {
1451                .pages = pages,
1452                .partial = partial,
1453                .flags = flags,
1454                .ops = &sock_pipe_buf_ops,
1455                .spd_release = sock_spd_release,
1456        };
1457        struct sk_buff *skb;
1458
1459        /*
1460         * I'd love to avoid the clone here, but tcp_read_sock()
1461         * ignores reference counts and unconditonally kills the sk_buff
1462         * on return from the actor.
1463         */
1464        skb = skb_clone(__skb, GFP_KERNEL);
1465        if (unlikely(!skb))
1466                return -ENOMEM;
1467
1468        /*
1469         * __skb_splice_bits() only fails if the output has no room left,
1470         * so no point in going over the frag_list for the error case.
1471         */
1472        if (__skb_splice_bits(skb, &offset, &tlen, &spd))
1473                goto done;
1474        else if (!tlen)
1475                goto done;
1476
1477        /*
1478         * now see if we have a frag_list to map
1479         */
1480        if (skb_shinfo(skb)->frag_list) {
1481                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1482
1483                for (; list && tlen; list = list->next) {
1484                        if (__skb_splice_bits(list, &offset, &tlen, &spd))
1485                                break;
1486                }
1487        }
1488
1489done:
1490        /*
1491         * drop our reference to the clone, the pipe consumption will
1492         * drop the rest.
1493         */
1494        kfree_skb(skb);
1495
1496        if (spd.nr_pages) {
1497                int ret;
1498                struct sock *sk = __skb->sk;
1499
1500                /*
1501                 * Drop the socket lock, otherwise we have reverse
1502                 * locking dependencies between sk_lock and i_mutex
1503                 * here as compared to sendfile(). We enter here
1504                 * with the socket lock held, and splice_to_pipe() will
1505                 * grab the pipe inode lock. For sendfile() emulation,
1506                 * we call into ->sendpage() with the i_mutex lock held
1507                 * and networking will grab the socket lock.
1508                 */
1509                release_sock(sk);
1510                ret = splice_to_pipe(pipe, &spd);
1511                lock_sock(sk);
1512                return ret;
1513        }
1514
1515        return 0;
1516}
1517
1518/**
1519 *        skb_store_bits - store bits from kernel buffer to skb
1520 *        @skb: destination buffer
1521 *        @offset: offset in destination
1522 *        @from: source buffer
1523 *        @len: number of bytes to copy
1524 *
1525 *        Copy the specified number of bytes from the source buffer to the
1526 *        destination skb.  This function handles all the messy bits of
1527 *        traversing fragment lists and such.
1528 */
1529
1530int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1531{
1532        int i, copy;
1533        int start = skb_headlen(skb);
1534
1535        if (offset > (int)skb->len - len)
1536                goto fault;
1537
1538        if ((copy = start - offset) > 0) {
1539                if (copy > len)
1540                        copy = len;
1541                skb_copy_to_linear_data_offset(skb, offset, from, copy);
1542                if ((len -= copy) == 0)
1543                        return 0;
1544                offset += copy;
1545                from += copy;
1546        }
1547
1548        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1549                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1550                int end;
1551
1552                WARN_ON(start > offset + len);
1553
1554                end = start + frag->size;
1555                if ((copy = end - offset) > 0) {
1556                        u8 *vaddr;
1557
1558                        if (copy > len)
1559                                copy = len;
1560
1561                        vaddr = kmap_skb_frag(frag);
1562                        memcpy(vaddr + frag->page_offset + offset - start,
1563                               from, copy);
1564                        kunmap_skb_frag(vaddr);
1565
1566                        if ((len -= copy) == 0)
1567                                return 0;
1568                        offset += copy;
1569                        from += copy;
1570                }
1571                start = end;
1572        }
1573
1574        if (skb_shinfo(skb)->frag_list) {
1575                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1576
1577                for (; list; list = list->next) {
1578                        int end;
1579
1580                        WARN_ON(start > offset + len);
1581
1582                        end = start + list->len;
1583                        if ((copy = end - offset) > 0) {
1584                                if (copy > len)
1585                                        copy = len;
1586                                if (skb_store_bits(list, offset - start,
1587                                                   from, copy))
1588                                        goto fault;
1589                                if ((len -= copy) == 0)
1590                                        return 0;
1591                                offset += copy;
1592                                from += copy;
1593                        }
1594                        start = end;
1595                }
1596        }
1597        if (!len)
1598                return 0;
1599
1600fault:
1601        return -EFAULT;
1602}
1603
1604EXPORT_SYMBOL(skb_store_bits);
1605
1606/* Checksum skb data. */
1607
1608__wsum skb_checksum(const struct sk_buff *skb, int offset,
1609                          int len, __wsum csum)
1610{
1611        int start = skb_headlen(skb);
1612        int i, copy = start - offset;
1613        int pos = 0;
1614
1615        /* Checksum header. */
1616        if (copy > 0) {
1617                if (copy > len)
1618                        copy = len;
1619                csum = csum_partial(skb->data + offset, copy, csum);
1620                if ((len -= copy) == 0)
1621                        return csum;
1622                offset += copy;
1623                pos        = copy;
1624        }
1625
1626        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1627                int end;
1628
1629                WARN_ON(start > offset + len);
1630
1631                end = start + skb_shinfo(skb)->frags[i].size;
1632                if ((copy = end - offset) > 0) {
1633                        __wsum csum2;
1634                        u8 *vaddr;
1635                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1636
1637                        if (copy > len)
1638                                copy = len;
1639                        vaddr = kmap_skb_frag(frag);
1640                        csum2 = csum_partial(vaddr + frag->page_offset +
1641                                             offset - start, copy, 0);
1642                        kunmap_skb_frag(vaddr);
1643                        csum = csum_block_add(csum, csum2, pos);
1644                        if (!(len -= copy))
1645                                return csum;
1646                        offset += copy;
1647                        pos    += copy;
1648                }
1649                start = end;
1650        }
1651
1652        if (skb_shinfo(skb)->frag_list) {
1653                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1654
1655                for (; list; list = list->next) {
1656                        int end;
1657
1658                        WARN_ON(start > offset + len);
1659
1660                        end = start + list->len;
1661                        if ((copy = end - offset) > 0) {
1662                                __wsum csum2;
1663                                if (copy > len)
1664                                        copy = len;
1665                                csum2 = skb_checksum(list, offset - start,
1666                                                     copy, 0);
1667                                csum = csum_block_add(csum, csum2, pos);
1668                                if ((len -= copy) == 0)
1669                                        return csum;
1670                                offset += copy;
1671                                pos    += copy;
1672                        }
1673                        start = end;
1674                }
1675        }
1676        BUG_ON(len);
1677
1678        return csum;
1679}
1680
1681/* Both of above in one bottle. */
1682
1683__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1684                                    u8 *to, int len, __wsum csum)
1685{
1686        int start = skb_headlen(skb);
1687        int i, copy = start - offset;
1688        int pos = 0;
1689
1690        /* Copy header. */
1691        if (copy > 0) {
1692                if (copy > len)
1693                        copy = len;
1694                csum = csum_partial_copy_nocheck(skb->data + offset, to,
1695                                                 copy, csum);
1696                if ((len -= copy) == 0)
1697                        return csum;
1698                offset += copy;
1699                to     += copy;
1700                pos        = copy;
1701        }
1702
1703        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1704                int end;
1705
1706                WARN_ON(start > offset + len);
1707
1708                end = start + skb_shinfo(skb)->frags[i].size;
1709                if ((copy = end - offset) > 0) {
1710                        __wsum csum2;
1711                        u8 *vaddr;
1712                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1713
1714                        if (copy > len)
1715                                copy = len;
1716                        vaddr = kmap_skb_frag(frag);
1717                        csum2 = csum_partial_copy_nocheck(vaddr +
1718                                                          frag->page_offset +
1719                                                          offset - start, to,
1720                                                          copy, 0);
1721                        kunmap_skb_frag(vaddr);
1722                        csum = csum_block_add(csum, csum2, pos);
1723                        if (!(len -= copy))
1724                                return csum;
1725                        offset += copy;
1726                        to     += copy;
1727                        pos    += copy;
1728                }
1729                start = end;
1730        }
1731
1732        if (skb_shinfo(skb)->frag_list) {
1733                struct sk_buff *list = skb_shinfo(skb)->frag_list;
1734
1735                for (; list; list = list->next) {
1736                        __wsum csum2;
1737                        int end;
1738
1739                        WARN_ON(start > offset + len);
1740
1741                        end = start + list->len;
1742                        if ((copy = end - offset) > 0) {
1743                                if (copy > len)
1744                                        copy = len;
1745                                csum2 = skb_copy_and_csum_bits(list,
1746                                                               offset - start,
1747                                                               to, copy, 0);
1748                                csum = csum_block_add(csum, csum2, pos);
1749                                if ((len -= copy) == 0)
1750                                        return csum;
1751                                offset += copy;
1752                                to     += copy;
1753                                pos    += copy;
1754                        }
1755                        start = end;
1756                }
1757        }
1758        BUG_ON(len);
1759        return csum;
1760}
1761
1762void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1763{
1764        __wsum csum;
1765        long csstart;
1766
1767        if (skb->ip_summed == CHECKSUM_PARTIAL)
1768                csstart = skb->csum_start - skb_headroom(skb);
1769        else
1770                csstart = skb_headlen(skb);
1771
1772        BUG_ON(csstart > skb_headlen(skb));
1773
1774        skb_copy_from_linear_data(skb, to, csstart);
1775
1776        csum = 0;
1777        if (csstart != skb->len)
1778                csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
1779                                              skb->len - csstart, 0);
1780
1781        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1782                long csstuff = csstart + skb->csum_offset;
1783
1784                *((__sum16 *)(to + csstuff)) = csum_fold(csum);
1785        }
1786}
1787
1788/**
1789 *        skb_dequeue - remove from the head of the queue
1790 *        @list: list to dequeue from
1791 *
1792 *        Remove the head of the list. The list lock is taken so the function
1793 *        may be used safely with other locking list functions. The head item is
1794 *        returned or %NULL if the list is empty.
1795 */
1796
1797struct sk_buff *skb_dequeue(struct sk_buff_head *list)
1798{
1799        unsigned long flags;
1800        struct sk_buff *result;
1801
1802        spin_lock_irqsave(&list->lock, flags);
1803        result = __skb_dequeue(list);
1804        spin_unlock_irqrestore(&list->lock, flags);
1805        return result;
1806}
1807
1808/**
1809 *        skb_dequeue_tail - remove from the tail of the queue
1810 *        @list: list to dequeue from
1811 *
1812 *        Remove the tail of the list. The list lock is taken so the function
1813 *        may be used safely with other locking list functions. The tail item is
1814 *        returned or %NULL if the list is empty.
1815 */
1816struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
1817{
1818        unsigned long flags;
1819        struct sk_buff *result;
1820
1821        spin_lock_irqsave(&list->lock, flags);
1822        result = __skb_dequeue_tail(list);
1823        spin_unlock_irqrestore(&list->lock, flags);
1824        return result;
1825}
1826
1827/**
1828 *        skb_queue_purge - empty a list
1829 *        @list: list to empty
1830 *
1831 *        Delete all buffers on an &sk_buff list. Each buffer is removed from
1832 *        the list and one reference dropped. This function takes the list
1833 *        lock and is atomic with respect to other list locking functions.
1834 */
1835void skb_queue_purge(struct sk_buff_head *list)
1836{
1837        struct sk_buff *skb;
1838        while ((skb = skb_dequeue(list)) != NULL)
1839                kfree_skb(skb);
1840}
1841
1842/**
1843 *        skb_queue_head - queue a buffer at the list head
1844 *        @list: list to use
1845 *        @newsk: buffer to queue
1846 *
1847 *        Queue a buffer at the start of the list. This function takes the
1848 *        list lock and can be used safely with other locking &sk_buff functions
1849 *        safely.
1850 *
1851 *        A buffer cannot be placed on two lists at the same time.
1852 */
1853void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
1854{
1855        unsigned long flags;
1856
1857        spin_lock_irqsave(&list->lock, flags);
1858        __skb_queue_head(list, newsk);
1859        spin_unlock_irqrestore(&list->lock, flags);
1860}
1861
1862/**
1863 *        skb_queue_tail - queue a buffer at the list tail
1864 *        @list: list to use
1865 *        @newsk: buffer to queue
1866 *
1867 *        Queue a buffer at the tail of the list. This function takes the
1868 *        list lock and can be used safely with other locking &sk_buff functions
1869 *        safely.
1870 *
1871 *        A buffer cannot be placed on two lists at the same time.
1872 */
1873void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
1874{
1875        unsigned long flags;
1876
1877        spin_lock_irqsave(&list->lock, flags);
1878        __skb_queue_tail(list, newsk);
1879        spin_unlock_irqrestore(&list->lock, flags);
1880}
1881
1882/**
1883 *        skb_unlink        -        remove a buffer from a list
1884 *        @skb: buffer to remove
1885 *        @list: list to use
1886 *
1887 *        Remove a packet from a list. The list locks are taken and this
1888 *        function is atomic with respect to other list locked calls
1889 *
1890 *        You must know what list the SKB is on.
1891 */
1892void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1893{
1894        unsigned long flags;
1895
1896        spin_lock_irqsave(&list->lock, flags);
1897        __skb_unlink(skb, list);
1898        spin_unlock_irqrestore(&list->lock, flags);
1899}
1900
1901/**
1902 *        skb_append        -        append a buffer
1903 *        @old: buffer to insert after
1904 *        @newsk: buffer to insert
1905 *        @list: list to use
1906 *
1907 *        Place a packet after a given packet in a list. The list locks are taken
1908 *        and this function is atomic with respect to other list locked calls.
1909 *        A buffer cannot be placed on two lists at the same time.
1910 */
1911void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1912{
1913        unsigned long flags;
1914
1915        spin_lock_irqsave(&list->lock, flags);
1916        __skb_queue_after(list, old, newsk);
1917        spin_unlock_irqrestore(&list->lock, flags);
1918}
1919
1920
1921/**
1922 *        skb_insert        -        insert a buffer
1923 *        @old: buffer to insert before
1924 *        @newsk: buffer to insert
1925 *        @list: list to use
1926 *
1927 *        Place a packet before a given packet in a list. The list locks are
1928 *         taken and this function is atomic with respect to other list locked
1929 *        calls.
1930 *
1931 *        A buffer cannot be placed on two lists at the same time.
1932 */
1933void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1934{
1935        unsigned long flags;
1936
1937        spin_lock_irqsave(&list->lock, flags);
1938        __skb_insert(newsk, old->prev, old, list);
1939        spin_unlock_irqrestore(&list->lock, flags);
1940}
1941
1942static inline void skb_split_inside_header(struct sk_buff *skb,
1943                                           struct sk_buff* skb1,
1944                                           const u32 len, const int pos)
1945{
1946        int i;
1947
1948        skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
1949                                         pos - len);
1950        /* And move data appendix as is. */
1951        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1952                skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
1953
1954        skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
1955        skb_shinfo(skb)->nr_frags  = 0;
1956        skb1->data_len                   = skb->data_len;
1957        skb1->len                   += skb1->data_len;
1958        skb->data_len                   = 0;
1959        skb->len                   = len;
1960        skb_set_tail_pointer(skb, len);
1961}
1962
1963static inline void skb_split_no_header(struct sk_buff *skb,
1964                                       struct sk_buff* skb1,
1965                                       const u32 len, int pos)
1966{
1967        int i, k = 0;
1968        const int nfrags = skb_shinfo(skb)->nr_frags;
1969
1970        skb_shinfo(skb)->nr_frags = 0;
1971        skb1->len                  = skb1->data_len = skb->len - len;
1972        skb->len                  = len;
1973        skb->data_len                  = len - pos;
1974
1975        for (i = 0; i < nfrags; i++) {
1976                int size = skb_shinfo(skb)->frags[i].size;
1977
1978                if (pos + size > len) {
1979                        skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
1980
1981                        if (pos < len) {
1982                                /* Split frag.
1983                                 * We have two variants in this case:
1984                                 * 1. Move all the frag to the second
1985                                 *    part, if it is possible. F.e.
1986                                 *    this approach is mandatory for TUX,
1987                                 *    where splitting is expensive.
1988                                 * 2. Split is accurately. We make this.
1989                                 */
1990                                get_page(skb_shinfo(skb)->frags[i].page);
1991                                skb_shinfo(skb1)->frags[0].page_offset += len - pos;
1992                                skb_shinfo(skb1)->frags[0].size -= len - pos;
1993                                skb_shinfo(skb)->frags[i].size        = len - pos;
1994                                skb_shinfo(skb)->nr_frags++;
1995                        }
1996                        k++;
1997                } else
1998                        skb_shinfo(skb)->nr_frags++;
1999                pos += size;
2000        }
2001        skb_shinfo(skb1)->nr_frags = k;
2002}
2003
2004/**
2005 * skb_split - Split fragmented skb to two parts at length len.
2006 * @skb: the buffer to split
2007 * @skb1: the buffer to receive the second part
2008 * @len: new length for skb
2009 */
2010void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2011{
2012        int pos = skb_headlen(skb);
2013
2014        if (len < pos)        /* Split line is inside header. */
2015                skb_split_inside_header(skb, skb1, len, pos);
2016        else                /* Second chunk has no header, nothing to copy. */
2017                skb_split_no_header(skb, skb1, len, pos);
2018}
2019
2020/**
2021 * skb_prepare_seq_read - Prepare a sequential read of skb data
2022 * @skb: the buffer to read
2023 * @from: lower offset of data to be read
2024 * @to: upper offset of data to be read
2025 * @st: state variable
2026 *
2027 * Initializes the specified state variable. Must be called before
2028 * invoking skb_seq_read() for the first time.
2029 */
2030void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
2031                          unsigned int to, struct skb_seq_state *st)
2032{
2033        st->lower_offset = from;
2034        st->upper_offset = to;
2035        st->root_skb = st->cur_skb = skb;
2036        st->frag_idx = st->stepped_offset = 0;
2037        st->frag_data = NULL;
2038}
2039
2040/**
2041 * skb_seq_read - Sequentially read skb data
2042 * @consumed: number of bytes consumed by the caller so far
2043 * @data: destination pointer for data to be returned
2044 * @st: state variable
2045 *
2046 * Reads a block of skb data at &consumed relative to the
2047 * lower offset specified to skb_prepare_seq_read(). Assigns
2048 * the head of the data block to &data and returns the length
2049 * of the block or 0 if the end of the skb data or the upper
2050 * offset has been reached.
2051 *
2052 * The caller is not required to consume all of the data
2053 * returned, i.e. &consumed is typically set to the number
2054 * of bytes already consumed and the next call to
2055 * skb_seq_read() will return the remaining part of the block.
2056 *
2057 * Note 1: The size of each block of data returned can be arbitary,
2058 *       this limitation is the cost for zerocopy seqeuental
2059 *       reads of potentially non linear data.
2060 *
2061 * Note 2: Fragment lists within fragments are not implemented
2062 *       at the moment, state->root_skb could be replaced with
2063 *       a stack for this purpose.
2064 */
2065unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2066                          struct skb_seq_state *st)
2067{
2068        unsigned int block_limit, abs_offset = consumed + st->lower_offset;
2069        skb_frag_t *frag;
2070
2071        if (unlikely(abs_offset >= st->upper_offset))
2072                return 0;
2073
2074next_skb:
2075        block_limit = skb_headlen(st->cur_skb);
2076
2077        if (abs_offset < block_limit) {
2078                *data = st->cur_skb->data + abs_offset;
2079                return block_limit - abs_offset;
2080        }
2081
2082        if (st->frag_idx == 0 && !st->frag_data)
2083                st->stepped_offset += skb_headlen(st->cur_skb);
2084
2085        while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
2086                frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
2087                block_limit = frag->size + st->stepped_offset;
2088
2089                if (abs_offset < block_limit) {
2090                        if (!st->frag_data)
2091                                st->frag_data = kmap_skb_frag(frag);
2092
2093                        *data = (u8 *) st->frag_data + frag->page_offset +
2094                                (abs_offset - st->stepped_offset);
2095
2096                        return block_limit - abs_offset;
2097                }
2098
2099                if (st->frag_data) {
2100                        kunmap_skb_frag(st->frag_data);
2101                        st->frag_data = NULL;
2102                }
2103
2104                st->frag_idx++;
2105                st->stepped_offset += frag->size;
2106        }
2107
2108        if (st->frag_data) {
2109                kunmap_skb_frag(st->frag_data);
2110                st->frag_data = NULL;
2111        }
2112
2113        if (st->cur_skb->next) {
2114                st->cur_skb = st->cur_skb->next;
2115                st->frag_idx = 0;
2116                goto next_skb;
2117        } else if (st->root_skb == st->cur_skb &&
2118                   skb_shinfo(st->root_skb)->frag_list) {
2119                st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2120                goto next_skb;
2121        }
2122
2123        return 0;
2124}
2125
2126/**
2127 * skb_abort_seq_read - Abort a sequential read of skb data
2128 * @st: state variable
2129 *
2130 * Must be called if skb_seq_read() was not called until it
2131 * returned 0.
2132 */
2133void skb_abort_seq_read(struct skb_seq_state *st)
2134{
2135        if (st->frag_data)
2136                kunmap_skb_frag(st->frag_data);
2137}
2138
2139#define TS_SKB_CB(state)        ((struct skb_seq_state *) &((state)->cb))
2140
2141static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
2142                                          struct ts_config *conf,
2143                                          struct ts_state *state)
2144{
2145        return skb_seq_read(offset, text, TS_SKB_CB(state));
2146}
2147
2148static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
2149{
2150        skb_abort_seq_read(TS_SKB_CB(state));
2151}
2152
2153/**
2154 * skb_find_text - Find a text pattern in skb data
2155 * @skb: the buffer to look in
2156 * @from: search offset
2157 * @to: search limit
2158 * @config: textsearch configuration
2159 * @state: uninitialized textsearch state variable
2160 *
2161 * Finds a pattern in the skb data according to the specified
2162 * textsearch configuration. Use textsearch_next() to retrieve
2163 * subsequent occurrences of the pattern. Returns the offset
2164 * to the first occurrence or UINT_MAX if no match was found.
2165 */
2166unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2167                           unsigned int to, struct ts_config *config,
2168                           struct ts_state *state)
2169{
2170        unsigned int ret;
2171
2172        config->get_next_block = skb_ts_get_next_block;
2173        config->finish = skb_ts_finish;
2174
2175        skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
2176
2177        ret = textsearch_find(config, state);
2178        return (ret <= to - from ? ret : UINT_MAX);
2179}
2180
2181/**
2182 * skb_append_datato_frags: - append the user data to a skb
2183 * @sk: sock  structure
2184 * @skb: skb structure to be appened with user data.
2185 * @getfrag: call back function to be used for getting the user data
2186 * @from: pointer to user message iov
2187 * @length: length of the iov message
2188 *
2189 * Description: This procedure append the user data in the fragment part
2190 * of the skb if any page alloc fails user this procedure returns  -ENOMEM
2191 */
2192int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2193                        int (*getfrag)(void *from, char *to, int offset,
2194                                        int len, int odd, struct sk_buff *skb),
2195                        void *from, int length)
2196{
2197        int frg_cnt = 0;
2198        skb_frag_t *frag = NULL;
2199        struct page *page = NULL;
2200        int copy, left;
2201        int offset = 0;
2202        int ret;
2203
2204        do {
2205                /* Return error if we don't have space for new frag */
2206                frg_cnt = skb_shinfo(skb)->nr_frags;
2207                if (frg_cnt >= MAX_SKB_FRAGS)
2208                        return -EFAULT;
2209
2210                /* allocate a new page for next frag */
2211                page = alloc_pages(sk->sk_allocation, 0);
2212
2213                /* If alloc_page fails just return failure and caller will
2214                 * free previous allocated pages by doing kfree_skb()
2215                 */
2216                if (page == NULL)
2217                        return -ENOMEM;
2218
2219                /* initialize the next frag */
2220                sk->sk_sndmsg_page = page;
2221                sk->sk_sndmsg_off = 0;
2222                skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2223                skb->truesize += PAGE_SIZE;
2224                atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
2225
2226                /* get the new initialized frag */
2227                frg_cnt = skb_shinfo(skb)->nr_frags;
2228                frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
2229
2230                /* copy the user data to page */
2231                left = PAGE_SIZE - frag->page_offset;
2232                copy = (length > left)? left : length;
2233
2234                ret = getfrag(from, (page_address(frag->page) +
2235                            frag->page_offset + frag->size),
2236                            offset, copy, 0, skb);
2237                if (ret < 0)
2238                        return -EFAULT;
2239
2240                /* copy was successful so update the size parameters */
2241                sk->sk_sndmsg_off += copy;
2242                frag->size += copy;
2243                skb->len += copy;
2244                skb->data_len += copy;
2245                offset += copy;
2246                length -= copy;
2247
2248        } while (length > 0);
2249
2250        return 0;
2251}
2252
2253/**
2254 *        skb_pull_rcsum - pull skb and update receive checksum
2255 *        @skb: buffer to update
2256 *        @len: length of data pulled
2257 *
2258 *        This function performs an skb_pull on the packet and updates
2259 *        the CHECKSUM_COMPLETE checksum.  It should be used on
2260 *        receive path processing instead of skb_pull unless you know
2261 *        that the checksum difference is zero (e.g., a valid IP header)
2262 *        or you are setting ip_summed to CHECKSUM_NONE.
2263 */
2264unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2265{
2266        BUG_ON(len > skb->len);
2267        skb->len -= len;
2268        BUG_ON(skb->len < skb->data_len);
2269        skb_postpull_rcsum(skb, skb->data, len);
2270        return skb->data += len;
2271}
2272
2273EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2274
2275/**
2276 *        skb_segment - Perform protocol segmentation on skb.
2277 *        @skb: buffer to segment
2278 *        @features: features for the output path (see dev->features)
2279 *
2280 *        This function performs segmentation on the given skb.  It returns
2281 *        a pointer to the first in a list of new skbs for the segments.
2282 *        In case of error it returns ERR_PTR(err).
2283 */
2284struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2285{
2286        struct sk_buff *segs = NULL;
2287        struct sk_buff *tail = NULL;
2288        unsigned int mss = skb_shinfo(skb)->gso_size;
2289        unsigned int doffset = skb->data - skb_mac_header(skb);
2290        unsigned int offset = doffset;
2291        unsigned int headroom;
2292        unsigned int len;
2293        int sg = features & NETIF_F_SG;
2294        int nfrags = skb_shinfo(skb)->nr_frags;
2295        int err = -ENOMEM;
2296        int i = 0;
2297        int pos;
2298
2299        __skb_push(skb, doffset);
2300        headroom = skb_headroom(skb);
2301        pos = skb_headlen(skb);
2302
2303        do {
2304                struct sk_buff *nskb;
2305                skb_frag_t *frag;
2306                int hsize;
2307                int k;
2308                int size;
2309
2310                len = skb->len - offset;
2311                if (len > mss)
2312                        len = mss;
2313
2314                hsize = skb_headlen(skb) - offset;
2315                if (hsize < 0)
2316                        hsize = 0;
2317                if (hsize > len || !sg)
2318                        hsize = len;
2319
2320                nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
2321                if (unlikely(!nskb))
2322                        goto err;
2323
2324                if (segs)
2325                        tail->next = nskb;
2326                else
2327                        segs = nskb;
2328                tail = nskb;
2329
2330                __copy_skb_header(nskb, skb);
2331                nskb->mac_len = skb->mac_len;
2332
2333                skb_reserve(nskb, headroom);
2334                skb_reset_mac_header(nskb);
2335                skb_set_network_header(nskb, skb->mac_len);
2336                nskb->transport_header = (nskb->network_header +
2337                                          skb_network_header_len(skb));
2338                skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
2339                                          doffset);
2340                if (!sg) {
2341                        nskb->ip_summed = CHECKSUM_NONE;
2342                        nskb->csum = skb_copy_and_csum_bits(skb, offset,
2343                                                            skb_put(nskb, len),
2344                                                            len, 0);
2345                        continue;
2346                }
2347
2348                frag = skb_shinfo(nskb)->frags;
2349                k = 0;
2350
2351                skb_copy_from_linear_data_offset(skb, offset,
2352                                                 skb_put(nskb, hsize), hsize);
2353
2354                while (pos < offset + len) {
2355                        BUG_ON(i >= nfrags);
2356
2357                        *frag = skb_shinfo(skb)->frags[i];
2358                        get_page(frag->page);
2359                        size = frag->size;
2360
2361                        if (pos < offset) {
2362                                frag->page_offset += offset - pos;
2363                                frag->size -= offset - pos;
2364                        }
2365
2366                        k++;
2367
2368                        if (pos + size <= offset + len) {
2369                                i++;
2370                                pos += size;
2371                        } else {
2372                                frag->size -= pos + size - (offset + len);
2373                                break;
2374                        }
2375
2376                        frag++;
2377                }
2378
2379                skb_shinfo(nskb)->nr_frags = k;
2380                nskb->data_len = len - hsize;
2381                nskb->len += nskb->data_len;
2382                nskb->truesize += nskb->data_len;
2383        } while ((offset += len) < skb->len);
2384
2385        return segs;
2386
2387err:
2388        while ((skb = segs)) {
2389                segs = skb->next;
2390                kfree_skb(skb);
2391        }
2392        return ERR_PTR(err);
2393}
2394
2395EXPORT_SYMBOL_GPL(skb_segment);
2396
2397void __init skb_init(void)
2398{
2399        skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
2400                                              sizeof(struct sk_buff),
2401                                              0,
2402                                              SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2403                                              NULL);
2404        skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
2405                                                (2*sizeof(struct sk_buff)) +
2406                                                sizeof(atomic_t),
2407                                                0,
2408                                                SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2409                                                NULL);
2410}
2411
2412/**
2413 *        skb_to_sgvec - Fill a scatter-gather list from a socket buffer
2414 *        @skb: Socket buffer containing the buffers to be mapped
2415 *        @sg: The scatter-gather list to map into
2416 *        @offset: The offset into the buffer's contents to start mapping
2417 *        @len: Length of buffer space to be mapped
2418 *
2419 *        Fill the specified scatter-gather list with mappings/pointers into a
2420 *        region of the buffer space attached to a socket buffer.
2421 */
2422static int
2423__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2424{
2425        int start = skb_headlen(skb);
2426        int i, copy = start - offset;
2427        int elt = 0;
2428
2429        if (copy > 0) {
2430                if (copy > len)
2431                        copy = len;
2432                sg_set_buf(sg, skb->data + offset, copy);
2433                elt++;
2434                if ((len -= copy) == 0)
2435                        return elt;
2436                offset += copy;
2437        }
2438
2439        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2440                int end;
2441
2442                WARN_ON(start > offset + len);
2443
2444                end = start + skb_shinfo(skb)->frags[i].size;
2445                if ((copy = end - offset) > 0) {
2446                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2447
2448                        if (copy > len)
2449                                copy = len;
2450                        sg_set_page(&sg[elt], frag->page, copy,
2451                                        frag->page_offset+offset-start);
2452                        elt++;
2453                        if (!(len -= copy))
2454                                return elt;
2455                        offset += copy;
2456                }
2457                start = end;
2458        }
2459
2460        if (skb_shinfo(skb)->frag_list) {
2461                struct sk_buff *list = skb_shinfo(skb)->frag_list;
2462
2463                for (; list; list = list->next) {
2464                        int end;
2465
2466                        WARN_ON(start > offset + len);
2467
2468                        end = start + list->len;
2469                        if ((copy = end - offset) > 0) {
2470                                if (copy > len)
2471                                        copy = len;
2472                                elt += __skb_to_sgvec(list, sg+elt, offset - start,
2473                                                      copy);
2474                                if ((len -= copy) == 0)
2475                                        return elt;
2476                                offset += copy;
2477                        }
2478                        start = end;
2479                }
2480        }
2481        BUG_ON(len);
2482        return elt;
2483}
2484
2485int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2486{
2487        int nsg = __skb_to_sgvec(skb, sg, offset, len);
2488
2489        sg_mark_end(&sg[nsg - 1]);
2490
2491        return nsg;
2492}
2493
2494/**
2495 *        skb_cow_data - Check that a socket buffer's data buffers are writable
2496 *        @skb: The socket buffer to check.
2497 *        @tailbits: Amount of trailing space to be added
2498 *        @trailer: Returned pointer to the skb where the @tailbits space begins
2499 *
2500 *        Make sure that the data buffers attached to a socket buffer are
2501 *        writable. If they are not, private copies are made of the data buffers
2502 *        and the socket buffer is set to use these instead.
2503 *
2504 *        If @tailbits is given, make sure that there is space to write @tailbits
2505 *        bytes of data beyond current end of socket buffer.  @trailer will be
2506 *        set to point to the skb in which this space begins.
2507 *
2508 *        The number of scatterlist elements required to completely map the
2509 *        COW'd and extended socket buffer will be returned.
2510 */
2511int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2512{
2513        int copyflag;
2514        int elt;
2515        struct sk_buff *skb1, **skb_p;
2516
2517        /* If skb is cloned or its head is paged, reallocate
2518         * head pulling out all the pages (pages are considered not writable
2519         * at the moment even if they are anonymous).
2520         */
2521        if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
2522            __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
2523                return -ENOMEM;
2524
2525        /* Easy case. Most of packets will go this way. */
2526        if (!skb_shinfo(skb)->frag_list) {
2527                /* A little of trouble, not enough of space for trailer.
2528                 * This should not happen, when stack is tuned to generate
2529                 * good frames. OK, on miss we reallocate and reserve even more
2530                 * space, 128 bytes is fair. */
2531
2532                if (skb_tailroom(skb) < tailbits &&
2533                    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
2534                        return -ENOMEM;
2535
2536                /* Voila! */
2537                *trailer = skb;
2538                return 1;
2539        }
2540
2541        /* Misery. We are in troubles, going to mincer fragments... */
2542
2543        elt = 1;
2544        skb_p = &skb_shinfo(skb)->frag_list;
2545        copyflag = 0;
2546
2547        while ((skb1 = *skb_p) != NULL) {
2548                int ntail = 0;
2549
2550                /* The fragment is partially pulled by someone,
2551                 * this can happen on input. Copy it and everything
2552                 * after it. */
2553
2554                if (skb_shared(skb1))
2555                        copyflag = 1;
2556
2557                /* If the skb is the last, worry about trailer. */
2558
2559                if (skb1->next == NULL && tailbits) {
2560                        if (skb_shinfo(skb1)->nr_frags ||
2561                            skb_shinfo(skb1)->frag_list ||
2562                            skb_tailroom(skb1) < tailbits)
2563                                ntail = tailbits + 128;
2564                }
2565
2566                if (copyflag ||
2567                    skb_cloned(skb1) ||
2568                    ntail ||
2569                    skb_shinfo(skb1)->nr_frags ||
2570                    skb_shinfo(skb1)->frag_list) {
2571                        struct sk_buff *skb2;
2572
2573                        /* Fuck, we are miserable poor guys... */
2574                        if (ntail == 0)
2575                                skb2 = skb_copy(skb1, GFP_ATOMIC);
2576                        else
2577                                skb2 = skb_copy_expand(skb1,
2578                                                       skb_headroom(skb1),
2579                                                       ntail,
2580                                                       GFP_ATOMIC);
2581                        if (unlikely(skb2 == NULL))
2582                                return -ENOMEM;
2583
2584                        if (skb1->sk)
2585                                skb_set_owner_w(skb2, skb1->sk);
2586
2587                        /* Looking around. Are we still alive?
2588                         * OK, link new skb, drop old one */
2589
2590                        skb2->next = skb1->next;
2591                        *skb_p = skb2;
2592                        kfree_skb(skb1);
2593                        skb1 = skb2;
2594                }
2595                elt++;
2596                *trailer = skb1;
2597                skb_p = &skb1->next;
2598        }
2599
2600        return elt;
2601}
2602
2603/**
2604 * skb_partial_csum_set - set up and verify partial csum values for packet
2605 * @skb: the skb to set
2606 * @start: the number of bytes after skb->data to start checksumming.
2607 * @off: the offset from start to place the checksum.
2608 *
2609 * For untrusted partially-checksummed packets, we need to make sure the values
2610 * for skb->csum_start and skb->csum_offset are valid so we don't oops.
2611 *
2612 * This function checks and sets those values and skb->ip_summed: if this
2613 * returns false you should drop the packet.
2614 */
2615bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
2616{
2617        if (unlikely(start > skb->len - 2) ||
2618            unlikely((int)start + off > skb->len - 2)) {
2619                if (net_ratelimit())
2620                        printk(KERN_WARNING
2621                               "bad partial csum: csum=%u/%u len=%u\n",
2622                               start, off, skb->len);
2623                return false;
2624        }
2625        skb->ip_summed = CHECKSUM_PARTIAL;
2626        skb->csum_start = skb_headroom(skb) + start;
2627        skb->csum_offset = off;
2628        return true;
2629}
2630
2631void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2632{
2633        if (net_ratelimit())
2634                pr_warning("%s: received packets cannot be forwarded"
2635                           " while LRO is enabled\n", skb->dev->name);
2636}
2637
2638EXPORT_SYMBOL(___pskb_trim);
2639EXPORT_SYMBOL(__kfree_skb);
2640EXPORT_SYMBOL(kfree_skb);
2641EXPORT_SYMBOL(__pskb_pull_tail);
2642EXPORT_SYMBOL(__alloc_skb);
2643EXPORT_SYMBOL(__netdev_alloc_skb);
2644EXPORT_SYMBOL(pskb_copy);
2645EXPORT_SYMBOL(pskb_expand_head);
2646EXPORT_SYMBOL(skb_checksum);
2647EXPORT_SYMBOL(skb_clone);
2648EXPORT_SYMBOL(skb_copy);
2649EXPORT_SYMBOL(skb_copy_and_csum_bits);
2650EXPORT_SYMBOL(skb_copy_and_csum_dev);
2651EXPORT_SYMBOL(skb_copy_bits);
2652EXPORT_SYMBOL(skb_copy_expand);
2653EXPORT_SYMBOL(skb_over_panic);
2654EXPORT_SYMBOL(skb_pad);
2655EXPORT_SYMBOL(skb_realloc_headroom);
2656EXPORT_SYMBOL(skb_under_panic);
2657EXPORT_SYMBOL(skb_dequeue);
2658EXPORT_SYMBOL(skb_dequeue_tail);
2659EXPORT_SYMBOL(skb_insert);
2660EXPORT_SYMBOL(skb_queue_purge);
2661EXPORT_SYMBOL(skb_queue_head);
2662EXPORT_SYMBOL(skb_queue_tail);
2663EXPORT_SYMBOL(skb_unlink);
2664EXPORT_SYMBOL(skb_append);
2665EXPORT_SYMBOL(skb_split);
2666EXPORT_SYMBOL(skb_prepare_seq_read);
2667EXPORT_SYMBOL(skb_seq_read);
2668EXPORT_SYMBOL(skb_abort_seq_read);
2669EXPORT_SYMBOL(skb_find_text);
2670EXPORT_SYMBOL(skb_append_datato_frags);
2671EXPORT_SYMBOL(__skb_warn_lro_forwarding);
2672
2673EXPORT_SYMBOL_GPL(skb_to_sgvec);
2674EXPORT_SYMBOL_GPL(skb_cow_data);
2675EXPORT_SYMBOL_GPL(skb_partial_csum_set);