Showing error 1484

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: net/ipv4/udp.c
Line in file: 1556
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 * INET                An implementation of the TCP/IP protocol suite for the LINUX
   3 *                operating system.  INET is implemented using the  BSD Socket
   4 *                interface as the means of communication with the user level.
   5 *
   6 *                The User Datagram Protocol (UDP).
   7 *
   8 * Authors:        Ross Biro
   9 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *                Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11 *                Alan Cox, <alan@lxorguk.ukuu.org.uk>
  12 *                Hirokazu Takahashi, <taka@valinux.co.jp>
  13 *
  14 * Fixes:
  15 *                Alan Cox        :        verify_area() calls
  16 *                Alan Cox        :         stopped close while in use off icmp
  17 *                                        messages. Not a fix but a botch that
  18 *                                        for udp at least is 'valid'.
  19 *                Alan Cox        :        Fixed icmp handling properly
  20 *                Alan Cox        :         Correct error for oversized datagrams
  21 *                Alan Cox        :        Tidied select() semantics.
  22 *                Alan Cox        :        udp_err() fixed properly, also now
  23 *                                        select and read wake correctly on errors
  24 *                Alan Cox        :        udp_send verify_area moved to avoid mem leak
  25 *                Alan Cox        :        UDP can count its memory
  26 *                Alan Cox        :        send to an unknown connection causes
  27 *                                        an ECONNREFUSED off the icmp, but
  28 *                                        does NOT close.
  29 *                Alan Cox        :        Switched to new sk_buff handlers. No more backlog!
  30 *                Alan Cox        :        Using generic datagram code. Even smaller and the PEEK
  31 *                                        bug no longer crashes it.
  32 *                Fred Van Kempen        :         Net2e support for sk->broadcast.
  33 *                Alan Cox        :        Uses skb_free_datagram
  34 *                Alan Cox        :        Added get/set sockopt support.
  35 *                Alan Cox        :        Broadcasting without option set returns EACCES.
  36 *                Alan Cox        :        No wakeup calls. Instead we now use the callbacks.
  37 *                Alan Cox        :        Use ip_tos and ip_ttl
  38 *                Alan Cox        :        SNMP Mibs
  39 *                Alan Cox        :        MSG_DONTROUTE, and 0.0.0.0 support.
  40 *                Matt Dillon        :        UDP length checks.
  41 *                Alan Cox        :        Smarter af_inet used properly.
  42 *                Alan Cox        :        Use new kernel side addressing.
  43 *                Alan Cox        :        Incorrect return on truncated datagram receive.
  44 *        Arnt Gulbrandsen         :        New udp_send and stuff
  45 *                Alan Cox        :        Cache last socket
  46 *                Alan Cox        :        Route cache
  47 *                Jon Peatfield        :        Minor efficiency fix to sendto().
  48 *                Mike Shaver        :        RFC1122 checks.
  49 *                Alan Cox        :        Nonblocking error fix.
  50 *        Willy Konynenberg        :        Transparent proxying support.
  51 *                Mike McLagan        :        Routing by source
  52 *                David S. Miller        :        New socket lookup architecture.
  53 *                                        Last socket cache retained as it
  54 *                                        does have a high hit rate.
  55 *                Olaf Kirch        :        Don't linearise iovec on sendmsg.
  56 *                Andi Kleen        :        Some cleanups, cache destination entry
  57 *                                        for connect.
  58 *        Vitaly E. Lavrov        :        Transparent proxy revived after year coma.
  59 *                Melvin Smith        :        Check msg_name not msg_namelen in sendto(),
  60 *                                        return ENOTCONN for unconnected sockets (POSIX)
  61 *                Janos Farkas        :        don't deliver multi/broadcasts to a different
  62 *                                        bound-to-device socket
  63 *        Hirokazu Takahashi        :        HW checksumming for outgoing UDP
  64 *                                        datagrams.
  65 *        Hirokazu Takahashi        :        sendfile() on UDP works now.
  66 *                Arnaldo C. Melo :        convert /proc/net/udp to seq_file
  67 *        YOSHIFUJI Hideaki @USAGI and:        Support IPV6_V6ONLY socket option, which
  68 *        Alexey Kuznetsov:                allow both IPv4 and IPv6 sockets to bind
  69 *                                        a single port at the same time.
  70 *        Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
  71 *        James Chapman                :        Add L2TP encapsulation type.
  72 *
  73 *
  74 *                This program is free software; you can redistribute it and/or
  75 *                modify it under the terms of the GNU General Public License
  76 *                as published by the Free Software Foundation; either version
  77 *                2 of the License, or (at your option) any later version.
  78 */
  79
  80#include <asm/system.h>
  81#include <asm/uaccess.h>
  82#include <asm/ioctls.h>
  83#include <linux/bootmem.h>
  84#include <linux/types.h>
  85#include <linux/fcntl.h>
  86#include <linux/module.h>
  87#include <linux/socket.h>
  88#include <linux/sockios.h>
  89#include <linux/igmp.h>
  90#include <linux/in.h>
  91#include <linux/errno.h>
  92#include <linux/timer.h>
  93#include <linux/mm.h>
  94#include <linux/inet.h>
  95#include <linux/netdevice.h>
  96#include <net/tcp_states.h>
  97#include <linux/skbuff.h>
  98#include <linux/proc_fs.h>
  99#include <linux/seq_file.h>
 100#include <net/net_namespace.h>
 101#include <net/icmp.h>
 102#include <net/route.h>
 103#include <net/checksum.h>
 104#include <net/xfrm.h>
 105#include "udp_impl.h"
 106
 107/*
 108 *        Snmp MIB for the UDP layer
 109 */
 110
 111struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 112DEFINE_RWLOCK(udp_hash_lock);
 113
 114int sysctl_udp_mem[3] __read_mostly;
 115int sysctl_udp_rmem_min __read_mostly;
 116int sysctl_udp_wmem_min __read_mostly;
 117
 118EXPORT_SYMBOL(sysctl_udp_mem);
 119EXPORT_SYMBOL(sysctl_udp_rmem_min);
 120EXPORT_SYMBOL(sysctl_udp_wmem_min);
 121
 122atomic_t udp_memory_allocated;
 123EXPORT_SYMBOL(udp_memory_allocated);
 124
 125static int udp_lib_lport_inuse(struct net *net, __u16 num,
 126                               const struct hlist_head udptable[],
 127                               struct sock *sk,
 128                               int (*saddr_comp)(const struct sock *sk1,
 129                                                 const struct sock *sk2))
 130{
 131        struct sock *sk2;
 132        struct hlist_node *node;
 133
 134        sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)])
 135                if (net_eq(sock_net(sk2), net)                        &&
 136                    sk2 != sk                                        &&
 137                    sk2->sk_hash == num                                &&
 138                    (!sk2->sk_reuse || !sk->sk_reuse)                &&
 139                    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
 140                        || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 141                    (*saddr_comp)(sk, sk2))
 142                        return 1;
 143        return 0;
 144}
 145
 146/**
 147 *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
 148 *
 149 *  @sk:          socket struct in question
 150 *  @snum:        port number to look up
 151 *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
 152 */
 153int udp_lib_get_port(struct sock *sk, unsigned short snum,
 154                       int (*saddr_comp)(const struct sock *sk1,
 155                                         const struct sock *sk2 )    )
 156{
 157        struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
 158        int    error = 1;
 159        struct net *net = sock_net(sk);
 160
 161        write_lock_bh(&udp_hash_lock);
 162
 163        if (!snum) {
 164                int low, high, remaining;
 165                unsigned rand;
 166                unsigned short first;
 167
 168                inet_get_local_port_range(&low, &high);
 169                remaining = (high - low) + 1;
 170
 171                rand = net_random();
 172                snum = first = rand % remaining + low;
 173                rand |= 1;
 174                while (udp_lib_lport_inuse(net, snum, udptable, sk,
 175                                           saddr_comp)) {
 176                        do {
 177                                snum = snum + rand;
 178                        } while (snum < low || snum > high);
 179                        if (snum == first)
 180                                goto fail;
 181                }
 182        } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp))
 183                goto fail;
 184
 185        inet_sk(sk)->num = snum;
 186        sk->sk_hash = snum;
 187        if (sk_unhashed(sk)) {
 188                sk_add_node(sk, &udptable[udp_hashfn(net, snum)]);
 189                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 190        }
 191        error = 0;
 192fail:
 193        write_unlock_bh(&udp_hash_lock);
 194        return error;
 195}
 196
 197static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 198{
 199        struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 200
 201        return         ( !ipv6_only_sock(sk2)  &&
 202                  (!inet1->rcv_saddr || !inet2->rcv_saddr ||
 203                   inet1->rcv_saddr == inet2->rcv_saddr      ));
 204}
 205
 206int udp_v4_get_port(struct sock *sk, unsigned short snum)
 207{
 208        return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
 209}
 210
 211/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
 212 * harder than this. -DaveM
 213 */
 214static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 215                __be16 sport, __be32 daddr, __be16 dport,
 216                int dif, struct hlist_head udptable[])
 217{
 218        struct sock *sk, *result = NULL;
 219        struct hlist_node *node;
 220        unsigned short hnum = ntohs(dport);
 221        int badness = -1;
 222
 223        read_lock(&udp_hash_lock);
 224        sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
 225                struct inet_sock *inet = inet_sk(sk);
 226
 227                if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
 228                                !ipv6_only_sock(sk)) {
 229                        int score = (sk->sk_family == PF_INET ? 1 : 0);
 230                        if (inet->rcv_saddr) {
 231                                if (inet->rcv_saddr != daddr)
 232                                        continue;
 233                                score+=2;
 234                        }
 235                        if (inet->daddr) {
 236                                if (inet->daddr != saddr)
 237                                        continue;
 238                                score+=2;
 239                        }
 240                        if (inet->dport) {
 241                                if (inet->dport != sport)
 242                                        continue;
 243                                score+=2;
 244                        }
 245                        if (sk->sk_bound_dev_if) {
 246                                if (sk->sk_bound_dev_if != dif)
 247                                        continue;
 248                                score+=2;
 249                        }
 250                        if (score == 9) {
 251                                result = sk;
 252                                break;
 253                        } else if (score > badness) {
 254                                result = sk;
 255                                badness = score;
 256                        }
 257                }
 258        }
 259        if (result)
 260                sock_hold(result);
 261        read_unlock(&udp_hash_lock);
 262        return result;
 263}
 264
 265static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 266                                                 __be16 sport, __be16 dport,
 267                                                 struct hlist_head udptable[])
 268{
 269        struct sock *sk;
 270        const struct iphdr *iph = ip_hdr(skb);
 271
 272        if (unlikely(sk = skb_steal_sock(skb)))
 273                return sk;
 274        else
 275                return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
 276                                         iph->daddr, dport, inet_iif(skb),
 277                                         udptable);
 278}
 279
 280struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 281                             __be32 daddr, __be16 dport, int dif)
 282{
 283        return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash);
 284}
 285EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 286
 287static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
 288                                             __be16 loc_port, __be32 loc_addr,
 289                                             __be16 rmt_port, __be32 rmt_addr,
 290                                             int dif)
 291{
 292        struct hlist_node *node;
 293        struct sock *s = sk;
 294        unsigned short hnum = ntohs(loc_port);
 295
 296        sk_for_each_from(s, node) {
 297                struct inet_sock *inet = inet_sk(s);
 298
 299                if (!net_eq(sock_net(s), net)                                ||
 300                    s->sk_hash != hnum                                        ||
 301                    (inet->daddr && inet->daddr != rmt_addr)                ||
 302                    (inet->dport != rmt_port && inet->dport)                ||
 303                    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)        ||
 304                    ipv6_only_sock(s)                                        ||
 305                    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
 306                        continue;
 307                if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
 308                        continue;
 309                goto found;
 310        }
 311        s = NULL;
 312found:
 313        return s;
 314}
 315
 316/*
 317 * This routine is called by the ICMP module when it gets some
 318 * sort of error condition.  If err < 0 then the socket should
 319 * be closed and the error returned to the user.  If err > 0
 320 * it's just the icmp type << 8 | icmp code.
 321 * Header points to the ip header of the error packet. We move
 322 * on past this. Then (as it used to claim before adjustment)
 323 * header points to the first 8 bytes of the udp header.  We need
 324 * to find the appropriate port.
 325 */
 326
 327void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
 328{
 329        struct inet_sock *inet;
 330        struct iphdr *iph = (struct iphdr*)skb->data;
 331        struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
 332        const int type = icmp_hdr(skb)->type;
 333        const int code = icmp_hdr(skb)->code;
 334        struct sock *sk;
 335        int harderr;
 336        int err;
 337        struct net *net = dev_net(skb->dev);
 338
 339        sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
 340                        iph->saddr, uh->source, skb->dev->ifindex, udptable);
 341        if (sk == NULL) {
 342                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 343                return;        /* No socket for error */
 344        }
 345
 346        err = 0;
 347        harderr = 0;
 348        inet = inet_sk(sk);
 349
 350        switch (type) {
 351        default:
 352        case ICMP_TIME_EXCEEDED:
 353                err = EHOSTUNREACH;
 354                break;
 355        case ICMP_SOURCE_QUENCH:
 356                goto out;
 357        case ICMP_PARAMETERPROB:
 358                err = EPROTO;
 359                harderr = 1;
 360                break;
 361        case ICMP_DEST_UNREACH:
 362                if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
 363                        if (inet->pmtudisc != IP_PMTUDISC_DONT) {
 364                                err = EMSGSIZE;
 365                                harderr = 1;
 366                                break;
 367                        }
 368                        goto out;
 369                }
 370                err = EHOSTUNREACH;
 371                if (code <= NR_ICMP_UNREACH) {
 372                        harderr = icmp_err_convert[code].fatal;
 373                        err = icmp_err_convert[code].errno;
 374                }
 375                break;
 376        }
 377
 378        /*
 379         *      RFC1122: OK.  Passes ICMP errors back to application, as per
 380         *        4.1.3.3.
 381         */
 382        if (!inet->recverr) {
 383                if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 384                        goto out;
 385        } else {
 386                ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
 387        }
 388        sk->sk_err = err;
 389        sk->sk_error_report(sk);
 390out:
 391        sock_put(sk);
 392}
 393
 394void udp_err(struct sk_buff *skb, u32 info)
 395{
 396        __udp4_lib_err(skb, info, udp_hash);
 397}
 398
 399/*
 400 * Throw away all pending data and cancel the corking. Socket is locked.
 401 */
 402void udp_flush_pending_frames(struct sock *sk)
 403{
 404        struct udp_sock *up = udp_sk(sk);
 405
 406        if (up->pending) {
 407                up->len = 0;
 408                up->pending = 0;
 409                ip_flush_pending_frames(sk);
 410        }
 411}
 412EXPORT_SYMBOL(udp_flush_pending_frames);
 413
 414/**
 415 *         udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
 416 *         @sk:         socket we are sending on
 417 *         @skb:         sk_buff containing the filled-in UDP header
 418 *                 (checksum field must be zeroed out)
 419 */
 420static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 421                                 __be32 src, __be32 dst, int len      )
 422{
 423        unsigned int offset;
 424        struct udphdr *uh = udp_hdr(skb);
 425        __wsum csum = 0;
 426
 427        if (skb_queue_len(&sk->sk_write_queue) == 1) {
 428                /*
 429                 * Only one fragment on the socket.
 430                 */
 431                skb->csum_start = skb_transport_header(skb) - skb->head;
 432                skb->csum_offset = offsetof(struct udphdr, check);
 433                uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
 434        } else {
 435                /*
 436                 * HW-checksum won't work as there are two or more
 437                 * fragments on the socket so that all csums of sk_buffs
 438                 * should be together
 439                 */
 440                offset = skb_transport_offset(skb);
 441                skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 442
 443                skb->ip_summed = CHECKSUM_NONE;
 444
 445                skb_queue_walk(&sk->sk_write_queue, skb) {
 446                        csum = csum_add(csum, skb->csum);
 447                }
 448
 449                uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
 450                if (uh->check == 0)
 451                        uh->check = CSUM_MANGLED_0;
 452        }
 453}
 454
 455/*
 456 * Push out all pending data as one UDP datagram. Socket is locked.
 457 */
 458static int udp_push_pending_frames(struct sock *sk)
 459{
 460        struct udp_sock  *up = udp_sk(sk);
 461        struct inet_sock *inet = inet_sk(sk);
 462        struct flowi *fl = &inet->cork.fl;
 463        struct sk_buff *skb;
 464        struct udphdr *uh;
 465        int err = 0;
 466        int is_udplite = IS_UDPLITE(sk);
 467        __wsum csum = 0;
 468
 469        /* Grab the skbuff where UDP header space exists. */
 470        if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
 471                goto out;
 472
 473        /*
 474         * Create a UDP header
 475         */
 476        uh = udp_hdr(skb);
 477        uh->source = fl->fl_ip_sport;
 478        uh->dest = fl->fl_ip_dport;
 479        uh->len = htons(up->len);
 480        uh->check = 0;
 481
 482        if (is_udplite)                                   /*     UDP-Lite      */
 483                csum  = udplite_csum_outgoing(sk, skb);
 484
 485        else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
 486
 487                skb->ip_summed = CHECKSUM_NONE;
 488                goto send;
 489
 490        } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
 491
 492                udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
 493                goto send;
 494
 495        } else                                                 /*   `normal' UDP    */
 496                csum = udp_csum_outgoing(sk, skb);
 497
 498        /* add protocol-dependent pseudo-header */
 499        uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
 500                                      sk->sk_protocol, csum             );
 501        if (uh->check == 0)
 502                uh->check = CSUM_MANGLED_0;
 503
 504send:
 505        err = ip_push_pending_frames(sk);
 506out:
 507        up->len = 0;
 508        up->pending = 0;
 509        if (!err)
 510                UDP_INC_STATS_USER(sock_net(sk),
 511                                UDP_MIB_OUTDATAGRAMS, is_udplite);
 512        return err;
 513}
 514
 515int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 516                size_t len)
 517{
 518        struct inet_sock *inet = inet_sk(sk);
 519        struct udp_sock *up = udp_sk(sk);
 520        int ulen = len;
 521        struct ipcm_cookie ipc;
 522        struct rtable *rt = NULL;
 523        int free = 0;
 524        int connected = 0;
 525        __be32 daddr, faddr, saddr;
 526        __be16 dport;
 527        u8  tos;
 528        int err, is_udplite = IS_UDPLITE(sk);
 529        int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 530        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 531
 532        if (len > 0xFFFF)
 533                return -EMSGSIZE;
 534
 535        /*
 536         *        Check the flags.
 537         */
 538
 539        if (msg->msg_flags&MSG_OOB)        /* Mirror BSD error message compatibility */
 540                return -EOPNOTSUPP;
 541
 542        ipc.opt = NULL;
 543
 544        if (up->pending) {
 545                /*
 546                 * There are pending frames.
 547                 * The socket lock must be held while it's corked.
 548                 */
 549                lock_sock(sk);
 550                if (likely(up->pending)) {
 551                        if (unlikely(up->pending != AF_INET)) {
 552                                release_sock(sk);
 553                                return -EINVAL;
 554                        }
 555                        goto do_append_data;
 556                }
 557                release_sock(sk);
 558        }
 559        ulen += sizeof(struct udphdr);
 560
 561        /*
 562         *        Get and verify the address.
 563         */
 564        if (msg->msg_name) {
 565                struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
 566                if (msg->msg_namelen < sizeof(*usin))
 567                        return -EINVAL;
 568                if (usin->sin_family != AF_INET) {
 569                        if (usin->sin_family != AF_UNSPEC)
 570                                return -EAFNOSUPPORT;
 571                }
 572
 573                daddr = usin->sin_addr.s_addr;
 574                dport = usin->sin_port;
 575                if (dport == 0)
 576                        return -EINVAL;
 577        } else {
 578                if (sk->sk_state != TCP_ESTABLISHED)
 579                        return -EDESTADDRREQ;
 580                daddr = inet->daddr;
 581                dport = inet->dport;
 582                /* Open fast path for connected socket.
 583                   Route will not be used, if at least one option is set.
 584                 */
 585                connected = 1;
 586        }
 587        ipc.addr = inet->saddr;
 588
 589        ipc.oif = sk->sk_bound_dev_if;
 590        if (msg->msg_controllen) {
 591                err = ip_cmsg_send(sock_net(sk), msg, &ipc);
 592                if (err)
 593                        return err;
 594                if (ipc.opt)
 595                        free = 1;
 596                connected = 0;
 597        }
 598        if (!ipc.opt)
 599                ipc.opt = inet->opt;
 600
 601        saddr = ipc.addr;
 602        ipc.addr = faddr = daddr;
 603
 604        if (ipc.opt && ipc.opt->srr) {
 605                if (!daddr)
 606                        return -EINVAL;
 607                faddr = ipc.opt->faddr;
 608                connected = 0;
 609        }
 610        tos = RT_TOS(inet->tos);
 611        if (sock_flag(sk, SOCK_LOCALROUTE) ||
 612            (msg->msg_flags & MSG_DONTROUTE) ||
 613            (ipc.opt && ipc.opt->is_strictroute)) {
 614                tos |= RTO_ONLINK;
 615                connected = 0;
 616        }
 617
 618        if (ipv4_is_multicast(daddr)) {
 619                if (!ipc.oif)
 620                        ipc.oif = inet->mc_index;
 621                if (!saddr)
 622                        saddr = inet->mc_addr;
 623                connected = 0;
 624        }
 625
 626        if (connected)
 627                rt = (struct rtable*)sk_dst_check(sk, 0);
 628
 629        if (rt == NULL) {
 630                struct flowi fl = { .oif = ipc.oif,
 631                                    .nl_u = { .ip4_u =
 632                                              { .daddr = faddr,
 633                                                .saddr = saddr,
 634                                                .tos = tos } },
 635                                    .proto = sk->sk_protocol,
 636                                    .flags = inet_sk_flowi_flags(sk),
 637                                    .uli_u = { .ports =
 638                                               { .sport = inet->sport,
 639                                                 .dport = dport } } };
 640                struct net *net = sock_net(sk);
 641
 642                security_sk_classify_flow(sk, &fl);
 643                err = ip_route_output_flow(net, &rt, &fl, sk, 1);
 644                if (err) {
 645                        if (err == -ENETUNREACH)
 646                                IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 647                        goto out;
 648                }
 649
 650                err = -EACCES;
 651                if ((rt->rt_flags & RTCF_BROADCAST) &&
 652                    !sock_flag(sk, SOCK_BROADCAST))
 653                        goto out;
 654                if (connected)
 655                        sk_dst_set(sk, dst_clone(&rt->u.dst));
 656        }
 657
 658        if (msg->msg_flags&MSG_CONFIRM)
 659                goto do_confirm;
 660back_from_confirm:
 661
 662        saddr = rt->rt_src;
 663        if (!ipc.addr)
 664                daddr = ipc.addr = rt->rt_dst;
 665
 666        lock_sock(sk);
 667        if (unlikely(up->pending)) {
 668                /* The socket is already corked while preparing it. */
 669                /* ... which is an evident application bug. --ANK */
 670                release_sock(sk);
 671
 672                LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
 673                err = -EINVAL;
 674                goto out;
 675        }
 676        /*
 677         *        Now cork the socket to pend data.
 678         */
 679        inet->cork.fl.fl4_dst = daddr;
 680        inet->cork.fl.fl_ip_dport = dport;
 681        inet->cork.fl.fl4_src = saddr;
 682        inet->cork.fl.fl_ip_sport = inet->sport;
 683        up->pending = AF_INET;
 684
 685do_append_data:
 686        up->len += ulen;
 687        getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 688        err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
 689                        sizeof(struct udphdr), &ipc, rt,
 690                        corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 691        if (err)
 692                udp_flush_pending_frames(sk);
 693        else if (!corkreq)
 694                err = udp_push_pending_frames(sk);
 695        else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
 696                up->pending = 0;
 697        release_sock(sk);
 698
 699out:
 700        ip_rt_put(rt);
 701        if (free)
 702                kfree(ipc.opt);
 703        if (!err)
 704                return len;
 705        /*
 706         * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
 707         * ENOBUFS might not be good (it's not tunable per se), but otherwise
 708         * we don't have a good statistic (IpOutDiscards but it can be too many
 709         * things).  We could add another new stat but at least for now that
 710         * seems like overkill.
 711         */
 712        if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
 713                UDP_INC_STATS_USER(sock_net(sk),
 714                                UDP_MIB_SNDBUFERRORS, is_udplite);
 715        }
 716        return err;
 717
 718do_confirm:
 719        dst_confirm(&rt->u.dst);
 720        if (!(msg->msg_flags&MSG_PROBE) || len)
 721                goto back_from_confirm;
 722        err = 0;
 723        goto out;
 724}
 725
 726int udp_sendpage(struct sock *sk, struct page *page, int offset,
 727                 size_t size, int flags)
 728{
 729        struct udp_sock *up = udp_sk(sk);
 730        int ret;
 731
 732        if (!up->pending) {
 733                struct msghdr msg = {        .msg_flags = flags|MSG_MORE };
 734
 735                /* Call udp_sendmsg to specify destination address which
 736                 * sendpage interface can't pass.
 737                 * This will succeed only when the socket is connected.
 738                 */
 739                ret = udp_sendmsg(NULL, sk, &msg, 0);
 740                if (ret < 0)
 741                        return ret;
 742        }
 743
 744        lock_sock(sk);
 745
 746        if (unlikely(!up->pending)) {
 747                release_sock(sk);
 748
 749                LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
 750                return -EINVAL;
 751        }
 752
 753        ret = ip_append_page(sk, page, offset, size, flags);
 754        if (ret == -EOPNOTSUPP) {
 755                release_sock(sk);
 756                return sock_no_sendpage(sk->sk_socket, page, offset,
 757                                        size, flags);
 758        }
 759        if (ret < 0) {
 760                udp_flush_pending_frames(sk);
 761                goto out;
 762        }
 763
 764        up->len += size;
 765        if (!(up->corkflag || (flags&MSG_MORE)))
 766                ret = udp_push_pending_frames(sk);
 767        if (!ret)
 768                ret = size;
 769out:
 770        release_sock(sk);
 771        return ret;
 772}
 773
 774/*
 775 *        IOCTL requests applicable to the UDP protocol
 776 */
 777
 778int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 779{
 780        switch (cmd) {
 781        case SIOCOUTQ:
 782        {
 783                int amount = atomic_read(&sk->sk_wmem_alloc);
 784                return put_user(amount, (int __user *)arg);
 785        }
 786
 787        case SIOCINQ:
 788        {
 789                struct sk_buff *skb;
 790                unsigned long amount;
 791
 792                amount = 0;
 793                spin_lock_bh(&sk->sk_receive_queue.lock);
 794                skb = skb_peek(&sk->sk_receive_queue);
 795                if (skb != NULL) {
 796                        /*
 797                         * We will only return the amount
 798                         * of this packet since that is all
 799                         * that will be read.
 800                         */
 801                        amount = skb->len - sizeof(struct udphdr);
 802                }
 803                spin_unlock_bh(&sk->sk_receive_queue.lock);
 804                return put_user(amount, (int __user *)arg);
 805        }
 806
 807        default:
 808                return -ENOIOCTLCMD;
 809        }
 810
 811        return 0;
 812}
 813
 814/*
 815 *         This should be easy, if there is something there we
 816 *         return it, otherwise we block.
 817 */
 818
 819int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 820                size_t len, int noblock, int flags, int *addr_len)
 821{
 822        struct inet_sock *inet = inet_sk(sk);
 823        struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 824        struct sk_buff *skb;
 825        unsigned int ulen, copied;
 826        int peeked;
 827        int err;
 828        int is_udplite = IS_UDPLITE(sk);
 829
 830        /*
 831         *        Check any passed addresses
 832         */
 833        if (addr_len)
 834                *addr_len=sizeof(*sin);
 835
 836        if (flags & MSG_ERRQUEUE)
 837                return ip_recv_error(sk, msg, len);
 838
 839try_again:
 840        skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 841                                  &peeked, &err);
 842        if (!skb)
 843                goto out;
 844
 845        ulen = skb->len - sizeof(struct udphdr);
 846        copied = len;
 847        if (copied > ulen)
 848                copied = ulen;
 849        else if (copied < ulen)
 850                msg->msg_flags |= MSG_TRUNC;
 851
 852        /*
 853         * If checksum is needed at all, try to do it while copying the
 854         * data.  If the data is truncated, or if we only want a partial
 855         * coverage checksum (UDP-Lite), do it before the copy.
 856         */
 857
 858        if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
 859                if (udp_lib_checksum_complete(skb))
 860                        goto csum_copy_err;
 861        }
 862
 863        if (skb_csum_unnecessary(skb))
 864                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 865                                              msg->msg_iov, copied       );
 866        else {
 867                err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 868
 869                if (err == -EINVAL)
 870                        goto csum_copy_err;
 871        }
 872
 873        if (err)
 874                goto out_free;
 875
 876        if (!peeked)
 877                UDP_INC_STATS_USER(sock_net(sk),
 878                                UDP_MIB_INDATAGRAMS, is_udplite);
 879
 880        sock_recv_timestamp(msg, sk, skb);
 881
 882        /* Copy the address. */
 883        if (sin)
 884        {
 885                sin->sin_family = AF_INET;
 886                sin->sin_port = udp_hdr(skb)->source;
 887                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 888                memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 889        }
 890        if (inet->cmsg_flags)
 891                ip_cmsg_recv(msg, skb);
 892
 893        err = copied;
 894        if (flags & MSG_TRUNC)
 895                err = ulen;
 896
 897out_free:
 898        lock_sock(sk);
 899        skb_free_datagram(sk, skb);
 900        release_sock(sk);
 901out:
 902        return err;
 903
 904csum_copy_err:
 905        lock_sock(sk);
 906        if (!skb_kill_datagram(sk, skb, flags))
 907                UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 908        release_sock(sk);
 909
 910        if (noblock)
 911                return -EAGAIN;
 912        goto try_again;
 913}
 914
 915
 916int udp_disconnect(struct sock *sk, int flags)
 917{
 918        struct inet_sock *inet = inet_sk(sk);
 919        /*
 920         *        1003.1g - break association.
 921         */
 922
 923        sk->sk_state = TCP_CLOSE;
 924        inet->daddr = 0;
 925        inet->dport = 0;
 926        sk->sk_bound_dev_if = 0;
 927        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 928                inet_reset_saddr(sk);
 929
 930        if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
 931                sk->sk_prot->unhash(sk);
 932                inet->sport = 0;
 933        }
 934        sk_dst_reset(sk);
 935        return 0;
 936}
 937
 938static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 939{
 940        int is_udplite = IS_UDPLITE(sk);
 941        int rc;
 942
 943        if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
 944                /* Note that an ENOMEM error is charged twice */
 945                if (rc == -ENOMEM)
 946                        UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
 947                                         is_udplite);
 948                goto drop;
 949        }
 950
 951        return 0;
 952
 953drop:
 954        UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 955        kfree_skb(skb);
 956        return -1;
 957}
 958
 959/* returns:
 960 *  -1: error
 961 *   0: success
 962 *  >0: "udp encap" protocol resubmission
 963 *
 964 * Note that in the success and error cases, the skb is assumed to
 965 * have either been requeued or freed.
 966 */
 967int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 968{
 969        struct udp_sock *up = udp_sk(sk);
 970        int rc;
 971        int is_udplite = IS_UDPLITE(sk);
 972
 973        /*
 974         *        Charge it to the socket, dropping if the queue is full.
 975         */
 976        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 977                goto drop;
 978        nf_reset(skb);
 979
 980        if (up->encap_type) {
 981                /*
 982                 * This is an encapsulation socket so pass the skb to
 983                 * the socket's udp_encap_rcv() hook. Otherwise, just
 984                 * fall through and pass this up the UDP socket.
 985                 * up->encap_rcv() returns the following value:
 986                 * =0 if skb was successfully passed to the encap
 987                 *    handler or was discarded by it.
 988                 * >0 if skb should be passed on to UDP.
 989                 * <0 if skb should be resubmitted as proto -N
 990                 */
 991
 992                /* if we're overly short, let UDP handle it */
 993                if (skb->len > sizeof(struct udphdr) &&
 994                    up->encap_rcv != NULL) {
 995                        int ret;
 996
 997                        ret = (*up->encap_rcv)(sk, skb);
 998                        if (ret <= 0) {
 999                                UDP_INC_STATS_BH(sock_net(sk),
1000                                                 UDP_MIB_INDATAGRAMS,
1001                                                 is_udplite);
1002                                return -ret;
1003                        }
1004                }
1005
1006                /* FALLTHROUGH -- it's a UDP Packet */
1007        }
1008
1009        /*
1010         *         UDP-Lite specific tests, ignored on UDP sockets
1011         */
1012        if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
1013
1014                /*
1015                 * MIB statistics other than incrementing the error count are
1016                 * disabled for the following two types of errors: these depend
1017                 * on the application settings, not on the functioning of the
1018                 * protocol stack as such.
1019                 *
1020                 * RFC 3828 here recommends (sec 3.3): "There should also be a
1021                 * way ... to ... at least let the receiving application block
1022                 * delivery of packets with coverage values less than a value
1023                 * provided by the application."
1024                 */
1025                if (up->pcrlen == 0) {          /* full coverage was set  */
1026                        LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1027                                "%d while full coverage %d requested\n",
1028                                UDP_SKB_CB(skb)->cscov, skb->len);
1029                        goto drop;
1030                }
1031                /* The next case involves violating the min. coverage requested
1032                 * by the receiver. This is subtle: if receiver wants x and x is
1033                 * greater than the buffersize/MTU then receiver will complain
1034                 * that it wants x while sender emits packets of smaller size y.
1035                 * Therefore the above ...()->partial_cov statement is essential.
1036                 */
1037                if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
1038                        LIMIT_NETDEBUG(KERN_WARNING
1039                                "UDPLITE: coverage %d too small, need min %d\n",
1040                                UDP_SKB_CB(skb)->cscov, up->pcrlen);
1041                        goto drop;
1042                }
1043        }
1044
1045        if (sk->sk_filter) {
1046                if (udp_lib_checksum_complete(skb))
1047                        goto drop;
1048        }
1049
1050        rc = 0;
1051
1052        bh_lock_sock(sk);
1053        if (!sock_owned_by_user(sk))
1054                rc = __udp_queue_rcv_skb(sk, skb);
1055        else
1056                sk_add_backlog(sk, skb);
1057        bh_unlock_sock(sk);
1058
1059        return rc;
1060
1061drop:
1062        UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1063        kfree_skb(skb);
1064        return -1;
1065}
1066
1067/*
1068 *        Multicasts and broadcasts go to each listener.
1069 *
1070 *        Note: called only from the BH handler context,
1071 *        so we don't need to lock the hashes.
1072 */
1073static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1074                                    struct udphdr  *uh,
1075                                    __be32 saddr, __be32 daddr,
1076                                    struct hlist_head udptable[])
1077{
1078        struct sock *sk;
1079        int dif;
1080
1081        read_lock(&udp_hash_lock);
1082        sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
1083        dif = skb->dev->ifindex;
1084        sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
1085        if (sk) {
1086                struct sock *sknext = NULL;
1087
1088                do {
1089                        struct sk_buff *skb1 = skb;
1090
1091                        sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
1092                                                   daddr, uh->source, saddr,
1093                                                   dif);
1094                        if (sknext)
1095                                skb1 = skb_clone(skb, GFP_ATOMIC);
1096
1097                        if (skb1) {
1098                                int ret = udp_queue_rcv_skb(sk, skb1);
1099                                if (ret > 0)
1100                                        /* we should probably re-process instead
1101                                         * of dropping packets here. */
1102                                        kfree_skb(skb1);
1103                        }
1104                        sk = sknext;
1105                } while (sknext);
1106        } else
1107                kfree_skb(skb);
1108        read_unlock(&udp_hash_lock);
1109        return 0;
1110}
1111
1112/* Initialize UDP checksum. If exited with zero value (success),
1113 * CHECKSUM_UNNECESSARY means, that no more checks are required.
1114 * Otherwise, csum completion requires chacksumming packet body,
1115 * including udp header and folding it to skb->csum.
1116 */
1117static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1118                                 int proto)
1119{
1120        const struct iphdr *iph;
1121        int err;
1122
1123        UDP_SKB_CB(skb)->partial_cov = 0;
1124        UDP_SKB_CB(skb)->cscov = skb->len;
1125
1126        if (proto == IPPROTO_UDPLITE) {
1127                err = udplite_checksum_init(skb, uh);
1128                if (err)
1129                        return err;
1130        }
1131
1132        iph = ip_hdr(skb);
1133        if (uh->check == 0) {
1134                skb->ip_summed = CHECKSUM_UNNECESSARY;
1135        } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1136               if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1137                                      proto, skb->csum))
1138                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1139        }
1140        if (!skb_csum_unnecessary(skb))
1141                skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1142                                               skb->len, proto, 0);
1143        /* Probably, we should checksum udp header (it should be in cache
1144         * in any case) and data in tiny packets (< rx copybreak).
1145         */
1146
1147        return 0;
1148}
1149
1150/*
1151 *        All we need to do is get the socket, and then do a checksum.
1152 */
1153
1154int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1155                   int proto)
1156{
1157        struct sock *sk;
1158        struct udphdr *uh = udp_hdr(skb);
1159        unsigned short ulen;
1160        struct rtable *rt = (struct rtable*)skb->dst;
1161        __be32 saddr = ip_hdr(skb)->saddr;
1162        __be32 daddr = ip_hdr(skb)->daddr;
1163        struct net *net = dev_net(skb->dev);
1164
1165        /*
1166         *  Validate the packet.
1167         */
1168        if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1169                goto drop;                /* No space for header. */
1170
1171        ulen = ntohs(uh->len);
1172        if (ulen > skb->len)
1173                goto short_packet;
1174
1175        if (proto == IPPROTO_UDP) {
1176                /* UDP validates ulen. */
1177                if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1178                        goto short_packet;
1179                uh = udp_hdr(skb);
1180        }
1181
1182        if (udp4_csum_init(skb, uh, proto))
1183                goto csum_error;
1184
1185        if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1186                return __udp4_lib_mcast_deliver(net, skb, uh,
1187                                saddr, daddr, udptable);
1188
1189        sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
1190
1191        if (sk != NULL) {
1192                int ret = udp_queue_rcv_skb(sk, skb);
1193                sock_put(sk);
1194
1195                /* a return value > 0 means to resubmit the input, but
1196                 * it wants the return to be -protocol, or 0
1197                 */
1198                if (ret > 0)
1199                        return -ret;
1200                return 0;
1201        }
1202
1203        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1204                goto drop;
1205        nf_reset(skb);
1206
1207        /* No socket. Drop packet silently, if checksum is wrong */
1208        if (udp_lib_checksum_complete(skb))
1209                goto csum_error;
1210
1211        UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1212        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1213
1214        /*
1215         * Hmm.  We got an UDP packet to a port to which we
1216         * don't wanna listen.  Ignore it.
1217         */
1218        kfree_skb(skb);
1219        return 0;
1220
1221short_packet:
1222        LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
1223                       proto == IPPROTO_UDPLITE ? "-Lite" : "",
1224                       NIPQUAD(saddr),
1225                       ntohs(uh->source),
1226                       ulen,
1227                       skb->len,
1228                       NIPQUAD(daddr),
1229                       ntohs(uh->dest));
1230        goto drop;
1231
1232csum_error:
1233        /*
1234         * RFC1122: OK.  Discards the bad packet silently (as far as
1235         * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1236         */
1237        LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
1238                       proto == IPPROTO_UDPLITE ? "-Lite" : "",
1239                       NIPQUAD(saddr),
1240                       ntohs(uh->source),
1241                       NIPQUAD(daddr),
1242                       ntohs(uh->dest),
1243                       ulen);
1244drop:
1245        UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1246        kfree_skb(skb);
1247        return 0;
1248}
1249
1250int udp_rcv(struct sk_buff *skb)
1251{
1252        return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1253}
1254
1255void udp_destroy_sock(struct sock *sk)
1256{
1257        lock_sock(sk);
1258        udp_flush_pending_frames(sk);
1259        release_sock(sk);
1260}
1261
1262/*
1263 *        Socket option code for UDP
1264 */
1265int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1266                       char __user *optval, int optlen,
1267                       int (*push_pending_frames)(struct sock *))
1268{
1269        struct udp_sock *up = udp_sk(sk);
1270        int val;
1271        int err = 0;
1272        int is_udplite = IS_UDPLITE(sk);
1273
1274        if (optlen<sizeof(int))
1275                return -EINVAL;
1276
1277        if (get_user(val, (int __user *)optval))
1278                return -EFAULT;
1279
1280        switch (optname) {
1281        case UDP_CORK:
1282                if (val != 0) {
1283                        up->corkflag = 1;
1284                } else {
1285                        up->corkflag = 0;
1286                        lock_sock(sk);
1287                        (*push_pending_frames)(sk);
1288                        release_sock(sk);
1289                }
1290                break;
1291
1292        case UDP_ENCAP:
1293                switch (val) {
1294                case 0:
1295                case UDP_ENCAP_ESPINUDP:
1296                case UDP_ENCAP_ESPINUDP_NON_IKE:
1297                        up->encap_rcv = xfrm4_udp_encap_rcv;
1298                        /* FALLTHROUGH */
1299                case UDP_ENCAP_L2TPINUDP:
1300                        up->encap_type = val;
1301                        break;
1302                default:
1303                        err = -ENOPROTOOPT;
1304                        break;
1305                }
1306                break;
1307
1308        /*
1309         *         UDP-Lite's partial checksum coverage (RFC 3828).
1310         */
1311        /* The sender sets actual checksum coverage length via this option.
1312         * The case coverage > packet length is handled by send module. */
1313        case UDPLITE_SEND_CSCOV:
1314                if (!is_udplite)         /* Disable the option on UDP sockets */
1315                        return -ENOPROTOOPT;
1316                if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1317                        val = 8;
1318                else if (val > USHORT_MAX)
1319                        val = USHORT_MAX;
1320                up->pcslen = val;
1321                up->pcflag |= UDPLITE_SEND_CC;
1322                break;
1323
1324        /* The receiver specifies a minimum checksum coverage value. To make
1325         * sense, this should be set to at least 8 (as done below). If zero is
1326         * used, this again means full checksum coverage.                     */
1327        case UDPLITE_RECV_CSCOV:
1328                if (!is_udplite)         /* Disable the option on UDP sockets */
1329                        return -ENOPROTOOPT;
1330                if (val != 0 && val < 8) /* Avoid silly minimal values.       */
1331                        val = 8;
1332                else if (val > USHORT_MAX)
1333                        val = USHORT_MAX;
1334                up->pcrlen = val;
1335                up->pcflag |= UDPLITE_RECV_CC;
1336                break;
1337
1338        default:
1339                err = -ENOPROTOOPT;
1340                break;
1341        }
1342
1343        return err;
1344}
1345
1346int udp_setsockopt(struct sock *sk, int level, int optname,
1347                   char __user *optval, int optlen)
1348{
1349        if (level == SOL_UDP  ||  level == SOL_UDPLITE)
1350                return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1351                                          udp_push_pending_frames);
1352        return ip_setsockopt(sk, level, optname, optval, optlen);
1353}
1354
1355#ifdef CONFIG_COMPAT
1356int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1357                          char __user *optval, int optlen)
1358{
1359        if (level == SOL_UDP  ||  level == SOL_UDPLITE)
1360                return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1361                                          udp_push_pending_frames);
1362        return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1363}
1364#endif
1365
1366int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1367                       char __user *optval, int __user *optlen)
1368{
1369        struct udp_sock *up = udp_sk(sk);
1370        int val, len;
1371
1372        if (get_user(len,optlen))
1373                return -EFAULT;
1374
1375        len = min_t(unsigned int, len, sizeof(int));
1376
1377        if (len < 0)
1378                return -EINVAL;
1379
1380        switch (optname) {
1381        case UDP_CORK:
1382                val = up->corkflag;
1383                break;
1384
1385        case UDP_ENCAP:
1386                val = up->encap_type;
1387                break;
1388
1389        /* The following two cannot be changed on UDP sockets, the return is
1390         * always 0 (which corresponds to the full checksum coverage of UDP). */
1391        case UDPLITE_SEND_CSCOV:
1392                val = up->pcslen;
1393                break;
1394
1395        case UDPLITE_RECV_CSCOV:
1396                val = up->pcrlen;
1397                break;
1398
1399        default:
1400                return -ENOPROTOOPT;
1401        }
1402
1403        if (put_user(len, optlen))
1404                return -EFAULT;
1405        if (copy_to_user(optval, &val,len))
1406                return -EFAULT;
1407        return 0;
1408}
1409
1410int udp_getsockopt(struct sock *sk, int level, int optname,
1411                   char __user *optval, int __user *optlen)
1412{
1413        if (level == SOL_UDP  ||  level == SOL_UDPLITE)
1414                return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1415        return ip_getsockopt(sk, level, optname, optval, optlen);
1416}
1417
1418#ifdef CONFIG_COMPAT
1419int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1420                                 char __user *optval, int __user *optlen)
1421{
1422        if (level == SOL_UDP  ||  level == SOL_UDPLITE)
1423                return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1424        return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1425}
1426#endif
1427/**
1428 *         udp_poll - wait for a UDP event.
1429 *        @file - file struct
1430 *        @sock - socket
1431 *        @wait - poll table
1432 *
1433 *        This is same as datagram poll, except for the special case of
1434 *        blocking sockets. If application is using a blocking fd
1435 *        and a packet with checksum error is in the queue;
1436 *        then it could get return from select indicating data available
1437 *        but then block when reading it. Add special case code
1438 *        to work around these arguably broken applications.
1439 */
1440unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1441{
1442        unsigned int mask = datagram_poll(file, sock, wait);
1443        struct sock *sk = sock->sk;
1444        int         is_lite = IS_UDPLITE(sk);
1445
1446        /* Check for false positives due to checksum errors */
1447        if ( (mask & POLLRDNORM) &&
1448             !(file->f_flags & O_NONBLOCK) &&
1449             !(sk->sk_shutdown & RCV_SHUTDOWN)){
1450                struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1451                struct sk_buff *skb;
1452
1453                spin_lock_bh(&rcvq->lock);
1454                while ((skb = skb_peek(rcvq)) != NULL &&
1455                       udp_lib_checksum_complete(skb)) {
1456                        UDP_INC_STATS_BH(sock_net(sk),
1457                                        UDP_MIB_INERRORS, is_lite);
1458                        __skb_unlink(skb, rcvq);
1459                        kfree_skb(skb);
1460                }
1461                spin_unlock_bh(&rcvq->lock);
1462
1463                /* nothing to see, move along */
1464                if (skb == NULL)
1465                        mask &= ~(POLLIN | POLLRDNORM);
1466        }
1467
1468        return mask;
1469
1470}
1471
1472struct proto udp_prot = {
1473        .name                   = "UDP",
1474        .owner                   = THIS_MODULE,
1475        .close                   = udp_lib_close,
1476        .connect           = ip4_datagram_connect,
1477        .disconnect           = udp_disconnect,
1478        .ioctl                   = udp_ioctl,
1479        .destroy           = udp_destroy_sock,
1480        .setsockopt           = udp_setsockopt,
1481        .getsockopt           = udp_getsockopt,
1482        .sendmsg           = udp_sendmsg,
1483        .recvmsg           = udp_recvmsg,
1484        .sendpage           = udp_sendpage,
1485        .backlog_rcv           = __udp_queue_rcv_skb,
1486        .hash                   = udp_lib_hash,
1487        .unhash                   = udp_lib_unhash,
1488        .get_port           = udp_v4_get_port,
1489        .memory_allocated  = &udp_memory_allocated,
1490        .sysctl_mem           = sysctl_udp_mem,
1491        .sysctl_wmem           = &sysctl_udp_wmem_min,
1492        .sysctl_rmem           = &sysctl_udp_rmem_min,
1493        .obj_size           = sizeof(struct udp_sock),
1494        .h.udp_hash           = udp_hash,
1495#ifdef CONFIG_COMPAT
1496        .compat_setsockopt = compat_udp_setsockopt,
1497        .compat_getsockopt = compat_udp_getsockopt,
1498#endif
1499};
1500
1501/* ------------------------------------------------------------------------ */
1502#ifdef CONFIG_PROC_FS
1503
1504static struct sock *udp_get_first(struct seq_file *seq)
1505{
1506        struct sock *sk;
1507        struct udp_iter_state *state = seq->private;
1508        struct net *net = seq_file_net(seq);
1509
1510        for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1511                struct hlist_node *node;
1512                sk_for_each(sk, node, state->hashtable + state->bucket) {
1513                        if (!net_eq(sock_net(sk), net))
1514                                continue;
1515                        if (sk->sk_family == state->family)
1516                                goto found;
1517                }
1518        }
1519        sk = NULL;
1520found:
1521        return sk;
1522}
1523
1524static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1525{
1526        struct udp_iter_state *state = seq->private;
1527        struct net *net = seq_file_net(seq);
1528
1529        do {
1530                sk = sk_next(sk);
1531try_again:
1532                ;
1533        } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1534
1535        if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
1536                sk = sk_head(state->hashtable + state->bucket);
1537                goto try_again;
1538        }
1539        return sk;
1540}
1541
1542static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1543{
1544        struct sock *sk = udp_get_first(seq);
1545
1546        if (sk)
1547                while (pos && (sk = udp_get_next(seq, sk)) != NULL)
1548                        --pos;
1549        return pos ? NULL : sk;
1550}
1551
1552static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1553        __acquires(udp_hash_lock)
1554{
1555        read_lock(&udp_hash_lock);
1556        return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1557}
1558
1559static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1560{
1561        struct sock *sk;
1562
1563        if (v == SEQ_START_TOKEN)
1564                sk = udp_get_idx(seq, 0);
1565        else
1566                sk = udp_get_next(seq, v);
1567
1568        ++*pos;
1569        return sk;
1570}
1571
1572static void udp_seq_stop(struct seq_file *seq, void *v)
1573        __releases(udp_hash_lock)
1574{
1575        read_unlock(&udp_hash_lock);
1576}
1577
1578static int udp_seq_open(struct inode *inode, struct file *file)
1579{
1580        struct udp_seq_afinfo *afinfo = PDE(inode)->data;
1581        struct udp_iter_state *s;
1582        int err;
1583
1584        err = seq_open_net(inode, file, &afinfo->seq_ops,
1585                           sizeof(struct udp_iter_state));
1586        if (err < 0)
1587                return err;
1588
1589        s = ((struct seq_file *)file->private_data)->private;
1590        s->family                = afinfo->family;
1591        s->hashtable                = afinfo->hashtable;
1592        return err;
1593}
1594
1595/* ------------------------------------------------------------------------ */
1596int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1597{
1598        struct proc_dir_entry *p;
1599        int rc = 0;
1600
1601        afinfo->seq_fops.open                = udp_seq_open;
1602        afinfo->seq_fops.read                = seq_read;
1603        afinfo->seq_fops.llseek                = seq_lseek;
1604        afinfo->seq_fops.release        = seq_release_net;
1605
1606        afinfo->seq_ops.start                = udp_seq_start;
1607        afinfo->seq_ops.next                = udp_seq_next;
1608        afinfo->seq_ops.stop                = udp_seq_stop;
1609
1610        p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
1611                             &afinfo->seq_fops, afinfo);
1612        if (!p)
1613                rc = -ENOMEM;
1614        return rc;
1615}
1616
1617void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1618{
1619        proc_net_remove(net, afinfo->name);
1620}
1621
1622/* ------------------------------------------------------------------------ */
1623static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1624                int bucket, int *len)
1625{
1626        struct inet_sock *inet = inet_sk(sp);
1627        __be32 dest = inet->daddr;
1628        __be32 src  = inet->rcv_saddr;
1629        __u16 destp          = ntohs(inet->dport);
1630        __u16 srcp          = ntohs(inet->sport);
1631
1632        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
1633                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1634                bucket, src, srcp, dest, destp, sp->sk_state,
1635                atomic_read(&sp->sk_wmem_alloc),
1636                atomic_read(&sp->sk_rmem_alloc),
1637                0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1638                atomic_read(&sp->sk_refcnt), sp,
1639                atomic_read(&sp->sk_drops), len);
1640}
1641
1642int udp4_seq_show(struct seq_file *seq, void *v)
1643{
1644        if (v == SEQ_START_TOKEN)
1645                seq_printf(seq, "%-127s\n",
1646                           "  sl  local_address rem_address   st tx_queue "
1647                           "rx_queue tr tm->when retrnsmt   uid  timeout "
1648                           "inode ref pointer drops");
1649        else {
1650                struct udp_iter_state *state = seq->private;
1651                int len;
1652
1653                udp4_format_sock(v, seq, state->bucket, &len);
1654                seq_printf(seq, "%*s\n", 127 - len ,"");
1655        }
1656        return 0;
1657}
1658
1659/* ------------------------------------------------------------------------ */
1660static struct udp_seq_afinfo udp4_seq_afinfo = {
1661        .name                = "udp",
1662        .family                = AF_INET,
1663        .hashtable        = udp_hash,
1664        .seq_fops        = {
1665                .owner        =        THIS_MODULE,
1666        },
1667        .seq_ops        = {
1668                .show                = udp4_seq_show,
1669        },
1670};
1671
1672static int udp4_proc_init_net(struct net *net)
1673{
1674        return udp_proc_register(net, &udp4_seq_afinfo);
1675}
1676
1677static void udp4_proc_exit_net(struct net *net)
1678{
1679        udp_proc_unregister(net, &udp4_seq_afinfo);
1680}
1681
1682static struct pernet_operations udp4_net_ops = {
1683        .init = udp4_proc_init_net,
1684        .exit = udp4_proc_exit_net,
1685};
1686
1687int __init udp4_proc_init(void)
1688{
1689        return register_pernet_subsys(&udp4_net_ops);
1690}
1691
1692void udp4_proc_exit(void)
1693{
1694        unregister_pernet_subsys(&udp4_net_ops);
1695}
1696#endif /* CONFIG_PROC_FS */
1697
1698void __init udp_init(void)
1699{
1700        unsigned long limit;
1701
1702        /* Set the pressure threshold up by the same strategy of TCP. It is a
1703         * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1704         * toward zero with the amount of memory, with a floor of 128 pages.
1705         */
1706        limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1707        limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1708        limit = max(limit, 128UL);
1709        sysctl_udp_mem[0] = limit / 4 * 3;
1710        sysctl_udp_mem[1] = limit;
1711        sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
1712
1713        sysctl_udp_rmem_min = SK_MEM_QUANTUM;
1714        sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1715}
1716
1717EXPORT_SYMBOL(udp_disconnect);
1718EXPORT_SYMBOL(udp_hash);
1719EXPORT_SYMBOL(udp_hash_lock);
1720EXPORT_SYMBOL(udp_ioctl);
1721EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg);
1723EXPORT_SYMBOL(udp_lib_getsockopt);
1724EXPORT_SYMBOL(udp_lib_setsockopt);
1725EXPORT_SYMBOL(udp_poll);
1726EXPORT_SYMBOL(udp_lib_get_port);
1727
1728#ifdef CONFIG_PROC_FS
1729EXPORT_SYMBOL(udp_proc_register);
1730EXPORT_SYMBOL(udp_proc_unregister);
1731#endif