Showing error 1777

User: Jiri Slaby
Error type: Invalid Pointer Dereference
Error type description: A pointer which is invalid is being dereferenced
File location: net/sunrpc/svcsock.c
Line in file: 1265
Project: Linux Kernel
Project version: 2.6.28
Tools: Smatch (1.59)
Entered: 2013-09-10 20:24:52 UTC


Source:

   1/*
   2 * linux/net/sunrpc/svcsock.c
   3 *
   4 * These are the RPC server socket internals.
   5 *
   6 * The server scheduling algorithm does not always distribute the load
   7 * evenly when servicing a single client. May need to modify the
   8 * svc_xprt_enqueue procedure...
   9 *
  10 * TCP support is largely untested and may be a little slow. The problem
  11 * is that we currently do two separate recvfrom's, one for the 4-byte
  12 * record length, and the second for the actual record. This could possibly
  13 * be improved by always reading a minimum size of around 100 bytes and
  14 * tucking any superfluous bytes away in a temporary store. Still, that
  15 * leaves write requests out in the rain. An alternative may be to peek at
  16 * the first skb in the queue, and if it matches the next TCP sequence
  17 * number, to extract the record marker. Yuck.
  18 *
  19 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  20 */
  21
  22#include <linux/kernel.h>
  23#include <linux/sched.h>
  24#include <linux/errno.h>
  25#include <linux/fcntl.h>
  26#include <linux/net.h>
  27#include <linux/in.h>
  28#include <linux/inet.h>
  29#include <linux/udp.h>
  30#include <linux/tcp.h>
  31#include <linux/unistd.h>
  32#include <linux/slab.h>
  33#include <linux/netdevice.h>
  34#include <linux/skbuff.h>
  35#include <linux/file.h>
  36#include <linux/freezer.h>
  37#include <net/sock.h>
  38#include <net/checksum.h>
  39#include <net/ip.h>
  40#include <net/ipv6.h>
  41#include <net/tcp.h>
  42#include <net/tcp_states.h>
  43#include <asm/uaccess.h>
  44#include <asm/ioctls.h>
  45
  46#include <linux/sunrpc/types.h>
  47#include <linux/sunrpc/clnt.h>
  48#include <linux/sunrpc/xdr.h>
  49#include <linux/sunrpc/msg_prot.h>
  50#include <linux/sunrpc/svcsock.h>
  51#include <linux/sunrpc/stats.h>
  52
  53#define RPCDBG_FACILITY        RPCDBG_SVCXPRT
  54
  55
  56static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
  57                                         int *errp, int flags);
  58static void                svc_udp_data_ready(struct sock *, int);
  59static int                svc_udp_recvfrom(struct svc_rqst *);
  60static int                svc_udp_sendto(struct svc_rqst *);
  61static void                svc_sock_detach(struct svc_xprt *);
  62static void                svc_sock_free(struct svc_xprt *);
  63
  64static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
  65                                          struct sockaddr *, int, int);
  66#ifdef CONFIG_DEBUG_LOCK_ALLOC
  67static struct lock_class_key svc_key[2];
  68static struct lock_class_key svc_slock_key[2];
  69
  70static void svc_reclassify_socket(struct socket *sock)
  71{
  72        struct sock *sk = sock->sk;
  73        BUG_ON(sock_owned_by_user(sk));
  74        switch (sk->sk_family) {
  75        case AF_INET:
  76                sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
  77                                              &svc_slock_key[0],
  78                                              "sk_xprt.xpt_lock-AF_INET-NFSD",
  79                                              &svc_key[0]);
  80                break;
  81
  82        case AF_INET6:
  83                sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
  84                                              &svc_slock_key[1],
  85                                              "sk_xprt.xpt_lock-AF_INET6-NFSD",
  86                                              &svc_key[1]);
  87                break;
  88
  89        default:
  90                BUG();
  91        }
  92}
  93#else
  94static void svc_reclassify_socket(struct socket *sock)
  95{
  96}
  97#endif
  98
  99/*
 100 * Release an skbuff after use
 101 */
 102static void svc_release_skb(struct svc_rqst *rqstp)
 103{
 104        struct sk_buff *skb = rqstp->rq_xprt_ctxt;
 105        struct svc_deferred_req *dr = rqstp->rq_deferred;
 106
 107        if (skb) {
 108                struct svc_sock *svsk =
 109                        container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 110                rqstp->rq_xprt_ctxt = NULL;
 111
 112                dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
 113                skb_free_datagram(svsk->sk_sk, skb);
 114        }
 115        if (dr) {
 116                rqstp->rq_deferred = NULL;
 117                kfree(dr);
 118        }
 119}
 120
 121union svc_pktinfo_u {
 122        struct in_pktinfo pkti;
 123        struct in6_pktinfo pkti6;
 124};
 125#define SVC_PKTINFO_SPACE \
 126        CMSG_SPACE(sizeof(union svc_pktinfo_u))
 127
 128static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 129{
 130        struct svc_sock *svsk =
 131                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 132        switch (svsk->sk_sk->sk_family) {
 133        case AF_INET: {
 134                        struct in_pktinfo *pki = CMSG_DATA(cmh);
 135
 136                        cmh->cmsg_level = SOL_IP;
 137                        cmh->cmsg_type = IP_PKTINFO;
 138                        pki->ipi_ifindex = 0;
 139                        pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr;
 140                        cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 141                }
 142                break;
 143
 144        case AF_INET6: {
 145                        struct in6_pktinfo *pki = CMSG_DATA(cmh);
 146
 147                        cmh->cmsg_level = SOL_IPV6;
 148                        cmh->cmsg_type = IPV6_PKTINFO;
 149                        pki->ipi6_ifindex = 0;
 150                        ipv6_addr_copy(&pki->ipi6_addr,
 151                                        &rqstp->rq_daddr.addr6);
 152                        cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 153                }
 154                break;
 155        }
 156        return;
 157}
 158
 159/*
 160 * Generic sendto routine
 161 */
 162static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 163{
 164        struct svc_sock        *svsk =
 165                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 166        struct socket        *sock = svsk->sk_sock;
 167        int                slen;
 168        union {
 169                struct cmsghdr        hdr;
 170                long                all[SVC_PKTINFO_SPACE / sizeof(long)];
 171        } buffer;
 172        struct cmsghdr *cmh = &buffer.hdr;
 173        int                len = 0;
 174        int                result;
 175        int                size;
 176        struct page        **ppage = xdr->pages;
 177        size_t                base = xdr->page_base;
 178        unsigned int        pglen = xdr->page_len;
 179        unsigned int        flags = MSG_MORE;
 180        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 181
 182        slen = xdr->len;
 183
 184        if (rqstp->rq_prot == IPPROTO_UDP) {
 185                struct msghdr msg = {
 186                        .msg_name        = &rqstp->rq_addr,
 187                        .msg_namelen        = rqstp->rq_addrlen,
 188                        .msg_control        = cmh,
 189                        .msg_controllen        = sizeof(buffer),
 190                        .msg_flags        = MSG_MORE,
 191                };
 192
 193                svc_set_cmsg_data(rqstp, cmh);
 194
 195                if (sock_sendmsg(sock, &msg, 0) < 0)
 196                        goto out;
 197        }
 198
 199        /* send head */
 200        if (slen == xdr->head[0].iov_len)
 201                flags = 0;
 202        len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
 203                                  xdr->head[0].iov_len, flags);
 204        if (len != xdr->head[0].iov_len)
 205                goto out;
 206        slen -= xdr->head[0].iov_len;
 207        if (slen == 0)
 208                goto out;
 209
 210        /* send page data */
 211        size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
 212        while (pglen > 0) {
 213                if (slen == size)
 214                        flags = 0;
 215                result = kernel_sendpage(sock, *ppage, base, size, flags);
 216                if (result > 0)
 217                        len += result;
 218                if (result != size)
 219                        goto out;
 220                slen -= size;
 221                pglen -= size;
 222                size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
 223                base = 0;
 224                ppage++;
 225        }
 226        /* send tail */
 227        if (xdr->tail[0].iov_len) {
 228                result = kernel_sendpage(sock, rqstp->rq_respages[0],
 229                                             ((unsigned long)xdr->tail[0].iov_base)
 230                                                & (PAGE_SIZE-1),
 231                                             xdr->tail[0].iov_len, 0);
 232
 233                if (result > 0)
 234                        len += result;
 235        }
 236out:
 237        dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
 238                svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
 239                xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
 240
 241        return len;
 242}
 243
 244/*
 245 * Report socket names for nfsdfs
 246 */
 247static int one_sock_name(char *buf, struct svc_sock *svsk)
 248{
 249        int len;
 250
 251        switch(svsk->sk_sk->sk_family) {
 252        case AF_INET:
 253                len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n",
 254                              svsk->sk_sk->sk_protocol==IPPROTO_UDP?
 255                              "udp" : "tcp",
 256                              NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr),
 257                              inet_sk(svsk->sk_sk)->num);
 258                break;
 259        default:
 260                len = sprintf(buf, "*unknown-%d*\n",
 261                               svsk->sk_sk->sk_family);
 262        }
 263        return len;
 264}
 265
 266int
 267svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
 268{
 269        struct svc_sock *svsk, *closesk = NULL;
 270        int len = 0;
 271
 272        if (!serv)
 273                return 0;
 274        spin_lock_bh(&serv->sv_lock);
 275        list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
 276                int onelen = one_sock_name(buf+len, svsk);
 277                if (toclose && strcmp(toclose, buf+len) == 0)
 278                        closesk = svsk;
 279                else
 280                        len += onelen;
 281        }
 282        spin_unlock_bh(&serv->sv_lock);
 283        if (closesk)
 284                /* Should unregister with portmap, but you cannot
 285                 * unregister just one protocol...
 286                 */
 287                svc_close_xprt(&closesk->sk_xprt);
 288        else if (toclose)
 289                return -ENOENT;
 290        return len;
 291}
 292EXPORT_SYMBOL(svc_sock_names);
 293
 294/*
 295 * Check input queue length
 296 */
 297static int svc_recv_available(struct svc_sock *svsk)
 298{
 299        struct socket        *sock = svsk->sk_sock;
 300        int                avail, err;
 301
 302        err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
 303
 304        return (err >= 0)? avail : err;
 305}
 306
 307/*
 308 * Generic recvfrom routine.
 309 */
 310static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
 311                        int buflen)
 312{
 313        struct svc_sock *svsk =
 314                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 315        struct msghdr msg = {
 316                .msg_flags        = MSG_DONTWAIT,
 317        };
 318        int len;
 319
 320        rqstp->rq_xprt_hlen = 0;
 321
 322        len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
 323                                msg.msg_flags);
 324
 325        dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
 326                svsk, iov[0].iov_base, iov[0].iov_len, len);
 327        return len;
 328}
 329
 330/*
 331 * Set socket snd and rcv buffer lengths
 332 */
 333static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 334                                unsigned int rcv)
 335{
 336#if 0
 337        mm_segment_t        oldfs;
 338        oldfs = get_fs(); set_fs(KERNEL_DS);
 339        sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
 340                        (char*)&snd, sizeof(snd));
 341        sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
 342                        (char*)&rcv, sizeof(rcv));
 343#else
 344        /* sock_setsockopt limits use to sysctl_?mem_max,
 345         * which isn't acceptable.  Until that is made conditional
 346         * on not having CAP_SYS_RESOURCE or similar, we go direct...
 347         * DaveM said I could!
 348         */
 349        lock_sock(sock->sk);
 350        sock->sk->sk_sndbuf = snd * 2;
 351        sock->sk->sk_rcvbuf = rcv * 2;
 352        sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
 353        release_sock(sock->sk);
 354#endif
 355}
 356/*
 357 * INET callback when data has been received on the socket.
 358 */
 359static void svc_udp_data_ready(struct sock *sk, int count)
 360{
 361        struct svc_sock        *svsk = (struct svc_sock *)sk->sk_user_data;
 362
 363        if (svsk) {
 364                dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
 365                        svsk, sk, count,
 366                        test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 367                set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 368                svc_xprt_enqueue(&svsk->sk_xprt);
 369        }
 370        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 371                wake_up_interruptible(sk->sk_sleep);
 372}
 373
 374/*
 375 * INET callback when space is newly available on the socket.
 376 */
 377static void svc_write_space(struct sock *sk)
 378{
 379        struct svc_sock        *svsk = (struct svc_sock *)(sk->sk_user_data);
 380
 381        if (svsk) {
 382                dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 383                        svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 384                svc_xprt_enqueue(&svsk->sk_xprt);
 385        }
 386
 387        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
 388                dprintk("RPC svc_write_space: someone sleeping on %p\n",
 389                       svsk);
 390                wake_up_interruptible(sk->sk_sleep);
 391        }
 392}
 393
 394/*
 395 * Copy the UDP datagram's destination address to the rqstp structure.
 396 * The 'destination' address in this case is the address to which the
 397 * peer sent the datagram, i.e. our local address. For multihomed
 398 * hosts, this can change from msg to msg. Note that only the IP
 399 * address changes, the port number should remain the same.
 400 */
 401static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
 402                                     struct cmsghdr *cmh)
 403{
 404        struct svc_sock *svsk =
 405                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 406        switch (svsk->sk_sk->sk_family) {
 407        case AF_INET: {
 408                struct in_pktinfo *pki = CMSG_DATA(cmh);
 409                rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
 410                break;
 411                }
 412        case AF_INET6: {
 413                struct in6_pktinfo *pki = CMSG_DATA(cmh);
 414                ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
 415                break;
 416                }
 417        }
 418}
 419
 420/*
 421 * Receive a datagram from a UDP socket.
 422 */
 423static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 424{
 425        struct svc_sock        *svsk =
 426                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 427        struct svc_serv        *serv = svsk->sk_xprt.xpt_server;
 428        struct sk_buff        *skb;
 429        union {
 430                struct cmsghdr        hdr;
 431                long                all[SVC_PKTINFO_SPACE / sizeof(long)];
 432        } buffer;
 433        struct cmsghdr *cmh = &buffer.hdr;
 434        int                err, len;
 435        struct msghdr msg = {
 436                .msg_name = svc_addr(rqstp),
 437                .msg_control = cmh,
 438                .msg_controllen = sizeof(buffer),
 439                .msg_flags = MSG_DONTWAIT,
 440        };
 441
 442        if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 443            /* udp sockets need large rcvbuf as all pending
 444             * requests are still in that buffer.  sndbuf must
 445             * also be large enough that there is enough space
 446             * for one reply per thread.  We count all threads
 447             * rather than threads in a particular pool, which
 448             * provides an upper bound on the number of threads
 449             * which will access the socket.
 450             */
 451            svc_sock_setbufsize(svsk->sk_sock,
 452                                (serv->sv_nrthreads+3) * serv->sv_max_mesg,
 453                                (serv->sv_nrthreads+3) * serv->sv_max_mesg);
 454
 455        clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 456        skb = NULL;
 457        err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
 458                             0, 0, MSG_PEEK | MSG_DONTWAIT);
 459        if (err >= 0)
 460                skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
 461
 462        if (skb == NULL) {
 463                if (err != -EAGAIN) {
 464                        /* possibly an icmp error */
 465                        dprintk("svc: recvfrom returned error %d\n", -err);
 466                        set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 467                }
 468                svc_xprt_received(&svsk->sk_xprt);
 469                return -EAGAIN;
 470        }
 471        len = svc_addr_len(svc_addr(rqstp));
 472        if (len < 0)
 473                return len;
 474        rqstp->rq_addrlen = len;
 475        if (skb->tstamp.tv64 == 0) {
 476                skb->tstamp = ktime_get_real();
 477                /* Don't enable netstamp, sunrpc doesn't
 478                   need that much accuracy */
 479        }
 480        svsk->sk_sk->sk_stamp = skb->tstamp;
 481        set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 482
 483        /*
 484         * Maybe more packets - kick another thread ASAP.
 485         */
 486        svc_xprt_received(&svsk->sk_xprt);
 487
 488        len  = skb->len - sizeof(struct udphdr);
 489        rqstp->rq_arg.len = len;
 490
 491        rqstp->rq_prot = IPPROTO_UDP;
 492
 493        if (cmh->cmsg_level != IPPROTO_IP ||
 494            cmh->cmsg_type != IP_PKTINFO) {
 495                if (net_ratelimit())
 496                        printk("rpcsvc: received unknown control message:"
 497                               "%d/%d\n",
 498                               cmh->cmsg_level, cmh->cmsg_type);
 499                skb_free_datagram(svsk->sk_sk, skb);
 500                return 0;
 501        }
 502        svc_udp_get_dest_address(rqstp, cmh);
 503
 504        if (skb_is_nonlinear(skb)) {
 505                /* we have to copy */
 506                local_bh_disable();
 507                if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
 508                        local_bh_enable();
 509                        /* checksum error */
 510                        skb_free_datagram(svsk->sk_sk, skb);
 511                        return 0;
 512                }
 513                local_bh_enable();
 514                skb_free_datagram(svsk->sk_sk, skb);
 515        } else {
 516                /* we can use it in-place */
 517                rqstp->rq_arg.head[0].iov_base = skb->data +
 518                        sizeof(struct udphdr);
 519                rqstp->rq_arg.head[0].iov_len = len;
 520                if (skb_checksum_complete(skb)) {
 521                        skb_free_datagram(svsk->sk_sk, skb);
 522                        return 0;
 523                }
 524                rqstp->rq_xprt_ctxt = skb;
 525        }
 526
 527        rqstp->rq_arg.page_base = 0;
 528        if (len <= rqstp->rq_arg.head[0].iov_len) {
 529                rqstp->rq_arg.head[0].iov_len = len;
 530                rqstp->rq_arg.page_len = 0;
 531                rqstp->rq_respages = rqstp->rq_pages+1;
 532        } else {
 533                rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
 534                rqstp->rq_respages = rqstp->rq_pages + 1 +
 535                        DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 536        }
 537
 538        if (serv->sv_stats)
 539                serv->sv_stats->netudpcnt++;
 540
 541        return len;
 542}
 543
 544static int
 545svc_udp_sendto(struct svc_rqst *rqstp)
 546{
 547        int                error;
 548
 549        error = svc_sendto(rqstp, &rqstp->rq_res);
 550        if (error == -ECONNREFUSED)
 551                /* ICMP error on earlier request. */
 552                error = svc_sendto(rqstp, &rqstp->rq_res);
 553
 554        return error;
 555}
 556
 557static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
 558{
 559}
 560
 561static int svc_udp_has_wspace(struct svc_xprt *xprt)
 562{
 563        struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 564        struct svc_serv        *serv = xprt->xpt_server;
 565        unsigned long required;
 566
 567        /*
 568         * Set the SOCK_NOSPACE flag before checking the available
 569         * sock space.
 570         */
 571        set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 572        required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
 573        if (required*2 > sock_wspace(svsk->sk_sk))
 574                return 0;
 575        clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 576        return 1;
 577}
 578
 579static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
 580{
 581        BUG();
 582        return NULL;
 583}
 584
 585static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
 586                                       struct sockaddr *sa, int salen,
 587                                       int flags)
 588{
 589        return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
 590}
 591
 592static struct svc_xprt_ops svc_udp_ops = {
 593        .xpo_create = svc_udp_create,
 594        .xpo_recvfrom = svc_udp_recvfrom,
 595        .xpo_sendto = svc_udp_sendto,
 596        .xpo_release_rqst = svc_release_skb,
 597        .xpo_detach = svc_sock_detach,
 598        .xpo_free = svc_sock_free,
 599        .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 600        .xpo_has_wspace = svc_udp_has_wspace,
 601        .xpo_accept = svc_udp_accept,
 602};
 603
 604static struct svc_xprt_class svc_udp_class = {
 605        .xcl_name = "udp",
 606        .xcl_owner = THIS_MODULE,
 607        .xcl_ops = &svc_udp_ops,
 608        .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
 609};
 610
 611static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 612{
 613        int one = 1;
 614        mm_segment_t oldfs;
 615
 616        svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
 617        clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 618        svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 619        svsk->sk_sk->sk_write_space = svc_write_space;
 620
 621        /* initialise setting must have enough space to
 622         * receive and respond to one request.
 623         * svc_udp_recvfrom will re-adjust if necessary
 624         */
 625        svc_sock_setbufsize(svsk->sk_sock,
 626                            3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
 627                            3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
 628
 629        /* data might have come in before data_ready set up */
 630        set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 631        set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 632
 633        oldfs = get_fs();
 634        set_fs(KERNEL_DS);
 635        /* make sure we get destination address info */
 636        svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
 637                                       (char __user *)&one, sizeof(one));
 638        set_fs(oldfs);
 639}
 640
 641/*
 642 * A data_ready event on a listening socket means there's a connection
 643 * pending. Do not use state_change as a substitute for it.
 644 */
 645static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 646{
 647        struct svc_sock        *svsk = (struct svc_sock *)sk->sk_user_data;
 648
 649        dprintk("svc: socket %p TCP (listen) state change %d\n",
 650                sk, sk->sk_state);
 651
 652        /*
 653         * This callback may called twice when a new connection
 654         * is established as a child socket inherits everything
 655         * from a parent LISTEN socket.
 656         * 1) data_ready method of the parent socket will be called
 657         *    when one of child sockets become ESTABLISHED.
 658         * 2) data_ready method of the child socket may be called
 659         *    when it receives data before the socket is accepted.
 660         * In case of 2, we should ignore it silently.
 661         */
 662        if (sk->sk_state == TCP_LISTEN) {
 663                if (svsk) {
 664                        set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 665                        svc_xprt_enqueue(&svsk->sk_xprt);
 666                } else
 667                        printk("svc: socket %p: no user data\n", sk);
 668        }
 669
 670        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 671                wake_up_interruptible_all(sk->sk_sleep);
 672}
 673
 674/*
 675 * A state change on a connected socket means it's dying or dead.
 676 */
 677static void svc_tcp_state_change(struct sock *sk)
 678{
 679        struct svc_sock        *svsk = (struct svc_sock *)sk->sk_user_data;
 680
 681        dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
 682                sk, sk->sk_state, sk->sk_user_data);
 683
 684        if (!svsk)
 685                printk("svc: socket %p: no user data\n", sk);
 686        else {
 687                set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 688                svc_xprt_enqueue(&svsk->sk_xprt);
 689        }
 690        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 691                wake_up_interruptible_all(sk->sk_sleep);
 692}
 693
 694static void svc_tcp_data_ready(struct sock *sk, int count)
 695{
 696        struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
 697
 698        dprintk("svc: socket %p TCP data ready (svsk %p)\n",
 699                sk, sk->sk_user_data);
 700        if (svsk) {
 701                set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 702                svc_xprt_enqueue(&svsk->sk_xprt);
 703        }
 704        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 705                wake_up_interruptible(sk->sk_sleep);
 706}
 707
 708/*
 709 * Accept a TCP connection
 710 */
 711static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 712{
 713        struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 714        struct sockaddr_storage addr;
 715        struct sockaddr        *sin = (struct sockaddr *) &addr;
 716        struct svc_serv        *serv = svsk->sk_xprt.xpt_server;
 717        struct socket        *sock = svsk->sk_sock;
 718        struct socket        *newsock;
 719        struct svc_sock        *newsvsk;
 720        int                err, slen;
 721        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 722
 723        dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
 724        if (!sock)
 725                return NULL;
 726
 727        clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 728        err = kernel_accept(sock, &newsock, O_NONBLOCK);
 729        if (err < 0) {
 730                if (err == -ENOMEM)
 731                        printk(KERN_WARNING "%s: no more sockets!\n",
 732                               serv->sv_name);
 733                else if (err != -EAGAIN && net_ratelimit())
 734                        printk(KERN_WARNING "%s: accept failed (err %d)!\n",
 735                                   serv->sv_name, -err);
 736                return NULL;
 737        }
 738        set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 739
 740        err = kernel_getpeername(newsock, sin, &slen);
 741        if (err < 0) {
 742                if (net_ratelimit())
 743                        printk(KERN_WARNING "%s: peername failed (err %d)!\n",
 744                                   serv->sv_name, -err);
 745                goto failed;                /* aborted connection or whatever */
 746        }
 747
 748        /* Ideally, we would want to reject connections from unauthorized
 749         * hosts here, but when we get encryption, the IP of the host won't
 750         * tell us anything.  For now just warn about unpriv connections.
 751         */
 752        if (!svc_port_is_privileged(sin)) {
 753                dprintk(KERN_WARNING
 754                        "%s: connect from unprivileged port: %s\n",
 755                        serv->sv_name,
 756                        __svc_print_addr(sin, buf, sizeof(buf)));
 757        }
 758        dprintk("%s: connect from %s\n", serv->sv_name,
 759                __svc_print_addr(sin, buf, sizeof(buf)));
 760
 761        /* make sure that a write doesn't block forever when
 762         * low on memory
 763         */
 764        newsock->sk->sk_sndtimeo = HZ*30;
 765
 766        if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
 767                                 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
 768                goto failed;
 769        svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
 770        err = kernel_getsockname(newsock, sin, &slen);
 771        if (unlikely(err < 0)) {
 772                dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
 773                slen = offsetof(struct sockaddr, sa_data);
 774        }
 775        svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 776
 777        if (serv->sv_stats)
 778                serv->sv_stats->nettcpconn++;
 779
 780        return &newsvsk->sk_xprt;
 781
 782failed:
 783        sock_release(newsock);
 784        return NULL;
 785}
 786
 787/*
 788 * Receive data from a TCP socket.
 789 */
 790static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 791{
 792        struct svc_sock        *svsk =
 793                container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 794        struct svc_serv        *serv = svsk->sk_xprt.xpt_server;
 795        int                len;
 796        struct kvec *vec;
 797        int pnum, vlen;
 798
 799        dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 800                svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
 801                test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
 802                test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 803
 804        if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 805                /* sndbuf needs to have room for one request
 806                 * per thread, otherwise we can stall even when the
 807                 * network isn't a bottleneck.
 808                 *
 809                 * We count all threads rather than threads in a
 810                 * particular pool, which provides an upper bound
 811                 * on the number of threads which will access the socket.
 812                 *
 813                 * rcvbuf just needs to be able to hold a few requests.
 814                 * Normally they will be removed from the queue
 815                 * as soon a a complete request arrives.
 816                 */
 817                svc_sock_setbufsize(svsk->sk_sock,
 818                                    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
 819                                    3 * serv->sv_max_mesg);
 820
 821        clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 822
 823        /* Receive data. If we haven't got the record length yet, get
 824         * the next four bytes. Otherwise try to gobble up as much as
 825         * possible up to the complete record length.
 826         */
 827        if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
 828                int                want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
 829                struct kvec        iov;
 830
 831                iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
 832                iov.iov_len  = want;
 833                if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
 834                        goto error;
 835                svsk->sk_tcplen += len;
 836
 837                if (len < want) {
 838                        dprintk("svc: short recvfrom while reading record "
 839                                "length (%d of %d)\n", len, want);
 840                        svc_xprt_received(&svsk->sk_xprt);
 841                        return -EAGAIN; /* record header not complete */
 842                }
 843
 844                svsk->sk_reclen = ntohl(svsk->sk_reclen);
 845                if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
 846                        /* FIXME: technically, a record can be fragmented,
 847                         *  and non-terminal fragments will not have the top
 848                         *  bit set in the fragment length header.
 849                         *  But apparently no known nfs clients send fragmented
 850                         *  records. */
 851                        if (net_ratelimit())
 852                                printk(KERN_NOTICE "RPC: multiple fragments "
 853                                        "per record not supported\n");
 854                        goto err_delete;
 855                }
 856                svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
 857                dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
 858                if (svsk->sk_reclen > serv->sv_max_mesg) {
 859                        if (net_ratelimit())
 860                                printk(KERN_NOTICE "RPC: "
 861                                        "fragment too large: 0x%08lx\n",
 862                                        (unsigned long)svsk->sk_reclen);
 863                        goto err_delete;
 864                }
 865        }
 866
 867        /* Check whether enough data is available */
 868        len = svc_recv_available(svsk);
 869        if (len < 0)
 870                goto error;
 871
 872        if (len < svsk->sk_reclen) {
 873                dprintk("svc: incomplete TCP record (%d of %d)\n",
 874                        len, svsk->sk_reclen);
 875                svc_xprt_received(&svsk->sk_xprt);
 876                return -EAGAIN;        /* record not complete */
 877        }
 878        len = svsk->sk_reclen;
 879        set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 880
 881        vec = rqstp->rq_vec;
 882        vec[0] = rqstp->rq_arg.head[0];
 883        vlen = PAGE_SIZE;
 884        pnum = 1;
 885        while (vlen < len) {
 886                vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
 887                vec[pnum].iov_len = PAGE_SIZE;
 888                pnum++;
 889                vlen += PAGE_SIZE;
 890        }
 891        rqstp->rq_respages = &rqstp->rq_pages[pnum];
 892
 893        /* Now receive data */
 894        len = svc_recvfrom(rqstp, vec, pnum, len);
 895        if (len < 0)
 896                goto error;
 897
 898        dprintk("svc: TCP complete record (%d bytes)\n", len);
 899        rqstp->rq_arg.len = len;
 900        rqstp->rq_arg.page_base = 0;
 901        if (len <= rqstp->rq_arg.head[0].iov_len) {
 902                rqstp->rq_arg.head[0].iov_len = len;
 903                rqstp->rq_arg.page_len = 0;
 904        } else {
 905                rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
 906        }
 907
 908        rqstp->rq_xprt_ctxt   = NULL;
 909        rqstp->rq_prot              = IPPROTO_TCP;
 910
 911        /* Reset TCP read info */
 912        svsk->sk_reclen = 0;
 913        svsk->sk_tcplen = 0;
 914
 915        svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
 916        svc_xprt_received(&svsk->sk_xprt);
 917        if (serv->sv_stats)
 918                serv->sv_stats->nettcpcnt++;
 919
 920        return len;
 921
 922 err_delete:
 923        set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 924        return -EAGAIN;
 925
 926 error:
 927        if (len == -EAGAIN) {
 928                dprintk("RPC: TCP recvfrom got EAGAIN\n");
 929                svc_xprt_received(&svsk->sk_xprt);
 930        } else {
 931                printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 932                       svsk->sk_xprt.xpt_server->sv_name, -len);
 933                goto err_delete;
 934        }
 935
 936        return len;
 937}
 938
 939/*
 940 * Send out data on TCP socket.
 941 */
 942static int svc_tcp_sendto(struct svc_rqst *rqstp)
 943{
 944        struct xdr_buf        *xbufp = &rqstp->rq_res;
 945        int sent;
 946        __be32 reclen;
 947
 948        /* Set up the first element of the reply kvec.
 949         * Any other kvecs that may be in use have been taken
 950         * care of by the server implementation itself.
 951         */
 952        reclen = htonl(0x80000000|((xbufp->len ) - 4));
 953        memcpy(xbufp->head[0].iov_base, &reclen, 4);
 954
 955        if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
 956                return -ENOTCONN;
 957
 958        sent = svc_sendto(rqstp, &rqstp->rq_res);
 959        if (sent != xbufp->len) {
 960                printk(KERN_NOTICE
 961                       "rpc-srv/tcp: %s: %s %d when sending %d bytes "
 962                       "- shutting down socket\n",
 963                       rqstp->rq_xprt->xpt_server->sv_name,
 964                       (sent<0)?"got error":"sent only",
 965                       sent, xbufp->len);
 966                set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
 967                svc_xprt_enqueue(rqstp->rq_xprt);
 968                sent = -EAGAIN;
 969        }
 970        return sent;
 971}
 972
 973/*
 974 * Setup response header. TCP has a 4B record length field.
 975 */
 976static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
 977{
 978        struct kvec *resv = &rqstp->rq_res.head[0];
 979
 980        /* tcp needs a space for the record length... */
 981        svc_putnl(resv, 0);
 982}
 983
 984static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 985{
 986        struct svc_sock *svsk =        container_of(xprt, struct svc_sock, sk_xprt);
 987        struct svc_serv        *serv = svsk->sk_xprt.xpt_server;
 988        int required;
 989        int wspace;
 990
 991        /*
 992         * Set the SOCK_NOSPACE flag before checking the available
 993         * sock space.
 994         */
 995        set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 996        required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
 997        wspace = sk_stream_wspace(svsk->sk_sk);
 998
 999        if (wspace < sk_stream_min_wspace(svsk->sk_sk))
1000                return 0;
1001        if (required * 2 > wspace)
1002                return 0;
1003
1004        clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
1005        return 1;
1006}
1007
1008static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1009                                       struct sockaddr *sa, int salen,
1010                                       int flags)
1011{
1012        return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
1013}
1014
1015static struct svc_xprt_ops svc_tcp_ops = {
1016        .xpo_create = svc_tcp_create,
1017        .xpo_recvfrom = svc_tcp_recvfrom,
1018        .xpo_sendto = svc_tcp_sendto,
1019        .xpo_release_rqst = svc_release_skb,
1020        .xpo_detach = svc_sock_detach,
1021        .xpo_free = svc_sock_free,
1022        .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
1023        .xpo_has_wspace = svc_tcp_has_wspace,
1024        .xpo_accept = svc_tcp_accept,
1025};
1026
1027static struct svc_xprt_class svc_tcp_class = {
1028        .xcl_name = "tcp",
1029        .xcl_owner = THIS_MODULE,
1030        .xcl_ops = &svc_tcp_ops,
1031        .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
1032};
1033
1034void svc_init_xprt_sock(void)
1035{
1036        svc_reg_xprt_class(&svc_tcp_class);
1037        svc_reg_xprt_class(&svc_udp_class);
1038}
1039
1040void svc_cleanup_xprt_sock(void)
1041{
1042        svc_unreg_xprt_class(&svc_tcp_class);
1043        svc_unreg_xprt_class(&svc_udp_class);
1044}
1045
1046static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1047{
1048        struct sock        *sk = svsk->sk_sk;
1049
1050        svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
1051        set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
1052        if (sk->sk_state == TCP_LISTEN) {
1053                dprintk("setting up TCP socket for listening\n");
1054                set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
1055                sk->sk_data_ready = svc_tcp_listen_data_ready;
1056                set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
1057        } else {
1058                dprintk("setting up TCP socket for reading\n");
1059                sk->sk_state_change = svc_tcp_state_change;
1060                sk->sk_data_ready = svc_tcp_data_ready;
1061                sk->sk_write_space = svc_write_space;
1062
1063                svsk->sk_reclen = 0;
1064                svsk->sk_tcplen = 0;
1065
1066                tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1067
1068                /* initialise setting must have enough space to
1069                 * receive and respond to one request.
1070                 * svc_tcp_recvfrom will re-adjust if necessary
1071                 */
1072                svc_sock_setbufsize(svsk->sk_sock,
1073                                    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1074                                    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1075
1076                set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1077                set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1078                if (sk->sk_state != TCP_ESTABLISHED)
1079                        set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1080        }
1081}
1082
1083void svc_sock_update_bufs(struct svc_serv *serv)
1084{
1085        /*
1086         * The number of server threads has changed. Update
1087         * rcvbuf and sndbuf accordingly on all sockets
1088         */
1089        struct list_head *le;
1090
1091        spin_lock_bh(&serv->sv_lock);
1092        list_for_each(le, &serv->sv_permsocks) {
1093                struct svc_sock *svsk =
1094                        list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1095                set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1096        }
1097        list_for_each(le, &serv->sv_tempsocks) {
1098                struct svc_sock *svsk =
1099                        list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1100                set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1101        }
1102        spin_unlock_bh(&serv->sv_lock);
1103}
1104EXPORT_SYMBOL(svc_sock_update_bufs);
1105
1106/*
1107 * Initialize socket for RPC use and create svc_sock struct
1108 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1109 */
1110static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1111                                                struct socket *sock,
1112                                                int *errp, int flags)
1113{
1114        struct svc_sock        *svsk;
1115        struct sock        *inet;
1116        int                pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1117        int                val;
1118
1119        dprintk("svc: svc_setup_socket %p\n", sock);
1120        if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
1121                *errp = -ENOMEM;
1122                return NULL;
1123        }
1124
1125        inet = sock->sk;
1126
1127        /* Register socket with portmapper */
1128        if (*errp >= 0 && pmap_register)
1129                *errp = svc_register(serv, inet->sk_protocol,
1130                                     ntohs(inet_sk(inet)->sport));
1131
1132        if (*errp < 0) {
1133                kfree(svsk);
1134                return NULL;
1135        }
1136
1137        inet->sk_user_data = svsk;
1138        svsk->sk_sock = sock;
1139        svsk->sk_sk = inet;
1140        svsk->sk_ostate = inet->sk_state_change;
1141        svsk->sk_odata = inet->sk_data_ready;
1142        svsk->sk_owspace = inet->sk_write_space;
1143
1144        /* Initialize the socket */
1145        if (sock->type == SOCK_DGRAM)
1146                svc_udp_init(svsk, serv);
1147        else
1148                svc_tcp_init(svsk, serv);
1149
1150        /*
1151         * We start one listener per sv_serv.  We want AF_INET
1152         * requests to be automatically shunted to our AF_INET6
1153         * listener using a mapped IPv4 address.  Make sure
1154         * no-one starts an equivalent IPv4 listener, which
1155         * would steal our incoming connections.
1156         */
1157        val = 0;
1158        if (serv->sv_family == AF_INET6)
1159                kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1160                                        (char *)&val, sizeof(val));
1161
1162        dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1163                                svsk, svsk->sk_sk);
1164
1165        return svsk;
1166}
1167
1168int svc_addsock(struct svc_serv *serv,
1169                int fd,
1170                char *name_return)
1171{
1172        int err = 0;
1173        struct socket *so = sockfd_lookup(fd, &err);
1174        struct svc_sock *svsk = NULL;
1175
1176        if (!so)
1177                return err;
1178        if (so->sk->sk_family != AF_INET)
1179                err =  -EAFNOSUPPORT;
1180        else if (so->sk->sk_protocol != IPPROTO_TCP &&
1181            so->sk->sk_protocol != IPPROTO_UDP)
1182                err =  -EPROTONOSUPPORT;
1183        else if (so->state > SS_UNCONNECTED)
1184                err = -EISCONN;
1185        else {
1186                if (!try_module_get(THIS_MODULE))
1187                        err = -ENOENT;
1188                else
1189                        svsk = svc_setup_socket(serv, so, &err,
1190                                                SVC_SOCK_DEFAULTS);
1191                if (svsk) {
1192                        struct sockaddr_storage addr;
1193                        struct sockaddr *sin = (struct sockaddr *)&addr;
1194                        int salen;
1195                        if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
1196                                svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
1197                        clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
1198                        spin_lock_bh(&serv->sv_lock);
1199                        list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
1200                        spin_unlock_bh(&serv->sv_lock);
1201                        svc_xprt_received(&svsk->sk_xprt);
1202                        err = 0;
1203                } else
1204                        module_put(THIS_MODULE);
1205        }
1206        if (err) {
1207                sockfd_put(so);
1208                return err;
1209        }
1210        return one_sock_name(name_return, svsk);
1211}
1212EXPORT_SYMBOL_GPL(svc_addsock);
1213
1214/*
1215 * Create socket for RPC service.
1216 */
1217static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1218                                          int protocol,
1219                                          struct sockaddr *sin, int len,
1220                                          int flags)
1221{
1222        struct svc_sock        *svsk;
1223        struct socket        *sock;
1224        int                error;
1225        int                type;
1226        struct sockaddr_storage addr;
1227        struct sockaddr *newsin = (struct sockaddr *)&addr;
1228        int                newlen;
1229        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
1230
1231        dprintk("svc: svc_create_socket(%s, %d, %s)\n",
1232                        serv->sv_program->pg_name, protocol,
1233                        __svc_print_addr(sin, buf, sizeof(buf)));
1234
1235        if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1236                printk(KERN_WARNING "svc: only UDP and TCP "
1237                                "sockets supported\n");
1238                return ERR_PTR(-EINVAL);
1239        }
1240        type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1241
1242        error = sock_create_kern(sin->sa_family, type, protocol, &sock);
1243        if (error < 0)
1244                return ERR_PTR(error);
1245
1246        svc_reclassify_socket(sock);
1247
1248        if (type == SOCK_STREAM)
1249                sock->sk->sk_reuse = 1;                /* allow address reuse */
1250        error = kernel_bind(sock, sin, len);
1251        if (error < 0)
1252                goto bummer;
1253
1254        newlen = len;
1255        error = kernel_getsockname(sock, newsin, &newlen);
1256        if (error < 0)
1257                goto bummer;
1258
1259        if (protocol == IPPROTO_TCP) {
1260                if ((error = kernel_listen(sock, 64)) < 0)
1261                        goto bummer;
1262        }
1263
1264        if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
1265                svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
1266                return (struct svc_xprt *)svsk;
1267        }
1268
1269bummer:
1270        dprintk("svc: svc_create_socket error = %d\n", -error);
1271        sock_release(sock);
1272        return ERR_PTR(error);
1273}
1274
1275/*
1276 * Detach the svc_sock from the socket so that no
1277 * more callbacks occur.
1278 */
1279static void svc_sock_detach(struct svc_xprt *xprt)
1280{
1281        struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1282        struct sock *sk = svsk->sk_sk;
1283
1284        dprintk("svc: svc_sock_detach(%p)\n", svsk);
1285
1286        /* put back the old socket callbacks */
1287        sk->sk_state_change = svsk->sk_ostate;
1288        sk->sk_data_ready = svsk->sk_odata;
1289        sk->sk_write_space = svsk->sk_owspace;
1290}
1291
1292/*
1293 * Free the svc_sock's socket resources and the svc_sock itself.
1294 */
1295static void svc_sock_free(struct svc_xprt *xprt)
1296{
1297        struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
1298        dprintk("svc: svc_sock_free(%p)\n", svsk);
1299
1300        if (svsk->sk_sock->file)
1301                sockfd_put(svsk->sk_sock);
1302        else
1303                sock_release(svsk->sk_sock);
1304        kfree(svsk);
1305}