Showing error 1118

User: Jiri Slaby
Error type: Double Lock
Error type description: Some lock is locked twice unintentionally in a sequence
File location: net/unix/af_unix.c
Line in file: 1407
Project: Linux Kernel
Project version: 2.6.28
Tools: Clang Static Analyzer (3.0)
Entered: 2012-04-17 12:29:30 UTC


Source:

   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:        Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *
   6 *                This program is free software; you can redistribute it and/or
   7 *                modify it under the terms of the GNU General Public License
   8 *                as published by the Free Software Foundation; either version
   9 *                2 of the License, or (at your option) any later version.
  10 *
  11 * Fixes:
  12 *                Linus Torvalds        :        Assorted bug cures.
  13 *                Niibe Yutaka        :        async I/O support.
  14 *                Carsten Paeth        :        PF_UNIX check, address fixes.
  15 *                Alan Cox        :        Limit size of allocated blocks.
  16 *                Alan Cox        :        Fixed the stupid socketpair bug.
  17 *                Alan Cox        :        BSD compatibility fine tuning.
  18 *                Alan Cox        :        Fixed a bug in connect when interrupted.
  19 *                Alan Cox        :        Sorted out a proper draft version of
  20 *                                        file descriptor passing hacked up from
  21 *                                        Mike Shaver's work.
  22 *                Marty Leisner        :        Fixes to fd passing
  23 *                Nick Nevin        :        recvmsg bugfix.
  24 *                Alan Cox        :        Started proper garbage collector
  25 *                Heiko EiBfeldt        :        Missing verify_area check
  26 *                Alan Cox        :        Started POSIXisms
  27 *                Andreas Schwab        :        Replace inode by dentry for proper
  28 *                                        reference counting
  29 *                Kirk Petersen        :        Made this a module
  30 *            Christoph Rohland        :        Elegant non-blocking accept/connect algorithm.
  31 *                                        Lots of bug fixes.
  32 *             Alexey Kuznetosv        :        Repaired (I hope) bugs introduces
  33 *                                        by above two patches.
  34 *             Andrea Arcangeli        :        If possible we block in connect(2)
  35 *                                        if the max backlog of the listen socket
  36 *                                        is been reached. This won't break
  37 *                                        old apps and it will avoid huge amount
  38 *                                        of socks hashed (this for unix_gc()
  39 *                                        performances reasons).
  40 *                                        Security fix that limits the max
  41 *                                        number of socks to 2*max_files and
  42 *                                        the number of skb queueable in the
  43 *                                        dgram receiver.
  44 *                Artur Skawina   :        Hash function optimizations
  45 *             Alexey Kuznetsov   :        Full scale SMP. Lot of bugs are introduced 8)
  46 *              Malcolm Beattie   :        Set peercred for socketpair
  47 *             Michal Ostrowski   :       Module initialization cleanup.
  48 *             Arnaldo C. Melo        :        Remove MOD_{INC,DEC}_USE_COUNT,
  49 *                                             the core infrastructure is doing that
  50 *                                             for all net proto families now (2.5.69+)
  51 *
  52 *
  53 * Known differences from reference BSD that was tested:
  54 *
  55 *        [TO FIX]
  56 *        ECONNREFUSED is not returned from one end of a connected() socket to the
  57 *                other the moment one end closes.
  58 *        fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59 *                and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60 *        [NOT TO FIX]
  61 *        accept() returns a path name even if the connecting socket has closed
  62 *                in the meantime (BSD loses the path and gives up).
  63 *        accept() returns 0 length path for an unbound connector. BSD returns 16
  64 *                and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65 *        socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66 *        BSD af_unix apparently has connect forgetting to block properly.
  67 *                (need to check this with the POSIX spec in detail)
  68 *
  69 * Differences from 2.0.0-11-... (ANK)
  70 *        Bug fixes and improvements.
  71 *                - client shutdown killed server socket.
  72 *                - removed all useless cli/sti pairs.
  73 *
  74 *        Semantic changes/extensions.
  75 *                - generic control message passing.
  76 *                - SCM_CREDENTIALS control message.
  77 *                - "Abstract" (not FS based) socket bindings.
  78 *                  Abstract names are sequences of bytes (not zero terminated)
  79 *                  started by 0, so that this name space does not intersect
  80 *                  with BSD names.
  81 */
  82
  83#include <linux/module.h>
  84#include <linux/kernel.h>
  85#include <linux/signal.h>
  86#include <linux/sched.h>
  87#include <linux/errno.h>
  88#include <linux/string.h>
  89#include <linux/stat.h>
  90#include <linux/dcache.h>
  91#include <linux/namei.h>
  92#include <linux/socket.h>
  93#include <linux/un.h>
  94#include <linux/fcntl.h>
  95#include <linux/termios.h>
  96#include <linux/sockios.h>
  97#include <linux/net.h>
  98#include <linux/in.h>
  99#include <linux/fs.h>
 100#include <linux/slab.h>
 101#include <asm/uaccess.h>
 102#include <linux/skbuff.h>
 103#include <linux/netdevice.h>
 104#include <net/net_namespace.h>
 105#include <net/sock.h>
 106#include <net/tcp_states.h>
 107#include <net/af_unix.h>
 108#include <linux/proc_fs.h>
 109#include <linux/seq_file.h>
 110#include <net/scm.h>
 111#include <linux/init.h>
 112#include <linux/poll.h>
 113#include <linux/rtnetlink.h>
 114#include <linux/mount.h>
 115#include <net/checksum.h>
 116#include <linux/security.h>
 117
 118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119static DEFINE_SPINLOCK(unix_table_lock);
 120static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 121
 122#define unix_sockets_unbound        (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124#define UNIX_ABSTRACT(sk)        (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126#ifdef CONFIG_SECURITY_NETWORK
 127static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128{
 129        memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130}
 131
 132static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133{
 134        scm->secid = *UNIXSID(skb);
 135}
 136#else
 137static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138{ }
 139
 140static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141{ }
 142#endif /* CONFIG_SECURITY_NETWORK */
 143
 144/*
 145 *  SMP locking strategy:
 146 *    hash table is protected with spinlock unix_table_lock
 147 *    each socket state is protected by separate rwlock.
 148 */
 149
 150static inline unsigned unix_hash_fold(__wsum n)
 151{
 152        unsigned hash = (__force unsigned)n;
 153        hash ^= hash>>16;
 154        hash ^= hash>>8;
 155        return hash&(UNIX_HASH_SIZE-1);
 156}
 157
 158#define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161{
 162        return unix_peer(osk) == sk;
 163}
 164
 165static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166{
 167        return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
 168}
 169
 170static inline int unix_recvq_full(struct sock const *sk)
 171{
 172        return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173}
 174
 175static struct sock *unix_peer_get(struct sock *s)
 176{
 177        struct sock *peer;
 178
 179        unix_state_lock(s);
 180        peer = unix_peer(s);
 181        if (peer)
 182                sock_hold(peer);
 183        unix_state_unlock(s);
 184        return peer;
 185}
 186
 187static inline void unix_release_addr(struct unix_address *addr)
 188{
 189        if (atomic_dec_and_test(&addr->refcnt))
 190                kfree(addr);
 191}
 192
 193/*
 194 *        Check unix socket name:
 195 *                - should be not zero length.
 196 *                - if started by not zero, should be NULL terminated (FS object)
 197 *                - if started by zero, it is abstract name.
 198 */
 199
 200static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 201{
 202        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                return -EINVAL;
 204        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                return -EINVAL;
 206        if (sunaddr->sun_path[0]) {
 207                /*
 208                 * This may look like an off by one error but it is a bit more
 209                 * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                 * sun_path[108] doesnt as such exist.  However in kernel space
 211                 * we are guaranteed that it is a valid memory location in our
 212                 * kernel address buffer.
 213                 */
 214                ((char *)sunaddr)[len]=0;
 215                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                return len;
 217        }
 218
 219        *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
 220        return len;
 221}
 222
 223static void __unix_remove_socket(struct sock *sk)
 224{
 225        sk_del_node_init(sk);
 226}
 227
 228static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229{
 230        WARN_ON(!sk_unhashed(sk));
 231        sk_add_node(sk, list);
 232}
 233
 234static inline void unix_remove_socket(struct sock *sk)
 235{
 236        spin_lock(&unix_table_lock);
 237        __unix_remove_socket(sk);
 238        spin_unlock(&unix_table_lock);
 239}
 240
 241static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242{
 243        spin_lock(&unix_table_lock);
 244        __unix_insert_socket(list, sk);
 245        spin_unlock(&unix_table_lock);
 246}
 247
 248static struct sock *__unix_find_socket_byname(struct net *net,
 249                                              struct sockaddr_un *sunname,
 250                                              int len, int type, unsigned hash)
 251{
 252        struct sock *s;
 253        struct hlist_node *node;
 254
 255        sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                struct unix_sock *u = unix_sk(s);
 257
 258                if (!net_eq(sock_net(s), net))
 259                        continue;
 260
 261                if (u->addr->len == len &&
 262                    !memcmp(u->addr->name, sunname, len))
 263                        goto found;
 264        }
 265        s = NULL;
 266found:
 267        return s;
 268}
 269
 270static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                   struct sockaddr_un *sunname,
 272                                                   int len, int type,
 273                                                   unsigned hash)
 274{
 275        struct sock *s;
 276
 277        spin_lock(&unix_table_lock);
 278        s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279        if (s)
 280                sock_hold(s);
 281        spin_unlock(&unix_table_lock);
 282        return s;
 283}
 284
 285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 286{
 287        struct sock *s;
 288        struct hlist_node *node;
 289
 290        spin_lock(&unix_table_lock);
 291        sk_for_each(s, node,
 292                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                if (!net_eq(sock_net(s), net))
 296                        continue;
 297
 298                if(dentry && dentry->d_inode == i)
 299                {
 300                        sock_hold(s);
 301                        goto found;
 302                }
 303        }
 304        s = NULL;
 305found:
 306        spin_unlock(&unix_table_lock);
 307        return s;
 308}
 309
 310static inline int unix_writable(struct sock *sk)
 311{
 312        return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 313}
 314
 315static void unix_write_space(struct sock *sk)
 316{
 317        read_lock(&sk->sk_callback_lock);
 318        if (unix_writable(sk)) {
 319                if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 320                        wake_up_interruptible_sync(sk->sk_sleep);
 321                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 322        }
 323        read_unlock(&sk->sk_callback_lock);
 324}
 325
 326/* When dgram socket disconnects (or changes its peer), we clear its receive
 327 * queue of packets arrived from previous peer. First, it allows to do
 328 * flow control based only on wmem_alloc; second, sk connected to peer
 329 * may receive messages only from that peer. */
 330static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 331{
 332        if (!skb_queue_empty(&sk->sk_receive_queue)) {
 333                skb_queue_purge(&sk->sk_receive_queue);
 334                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 335
 336                /* If one link of bidirectional dgram pipe is disconnected,
 337                 * we signal error. Messages are lost. Do not make this,
 338                 * when peer was not connected to us.
 339                 */
 340                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 341                        other->sk_err = ECONNRESET;
 342                        other->sk_error_report(other);
 343                }
 344        }
 345}
 346
 347static void unix_sock_destructor(struct sock *sk)
 348{
 349        struct unix_sock *u = unix_sk(sk);
 350
 351        skb_queue_purge(&sk->sk_receive_queue);
 352
 353        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 354        WARN_ON(!sk_unhashed(sk));
 355        WARN_ON(sk->sk_socket);
 356        if (!sock_flag(sk, SOCK_DEAD)) {
 357                printk("Attempt to release alive unix socket: %p\n", sk);
 358                return;
 359        }
 360
 361        if (u->addr)
 362                unix_release_addr(u->addr);
 363
 364        atomic_dec(&unix_nr_socks);
 365#ifdef UNIX_REFCNT_DEBUG
 366        printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
 367#endif
 368}
 369
 370static int unix_release_sock (struct sock *sk, int embrion)
 371{
 372        struct unix_sock *u = unix_sk(sk);
 373        struct dentry *dentry;
 374        struct vfsmount *mnt;
 375        struct sock *skpair;
 376        struct sk_buff *skb;
 377        int state;
 378
 379        unix_remove_socket(sk);
 380
 381        /* Clear state */
 382        unix_state_lock(sk);
 383        sock_orphan(sk);
 384        sk->sk_shutdown = SHUTDOWN_MASK;
 385        dentry             = u->dentry;
 386        u->dentry    = NULL;
 387        mnt             = u->mnt;
 388        u->mnt             = NULL;
 389        state = sk->sk_state;
 390        sk->sk_state = TCP_CLOSE;
 391        unix_state_unlock(sk);
 392
 393        wake_up_interruptible_all(&u->peer_wait);
 394
 395        skpair=unix_peer(sk);
 396
 397        if (skpair!=NULL) {
 398                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 399                        unix_state_lock(skpair);
 400                        /* No more writes */
 401                        skpair->sk_shutdown = SHUTDOWN_MASK;
 402                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 403                                skpair->sk_err = ECONNRESET;
 404                        unix_state_unlock(skpair);
 405                        skpair->sk_state_change(skpair);
 406                        read_lock(&skpair->sk_callback_lock);
 407                        sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 408                        read_unlock(&skpair->sk_callback_lock);
 409                }
 410                sock_put(skpair); /* It may now die */
 411                unix_peer(sk) = NULL;
 412        }
 413
 414        /* Try to flush out this socket. Throw out buffers at least */
 415
 416        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 417                if (state==TCP_LISTEN)
 418                        unix_release_sock(skb->sk, 1);
 419                /* passed fds are erased in the kfree_skb hook              */
 420                kfree_skb(skb);
 421        }
 422
 423        if (dentry) {
 424                dput(dentry);
 425                mntput(mnt);
 426        }
 427
 428        sock_put(sk);
 429
 430        /* ---- Socket is dead now and most probably destroyed ---- */
 431
 432        /*
 433         * Fixme: BSD difference: In BSD all sockets connected to use get
 434         *          ECONNRESET and we die on the spot. In Linux we behave
 435         *          like files and pipes do and wait for the last
 436         *          dereference.
 437         *
 438         * Can't we simply set sock->err?
 439         *
 440         *          What the above comment does talk about? --ANK(980817)
 441         */
 442
 443        if (unix_tot_inflight)
 444                unix_gc();                /* Garbage collect fds */
 445
 446        return 0;
 447}
 448
 449static int unix_listen(struct socket *sock, int backlog)
 450{
 451        int err;
 452        struct sock *sk = sock->sk;
 453        struct unix_sock *u = unix_sk(sk);
 454
 455        err = -EOPNOTSUPP;
 456        if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
 457                goto out;                        /* Only stream/seqpacket sockets accept */
 458        err = -EINVAL;
 459        if (!u->addr)
 460                goto out;                        /* No listens on an unbound socket */
 461        unix_state_lock(sk);
 462        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 463                goto out_unlock;
 464        if (backlog > sk->sk_max_ack_backlog)
 465                wake_up_interruptible_all(&u->peer_wait);
 466        sk->sk_max_ack_backlog        = backlog;
 467        sk->sk_state                = TCP_LISTEN;
 468        /* set credentials so connect can copy them */
 469        sk->sk_peercred.pid        = task_tgid_vnr(current);
 470        sk->sk_peercred.uid        = current->euid;
 471        sk->sk_peercred.gid        = current->egid;
 472        err = 0;
 473
 474out_unlock:
 475        unix_state_unlock(sk);
 476out:
 477        return err;
 478}
 479
 480static int unix_release(struct socket *);
 481static int unix_bind(struct socket *, struct sockaddr *, int);
 482static int unix_stream_connect(struct socket *, struct sockaddr *,
 483                               int addr_len, int flags);
 484static int unix_socketpair(struct socket *, struct socket *);
 485static int unix_accept(struct socket *, struct socket *, int);
 486static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 487static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 488static unsigned int unix_dgram_poll(struct file *, struct socket *,
 489                                    poll_table *);
 490static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 491static int unix_shutdown(struct socket *, int);
 492static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 493                               struct msghdr *, size_t);
 494static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 495                               struct msghdr *, size_t, int);
 496static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 497                              struct msghdr *, size_t);
 498static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 499                              struct msghdr *, size_t, int);
 500static int unix_dgram_connect(struct socket *, struct sockaddr *,
 501                              int, int);
 502static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 503                                  struct msghdr *, size_t);
 504
 505static const struct proto_ops unix_stream_ops = {
 506        .family =        PF_UNIX,
 507        .owner =        THIS_MODULE,
 508        .release =        unix_release,
 509        .bind =                unix_bind,
 510        .connect =        unix_stream_connect,
 511        .socketpair =        unix_socketpair,
 512        .accept =        unix_accept,
 513        .getname =        unix_getname,
 514        .poll =                unix_poll,
 515        .ioctl =        unix_ioctl,
 516        .listen =        unix_listen,
 517        .shutdown =        unix_shutdown,
 518        .setsockopt =        sock_no_setsockopt,
 519        .getsockopt =        sock_no_getsockopt,
 520        .sendmsg =        unix_stream_sendmsg,
 521        .recvmsg =        unix_stream_recvmsg,
 522        .mmap =                sock_no_mmap,
 523        .sendpage =        sock_no_sendpage,
 524};
 525
 526static const struct proto_ops unix_dgram_ops = {
 527        .family =        PF_UNIX,
 528        .owner =        THIS_MODULE,
 529        .release =        unix_release,
 530        .bind =                unix_bind,
 531        .connect =        unix_dgram_connect,
 532        .socketpair =        unix_socketpair,
 533        .accept =        sock_no_accept,
 534        .getname =        unix_getname,
 535        .poll =                unix_dgram_poll,
 536        .ioctl =        unix_ioctl,
 537        .listen =        sock_no_listen,
 538        .shutdown =        unix_shutdown,
 539        .setsockopt =        sock_no_setsockopt,
 540        .getsockopt =        sock_no_getsockopt,
 541        .sendmsg =        unix_dgram_sendmsg,
 542        .recvmsg =        unix_dgram_recvmsg,
 543        .mmap =                sock_no_mmap,
 544        .sendpage =        sock_no_sendpage,
 545};
 546
 547static const struct proto_ops unix_seqpacket_ops = {
 548        .family =        PF_UNIX,
 549        .owner =        THIS_MODULE,
 550        .release =        unix_release,
 551        .bind =                unix_bind,
 552        .connect =        unix_stream_connect,
 553        .socketpair =        unix_socketpair,
 554        .accept =        unix_accept,
 555        .getname =        unix_getname,
 556        .poll =                unix_dgram_poll,
 557        .ioctl =        unix_ioctl,
 558        .listen =        unix_listen,
 559        .shutdown =        unix_shutdown,
 560        .setsockopt =        sock_no_setsockopt,
 561        .getsockopt =        sock_no_getsockopt,
 562        .sendmsg =        unix_seqpacket_sendmsg,
 563        .recvmsg =        unix_dgram_recvmsg,
 564        .mmap =                sock_no_mmap,
 565        .sendpage =        sock_no_sendpage,
 566};
 567
 568static struct proto unix_proto = {
 569        .name          = "UNIX",
 570        .owner          = THIS_MODULE,
 571        .obj_size = sizeof(struct unix_sock),
 572};
 573
 574/*
 575 * AF_UNIX sockets do not interact with hardware, hence they
 576 * dont trigger interrupts - so it's safe for them to have
 577 * bh-unsafe locking for their sk_receive_queue.lock. Split off
 578 * this special lock-class by reinitializing the spinlock key:
 579 */
 580static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 581
 582static struct sock * unix_create1(struct net *net, struct socket *sock)
 583{
 584        struct sock *sk = NULL;
 585        struct unix_sock *u;
 586
 587        atomic_inc(&unix_nr_socks);
 588        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 589                goto out;
 590
 591        sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 592        if (!sk)
 593                goto out;
 594
 595        sock_init_data(sock,sk);
 596        lockdep_set_class(&sk->sk_receive_queue.lock,
 597                                &af_unix_sk_receive_queue_lock_key);
 598
 599        sk->sk_write_space        = unix_write_space;
 600        sk->sk_max_ack_backlog        = net->unx.sysctl_max_dgram_qlen;
 601        sk->sk_destruct                = unix_sock_destructor;
 602        u          = unix_sk(sk);
 603        u->dentry = NULL;
 604        u->mnt          = NULL;
 605        spin_lock_init(&u->lock);
 606        atomic_long_set(&u->inflight, 0);
 607        INIT_LIST_HEAD(&u->link);
 608        mutex_init(&u->readlock); /* single task reading lock */
 609        init_waitqueue_head(&u->peer_wait);
 610        unix_insert_socket(unix_sockets_unbound, sk);
 611out:
 612        if (sk == NULL)
 613                atomic_dec(&unix_nr_socks);
 614        return sk;
 615}
 616
 617static int unix_create(struct net *net, struct socket *sock, int protocol)
 618{
 619        if (protocol && protocol != PF_UNIX)
 620                return -EPROTONOSUPPORT;
 621
 622        sock->state = SS_UNCONNECTED;
 623
 624        switch (sock->type) {
 625        case SOCK_STREAM:
 626                sock->ops = &unix_stream_ops;
 627                break;
 628                /*
 629                 *        Believe it or not BSD has AF_UNIX, SOCK_RAW though
 630                 *        nothing uses it.
 631                 */
 632        case SOCK_RAW:
 633                sock->type=SOCK_DGRAM;
 634        case SOCK_DGRAM:
 635                sock->ops = &unix_dgram_ops;
 636                break;
 637        case SOCK_SEQPACKET:
 638                sock->ops = &unix_seqpacket_ops;
 639                break;
 640        default:
 641                return -ESOCKTNOSUPPORT;
 642        }
 643
 644        return unix_create1(net, sock) ? 0 : -ENOMEM;
 645}
 646
 647static int unix_release(struct socket *sock)
 648{
 649        struct sock *sk = sock->sk;
 650
 651        if (!sk)
 652                return 0;
 653
 654        sock->sk = NULL;
 655
 656        return unix_release_sock (sk, 0);
 657}
 658
 659static int unix_autobind(struct socket *sock)
 660{
 661        struct sock *sk = sock->sk;
 662        struct net *net = sock_net(sk);
 663        struct unix_sock *u = unix_sk(sk);
 664        static u32 ordernum = 1;
 665        struct unix_address * addr;
 666        int err;
 667
 668        mutex_lock(&u->readlock);
 669
 670        err = 0;
 671        if (u->addr)
 672                goto out;
 673
 674        err = -ENOMEM;
 675        addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 676        if (!addr)
 677                goto out;
 678
 679        addr->name->sun_family = AF_UNIX;
 680        atomic_set(&addr->refcnt, 1);
 681
 682retry:
 683        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 684        addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 685
 686        spin_lock(&unix_table_lock);
 687        ordernum = (ordernum+1)&0xFFFFF;
 688
 689        if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 690                                      addr->hash)) {
 691                spin_unlock(&unix_table_lock);
 692                /* Sanity yield. It is unusual case, but yet... */
 693                if (!(ordernum&0xFF))
 694                        yield();
 695                goto retry;
 696        }
 697        addr->hash ^= sk->sk_type;
 698
 699        __unix_remove_socket(sk);
 700        u->addr = addr;
 701        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 702        spin_unlock(&unix_table_lock);
 703        err = 0;
 704
 705out:        mutex_unlock(&u->readlock);
 706        return err;
 707}
 708
 709static struct sock *unix_find_other(struct net *net,
 710                                    struct sockaddr_un *sunname, int len,
 711                                    int type, unsigned hash, int *error)
 712{
 713        struct sock *u;
 714        struct path path;
 715        int err = 0;
 716
 717        if (sunname->sun_path[0]) {
 718                struct inode *inode;
 719                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 720                if (err)
 721                        goto fail;
 722                inode = path.dentry->d_inode;
 723                err = inode_permission(inode, MAY_WRITE);
 724                if (err)
 725                        goto put_fail;
 726
 727                err = -ECONNREFUSED;
 728                if (!S_ISSOCK(inode->i_mode))
 729                        goto put_fail;
 730                u = unix_find_socket_byinode(net, inode);
 731                if (!u)
 732                        goto put_fail;
 733
 734                if (u->sk_type == type)
 735                        touch_atime(path.mnt, path.dentry);
 736
 737                path_put(&path);
 738
 739                err=-EPROTOTYPE;
 740                if (u->sk_type != type) {
 741                        sock_put(u);
 742                        goto fail;
 743                }
 744        } else {
 745                err = -ECONNREFUSED;
 746                u=unix_find_socket_byname(net, sunname, len, type, hash);
 747                if (u) {
 748                        struct dentry *dentry;
 749                        dentry = unix_sk(u)->dentry;
 750                        if (dentry)
 751                                touch_atime(unix_sk(u)->mnt, dentry);
 752                } else
 753                        goto fail;
 754        }
 755        return u;
 756
 757put_fail:
 758        path_put(&path);
 759fail:
 760        *error=err;
 761        return NULL;
 762}
 763
 764
 765static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 766{
 767        struct sock *sk = sock->sk;
 768        struct net *net = sock_net(sk);
 769        struct unix_sock *u = unix_sk(sk);
 770        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 771        struct dentry * dentry = NULL;
 772        struct nameidata nd;
 773        int err;
 774        unsigned hash;
 775        struct unix_address *addr;
 776        struct hlist_head *list;
 777
 778        err = -EINVAL;
 779        if (sunaddr->sun_family != AF_UNIX)
 780                goto out;
 781
 782        if (addr_len==sizeof(short)) {
 783                err = unix_autobind(sock);
 784                goto out;
 785        }
 786
 787        err = unix_mkname(sunaddr, addr_len, &hash);
 788        if (err < 0)
 789                goto out;
 790        addr_len = err;
 791
 792        mutex_lock(&u->readlock);
 793
 794        err = -EINVAL;
 795        if (u->addr)
 796                goto out_up;
 797
 798        err = -ENOMEM;
 799        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 800        if (!addr)
 801                goto out_up;
 802
 803        memcpy(addr->name, sunaddr, addr_len);
 804        addr->len = addr_len;
 805        addr->hash = hash ^ sk->sk_type;
 806        atomic_set(&addr->refcnt, 1);
 807
 808        if (sunaddr->sun_path[0]) {
 809                unsigned int mode;
 810                err = 0;
 811                /*
 812                 * Get the parent directory, calculate the hash for last
 813                 * component.
 814                 */
 815                err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 816                if (err)
 817                        goto out_mknod_parent;
 818
 819                dentry = lookup_create(&nd, 0);
 820                err = PTR_ERR(dentry);
 821                if (IS_ERR(dentry))
 822                        goto out_mknod_unlock;
 823
 824                /*
 825                 * All right, let's create it.
 826                 */
 827                mode = S_IFSOCK |
 828                       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
 829                err = mnt_want_write(nd.path.mnt);
 830                if (err)
 831                        goto out_mknod_dput;
 832                err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 833                mnt_drop_write(nd.path.mnt);
 834                if (err)
 835                        goto out_mknod_dput;
 836                mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 837                dput(nd.path.dentry);
 838                nd.path.dentry = dentry;
 839
 840                addr->hash = UNIX_HASH_SIZE;
 841        }
 842
 843        spin_lock(&unix_table_lock);
 844
 845        if (!sunaddr->sun_path[0]) {
 846                err = -EADDRINUSE;
 847                if (__unix_find_socket_byname(net, sunaddr, addr_len,
 848                                              sk->sk_type, hash)) {
 849                        unix_release_addr(addr);
 850                        goto out_unlock;
 851                }
 852
 853                list = &unix_socket_table[addr->hash];
 854        } else {
 855                list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 856                u->dentry = nd.path.dentry;
 857                u->mnt    = nd.path.mnt;
 858        }
 859
 860        err = 0;
 861        __unix_remove_socket(sk);
 862        u->addr = addr;
 863        __unix_insert_socket(list, sk);
 864
 865out_unlock:
 866        spin_unlock(&unix_table_lock);
 867out_up:
 868        mutex_unlock(&u->readlock);
 869out:
 870        return err;
 871
 872out_mknod_dput:
 873        dput(dentry);
 874out_mknod_unlock:
 875        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 876        path_put(&nd.path);
 877out_mknod_parent:
 878        if (err==-EEXIST)
 879                err=-EADDRINUSE;
 880        unix_release_addr(addr);
 881        goto out_up;
 882}
 883
 884static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 885{
 886        if (unlikely(sk1 == sk2) || !sk2) {
 887                unix_state_lock(sk1);
 888                return;
 889        }
 890        if (sk1 < sk2) {
 891                unix_state_lock(sk1);
 892                unix_state_lock_nested(sk2);
 893        } else {
 894                unix_state_lock(sk2);
 895                unix_state_lock_nested(sk1);
 896        }
 897}
 898
 899static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 900{
 901        if (unlikely(sk1 == sk2) || !sk2) {
 902                unix_state_unlock(sk1);
 903                return;
 904        }
 905        unix_state_unlock(sk1);
 906        unix_state_unlock(sk2);
 907}
 908
 909static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 910                              int alen, int flags)
 911{
 912        struct sock *sk = sock->sk;
 913        struct net *net = sock_net(sk);
 914        struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
 915        struct sock *other;
 916        unsigned hash;
 917        int err;
 918
 919        if (addr->sa_family != AF_UNSPEC) {
 920                err = unix_mkname(sunaddr, alen, &hash);
 921                if (err < 0)
 922                        goto out;
 923                alen = err;
 924
 925                if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 926                    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 927                        goto out;
 928
 929restart:
 930                other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 931                if (!other)
 932                        goto out;
 933
 934                unix_state_double_lock(sk, other);
 935
 936                /* Apparently VFS overslept socket death. Retry. */
 937                if (sock_flag(other, SOCK_DEAD)) {
 938                        unix_state_double_unlock(sk, other);
 939                        sock_put(other);
 940                        goto restart;
 941                }
 942
 943                err = -EPERM;
 944                if (!unix_may_send(sk, other))
 945                        goto out_unlock;
 946
 947                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 948                if (err)
 949                        goto out_unlock;
 950
 951        } else {
 952                /*
 953                 *        1003.1g breaking connected state with AF_UNSPEC
 954                 */
 955                other = NULL;
 956                unix_state_double_lock(sk, other);
 957        }
 958
 959        /*
 960         * If it was connected, reconnect.
 961         */
 962        if (unix_peer(sk)) {
 963                struct sock *old_peer = unix_peer(sk);
 964                unix_peer(sk)=other;
 965                unix_state_double_unlock(sk, other);
 966
 967                if (other != old_peer)
 968                        unix_dgram_disconnected(sk, old_peer);
 969                sock_put(old_peer);
 970        } else {
 971                unix_peer(sk)=other;
 972                unix_state_double_unlock(sk, other);
 973        }
 974        return 0;
 975
 976out_unlock:
 977        unix_state_double_unlock(sk, other);
 978        sock_put(other);
 979out:
 980        return err;
 981}
 982
 983static long unix_wait_for_peer(struct sock *other, long timeo)
 984{
 985        struct unix_sock *u = unix_sk(other);
 986        int sched;
 987        DEFINE_WAIT(wait);
 988
 989        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
 990
 991        sched = !sock_flag(other, SOCK_DEAD) &&
 992                !(other->sk_shutdown & RCV_SHUTDOWN) &&
 993                unix_recvq_full(other);
 994
 995        unix_state_unlock(other);
 996
 997        if (sched)
 998                timeo = schedule_timeout(timeo);
 999
1000        finish_wait(&u->peer_wait, &wait);
1001        return timeo;
1002}
1003
1004static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1005                               int addr_len, int flags)
1006{
1007        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1008        struct sock *sk = sock->sk;
1009        struct net *net = sock_net(sk);
1010        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1011        struct sock *newsk = NULL;
1012        struct sock *other = NULL;
1013        struct sk_buff *skb = NULL;
1014        unsigned hash;
1015        int st;
1016        int err;
1017        long timeo;
1018
1019        err = unix_mkname(sunaddr, addr_len, &hash);
1020        if (err < 0)
1021                goto out;
1022        addr_len = err;
1023
1024        if (test_bit(SOCK_PASSCRED, &sock->flags)
1025                && !u->addr && (err = unix_autobind(sock)) != 0)
1026                goto out;
1027
1028        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1029
1030        /* First of all allocate resources.
1031           If we will make it after state is locked,
1032           we will have to recheck all again in any case.
1033         */
1034
1035        err = -ENOMEM;
1036
1037        /* create new sock for complete connection */
1038        newsk = unix_create1(sock_net(sk), NULL);
1039        if (newsk == NULL)
1040                goto out;
1041
1042        /* Allocate skb for sending to listening sock */
1043        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1044        if (skb == NULL)
1045                goto out;
1046
1047restart:
1048        /*  Find listening sock. */
1049        other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1050        if (!other)
1051                goto out;
1052
1053        /* Latch state of peer */
1054        unix_state_lock(other);
1055
1056        /* Apparently VFS overslept socket death. Retry. */
1057        if (sock_flag(other, SOCK_DEAD)) {
1058                unix_state_unlock(other);
1059                sock_put(other);
1060                goto restart;
1061        }
1062
1063        err = -ECONNREFUSED;
1064        if (other->sk_state != TCP_LISTEN)
1065                goto out_unlock;
1066
1067        if (unix_recvq_full(other)) {
1068                err = -EAGAIN;
1069                if (!timeo)
1070                        goto out_unlock;
1071
1072                timeo = unix_wait_for_peer(other, timeo);
1073
1074                err = sock_intr_errno(timeo);
1075                if (signal_pending(current))
1076                        goto out;
1077                sock_put(other);
1078                goto restart;
1079        }
1080
1081        /* Latch our state.
1082
1083           It is tricky place. We need to grab write lock and cannot
1084           drop lock on peer. It is dangerous because deadlock is
1085           possible. Connect to self case and simultaneous
1086           attempt to connect are eliminated by checking socket
1087           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1088           check this before attempt to grab lock.
1089
1090           Well, and we have to recheck the state after socket locked.
1091         */
1092        st = sk->sk_state;
1093
1094        switch (st) {
1095        case TCP_CLOSE:
1096                /* This is ok... continue with connect */
1097                break;
1098        case TCP_ESTABLISHED:
1099                /* Socket is already connected */
1100                err = -EISCONN;
1101                goto out_unlock;
1102        default:
1103                err = -EINVAL;
1104                goto out_unlock;
1105        }
1106
1107        unix_state_lock_nested(sk);
1108
1109        if (sk->sk_state != st) {
1110                unix_state_unlock(sk);
1111                unix_state_unlock(other);
1112                sock_put(other);
1113                goto restart;
1114        }
1115
1116        err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1117        if (err) {
1118                unix_state_unlock(sk);
1119                goto out_unlock;
1120        }
1121
1122        /* The way is open! Fastly set all the necessary fields... */
1123
1124        sock_hold(sk);
1125        unix_peer(newsk)        = sk;
1126        newsk->sk_state                = TCP_ESTABLISHED;
1127        newsk->sk_type                = sk->sk_type;
1128        newsk->sk_peercred.pid        = task_tgid_vnr(current);
1129        newsk->sk_peercred.uid        = current->euid;
1130        newsk->sk_peercred.gid        = current->egid;
1131        newu = unix_sk(newsk);
1132        newsk->sk_sleep                = &newu->peer_wait;
1133        otheru = unix_sk(other);
1134
1135        /* copy address information from listening to new sock*/
1136        if (otheru->addr) {
1137                atomic_inc(&otheru->addr->refcnt);
1138                newu->addr = otheru->addr;
1139        }
1140        if (otheru->dentry) {
1141                newu->dentry        = dget(otheru->dentry);
1142                newu->mnt        = mntget(otheru->mnt);
1143        }
1144
1145        /* Set credentials */
1146        sk->sk_peercred = other->sk_peercred;
1147
1148        sock->state        = SS_CONNECTED;
1149        sk->sk_state        = TCP_ESTABLISHED;
1150        sock_hold(newsk);
1151
1152        smp_mb__after_atomic_inc();        /* sock_hold() does an atomic_inc() */
1153        unix_peer(sk)        = newsk;
1154
1155        unix_state_unlock(sk);
1156
1157        /* take ten and and send info to listening sock */
1158        spin_lock(&other->sk_receive_queue.lock);
1159        __skb_queue_tail(&other->sk_receive_queue, skb);
1160        spin_unlock(&other->sk_receive_queue.lock);
1161        unix_state_unlock(other);
1162        other->sk_data_ready(other, 0);
1163        sock_put(other);
1164        return 0;
1165
1166out_unlock:
1167        if (other)
1168                unix_state_unlock(other);
1169
1170out:
1171        if (skb)
1172                kfree_skb(skb);
1173        if (newsk)
1174                unix_release_sock(newsk, 0);
1175        if (other)
1176                sock_put(other);
1177        return err;
1178}
1179
1180static int unix_socketpair(struct socket *socka, struct socket *sockb)
1181{
1182        struct sock *ska=socka->sk, *skb = sockb->sk;
1183
1184        /* Join our sockets back to back */
1185        sock_hold(ska);
1186        sock_hold(skb);
1187        unix_peer(ska)=skb;
1188        unix_peer(skb)=ska;
1189        ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1190        ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1191        ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1192
1193        if (ska->sk_type != SOCK_DGRAM) {
1194                ska->sk_state = TCP_ESTABLISHED;
1195                skb->sk_state = TCP_ESTABLISHED;
1196                socka->state  = SS_CONNECTED;
1197                sockb->state  = SS_CONNECTED;
1198        }
1199        return 0;
1200}
1201
1202static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1203{
1204        struct sock *sk = sock->sk;
1205        struct sock *tsk;
1206        struct sk_buff *skb;
1207        int err;
1208
1209        err = -EOPNOTSUPP;
1210        if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1211                goto out;
1212
1213        err = -EINVAL;
1214        if (sk->sk_state != TCP_LISTEN)
1215                goto out;
1216
1217        /* If socket state is TCP_LISTEN it cannot change (for now...),
1218         * so that no locks are necessary.
1219         */
1220
1221        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1222        if (!skb) {
1223                /* This means receive shutdown. */
1224                if (err == 0)
1225                        err = -EINVAL;
1226                goto out;
1227        }
1228
1229        tsk = skb->sk;
1230        skb_free_datagram(sk, skb);
1231        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1232
1233        /* attach accepted sock to socket */
1234        unix_state_lock(tsk);
1235        newsock->state = SS_CONNECTED;
1236        sock_graft(tsk, newsock);
1237        unix_state_unlock(tsk);
1238        return 0;
1239
1240out:
1241        return err;
1242}
1243
1244
1245static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1246{
1247        struct sock *sk = sock->sk;
1248        struct unix_sock *u;
1249        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1250        int err = 0;
1251
1252        if (peer) {
1253                sk = unix_peer_get(sk);
1254
1255                err = -ENOTCONN;
1256                if (!sk)
1257                        goto out;
1258                err = 0;
1259        } else {
1260                sock_hold(sk);
1261        }
1262
1263        u = unix_sk(sk);
1264        unix_state_lock(sk);
1265        if (!u->addr) {
1266                sunaddr->sun_family = AF_UNIX;
1267                sunaddr->sun_path[0] = 0;
1268                *uaddr_len = sizeof(short);
1269        } else {
1270                struct unix_address *addr = u->addr;
1271
1272                *uaddr_len = addr->len;
1273                memcpy(sunaddr, addr->name, *uaddr_len);
1274        }
1275        unix_state_unlock(sk);
1276        sock_put(sk);
1277out:
1278        return err;
1279}
1280
1281static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1282{
1283        int i;
1284
1285        scm->fp = UNIXCB(skb).fp;
1286        skb->destructor = sock_wfree;
1287        UNIXCB(skb).fp = NULL;
1288
1289        for (i=scm->fp->count-1; i>=0; i--)
1290                unix_notinflight(scm->fp->fp[i]);
1291}
1292
1293static void unix_destruct_fds(struct sk_buff *skb)
1294{
1295        struct scm_cookie scm;
1296        memset(&scm, 0, sizeof(scm));
1297        unix_detach_fds(&scm, skb);
1298
1299        /* Alas, it calls VFS */
1300        /* So fscking what? fput() had been SMP-safe since the last Summer */
1301        scm_destroy(&scm);
1302        sock_wfree(skb);
1303}
1304
1305static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1306{
1307        int i;
1308
1309        /*
1310         * Need to duplicate file references for the sake of garbage
1311         * collection.  Otherwise a socket in the fps might become a
1312         * candidate for GC while the skb is not yet queued.
1313         */
1314        UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1315        if (!UNIXCB(skb).fp)
1316                return -ENOMEM;
1317
1318        for (i=scm->fp->count-1; i>=0; i--)
1319                unix_inflight(scm->fp->fp[i]);
1320        skb->destructor = unix_destruct_fds;
1321        return 0;
1322}
1323
1324/*
1325 *        Send AF_UNIX data.
1326 */
1327
1328static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1329                              struct msghdr *msg, size_t len)
1330{
1331        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1332        struct sock *sk = sock->sk;
1333        struct net *net = sock_net(sk);
1334        struct unix_sock *u = unix_sk(sk);
1335        struct sockaddr_un *sunaddr=msg->msg_name;
1336        struct sock *other = NULL;
1337        int namelen = 0; /* fake GCC */
1338        int err;
1339        unsigned hash;
1340        struct sk_buff *skb;
1341        long timeo;
1342        struct scm_cookie tmp_scm;
1343
1344        if (NULL == siocb->scm)
1345                siocb->scm = &tmp_scm;
1346        wait_for_unix_gc();
1347        err = scm_send(sock, msg, siocb->scm);
1348        if (err < 0)
1349                return err;
1350
1351        err = -EOPNOTSUPP;
1352        if (msg->msg_flags&MSG_OOB)
1353                goto out;
1354
1355        if (msg->msg_namelen) {
1356                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1357                if (err < 0)
1358                        goto out;
1359                namelen = err;
1360        } else {
1361                sunaddr = NULL;
1362                err = -ENOTCONN;
1363                other = unix_peer_get(sk);
1364                if (!other)
1365                        goto out;
1366        }
1367
1368        if (test_bit(SOCK_PASSCRED, &sock->flags)
1369                && !u->addr && (err = unix_autobind(sock)) != 0)
1370                goto out;
1371
1372        err = -EMSGSIZE;
1373        if (len > sk->sk_sndbuf - 32)
1374                goto out;
1375
1376        skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1377        if (skb==NULL)
1378                goto out;
1379
1380        memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1381        if (siocb->scm->fp) {
1382                err = unix_attach_fds(siocb->scm, skb);
1383                if (err)
1384                        goto out_free;
1385        }
1386        unix_get_secdata(siocb->scm, skb);
1387
1388        skb_reset_transport_header(skb);
1389        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1390        if (err)
1391                goto out_free;
1392
1393        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1394
1395restart:
1396        if (!other) {
1397                err = -ECONNRESET;
1398                if (sunaddr == NULL)
1399                        goto out_free;
1400
1401                other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1402                                        hash, &err);
1403                if (other==NULL)
1404                        goto out_free;
1405        }
1406
1407        unix_state_lock(other);
1408        err = -EPERM;
1409        if (!unix_may_send(sk, other))
1410                goto out_unlock;
1411
1412        if (sock_flag(other, SOCK_DEAD)) {
1413                /*
1414                 *        Check with 1003.1g - what should
1415                 *        datagram error
1416                 */
1417                unix_state_unlock(other);
1418                sock_put(other);
1419
1420                err = 0;
1421                unix_state_lock(sk);
1422                if (unix_peer(sk) == other) {
1423                        unix_peer(sk)=NULL;
1424                        unix_state_unlock(sk);
1425
1426                        unix_dgram_disconnected(sk, other);
1427                        sock_put(other);
1428                        err = -ECONNREFUSED;
1429                } else {
1430                        unix_state_unlock(sk);
1431                }
1432
1433                other = NULL;
1434                if (err)
1435                        goto out_free;
1436                goto restart;
1437        }
1438
1439        err = -EPIPE;
1440        if (other->sk_shutdown & RCV_SHUTDOWN)
1441                goto out_unlock;
1442
1443        if (sk->sk_type != SOCK_SEQPACKET) {
1444                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1445                if (err)
1446                        goto out_unlock;
1447        }
1448
1449        if (unix_peer(other) != sk && unix_recvq_full(other)) {
1450                if (!timeo) {
1451                        err = -EAGAIN;
1452                        goto out_unlock;
1453                }
1454
1455                timeo = unix_wait_for_peer(other, timeo);
1456
1457                err = sock_intr_errno(timeo);
1458                if (signal_pending(current))
1459                        goto out_free;
1460
1461                goto restart;
1462        }
1463
1464        skb_queue_tail(&other->sk_receive_queue, skb);
1465        unix_state_unlock(other);
1466        other->sk_data_ready(other, len);
1467        sock_put(other);
1468        scm_destroy(siocb->scm);
1469        return len;
1470
1471out_unlock:
1472        unix_state_unlock(other);
1473out_free:
1474        kfree_skb(skb);
1475out:
1476        if (other)
1477                sock_put(other);
1478        scm_destroy(siocb->scm);
1479        return err;
1480}
1481
1482
1483static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1484                               struct msghdr *msg, size_t len)
1485{
1486        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1487        struct sock *sk = sock->sk;
1488        struct sock *other = NULL;
1489        struct sockaddr_un *sunaddr=msg->msg_name;
1490        int err,size;
1491        struct sk_buff *skb;
1492        int sent=0;
1493        struct scm_cookie tmp_scm;
1494
1495        if (NULL == siocb->scm)
1496                siocb->scm = &tmp_scm;
1497        wait_for_unix_gc();
1498        err = scm_send(sock, msg, siocb->scm);
1499        if (err < 0)
1500                return err;
1501
1502        err = -EOPNOTSUPP;
1503        if (msg->msg_flags&MSG_OOB)
1504                goto out_err;
1505
1506        if (msg->msg_namelen) {
1507                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1508                goto out_err;
1509        } else {
1510                sunaddr = NULL;
1511                err = -ENOTCONN;
1512                other = unix_peer(sk);
1513                if (!other)
1514                        goto out_err;
1515        }
1516
1517        if (sk->sk_shutdown & SEND_SHUTDOWN)
1518                goto pipe_err;
1519
1520        while(sent < len)
1521        {
1522                /*
1523                 *        Optimisation for the fact that under 0.01% of X
1524                 *        messages typically need breaking up.
1525                 */
1526
1527                size = len-sent;
1528
1529                /* Keep two messages in the pipe so it schedules better */
1530                if (size > ((sk->sk_sndbuf >> 1) - 64))
1531                        size = (sk->sk_sndbuf >> 1) - 64;
1532
1533                if (size > SKB_MAX_ALLOC)
1534                        size = SKB_MAX_ALLOC;
1535
1536                /*
1537                 *        Grab a buffer
1538                 */
1539
1540                skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1541
1542                if (skb==NULL)
1543                        goto out_err;
1544
1545                /*
1546                 *        If you pass two values to the sock_alloc_send_skb
1547                 *        it tries to grab the large buffer with GFP_NOFS
1548                 *        (which can fail easily), and if it fails grab the
1549                 *        fallback size buffer which is under a page and will
1550                 *        succeed. [Alan]
1551                 */
1552                size = min_t(int, size, skb_tailroom(skb));
1553
1554                memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1555                if (siocb->scm->fp) {
1556                        err = unix_attach_fds(siocb->scm, skb);
1557                        if (err) {
1558                                kfree_skb(skb);
1559                                goto out_err;
1560                        }
1561                }
1562
1563                if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1564                        kfree_skb(skb);
1565                        goto out_err;
1566                }
1567
1568                unix_state_lock(other);
1569
1570                if (sock_flag(other, SOCK_DEAD) ||
1571                    (other->sk_shutdown & RCV_SHUTDOWN))
1572                        goto pipe_err_free;
1573
1574                skb_queue_tail(&other->sk_receive_queue, skb);
1575                unix_state_unlock(other);
1576                other->sk_data_ready(other, size);
1577                sent+=size;
1578        }
1579
1580        scm_destroy(siocb->scm);
1581        siocb->scm = NULL;
1582
1583        return sent;
1584
1585pipe_err_free:
1586        unix_state_unlock(other);
1587        kfree_skb(skb);
1588pipe_err:
1589        if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1590                send_sig(SIGPIPE,current,0);
1591        err = -EPIPE;
1592out_err:
1593        scm_destroy(siocb->scm);
1594        siocb->scm = NULL;
1595        return sent ? : err;
1596}
1597
1598static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1599                                  struct msghdr *msg, size_t len)
1600{
1601        int err;
1602        struct sock *sk = sock->sk;
1603
1604        err = sock_error(sk);
1605        if (err)
1606                return err;
1607
1608        if (sk->sk_state != TCP_ESTABLISHED)
1609                return -ENOTCONN;
1610
1611        if (msg->msg_namelen)
1612                msg->msg_namelen = 0;
1613
1614        return unix_dgram_sendmsg(kiocb, sock, msg, len);
1615}
1616
1617static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1618{
1619        struct unix_sock *u = unix_sk(sk);
1620
1621        msg->msg_namelen = 0;
1622        if (u->addr) {
1623                msg->msg_namelen = u->addr->len;
1624                memcpy(msg->msg_name, u->addr->name, u->addr->len);
1625        }
1626}
1627
1628static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1629                              struct msghdr *msg, size_t size,
1630                              int flags)
1631{
1632        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1633        struct scm_cookie tmp_scm;
1634        struct sock *sk = sock->sk;
1635        struct unix_sock *u = unix_sk(sk);
1636        int noblock = flags & MSG_DONTWAIT;
1637        struct sk_buff *skb;
1638        int err;
1639
1640        err = -EOPNOTSUPP;
1641        if (flags&MSG_OOB)
1642                goto out;
1643
1644        msg->msg_namelen = 0;
1645
1646        mutex_lock(&u->readlock);
1647
1648        skb = skb_recv_datagram(sk, flags, noblock, &err);
1649        if (!skb) {
1650                unix_state_lock(sk);
1651                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1652                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1653                    (sk->sk_shutdown & RCV_SHUTDOWN))
1654                        err = 0;
1655                unix_state_unlock(sk);
1656                goto out_unlock;
1657        }
1658
1659        wake_up_interruptible_sync(&u->peer_wait);
1660
1661        if (msg->msg_name)
1662                unix_copy_addr(msg, skb->sk);
1663
1664        if (size > skb->len)
1665                size = skb->len;
1666        else if (size < skb->len)
1667                msg->msg_flags |= MSG_TRUNC;
1668
1669        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1670        if (err)
1671                goto out_free;
1672
1673        if (!siocb->scm) {
1674                siocb->scm = &tmp_scm;
1675                memset(&tmp_scm, 0, sizeof(tmp_scm));
1676        }
1677        siocb->scm->creds = *UNIXCREDS(skb);
1678        unix_set_secdata(siocb->scm, skb);
1679
1680        if (!(flags & MSG_PEEK))
1681        {
1682                if (UNIXCB(skb).fp)
1683                        unix_detach_fds(siocb->scm, skb);
1684        }
1685        else
1686        {
1687                /* It is questionable: on PEEK we could:
1688                   - do not return fds - good, but too simple 8)
1689                   - return fds, and do not return them on read (old strategy,
1690                     apparently wrong)
1691                   - clone fds (I chose it for now, it is the most universal
1692                     solution)
1693
1694                   POSIX 1003.1g does not actually define this clearly
1695                   at all. POSIX 1003.1g doesn't define a lot of things
1696                   clearly however!
1697
1698                */
1699                if (UNIXCB(skb).fp)
1700                        siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1701        }
1702        err = size;
1703
1704        scm_recv(sock, msg, siocb->scm, flags);
1705
1706out_free:
1707        skb_free_datagram(sk,skb);
1708out_unlock:
1709        mutex_unlock(&u->readlock);
1710out:
1711        return err;
1712}
1713
1714/*
1715 *        Sleep until data has arrive. But check for races..
1716 */
1717
1718static long unix_stream_data_wait(struct sock * sk, long timeo)
1719{
1720        DEFINE_WAIT(wait);
1721
1722        unix_state_lock(sk);
1723
1724        for (;;) {
1725                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1726
1727                if (!skb_queue_empty(&sk->sk_receive_queue) ||
1728                    sk->sk_err ||
1729                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1730                    signal_pending(current) ||
1731                    !timeo)
1732                        break;
1733
1734                set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1735                unix_state_unlock(sk);
1736                timeo = schedule_timeout(timeo);
1737                unix_state_lock(sk);
1738                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1739        }
1740
1741        finish_wait(sk->sk_sleep, &wait);
1742        unix_state_unlock(sk);
1743        return timeo;
1744}
1745
1746
1747
1748static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1749                               struct msghdr *msg, size_t size,
1750                               int flags)
1751{
1752        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1753        struct scm_cookie tmp_scm;
1754        struct sock *sk = sock->sk;
1755        struct unix_sock *u = unix_sk(sk);
1756        struct sockaddr_un *sunaddr=msg->msg_name;
1757        int copied = 0;
1758        int check_creds = 0;
1759        int target;
1760        int err = 0;
1761        long timeo;
1762
1763        err = -EINVAL;
1764        if (sk->sk_state != TCP_ESTABLISHED)
1765                goto out;
1766
1767        err = -EOPNOTSUPP;
1768        if (flags&MSG_OOB)
1769                goto out;
1770
1771        target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1772        timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1773
1774        msg->msg_namelen = 0;
1775
1776        /* Lock the socket to prevent queue disordering
1777         * while sleeps in memcpy_tomsg
1778         */
1779
1780        if (!siocb->scm) {
1781                siocb->scm = &tmp_scm;
1782                memset(&tmp_scm, 0, sizeof(tmp_scm));
1783        }
1784
1785        mutex_lock(&u->readlock);
1786
1787        do
1788        {
1789                int chunk;
1790                struct sk_buff *skb;
1791
1792                unix_state_lock(sk);
1793                skb = skb_dequeue(&sk->sk_receive_queue);
1794                if (skb==NULL)
1795                {
1796                        if (copied >= target)
1797                                goto unlock;
1798
1799                        /*
1800                         *        POSIX 1003.1g mandates this order.
1801                         */
1802
1803                        if ((err = sock_error(sk)) != 0)
1804                                goto unlock;
1805                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1806                                goto unlock;
1807
1808                        unix_state_unlock(sk);
1809                        err = -EAGAIN;
1810                        if (!timeo)
1811                                break;
1812                        mutex_unlock(&u->readlock);
1813
1814                        timeo = unix_stream_data_wait(sk, timeo);
1815
1816                        if (signal_pending(current)) {
1817                                err = sock_intr_errno(timeo);
1818                                goto out;
1819                        }
1820                        mutex_lock(&u->readlock);
1821                        continue;
1822 unlock:
1823                        unix_state_unlock(sk);
1824                        break;
1825                }
1826                unix_state_unlock(sk);
1827
1828                if (check_creds) {
1829                        /* Never glue messages from different writers */
1830                        if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1831                                skb_queue_head(&sk->sk_receive_queue, skb);
1832                                break;
1833                        }
1834                } else {
1835                        /* Copy credentials */
1836                        siocb->scm->creds = *UNIXCREDS(skb);
1837                        check_creds = 1;
1838                }
1839
1840                /* Copy address just once */
1841                if (sunaddr)
1842                {
1843                        unix_copy_addr(msg, skb->sk);
1844                        sunaddr = NULL;
1845                }
1846
1847                chunk = min_t(unsigned int, skb->len, size);
1848                if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1849                        skb_queue_head(&sk->sk_receive_queue, skb);
1850                        if (copied == 0)
1851                                copied = -EFAULT;
1852                        break;
1853                }
1854                copied += chunk;
1855                size -= chunk;
1856
1857                /* Mark read part of skb as used */
1858                if (!(flags & MSG_PEEK))
1859                {
1860                        skb_pull(skb, chunk);
1861
1862                        if (UNIXCB(skb).fp)
1863                                unix_detach_fds(siocb->scm, skb);
1864
1865                        /* put the skb back if we didn't use it up.. */
1866                        if (skb->len)
1867                        {
1868                                skb_queue_head(&sk->sk_receive_queue, skb);
1869                                break;
1870                        }
1871
1872                        kfree_skb(skb);
1873
1874                        if (siocb->scm->fp)
1875                                break;
1876                }
1877                else
1878                {
1879                        /* It is questionable, see note in unix_dgram_recvmsg.
1880                         */
1881                        if (UNIXCB(skb).fp)
1882                                siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1883
1884                        /* put message back and return */
1885                        skb_queue_head(&sk->sk_receive_queue, skb);
1886                        break;
1887                }
1888        } while (size);
1889
1890        mutex_unlock(&u->readlock);
1891        scm_recv(sock, msg, siocb->scm, flags);
1892out:
1893        return copied ? : err;
1894}
1895
1896static int unix_shutdown(struct socket *sock, int mode)
1897{
1898        struct sock *sk = sock->sk;
1899        struct sock *other;
1900
1901        mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1902
1903        if (mode) {
1904                unix_state_lock(sk);
1905                sk->sk_shutdown |= mode;
1906                other=unix_peer(sk);
1907                if (other)
1908                        sock_hold(other);
1909                unix_state_unlock(sk);
1910                sk->sk_state_change(sk);
1911
1912                if (other &&
1913                        (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1914
1915                        int peer_mode = 0;
1916
1917                        if (mode&RCV_SHUTDOWN)
1918                                peer_mode |= SEND_SHUTDOWN;
1919                        if (mode&SEND_SHUTDOWN)
1920                                peer_mode |= RCV_SHUTDOWN;
1921                        unix_state_lock(other);
1922                        other->sk_shutdown |= peer_mode;
1923                        unix_state_unlock(other);
1924                        other->sk_state_change(other);
1925                        read_lock(&other->sk_callback_lock);
1926                        if (peer_mode == SHUTDOWN_MASK)
1927                                sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1928                        else if (peer_mode & RCV_SHUTDOWN)
1929                                sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1930                        read_unlock(&other->sk_callback_lock);
1931                }
1932                if (other)
1933                        sock_put(other);
1934        }
1935        return 0;
1936}
1937
1938static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1939{
1940        struct sock *sk = sock->sk;
1941        long amount=0;
1942        int err;
1943
1944        switch(cmd)
1945        {
1946                case SIOCOUTQ:
1947                        amount = atomic_read(&sk->sk_wmem_alloc);
1948                        err = put_user(amount, (int __user *)arg);
1949                        break;
1950                case SIOCINQ:
1951                {
1952                        struct sk_buff *skb;
1953
1954                        if (sk->sk_state == TCP_LISTEN) {
1955                                err = -EINVAL;
1956                                break;
1957                        }
1958
1959                        spin_lock(&sk->sk_receive_queue.lock);
1960                        if (sk->sk_type == SOCK_STREAM ||
1961                            sk->sk_type == SOCK_SEQPACKET) {
1962                                skb_queue_walk(&sk->sk_receive_queue, skb)
1963                                        amount += skb->len;
1964                        } else {
1965                                skb = skb_peek(&sk->sk_receive_queue);
1966                                if (skb)
1967                                        amount=skb->len;
1968                        }
1969                        spin_unlock(&sk->sk_receive_queue.lock);
1970                        err = put_user(amount, (int __user *)arg);
1971                        break;
1972                }
1973
1974                default:
1975                        err = -ENOIOCTLCMD;
1976                        break;
1977        }
1978        return err;
1979}
1980
1981static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1982{
1983        struct sock *sk = sock->sk;
1984        unsigned int mask;
1985
1986        poll_wait(file, sk->sk_sleep, wait);
1987        mask = 0;
1988
1989        /* exceptional events? */
1990        if (sk->sk_err)
1991                mask |= POLLERR;
1992        if (sk->sk_shutdown == SHUTDOWN_MASK)
1993                mask |= POLLHUP;
1994        if (sk->sk_shutdown & RCV_SHUTDOWN)
1995                mask |= POLLRDHUP;
1996
1997        /* readable? */
1998        if (!skb_queue_empty(&sk->sk_receive_queue) ||
1999            (sk->sk_shutdown & RCV_SHUTDOWN))
2000                mask |= POLLIN | POLLRDNORM;
2001
2002        /* Connection-based need to check for termination and startup */
2003        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
2004                mask |= POLLHUP;
2005
2006        /*
2007         * we set writable also when the other side has shut down the
2008         * connection. This prevents stuck sockets.
2009         */
2010        if (unix_writable(sk))
2011                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2012
2013        return mask;
2014}
2015
2016static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2017                                    poll_table *wait)
2018{
2019        struct sock *sk = sock->sk, *other;
2020        unsigned int mask, writable;
2021
2022        poll_wait(file, sk->sk_sleep, wait);
2023        mask = 0;
2024
2025        /* exceptional events? */
2026        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2027                mask |= POLLERR;
2028        if (sk->sk_shutdown & RCV_SHUTDOWN)
2029                mask |= POLLRDHUP;
2030        if (sk->sk_shutdown == SHUTDOWN_MASK)
2031                mask |= POLLHUP;
2032
2033        /* readable? */
2034        if (!skb_queue_empty(&sk->sk_receive_queue) ||
2035            (sk->sk_shutdown & RCV_SHUTDOWN))
2036                mask |= POLLIN | POLLRDNORM;
2037
2038        /* Connection-based need to check for termination and startup */
2039        if (sk->sk_type == SOCK_SEQPACKET) {
2040                if (sk->sk_state == TCP_CLOSE)
2041                        mask |= POLLHUP;
2042                /* connection hasn't started yet? */
2043                if (sk->sk_state == TCP_SYN_SENT)
2044                        return mask;
2045        }
2046
2047        /* writable? */
2048        writable = unix_writable(sk);
2049        if (writable) {
2050                other = unix_peer_get(sk);
2051                if (other) {
2052                        if (unix_peer(other) != sk) {
2053                                poll_wait(file, &unix_sk(other)->peer_wait,
2054                                          wait);
2055                                if (unix_recvq_full(other))
2056                                        writable = 0;
2057                        }
2058
2059                        sock_put(other);
2060                }
2061        }
2062
2063        if (writable)
2064                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2065        else
2066                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2067
2068        return mask;
2069}
2070
2071#ifdef CONFIG_PROC_FS
2072static struct sock *first_unix_socket(int *i)
2073{
2074        for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2075                if (!hlist_empty(&unix_socket_table[*i]))
2076                        return __sk_head(&unix_socket_table[*i]);
2077        }
2078        return NULL;
2079}
2080
2081static struct sock *next_unix_socket(int *i, struct sock *s)
2082{
2083        struct sock *next = sk_next(s);
2084        /* More in this chain? */
2085        if (next)
2086                return next;
2087        /* Look for next non-empty chain. */
2088        for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2089                if (!hlist_empty(&unix_socket_table[*i]))
2090                        return __sk_head(&unix_socket_table[*i]);
2091        }
2092        return NULL;
2093}
2094
2095struct unix_iter_state {
2096        struct seq_net_private p;
2097        int i;
2098};
2099static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2100{
2101        struct unix_iter_state *iter = seq->private;
2102        loff_t off = 0;
2103        struct sock *s;
2104
2105        for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2106                if (sock_net(s) != seq_file_net(seq))
2107                        continue;
2108                if (off == pos)
2109                        return s;
2110                ++off;
2111        }
2112        return NULL;
2113}
2114
2115
2116static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2117        __acquires(unix_table_lock)
2118{
2119        spin_lock(&unix_table_lock);
2120        return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2121}
2122
2123static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2124{
2125        struct unix_iter_state *iter = seq->private;
2126        struct sock *sk = v;
2127        ++*pos;
2128
2129        if (v == SEQ_START_TOKEN)
2130                sk = first_unix_socket(&iter->i);
2131        else
2132                sk = next_unix_socket(&iter->i, sk);
2133        while (sk && (sock_net(sk) != seq_file_net(seq)))
2134                sk = next_unix_socket(&iter->i, sk);
2135        return sk;
2136}
2137
2138static void unix_seq_stop(struct seq_file *seq, void *v)
2139        __releases(unix_table_lock)
2140{
2141        spin_unlock(&unix_table_lock);
2142}
2143
2144static int unix_seq_show(struct seq_file *seq, void *v)
2145{
2146
2147        if (v == SEQ_START_TOKEN)
2148                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2149                         "Inode Path\n");
2150        else {
2151                struct sock *s = v;
2152                struct unix_sock *u = unix_sk(s);
2153                unix_state_lock(s);
2154
2155                seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2156                        s,
2157                        atomic_read(&s->sk_refcnt),
2158                        0,
2159                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2160                        s->sk_type,
2161                        s->sk_socket ?
2162                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2163                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2164                        sock_i_ino(s));
2165
2166                if (u->addr) {
2167                        int i, len;
2168                        seq_putc(seq, ' ');
2169
2170                        i = 0;
2171                        len = u->addr->len - sizeof(short);
2172                        if (!UNIX_ABSTRACT(s))
2173                                len--;
2174                        else {
2175                                seq_putc(seq, '@');
2176                                i++;
2177                        }
2178                        for ( ; i < len; i++)
2179                                seq_putc(seq, u->addr->name->sun_path[i]);
2180                }
2181                unix_state_unlock(s);
2182                seq_putc(seq, '\n');
2183        }
2184
2185        return 0;
2186}
2187
2188static const struct seq_operations unix_seq_ops = {
2189        .start  = unix_seq_start,
2190        .next   = unix_seq_next,
2191        .stop   = unix_seq_stop,
2192        .show   = unix_seq_show,
2193};
2194
2195
2196static int unix_seq_open(struct inode *inode, struct file *file)
2197{
2198        return seq_open_net(inode, file, &unix_seq_ops,
2199                            sizeof(struct unix_iter_state));
2200}
2201
2202static const struct file_operations unix_seq_fops = {
2203        .owner                = THIS_MODULE,
2204        .open                = unix_seq_open,
2205        .read                = seq_read,
2206        .llseek                = seq_lseek,
2207        .release        = seq_release_net,
2208};
2209
2210#endif
2211
2212static struct net_proto_family unix_family_ops = {
2213        .family = PF_UNIX,
2214        .create = unix_create,
2215        .owner        = THIS_MODULE,
2216};
2217
2218
2219static int unix_net_init(struct net *net)
2220{
2221        int error = -ENOMEM;
2222
2223        net->unx.sysctl_max_dgram_qlen = 10;
2224        if (unix_sysctl_register(net))
2225                goto out;
2226
2227#ifdef CONFIG_PROC_FS
2228        if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2229                unix_sysctl_unregister(net);
2230                goto out;
2231        }
2232#endif
2233        error = 0;
2234out:
2235        return error;
2236}
2237
2238static void unix_net_exit(struct net *net)
2239{
2240        unix_sysctl_unregister(net);
2241        proc_net_remove(net, "unix");
2242}
2243
2244static struct pernet_operations unix_net_ops = {
2245        .init = unix_net_init,
2246        .exit = unix_net_exit,
2247};
2248
2249static int __init af_unix_init(void)
2250{
2251        int rc = -1;
2252        struct sk_buff *dummy_skb;
2253
2254        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2255
2256        rc = proto_register(&unix_proto, 1);
2257        if (rc != 0) {
2258                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2259                       __func__);
2260                goto out;
2261        }
2262
2263        sock_register(&unix_family_ops);
2264        register_pernet_subsys(&unix_net_ops);
2265out:
2266        return rc;
2267}
2268
2269static void __exit af_unix_exit(void)
2270{
2271        sock_unregister(PF_UNIX);
2272        proto_unregister(&unix_proto);
2273        unregister_pernet_subsys(&unix_net_ops);
2274}
2275
2276/* Earlier than device_initcall() so that other drivers invoking
2277   request_module() don't end up in a loop when modprobe tries
2278   to use a UNIX socket. But later than subsys_initcall() because
2279   we depend on stuff initialised there */
2280fs_initcall(af_unix_init);
2281module_exit(af_unix_exit);
2282
2283MODULE_LICENSE("GPL");
2284MODULE_ALIAS_NETPROTO(PF_UNIX);