Showing error 1762

User: Jiri Slaby
Error type: Invalid Pointer Dereference
Error type description: A pointer which is invalid is being dereferenced
File location: fs/pipe.c
Line in file: 979
Project: Linux Kernel
Project version: 2.6.28
Tools: Smatch (1.59)
Entered: 2013-09-10 20:24:52 UTC


Source:

   1/*
   2 *  linux/fs/pipe.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5 */
   6
   7#include <linux/mm.h>
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/init.h>
  13#include <linux/fs.h>
  14#include <linux/mount.h>
  15#include <linux/pipe_fs_i.h>
  16#include <linux/uio.h>
  17#include <linux/highmem.h>
  18#include <linux/pagemap.h>
  19#include <linux/audit.h>
  20#include <linux/syscalls.h>
  21
  22#include <asm/uaccess.h>
  23#include <asm/ioctls.h>
  24
  25/*
  26 * We use a start+len construction, which provides full use of the 
  27 * allocated memory.
  28 * -- Florian Coosmann (FGC)
  29 * 
  30 * Reads with count = 0 should always return 0.
  31 * -- Julian Bradfield 1999-06-07.
  32 *
  33 * FIFOs and Pipes now generate SIGIO for both readers and writers.
  34 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
  35 *
  36 * pipe_read & write cleanup
  37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  38 */
  39
  40/* Drop the inode semaphore and wait for a pipe event, atomically */
  41void pipe_wait(struct pipe_inode_info *pipe)
  42{
  43        DEFINE_WAIT(wait);
  44
  45        /*
  46         * Pipes are system-local resources, so sleeping on them
  47         * is considered a noninteractive wait:
  48         */
  49        prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
  50        if (pipe->inode)
  51                mutex_unlock(&pipe->inode->i_mutex);
  52        schedule();
  53        finish_wait(&pipe->wait, &wait);
  54        if (pipe->inode)
  55                mutex_lock(&pipe->inode->i_mutex);
  56}
  57
  58static int
  59pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
  60                        int atomic)
  61{
  62        unsigned long copy;
  63
  64        while (len > 0) {
  65                while (!iov->iov_len)
  66                        iov++;
  67                copy = min_t(unsigned long, len, iov->iov_len);
  68
  69                if (atomic) {
  70                        if (__copy_from_user_inatomic(to, iov->iov_base, copy))
  71                                return -EFAULT;
  72                } else {
  73                        if (copy_from_user(to, iov->iov_base, copy))
  74                                return -EFAULT;
  75                }
  76                to += copy;
  77                len -= copy;
  78                iov->iov_base += copy;
  79                iov->iov_len -= copy;
  80        }
  81        return 0;
  82}
  83
  84static int
  85pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
  86                      int atomic)
  87{
  88        unsigned long copy;
  89
  90        while (len > 0) {
  91                while (!iov->iov_len)
  92                        iov++;
  93                copy = min_t(unsigned long, len, iov->iov_len);
  94
  95                if (atomic) {
  96                        if (__copy_to_user_inatomic(iov->iov_base, from, copy))
  97                                return -EFAULT;
  98                } else {
  99                        if (copy_to_user(iov->iov_base, from, copy))
 100                                return -EFAULT;
 101                }
 102                from += copy;
 103                len -= copy;
 104                iov->iov_base += copy;
 105                iov->iov_len -= copy;
 106        }
 107        return 0;
 108}
 109
 110/*
 111 * Attempt to pre-fault in the user memory, so we can use atomic copies.
 112 * Returns the number of bytes not faulted in.
 113 */
 114static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
 115{
 116        while (!iov->iov_len)
 117                iov++;
 118
 119        while (len > 0) {
 120                unsigned long this_len;
 121
 122                this_len = min_t(unsigned long, len, iov->iov_len);
 123                if (fault_in_pages_writeable(iov->iov_base, this_len))
 124                        break;
 125
 126                len -= this_len;
 127                iov++;
 128        }
 129
 130        return len;
 131}
 132
 133/*
 134 * Pre-fault in the user memory, so we can use atomic copies.
 135 */
 136static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
 137{
 138        while (!iov->iov_len)
 139                iov++;
 140
 141        while (len > 0) {
 142                unsigned long this_len;
 143
 144                this_len = min_t(unsigned long, len, iov->iov_len);
 145                fault_in_pages_readable(iov->iov_base, this_len);
 146                len -= this_len;
 147                iov++;
 148        }
 149}
 150
 151static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 152                                  struct pipe_buffer *buf)
 153{
 154        struct page *page = buf->page;
 155
 156        /*
 157         * If nobody else uses this page, and we don't already have a
 158         * temporary page, let's keep track of it as a one-deep
 159         * allocation cache. (Otherwise just release our reference to it)
 160         */
 161        if (page_count(page) == 1 && !pipe->tmp_page)
 162                pipe->tmp_page = page;
 163        else
 164                page_cache_release(page);
 165}
 166
 167/**
 168 * generic_pipe_buf_map - virtually map a pipe buffer
 169 * @pipe:        the pipe that the buffer belongs to
 170 * @buf:        the buffer that should be mapped
 171 * @atomic:        whether to use an atomic map
 172 *
 173 * Description:
 174 *        This function returns a kernel virtual address mapping for the
 175 *        pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
 176 *        and the caller has to be careful not to fault before calling
 177 *        the unmap function.
 178 *
 179 *        Note that this function occupies KM_USER0 if @atomic != 0.
 180 */
 181void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
 182                           struct pipe_buffer *buf, int atomic)
 183{
 184        if (atomic) {
 185                buf->flags |= PIPE_BUF_FLAG_ATOMIC;
 186                return kmap_atomic(buf->page, KM_USER0);
 187        }
 188
 189        return kmap(buf->page);
 190}
 191
 192/**
 193 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
 194 * @pipe:        the pipe that the buffer belongs to
 195 * @buf:        the buffer that should be unmapped
 196 * @map_data:        the data that the mapping function returned
 197 *
 198 * Description:
 199 *        This function undoes the mapping that ->map() provided.
 200 */
 201void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 202                            struct pipe_buffer *buf, void *map_data)
 203{
 204        if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
 205                buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
 206                kunmap_atomic(map_data, KM_USER0);
 207        } else
 208                kunmap(buf->page);
 209}
 210
 211/**
 212 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
 213 * @pipe:        the pipe that the buffer belongs to
 214 * @buf:        the buffer to attempt to steal
 215 *
 216 * Description:
 217 *        This function attempts to steal the &struct page attached to
 218 *        @buf. If successful, this function returns 0 and returns with
 219 *        the page locked. The caller may then reuse the page for whatever
 220 *        he wishes; the typical use is insertion into a different file
 221 *        page cache.
 222 */
 223int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
 224                           struct pipe_buffer *buf)
 225{
 226        struct page *page = buf->page;
 227
 228        /*
 229         * A reference of one is golden, that means that the owner of this
 230         * page is the only one holding a reference to it. lock the page
 231         * and return OK.
 232         */
 233        if (page_count(page) == 1) {
 234                lock_page(page);
 235                return 0;
 236        }
 237
 238        return 1;
 239}
 240
 241/**
 242 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
 243 * @pipe:        the pipe that the buffer belongs to
 244 * @buf:        the buffer to get a reference to
 245 *
 246 * Description:
 247 *        This function grabs an extra reference to @buf. It's used in
 248 *        in the tee() system call, when we duplicate the buffers in one
 249 *        pipe into another.
 250 */
 251void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 252{
 253        page_cache_get(buf->page);
 254}
 255
 256/**
 257 * generic_pipe_buf_confirm - verify contents of the pipe buffer
 258 * @info:        the pipe that the buffer belongs to
 259 * @buf:        the buffer to confirm
 260 *
 261 * Description:
 262 *        This function does nothing, because the generic pipe code uses
 263 *        pages that are always good when inserted into the pipe.
 264 */
 265int generic_pipe_buf_confirm(struct pipe_inode_info *info,
 266                             struct pipe_buffer *buf)
 267{
 268        return 0;
 269}
 270
 271static const struct pipe_buf_operations anon_pipe_buf_ops = {
 272        .can_merge = 1,
 273        .map = generic_pipe_buf_map,
 274        .unmap = generic_pipe_buf_unmap,
 275        .confirm = generic_pipe_buf_confirm,
 276        .release = anon_pipe_buf_release,
 277        .steal = generic_pipe_buf_steal,
 278        .get = generic_pipe_buf_get,
 279};
 280
 281static ssize_t
 282pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 283           unsigned long nr_segs, loff_t pos)
 284{
 285        struct file *filp = iocb->ki_filp;
 286        struct inode *inode = filp->f_path.dentry->d_inode;
 287        struct pipe_inode_info *pipe;
 288        int do_wakeup;
 289        ssize_t ret;
 290        struct iovec *iov = (struct iovec *)_iov;
 291        size_t total_len;
 292
 293        total_len = iov_length(iov, nr_segs);
 294        /* Null read succeeds. */
 295        if (unlikely(total_len == 0))
 296                return 0;
 297
 298        do_wakeup = 0;
 299        ret = 0;
 300        mutex_lock(&inode->i_mutex);
 301        pipe = inode->i_pipe;
 302        for (;;) {
 303                int bufs = pipe->nrbufs;
 304                if (bufs) {
 305                        int curbuf = pipe->curbuf;
 306                        struct pipe_buffer *buf = pipe->bufs + curbuf;
 307                        const struct pipe_buf_operations *ops = buf->ops;
 308                        void *addr;
 309                        size_t chars = buf->len;
 310                        int error, atomic;
 311
 312                        if (chars > total_len)
 313                                chars = total_len;
 314
 315                        error = ops->confirm(pipe, buf);
 316                        if (error) {
 317                                if (!ret)
 318                                        error = ret;
 319                                break;
 320                        }
 321
 322                        atomic = !iov_fault_in_pages_write(iov, chars);
 323redo:
 324                        addr = ops->map(pipe, buf, atomic);
 325                        error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
 326                        ops->unmap(pipe, buf, addr);
 327                        if (unlikely(error)) {
 328                                /*
 329                                 * Just retry with the slow path if we failed.
 330                                 */
 331                                if (atomic) {
 332                                        atomic = 0;
 333                                        goto redo;
 334                                }
 335                                if (!ret)
 336                                        ret = error;
 337                                break;
 338                        }
 339                        ret += chars;
 340                        buf->offset += chars;
 341                        buf->len -= chars;
 342                        if (!buf->len) {
 343                                buf->ops = NULL;
 344                                ops->release(pipe, buf);
 345                                curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
 346                                pipe->curbuf = curbuf;
 347                                pipe->nrbufs = --bufs;
 348                                do_wakeup = 1;
 349                        }
 350                        total_len -= chars;
 351                        if (!total_len)
 352                                break;        /* common path: read succeeded */
 353                }
 354                if (bufs)        /* More to do? */
 355                        continue;
 356                if (!pipe->writers)
 357                        break;
 358                if (!pipe->waiting_writers) {
 359                        /* syscall merging: Usually we must not sleep
 360                         * if O_NONBLOCK is set, or if we got some data.
 361                         * But if a writer sleeps in kernel space, then
 362                         * we can wait for that data without violating POSIX.
 363                         */
 364                        if (ret)
 365                                break;
 366                        if (filp->f_flags & O_NONBLOCK) {
 367                                ret = -EAGAIN;
 368                                break;
 369                        }
 370                }
 371                if (signal_pending(current)) {
 372                        if (!ret)
 373                                ret = -ERESTARTSYS;
 374                        break;
 375                }
 376                if (do_wakeup) {
 377                        wake_up_interruptible_sync(&pipe->wait);
 378                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 379                }
 380                pipe_wait(pipe);
 381        }
 382        mutex_unlock(&inode->i_mutex);
 383
 384        /* Signal writers asynchronously that there is more room. */
 385        if (do_wakeup) {
 386                wake_up_interruptible_sync(&pipe->wait);
 387                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 388        }
 389        if (ret > 0)
 390                file_accessed(filp);
 391        return ret;
 392}
 393
 394static ssize_t
 395pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 396            unsigned long nr_segs, loff_t ppos)
 397{
 398        struct file *filp = iocb->ki_filp;
 399        struct inode *inode = filp->f_path.dentry->d_inode;
 400        struct pipe_inode_info *pipe;
 401        ssize_t ret;
 402        int do_wakeup;
 403        struct iovec *iov = (struct iovec *)_iov;
 404        size_t total_len;
 405        ssize_t chars;
 406
 407        total_len = iov_length(iov, nr_segs);
 408        /* Null write succeeds. */
 409        if (unlikely(total_len == 0))
 410                return 0;
 411
 412        do_wakeup = 0;
 413        ret = 0;
 414        mutex_lock(&inode->i_mutex);
 415        pipe = inode->i_pipe;
 416
 417        if (!pipe->readers) {
 418                send_sig(SIGPIPE, current, 0);
 419                ret = -EPIPE;
 420                goto out;
 421        }
 422
 423        /* We try to merge small writes */
 424        chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
 425        if (pipe->nrbufs && chars != 0) {
 426                int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
 427                                                        (PIPE_BUFFERS-1);
 428                struct pipe_buffer *buf = pipe->bufs + lastbuf;
 429                const struct pipe_buf_operations *ops = buf->ops;
 430                int offset = buf->offset + buf->len;
 431
 432                if (ops->can_merge && offset + chars <= PAGE_SIZE) {
 433                        int error, atomic = 1;
 434                        void *addr;
 435
 436                        error = ops->confirm(pipe, buf);
 437                        if (error)
 438                                goto out;
 439
 440                        iov_fault_in_pages_read(iov, chars);
 441redo1:
 442                        addr = ops->map(pipe, buf, atomic);
 443                        error = pipe_iov_copy_from_user(offset + addr, iov,
 444                                                        chars, atomic);
 445                        ops->unmap(pipe, buf, addr);
 446                        ret = error;
 447                        do_wakeup = 1;
 448                        if (error) {
 449                                if (atomic) {
 450                                        atomic = 0;
 451                                        goto redo1;
 452                                }
 453                                goto out;
 454                        }
 455                        buf->len += chars;
 456                        total_len -= chars;
 457                        ret = chars;
 458                        if (!total_len)
 459                                goto out;
 460                }
 461        }
 462
 463        for (;;) {
 464                int bufs;
 465
 466                if (!pipe->readers) {
 467                        send_sig(SIGPIPE, current, 0);
 468                        if (!ret)
 469                                ret = -EPIPE;
 470                        break;
 471                }
 472                bufs = pipe->nrbufs;
 473                if (bufs < PIPE_BUFFERS) {
 474                        int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
 475                        struct pipe_buffer *buf = pipe->bufs + newbuf;
 476                        struct page *page = pipe->tmp_page;
 477                        char *src;
 478                        int error, atomic = 1;
 479
 480                        if (!page) {
 481                                page = alloc_page(GFP_HIGHUSER);
 482                                if (unlikely(!page)) {
 483                                        ret = ret ? : -ENOMEM;
 484                                        break;
 485                                }
 486                                pipe->tmp_page = page;
 487                        }
 488                        /* Always wake up, even if the copy fails. Otherwise
 489                         * we lock up (O_NONBLOCK-)readers that sleep due to
 490                         * syscall merging.
 491                         * FIXME! Is this really true?
 492                         */
 493                        do_wakeup = 1;
 494                        chars = PAGE_SIZE;
 495                        if (chars > total_len)
 496                                chars = total_len;
 497
 498                        iov_fault_in_pages_read(iov, chars);
 499redo2:
 500                        if (atomic)
 501                                src = kmap_atomic(page, KM_USER0);
 502                        else
 503                                src = kmap(page);
 504
 505                        error = pipe_iov_copy_from_user(src, iov, chars,
 506                                                        atomic);
 507                        if (atomic)
 508                                kunmap_atomic(src, KM_USER0);
 509                        else
 510                                kunmap(page);
 511
 512                        if (unlikely(error)) {
 513                                if (atomic) {
 514                                        atomic = 0;
 515                                        goto redo2;
 516                                }
 517                                if (!ret)
 518                                        ret = error;
 519                                break;
 520                        }
 521                        ret += chars;
 522
 523                        /* Insert it into the buffer array */
 524                        buf->page = page;
 525                        buf->ops = &anon_pipe_buf_ops;
 526                        buf->offset = 0;
 527                        buf->len = chars;
 528                        pipe->nrbufs = ++bufs;
 529                        pipe->tmp_page = NULL;
 530
 531                        total_len -= chars;
 532                        if (!total_len)
 533                                break;
 534                }
 535                if (bufs < PIPE_BUFFERS)
 536                        continue;
 537                if (filp->f_flags & O_NONBLOCK) {
 538                        if (!ret)
 539                                ret = -EAGAIN;
 540                        break;
 541                }
 542                if (signal_pending(current)) {
 543                        if (!ret)
 544                                ret = -ERESTARTSYS;
 545                        break;
 546                }
 547                if (do_wakeup) {
 548                        wake_up_interruptible_sync(&pipe->wait);
 549                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 550                        do_wakeup = 0;
 551                }
 552                pipe->waiting_writers++;
 553                pipe_wait(pipe);
 554                pipe->waiting_writers--;
 555        }
 556out:
 557        mutex_unlock(&inode->i_mutex);
 558        if (do_wakeup) {
 559                wake_up_interruptible_sync(&pipe->wait);
 560                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 561        }
 562        if (ret > 0)
 563                file_update_time(filp);
 564        return ret;
 565}
 566
 567static ssize_t
 568bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 569{
 570        return -EBADF;
 571}
 572
 573static ssize_t
 574bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
 575           loff_t *ppos)
 576{
 577        return -EBADF;
 578}
 579
 580static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 581{
 582        struct inode *inode = filp->f_path.dentry->d_inode;
 583        struct pipe_inode_info *pipe;
 584        int count, buf, nrbufs;
 585
 586        switch (cmd) {
 587                case FIONREAD:
 588                        mutex_lock(&inode->i_mutex);
 589                        pipe = inode->i_pipe;
 590                        count = 0;
 591                        buf = pipe->curbuf;
 592                        nrbufs = pipe->nrbufs;
 593                        while (--nrbufs >= 0) {
 594                                count += pipe->bufs[buf].len;
 595                                buf = (buf+1) & (PIPE_BUFFERS-1);
 596                        }
 597                        mutex_unlock(&inode->i_mutex);
 598
 599                        return put_user(count, (int __user *)arg);
 600                default:
 601                        return -EINVAL;
 602        }
 603}
 604
 605/* No kernel lock held - fine */
 606static unsigned int
 607pipe_poll(struct file *filp, poll_table *wait)
 608{
 609        unsigned int mask;
 610        struct inode *inode = filp->f_path.dentry->d_inode;
 611        struct pipe_inode_info *pipe = inode->i_pipe;
 612        int nrbufs;
 613
 614        poll_wait(filp, &pipe->wait, wait);
 615
 616        /* Reading only -- no need for acquiring the semaphore.  */
 617        nrbufs = pipe->nrbufs;
 618        mask = 0;
 619        if (filp->f_mode & FMODE_READ) {
 620                mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
 621                if (!pipe->writers && filp->f_version != pipe->w_counter)
 622                        mask |= POLLHUP;
 623        }
 624
 625        if (filp->f_mode & FMODE_WRITE) {
 626                mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
 627                /*
 628                 * Most Unices do not set POLLERR for FIFOs but on Linux they
 629                 * behave exactly like pipes for poll().
 630                 */
 631                if (!pipe->readers)
 632                        mask |= POLLERR;
 633        }
 634
 635        return mask;
 636}
 637
 638static int
 639pipe_release(struct inode *inode, int decr, int decw)
 640{
 641        struct pipe_inode_info *pipe;
 642
 643        mutex_lock(&inode->i_mutex);
 644        pipe = inode->i_pipe;
 645        pipe->readers -= decr;
 646        pipe->writers -= decw;
 647
 648        if (!pipe->readers && !pipe->writers) {
 649                free_pipe_info(inode);
 650        } else {
 651                wake_up_interruptible_sync(&pipe->wait);
 652                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 653                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 654        }
 655        mutex_unlock(&inode->i_mutex);
 656
 657        return 0;
 658}
 659
 660static int
 661pipe_read_fasync(int fd, struct file *filp, int on)
 662{
 663        struct inode *inode = filp->f_path.dentry->d_inode;
 664        int retval;
 665
 666        mutex_lock(&inode->i_mutex);
 667        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
 668        mutex_unlock(&inode->i_mutex);
 669
 670        if (retval < 0)
 671                return retval;
 672
 673        return 0;
 674}
 675
 676
 677static int
 678pipe_write_fasync(int fd, struct file *filp, int on)
 679{
 680        struct inode *inode = filp->f_path.dentry->d_inode;
 681        int retval;
 682
 683        mutex_lock(&inode->i_mutex);
 684        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
 685        mutex_unlock(&inode->i_mutex);
 686
 687        if (retval < 0)
 688                return retval;
 689
 690        return 0;
 691}
 692
 693
 694static int
 695pipe_rdwr_fasync(int fd, struct file *filp, int on)
 696{
 697        struct inode *inode = filp->f_path.dentry->d_inode;
 698        struct pipe_inode_info *pipe = inode->i_pipe;
 699        int retval;
 700
 701        mutex_lock(&inode->i_mutex);
 702
 703        retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 704
 705        if (retval >= 0)
 706                retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
 707
 708        mutex_unlock(&inode->i_mutex);
 709
 710        if (retval < 0)
 711                return retval;
 712
 713        return 0;
 714}
 715
 716
 717static int
 718pipe_read_release(struct inode *inode, struct file *filp)
 719{
 720        return pipe_release(inode, 1, 0);
 721}
 722
 723static int
 724pipe_write_release(struct inode *inode, struct file *filp)
 725{
 726        return pipe_release(inode, 0, 1);
 727}
 728
 729static int
 730pipe_rdwr_release(struct inode *inode, struct file *filp)
 731{
 732        int decr, decw;
 733
 734        decr = (filp->f_mode & FMODE_READ) != 0;
 735        decw = (filp->f_mode & FMODE_WRITE) != 0;
 736        return pipe_release(inode, decr, decw);
 737}
 738
 739static int
 740pipe_read_open(struct inode *inode, struct file *filp)
 741{
 742        /* We could have perhaps used atomic_t, but this and friends
 743           below are the only places.  So it doesn't seem worthwhile.  */
 744        mutex_lock(&inode->i_mutex);
 745        inode->i_pipe->readers++;
 746        mutex_unlock(&inode->i_mutex);
 747
 748        return 0;
 749}
 750
 751static int
 752pipe_write_open(struct inode *inode, struct file *filp)
 753{
 754        mutex_lock(&inode->i_mutex);
 755        inode->i_pipe->writers++;
 756        mutex_unlock(&inode->i_mutex);
 757
 758        return 0;
 759}
 760
 761static int
 762pipe_rdwr_open(struct inode *inode, struct file *filp)
 763{
 764        mutex_lock(&inode->i_mutex);
 765        if (filp->f_mode & FMODE_READ)
 766                inode->i_pipe->readers++;
 767        if (filp->f_mode & FMODE_WRITE)
 768                inode->i_pipe->writers++;
 769        mutex_unlock(&inode->i_mutex);
 770
 771        return 0;
 772}
 773
 774/*
 775 * The file_operations structs are not static because they
 776 * are also used in linux/fs/fifo.c to do operations on FIFOs.
 777 *
 778 * Pipes reuse fifos' file_operations structs.
 779 */
 780const struct file_operations read_pipefifo_fops = {
 781        .llseek                = no_llseek,
 782        .read                = do_sync_read,
 783        .aio_read        = pipe_read,
 784        .write                = bad_pipe_w,
 785        .poll                = pipe_poll,
 786        .unlocked_ioctl        = pipe_ioctl,
 787        .open                = pipe_read_open,
 788        .release        = pipe_read_release,
 789        .fasync                = pipe_read_fasync,
 790};
 791
 792const struct file_operations write_pipefifo_fops = {
 793        .llseek                = no_llseek,
 794        .read                = bad_pipe_r,
 795        .write                = do_sync_write,
 796        .aio_write        = pipe_write,
 797        .poll                = pipe_poll,
 798        .unlocked_ioctl        = pipe_ioctl,
 799        .open                = pipe_write_open,
 800        .release        = pipe_write_release,
 801        .fasync                = pipe_write_fasync,
 802};
 803
 804const struct file_operations rdwr_pipefifo_fops = {
 805        .llseek                = no_llseek,
 806        .read                = do_sync_read,
 807        .aio_read        = pipe_read,
 808        .write                = do_sync_write,
 809        .aio_write        = pipe_write,
 810        .poll                = pipe_poll,
 811        .unlocked_ioctl        = pipe_ioctl,
 812        .open                = pipe_rdwr_open,
 813        .release        = pipe_rdwr_release,
 814        .fasync                = pipe_rdwr_fasync,
 815};
 816
 817struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 818{
 819        struct pipe_inode_info *pipe;
 820
 821        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 822        if (pipe) {
 823                init_waitqueue_head(&pipe->wait);
 824                pipe->r_counter = pipe->w_counter = 1;
 825                pipe->inode = inode;
 826        }
 827
 828        return pipe;
 829}
 830
 831void __free_pipe_info(struct pipe_inode_info *pipe)
 832{
 833        int i;
 834
 835        for (i = 0; i < PIPE_BUFFERS; i++) {
 836                struct pipe_buffer *buf = pipe->bufs + i;
 837                if (buf->ops)
 838                        buf->ops->release(pipe, buf);
 839        }
 840        if (pipe->tmp_page)
 841                __free_page(pipe->tmp_page);
 842        kfree(pipe);
 843}
 844
 845void free_pipe_info(struct inode *inode)
 846{
 847        __free_pipe_info(inode->i_pipe);
 848        inode->i_pipe = NULL;
 849}
 850
 851static struct vfsmount *pipe_mnt __read_mostly;
 852static int pipefs_delete_dentry(struct dentry *dentry)
 853{
 854        /*
 855         * At creation time, we pretended this dentry was hashed
 856         * (by clearing DCACHE_UNHASHED bit in d_flags)
 857         * At delete time, we restore the truth : not hashed.
 858         * (so that dput() can proceed correctly)
 859         */
 860        dentry->d_flags |= DCACHE_UNHASHED;
 861        return 0;
 862}
 863
 864/*
 865 * pipefs_dname() is called from d_path().
 866 */
 867static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 868{
 869        return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
 870                                dentry->d_inode->i_ino);
 871}
 872
 873static struct dentry_operations pipefs_dentry_operations = {
 874        .d_delete        = pipefs_delete_dentry,
 875        .d_dname        = pipefs_dname,
 876};
 877
 878static struct inode * get_pipe_inode(void)
 879{
 880        struct inode *inode = new_inode(pipe_mnt->mnt_sb);
 881        struct pipe_inode_info *pipe;
 882
 883        if (!inode)
 884                goto fail_inode;
 885
 886        pipe = alloc_pipe_info(inode);
 887        if (!pipe)
 888                goto fail_iput;
 889        inode->i_pipe = pipe;
 890
 891        pipe->readers = pipe->writers = 1;
 892        inode->i_fop = &rdwr_pipefifo_fops;
 893
 894        /*
 895         * Mark the inode dirty from the very beginning,
 896         * that way it will never be moved to the dirty
 897         * list because "mark_inode_dirty()" will think
 898         * that it already _is_ on the dirty list.
 899         */
 900        inode->i_state = I_DIRTY;
 901        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 902        inode->i_uid = current->fsuid;
 903        inode->i_gid = current->fsgid;
 904        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 905
 906        return inode;
 907
 908fail_iput:
 909        iput(inode);
 910
 911fail_inode:
 912        return NULL;
 913}
 914
 915struct file *create_write_pipe(int flags)
 916{
 917        int err;
 918        struct inode *inode;
 919        struct file *f;
 920        struct dentry *dentry;
 921        struct qstr name = { .name = "" };
 922
 923        err = -ENFILE;
 924        inode = get_pipe_inode();
 925        if (!inode)
 926                goto err;
 927
 928        err = -ENOMEM;
 929        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
 930        if (!dentry)
 931                goto err_inode;
 932
 933        dentry->d_op = &pipefs_dentry_operations;
 934        /*
 935         * We dont want to publish this dentry into global dentry hash table.
 936         * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 937         * This permits a working /proc/$pid/fd/XXX on pipes
 938         */
 939        dentry->d_flags &= ~DCACHE_UNHASHED;
 940        d_instantiate(dentry, inode);
 941
 942        err = -ENFILE;
 943        f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
 944        if (!f)
 945                goto err_dentry;
 946        f->f_mapping = inode->i_mapping;
 947
 948        f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
 949        f->f_version = 0;
 950
 951        return f;
 952
 953 err_dentry:
 954        free_pipe_info(inode);
 955        dput(dentry);
 956        return ERR_PTR(err);
 957
 958 err_inode:
 959        free_pipe_info(inode);
 960        iput(inode);
 961 err:
 962        return ERR_PTR(err);
 963}
 964
 965void free_write_pipe(struct file *f)
 966{
 967        free_pipe_info(f->f_dentry->d_inode);
 968        path_put(&f->f_path);
 969        put_filp(f);
 970}
 971
 972struct file *create_read_pipe(struct file *wrf, int flags)
 973{
 974        struct file *f = get_empty_filp();
 975        if (!f)
 976                return ERR_PTR(-ENFILE);
 977
 978        /* Grab pipe from the writer */
 979        f->f_path = wrf->f_path;
 980        path_get(&wrf->f_path);
 981        f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
 982
 983        f->f_pos = 0;
 984        f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 985        f->f_op = &read_pipefifo_fops;
 986        f->f_mode = FMODE_READ;
 987        f->f_version = 0;
 988
 989        return f;
 990}
 991
 992int do_pipe_flags(int *fd, int flags)
 993{
 994        struct file *fw, *fr;
 995        int error;
 996        int fdw, fdr;
 997
 998        if (flags & ~(O_CLOEXEC | O_NONBLOCK))
 999                return -EINVAL;
1000
1001        fw = create_write_pipe(flags);
1002        if (IS_ERR(fw))
1003                return PTR_ERR(fw);
1004        fr = create_read_pipe(fw, flags);
1005        error = PTR_ERR(fr);
1006        if (IS_ERR(fr))
1007                goto err_write_pipe;
1008
1009        error = get_unused_fd_flags(flags);
1010        if (error < 0)
1011                goto err_read_pipe;
1012        fdr = error;
1013
1014        error = get_unused_fd_flags(flags);
1015        if (error < 0)
1016                goto err_fdr;
1017        fdw = error;
1018
1019        error = audit_fd_pair(fdr, fdw);
1020        if (error < 0)
1021                goto err_fdw;
1022
1023        fd_install(fdr, fr);
1024        fd_install(fdw, fw);
1025        fd[0] = fdr;
1026        fd[1] = fdw;
1027
1028        return 0;
1029
1030 err_fdw:
1031        put_unused_fd(fdw);
1032 err_fdr:
1033        put_unused_fd(fdr);
1034 err_read_pipe:
1035        path_put(&fr->f_path);
1036        put_filp(fr);
1037 err_write_pipe:
1038        free_write_pipe(fw);
1039        return error;
1040}
1041
1042int do_pipe(int *fd)
1043{
1044        return do_pipe_flags(fd, 0);
1045}
1046
1047/*
1048 * sys_pipe() is the normal C calling standard for creating
1049 * a pipe. It's not the way Unix traditionally does this, though.
1050 */
1051asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
1052{
1053        int fd[2];
1054        int error;
1055
1056        error = do_pipe_flags(fd, flags);
1057        if (!error) {
1058                if (copy_to_user(fildes, fd, sizeof(fd))) {
1059                        sys_close(fd[0]);
1060                        sys_close(fd[1]);
1061                        error = -EFAULT;
1062                }
1063        }
1064        return error;
1065}
1066
1067asmlinkage long __weak sys_pipe(int __user *fildes)
1068{
1069        return sys_pipe2(fildes, 0);
1070}
1071
1072/*
1073 * pipefs should _never_ be mounted by userland - too much of security hassle,
1074 * no real gain from having the whole whorehouse mounted. So we don't need
1075 * any operations on the root directory. However, we need a non-trivial
1076 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1077 */
1078static int pipefs_get_sb(struct file_system_type *fs_type,
1079                         int flags, const char *dev_name, void *data,
1080                         struct vfsmount *mnt)
1081{
1082        return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
1083}
1084
1085static struct file_system_type pipe_fs_type = {
1086        .name                = "pipefs",
1087        .get_sb                = pipefs_get_sb,
1088        .kill_sb        = kill_anon_super,
1089};
1090
1091static int __init init_pipe_fs(void)
1092{
1093        int err = register_filesystem(&pipe_fs_type);
1094
1095        if (!err) {
1096                pipe_mnt = kern_mount(&pipe_fs_type);
1097                if (IS_ERR(pipe_mnt)) {
1098                        err = PTR_ERR(pipe_mnt);
1099                        unregister_filesystem(&pipe_fs_type);
1100                }
1101        }
1102        return err;
1103}
1104
1105static void __exit exit_pipe_fs(void)
1106{
1107        unregister_filesystem(&pipe_fs_type);
1108        mntput(pipe_mnt);
1109}
1110
1111fs_initcall(init_pipe_fs);
1112module_exit(exit_pipe_fs);