Showing error 1199

User: Jiri Slaby
Error type: Double Unlock
Error type description: Some lock is unlocked twice unintentionally in a sequence
File location: fs/nfs/write.c
Line in file: 255
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-04-30 10:52:00 UTC


Source:

   1/*
   2 * linux/fs/nfs/write.c
   3 *
   4 * Write file data over NFS.
   5 *
   6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/slab.h>
  11#include <linux/mm.h>
  12#include <linux/pagemap.h>
  13#include <linux/file.h>
  14#include <linux/writeback.h>
  15#include <linux/swap.h>
  16
  17#include <linux/sunrpc/clnt.h>
  18#include <linux/nfs_fs.h>
  19#include <linux/nfs_mount.h>
  20#include <linux/nfs_page.h>
  21#include <linux/backing-dev.h>
  22
  23#include <asm/uaccess.h>
  24
  25#include "delegation.h"
  26#include "internal.h"
  27#include "iostat.h"
  28
  29#define NFSDBG_FACILITY                NFSDBG_PAGECACHE
  30
  31#define MIN_POOL_WRITE                (32)
  32#define MIN_POOL_COMMIT                (4)
  33
  34/*
  35 * Local function declarations
  36 */
  37static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
  38                                  struct inode *inode, int ioflags);
  39static void nfs_redirty_request(struct nfs_page *req);
  40static const struct rpc_call_ops nfs_write_partial_ops;
  41static const struct rpc_call_ops nfs_write_full_ops;
  42static const struct rpc_call_ops nfs_commit_ops;
  43
  44static struct kmem_cache *nfs_wdata_cachep;
  45static mempool_t *nfs_wdata_mempool;
  46static mempool_t *nfs_commit_mempool;
  47
  48struct nfs_write_data *nfs_commitdata_alloc(void)
  49{
  50        struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
  51
  52        if (p) {
  53                memset(p, 0, sizeof(*p));
  54                INIT_LIST_HEAD(&p->pages);
  55        }
  56        return p;
  57}
  58
  59void nfs_commit_free(struct nfs_write_data *p)
  60{
  61        if (p && (p->pagevec != &p->page_array[0]))
  62                kfree(p->pagevec);
  63        mempool_free(p, nfs_commit_mempool);
  64}
  65
  66struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
  67{
  68        struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
  69
  70        if (p) {
  71                memset(p, 0, sizeof(*p));
  72                INIT_LIST_HEAD(&p->pages);
  73                p->npages = pagecount;
  74                if (pagecount <= ARRAY_SIZE(p->page_array))
  75                        p->pagevec = p->page_array;
  76                else {
  77                        p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
  78                        if (!p->pagevec) {
  79                                mempool_free(p, nfs_wdata_mempool);
  80                                p = NULL;
  81                        }
  82                }
  83        }
  84        return p;
  85}
  86
  87static void nfs_writedata_free(struct nfs_write_data *p)
  88{
  89        if (p && (p->pagevec != &p->page_array[0]))
  90                kfree(p->pagevec);
  91        mempool_free(p, nfs_wdata_mempool);
  92}
  93
  94void nfs_writedata_release(void *data)
  95{
  96        struct nfs_write_data *wdata = data;
  97
  98        put_nfs_open_context(wdata->args.context);
  99        nfs_writedata_free(wdata);
 100}
 101
 102static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 103{
 104        ctx->error = error;
 105        smp_wmb();
 106        set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 107}
 108
 109static struct nfs_page *nfs_page_find_request_locked(struct page *page)
 110{
 111        struct nfs_page *req = NULL;
 112
 113        if (PagePrivate(page)) {
 114                req = (struct nfs_page *)page_private(page);
 115                if (req != NULL)
 116                        kref_get(&req->wb_kref);
 117        }
 118        return req;
 119}
 120
 121static struct nfs_page *nfs_page_find_request(struct page *page)
 122{
 123        struct inode *inode = page->mapping->host;
 124        struct nfs_page *req = NULL;
 125
 126        spin_lock(&inode->i_lock);
 127        req = nfs_page_find_request_locked(page);
 128        spin_unlock(&inode->i_lock);
 129        return req;
 130}
 131
 132/* Adjust the file length if we're writing beyond the end */
 133static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 134{
 135        struct inode *inode = page->mapping->host;
 136        loff_t end, i_size;
 137        pgoff_t end_index;
 138
 139        spin_lock(&inode->i_lock);
 140        i_size = i_size_read(inode);
 141        end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 142        if (i_size > 0 && page->index < end_index)
 143                goto out;
 144        end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 145        if (i_size >= end)
 146                goto out;
 147        i_size_write(inode, end);
 148        nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 149out:
 150        spin_unlock(&inode->i_lock);
 151}
 152
 153/* A writeback failed: mark the page as bad, and invalidate the page cache */
 154static void nfs_set_pageerror(struct page *page)
 155{
 156        SetPageError(page);
 157        nfs_zap_mapping(page->mapping->host, page->mapping);
 158}
 159
 160/* We can set the PG_uptodate flag if we see that a write request
 161 * covers the full page.
 162 */
 163static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
 164{
 165        if (PageUptodate(page))
 166                return;
 167        if (base != 0)
 168                return;
 169        if (count != nfs_page_length(page))
 170                return;
 171        SetPageUptodate(page);
 172}
 173
 174static int wb_priority(struct writeback_control *wbc)
 175{
 176        if (wbc->for_reclaim)
 177                return FLUSH_HIGHPRI | FLUSH_STABLE;
 178        if (wbc->for_kupdate)
 179                return FLUSH_LOWPRI;
 180        return 0;
 181}
 182
 183/*
 184 * NFS congestion control
 185 */
 186
 187int nfs_congestion_kb;
 188
 189#define NFS_CONGESTION_ON_THRESH         (nfs_congestion_kb >> (PAGE_SHIFT-10))
 190#define NFS_CONGESTION_OFF_THRESH        \
 191        (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 192
 193static int nfs_set_page_writeback(struct page *page)
 194{
 195        int ret = test_set_page_writeback(page);
 196
 197        if (!ret) {
 198                struct inode *inode = page->mapping->host;
 199                struct nfs_server *nfss = NFS_SERVER(inode);
 200
 201                if (atomic_long_inc_return(&nfss->writeback) >
 202                                NFS_CONGESTION_ON_THRESH)
 203                        set_bdi_congested(&nfss->backing_dev_info, WRITE);
 204        }
 205        return ret;
 206}
 207
 208static void nfs_end_page_writeback(struct page *page)
 209{
 210        struct inode *inode = page->mapping->host;
 211        struct nfs_server *nfss = NFS_SERVER(inode);
 212
 213        end_page_writeback(page);
 214        if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 215                clear_bdi_congested(&nfss->backing_dev_info, WRITE);
 216}
 217
 218/*
 219 * Find an associated nfs write request, and prepare to flush it out
 220 * May return an error if the user signalled nfs_wait_on_request().
 221 */
 222static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 223                                struct page *page)
 224{
 225        struct inode *inode = page->mapping->host;
 226        struct nfs_page *req;
 227        int ret;
 228
 229        spin_lock(&inode->i_lock);
 230        for(;;) {
 231                req = nfs_page_find_request_locked(page);
 232                if (req == NULL) {
 233                        spin_unlock(&inode->i_lock);
 234                        return 0;
 235                }
 236                if (nfs_set_page_tag_locked(req))
 237                        break;
 238                /* Note: If we hold the page lock, as is the case in nfs_writepage,
 239                 *         then the call to nfs_set_page_tag_locked() will always
 240                 *         succeed provided that someone hasn't already marked the
 241                 *         request as dirty (in which case we don't care).
 242                 */
 243                spin_unlock(&inode->i_lock);
 244                ret = nfs_wait_on_request(req);
 245                nfs_release_request(req);
 246                if (ret != 0)
 247                        return ret;
 248                spin_lock(&inode->i_lock);
 249        }
 250        if (test_bit(PG_CLEAN, &req->wb_flags)) {
 251                spin_unlock(&inode->i_lock);
 252                BUG();
 253        }
 254        if (nfs_set_page_writeback(page) != 0) {
 255                spin_unlock(&inode->i_lock);
 256                BUG();
 257        }
 258        spin_unlock(&inode->i_lock);
 259        if (!nfs_pageio_add_request(pgio, req)) {
 260                nfs_redirty_request(req);
 261                return pgio->pg_error;
 262        }
 263        return 0;
 264}
 265
 266static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
 267{
 268        struct inode *inode = page->mapping->host;
 269
 270        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 271        nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 272
 273        nfs_pageio_cond_complete(pgio, page->index);
 274        return nfs_page_async_flush(pgio, page);
 275}
 276
 277/*
 278 * Write an mmapped page to the server.
 279 */
 280static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 281{
 282        struct nfs_pageio_descriptor pgio;
 283        int err;
 284
 285        nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
 286        err = nfs_do_writepage(page, wbc, &pgio);
 287        nfs_pageio_complete(&pgio);
 288        if (err < 0)
 289                return err;
 290        if (pgio.pg_error < 0)
 291                return pgio.pg_error;
 292        return 0;
 293}
 294
 295int nfs_writepage(struct page *page, struct writeback_control *wbc)
 296{
 297        int ret;
 298
 299        ret = nfs_writepage_locked(page, wbc);
 300        unlock_page(page);
 301        return ret;
 302}
 303
 304static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
 305{
 306        int ret;
 307
 308        ret = nfs_do_writepage(page, wbc, data);
 309        unlock_page(page);
 310        return ret;
 311}
 312
 313int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 314{
 315        struct inode *inode = mapping->host;
 316        struct nfs_pageio_descriptor pgio;
 317        int err;
 318
 319        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 320
 321        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
 322        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 323        nfs_pageio_complete(&pgio);
 324        if (err < 0)
 325                return err;
 326        if (pgio.pg_error < 0)
 327                return pgio.pg_error;
 328        return 0;
 329}
 330
 331/*
 332 * Insert a write request into an inode
 333 */
 334static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 335{
 336        struct nfs_inode *nfsi = NFS_I(inode);
 337        int error;
 338
 339        error = radix_tree_preload(GFP_NOFS);
 340        if (error != 0)
 341                goto out;
 342
 343        /* Lock the request! */
 344        nfs_lock_request_dontget(req);
 345
 346        spin_lock(&inode->i_lock);
 347        error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
 348        BUG_ON(error);
 349        if (!nfsi->npages) {
 350                igrab(inode);
 351                if (nfs_have_delegation(inode, FMODE_WRITE))
 352                        nfsi->change_attr++;
 353        }
 354        SetPagePrivate(req->wb_page);
 355        set_page_private(req->wb_page, (unsigned long)req);
 356        nfsi->npages++;
 357        kref_get(&req->wb_kref);
 358        radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
 359                                NFS_PAGE_TAG_LOCKED);
 360        spin_unlock(&inode->i_lock);
 361        radix_tree_preload_end();
 362out:
 363        return error;
 364}
 365
 366/*
 367 * Remove a write request from an inode
 368 */
 369static void nfs_inode_remove_request(struct nfs_page *req)
 370{
 371        struct inode *inode = req->wb_context->path.dentry->d_inode;
 372        struct nfs_inode *nfsi = NFS_I(inode);
 373
 374        BUG_ON (!NFS_WBACK_BUSY(req));
 375
 376        spin_lock(&inode->i_lock);
 377        set_page_private(req->wb_page, 0);
 378        ClearPagePrivate(req->wb_page);
 379        radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 380        nfsi->npages--;
 381        if (!nfsi->npages) {
 382                spin_unlock(&inode->i_lock);
 383                iput(inode);
 384        } else
 385                spin_unlock(&inode->i_lock);
 386        nfs_clear_request(req);
 387        nfs_release_request(req);
 388}
 389
 390static void
 391nfs_mark_request_dirty(struct nfs_page *req)
 392{
 393        __set_page_dirty_nobuffers(req->wb_page);
 394}
 395
 396#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 397/*
 398 * Add a request to the inode's commit list.
 399 */
 400static void
 401nfs_mark_request_commit(struct nfs_page *req)
 402{
 403        struct inode *inode = req->wb_context->path.dentry->d_inode;
 404        struct nfs_inode *nfsi = NFS_I(inode);
 405
 406        spin_lock(&inode->i_lock);
 407        nfsi->ncommit++;
 408        set_bit(PG_CLEAN, &(req)->wb_flags);
 409        radix_tree_tag_set(&nfsi->nfs_page_tree,
 410                        req->wb_index,
 411                        NFS_PAGE_TAG_COMMIT);
 412        spin_unlock(&inode->i_lock);
 413        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 414        inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
 415        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 416}
 417
 418static int
 419nfs_clear_request_commit(struct nfs_page *req)
 420{
 421        struct page *page = req->wb_page;
 422
 423        if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
 424                dec_zone_page_state(page, NR_UNSTABLE_NFS);
 425                dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
 426                return 1;
 427        }
 428        return 0;
 429}
 430
 431static inline
 432int nfs_write_need_commit(struct nfs_write_data *data)
 433{
 434        return data->verf.committed != NFS_FILE_SYNC;
 435}
 436
 437static inline
 438int nfs_reschedule_unstable_write(struct nfs_page *req)
 439{
 440        if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 441                nfs_mark_request_commit(req);
 442                return 1;
 443        }
 444        if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
 445                nfs_mark_request_dirty(req);
 446                return 1;
 447        }
 448        return 0;
 449}
 450#else
 451static inline void
 452nfs_mark_request_commit(struct nfs_page *req)
 453{
 454}
 455
 456static inline int
 457nfs_clear_request_commit(struct nfs_page *req)
 458{
 459        return 0;
 460}
 461
 462static inline
 463int nfs_write_need_commit(struct nfs_write_data *data)
 464{
 465        return 0;
 466}
 467
 468static inline
 469int nfs_reschedule_unstable_write(struct nfs_page *req)
 470{
 471        return 0;
 472}
 473#endif
 474
 475/*
 476 * Wait for a request to complete.
 477 *
 478 * Interruptible by fatal signals only.
 479 */
 480static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
 481{
 482        struct nfs_inode *nfsi = NFS_I(inode);
 483        struct nfs_page *req;
 484        pgoff_t idx_end, next;
 485        unsigned int                res = 0;
 486        int                        error;
 487
 488        if (npages == 0)
 489                idx_end = ~0;
 490        else
 491                idx_end = idx_start + npages - 1;
 492
 493        next = idx_start;
 494        while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
 495                if (req->wb_index > idx_end)
 496                        break;
 497
 498                next = req->wb_index + 1;
 499                BUG_ON(!NFS_WBACK_BUSY(req));
 500
 501                kref_get(&req->wb_kref);
 502                spin_unlock(&inode->i_lock);
 503                error = nfs_wait_on_request(req);
 504                nfs_release_request(req);
 505                spin_lock(&inode->i_lock);
 506                if (error < 0)
 507                        return error;
 508                res++;
 509        }
 510        return res;
 511}
 512
 513static void nfs_cancel_commit_list(struct list_head *head)
 514{
 515        struct nfs_page *req;
 516
 517        while(!list_empty(head)) {
 518                req = nfs_list_entry(head->next);
 519                nfs_list_remove_request(req);
 520                nfs_clear_request_commit(req);
 521                nfs_inode_remove_request(req);
 522                nfs_unlock_request(req);
 523        }
 524}
 525
 526#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 527/*
 528 * nfs_scan_commit - Scan an inode for commit requests
 529 * @inode: NFS inode to scan
 530 * @dst: destination list
 531 * @idx_start: lower bound of page->index to scan.
 532 * @npages: idx_start + npages sets the upper bound to scan.
 533 *
 534 * Moves requests from the inode's 'commit' request list.
 535 * The requests are *not* checked to ensure that they form a contiguous set.
 536 */
 537static int
 538nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 539{
 540        struct nfs_inode *nfsi = NFS_I(inode);
 541        int res = 0;
 542
 543        if (nfsi->ncommit != 0) {
 544                res = nfs_scan_list(nfsi, dst, idx_start, npages,
 545                                NFS_PAGE_TAG_COMMIT);
 546                nfsi->ncommit -= res;
 547        }
 548        return res;
 549}
 550#else
 551static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 552{
 553        return 0;
 554}
 555#endif
 556
 557/*
 558 * Search for an existing write request, and attempt to update
 559 * it to reflect a new dirty region on a given page.
 560 *
 561 * If the attempt fails, then the existing request is flushed out
 562 * to disk.
 563 */
 564static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 565                struct page *page,
 566                unsigned int offset,
 567                unsigned int bytes)
 568{
 569        struct nfs_page *req;
 570        unsigned int rqend;
 571        unsigned int end;
 572        int error;
 573
 574        if (!PagePrivate(page))
 575                return NULL;
 576
 577        end = offset + bytes;
 578        spin_lock(&inode->i_lock);
 579
 580        for (;;) {
 581                req = nfs_page_find_request_locked(page);
 582                if (req == NULL)
 583                        goto out_unlock;
 584
 585                rqend = req->wb_offset + req->wb_bytes;
 586                /*
 587                 * Tell the caller to flush out the request if
 588                 * the offsets are non-contiguous.
 589                 * Note: nfs_flush_incompatible() will already
 590                 * have flushed out requests having wrong owners.
 591                 */
 592                if (offset > rqend
 593                    || end < req->wb_offset)
 594                        goto out_flushme;
 595
 596                if (nfs_set_page_tag_locked(req))
 597                        break;
 598
 599                /* The request is locked, so wait and then retry */
 600                spin_unlock(&inode->i_lock);
 601                error = nfs_wait_on_request(req);
 602                nfs_release_request(req);
 603                if (error != 0)
 604                        goto out_err;
 605                spin_lock(&inode->i_lock);
 606        }
 607
 608        if (nfs_clear_request_commit(req))
 609                radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
 610                                req->wb_index, NFS_PAGE_TAG_COMMIT);
 611
 612        /* Okay, the request matches. Update the region */
 613        if (offset < req->wb_offset) {
 614                req->wb_offset = offset;
 615                req->wb_pgbase = offset;
 616        }
 617        if (end > rqend)
 618                req->wb_bytes = end - req->wb_offset;
 619        else
 620                req->wb_bytes = rqend - req->wb_offset;
 621out_unlock:
 622        spin_unlock(&inode->i_lock);
 623        return req;
 624out_flushme:
 625        spin_unlock(&inode->i_lock);
 626        nfs_release_request(req);
 627        error = nfs_wb_page(inode, page);
 628out_err:
 629        return ERR_PTR(error);
 630}
 631
 632/*
 633 * Try to update an existing write request, or create one if there is none.
 634 *
 635 * Note: Should always be called with the Page Lock held to prevent races
 636 * if we have to add a new request. Also assumes that the caller has
 637 * already called nfs_flush_incompatible() if necessary.
 638 */
 639static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 640                struct page *page, unsigned int offset, unsigned int bytes)
 641{
 642        struct inode *inode = page->mapping->host;
 643        struct nfs_page        *req;
 644        int error;
 645
 646        req = nfs_try_to_update_request(inode, page, offset, bytes);
 647        if (req != NULL)
 648                goto out;
 649        req = nfs_create_request(ctx, inode, page, offset, bytes);
 650        if (IS_ERR(req))
 651                goto out;
 652        error = nfs_inode_add_request(inode, req);
 653        if (error != 0) {
 654                nfs_release_request(req);
 655                req = ERR_PTR(error);
 656        }
 657out:
 658        return req;
 659}
 660
 661static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 662                unsigned int offset, unsigned int count)
 663{
 664        struct nfs_page        *req;
 665
 666        req = nfs_setup_write_request(ctx, page, offset, count);
 667        if (IS_ERR(req))
 668                return PTR_ERR(req);
 669        /* Update file length */
 670        nfs_grow_file(page, offset, count);
 671        nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 672        nfs_clear_page_tag_locked(req);
 673        return 0;
 674}
 675
 676int nfs_flush_incompatible(struct file *file, struct page *page)
 677{
 678        struct nfs_open_context *ctx = nfs_file_open_context(file);
 679        struct nfs_page        *req;
 680        int do_flush, status;
 681        /*
 682         * Look for a request corresponding to this page. If there
 683         * is one, and it belongs to another file, we flush it out
 684         * before we try to copy anything into the page. Do this
 685         * due to the lack of an ACCESS-type call in NFSv2.
 686         * Also do the same if we find a request from an existing
 687         * dropped page.
 688         */
 689        do {
 690                req = nfs_page_find_request(page);
 691                if (req == NULL)
 692                        return 0;
 693                do_flush = req->wb_page != page || req->wb_context != ctx;
 694                nfs_release_request(req);
 695                if (!do_flush)
 696                        return 0;
 697                status = nfs_wb_page(page->mapping->host, page);
 698        } while (status == 0);
 699        return status;
 700}
 701
 702/*
 703 * If the page cache is marked as unsafe or invalid, then we can't rely on
 704 * the PageUptodate() flag. In this case, we will need to turn off
 705 * write optimisations that depend on the page contents being correct.
 706 */
 707static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
 708{
 709        return PageUptodate(page) &&
 710                !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
 711}
 712
 713/*
 714 * Update and possibly write a cached page of an NFS file.
 715 *
 716 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 717 * things with a page scheduled for an RPC call (e.g. invalidate it).
 718 */
 719int nfs_updatepage(struct file *file, struct page *page,
 720                unsigned int offset, unsigned int count)
 721{
 722        struct nfs_open_context *ctx = nfs_file_open_context(file);
 723        struct inode        *inode = page->mapping->host;
 724        int                status = 0;
 725
 726        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
 727
 728        dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
 729                file->f_path.dentry->d_parent->d_name.name,
 730                file->f_path.dentry->d_name.name, count,
 731                (long long)(page_offset(page) + offset));
 732
 733        /* If we're not using byte range locks, and we know the page
 734         * is up to date, it may be more efficient to extend the write
 735         * to cover the entire page in order to avoid fragmentation
 736         * inefficiencies.
 737         */
 738        if (nfs_write_pageuptodate(page, inode) &&
 739                        inode->i_flock == NULL &&
 740                        !(file->f_flags & O_SYNC)) {
 741                count = max(count + offset, nfs_page_length(page));
 742                offset = 0;
 743        }
 744
 745        status = nfs_writepage_setup(ctx, page, offset, count);
 746        if (status < 0)
 747                nfs_set_pageerror(page);
 748        else
 749                __set_page_dirty_nobuffers(page);
 750
 751        dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
 752                        status, (long long)i_size_read(inode));
 753        return status;
 754}
 755
 756static void nfs_writepage_release(struct nfs_page *req)
 757{
 758
 759        if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
 760                nfs_end_page_writeback(req->wb_page);
 761                nfs_inode_remove_request(req);
 762        } else
 763                nfs_end_page_writeback(req->wb_page);
 764        nfs_clear_page_tag_locked(req);
 765}
 766
 767static int flush_task_priority(int how)
 768{
 769        switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
 770                case FLUSH_HIGHPRI:
 771                        return RPC_PRIORITY_HIGH;
 772                case FLUSH_LOWPRI:
 773                        return RPC_PRIORITY_LOW;
 774        }
 775        return RPC_PRIORITY_NORMAL;
 776}
 777
 778/*
 779 * Set up the argument/result storage required for the RPC call.
 780 */
 781static int nfs_write_rpcsetup(struct nfs_page *req,
 782                struct nfs_write_data *data,
 783                const struct rpc_call_ops *call_ops,
 784                unsigned int count, unsigned int offset,
 785                int how)
 786{
 787        struct inode *inode = req->wb_context->path.dentry->d_inode;
 788        int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 789        int priority = flush_task_priority(how);
 790        struct rpc_task *task;
 791        struct rpc_message msg = {
 792                .rpc_argp = &data->args,
 793                .rpc_resp = &data->res,
 794                .rpc_cred = req->wb_context->cred,
 795        };
 796        struct rpc_task_setup task_setup_data = {
 797                .rpc_client = NFS_CLIENT(inode),
 798                .task = &data->task,
 799                .rpc_message = &msg,
 800                .callback_ops = call_ops,
 801                .callback_data = data,
 802                .workqueue = nfsiod_workqueue,
 803                .flags = flags,
 804                .priority = priority,
 805        };
 806
 807        /* Set up the RPC argument and reply structs
 808         * NB: take care not to mess about with data->commit et al. */
 809
 810        data->req = req;
 811        data->inode = inode = req->wb_context->path.dentry->d_inode;
 812        data->cred = msg.rpc_cred;
 813
 814        data->args.fh     = NFS_FH(inode);
 815        data->args.offset = req_offset(req) + offset;
 816        data->args.pgbase = req->wb_pgbase + offset;
 817        data->args.pages  = data->pagevec;
 818        data->args.count  = count;
 819        data->args.context = get_nfs_open_context(req->wb_context);
 820        data->args.stable  = NFS_UNSTABLE;
 821        if (how & FLUSH_STABLE) {
 822                data->args.stable = NFS_DATA_SYNC;
 823                if (!NFS_I(inode)->ncommit)
 824                        data->args.stable = NFS_FILE_SYNC;
 825        }
 826
 827        data->res.fattr   = &data->fattr;
 828        data->res.count   = count;
 829        data->res.verf    = &data->verf;
 830        nfs_fattr_init(&data->fattr);
 831
 832        /* Set up the initial task struct.  */
 833        NFS_PROTO(inode)->write_setup(data, &msg);
 834
 835        dprintk("NFS: %5u initiated write call "
 836                "(req %s/%lld, %u bytes @ offset %llu)\n",
 837                data->task.tk_pid,
 838                inode->i_sb->s_id,
 839                (long long)NFS_FILEID(inode),
 840                count,
 841                (unsigned long long)data->args.offset);
 842
 843        task = rpc_run_task(&task_setup_data);
 844        if (IS_ERR(task))
 845                return PTR_ERR(task);
 846        rpc_put_task(task);
 847        return 0;
 848}
 849
 850/* If a nfs_flush_* function fails, it should remove reqs from @head and
 851 * call this on each, which will prepare them to be retried on next
 852 * writeback using standard nfs.
 853 */
 854static void nfs_redirty_request(struct nfs_page *req)
 855{
 856        nfs_mark_request_dirty(req);
 857        nfs_end_page_writeback(req->wb_page);
 858        nfs_clear_page_tag_locked(req);
 859}
 860
 861/*
 862 * Generate multiple small requests to write out a single
 863 * contiguous dirty area on one page.
 864 */
 865static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
 866{
 867        struct nfs_page *req = nfs_list_entry(head->next);
 868        struct page *page = req->wb_page;
 869        struct nfs_write_data *data;
 870        size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
 871        unsigned int offset;
 872        int requests = 0;
 873        int ret = 0;
 874        LIST_HEAD(list);
 875
 876        nfs_list_remove_request(req);
 877
 878        nbytes = count;
 879        do {
 880                size_t len = min(nbytes, wsize);
 881
 882                data = nfs_writedata_alloc(1);
 883                if (!data)
 884                        goto out_bad;
 885                list_add(&data->pages, &list);
 886                requests++;
 887                nbytes -= len;
 888        } while (nbytes != 0);
 889        atomic_set(&req->wb_complete, requests);
 890
 891        ClearPageError(page);
 892        offset = 0;
 893        nbytes = count;
 894        do {
 895                int ret2;
 896
 897                data = list_entry(list.next, struct nfs_write_data, pages);
 898                list_del_init(&data->pages);
 899
 900                data->pagevec[0] = page;
 901
 902                if (nbytes < wsize)
 903                        wsize = nbytes;
 904                ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
 905                                   wsize, offset, how);
 906                if (ret == 0)
 907                        ret = ret2;
 908                offset += wsize;
 909                nbytes -= wsize;
 910        } while (nbytes != 0);
 911
 912        return ret;
 913
 914out_bad:
 915        while (!list_empty(&list)) {
 916                data = list_entry(list.next, struct nfs_write_data, pages);
 917                list_del(&data->pages);
 918                nfs_writedata_release(data);
 919        }
 920        nfs_redirty_request(req);
 921        return -ENOMEM;
 922}
 923
 924/*
 925 * Create an RPC task for the given write request and kick it.
 926 * The page must have been locked by the caller.
 927 *
 928 * It may happen that the page we're passed is not marked dirty.
 929 * This is the case if nfs_updatepage detects a conflicting request
 930 * that has been written but not committed.
 931 */
 932static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
 933{
 934        struct nfs_page                *req;
 935        struct page                **pages;
 936        struct nfs_write_data        *data;
 937
 938        data = nfs_writedata_alloc(npages);
 939        if (!data)
 940                goto out_bad;
 941
 942        pages = data->pagevec;
 943        while (!list_empty(head)) {
 944                req = nfs_list_entry(head->next);
 945                nfs_list_remove_request(req);
 946                nfs_list_add_request(req, &data->pages);
 947                ClearPageError(req->wb_page);
 948                *pages++ = req->wb_page;
 949        }
 950        req = nfs_list_entry(data->pages.next);
 951
 952        /* Set up the argument struct */
 953        return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 954 out_bad:
 955        while (!list_empty(head)) {
 956                req = nfs_list_entry(head->next);
 957                nfs_list_remove_request(req);
 958                nfs_redirty_request(req);
 959        }
 960        return -ENOMEM;
 961}
 962
 963static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 964                                  struct inode *inode, int ioflags)
 965{
 966        size_t wsize = NFS_SERVER(inode)->wsize;
 967
 968        if (wsize < PAGE_CACHE_SIZE)
 969                nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
 970        else
 971                nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
 972}
 973
 974/*
 975 * Handle a write reply that flushed part of a page.
 976 */
 977static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 978{
 979        struct nfs_write_data        *data = calldata;
 980
 981        dprintk("NFS: %5u write(%s/%lld %d@%lld)",
 982                task->tk_pid,
 983                data->req->wb_context->path.dentry->d_inode->i_sb->s_id,
 984                (long long)
 985                  NFS_FILEID(data->req->wb_context->path.dentry->d_inode),
 986                data->req->wb_bytes, (long long)req_offset(data->req));
 987
 988        nfs_writeback_done(task, data);
 989}
 990
 991static void nfs_writeback_release_partial(void *calldata)
 992{
 993        struct nfs_write_data        *data = calldata;
 994        struct nfs_page                *req = data->req;
 995        struct page                *page = req->wb_page;
 996        int status = data->task.tk_status;
 997
 998        if (status < 0) {
 999                nfs_set_pageerror(page);
1000                nfs_context_set_write_error(req->wb_context, status);
1001                dprintk(", error = %d\n", status);
1002                goto out;
1003        }
1004
1005        if (nfs_write_need_commit(data)) {
1006                struct inode *inode = page->mapping->host;
1007
1008                spin_lock(&inode->i_lock);
1009                if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1010                        /* Do nothing we need to resend the writes */
1011                } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1012                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1013                        dprintk(" defer commit\n");
1014                } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1015                        set_bit(PG_NEED_RESCHED, &req->wb_flags);
1016                        clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1017                        dprintk(" server reboot detected\n");
1018                }
1019                spin_unlock(&inode->i_lock);
1020        } else
1021                dprintk(" OK\n");
1022
1023out:
1024        if (atomic_dec_and_test(&req->wb_complete))
1025                nfs_writepage_release(req);
1026        nfs_writedata_release(calldata);
1027}
1028
1029static const struct rpc_call_ops nfs_write_partial_ops = {
1030        .rpc_call_done = nfs_writeback_done_partial,
1031        .rpc_release = nfs_writeback_release_partial,
1032};
1033
1034/*
1035 * Handle a write reply that flushes a whole page.
1036 *
1037 * FIXME: There is an inherent race with invalidate_inode_pages and
1038 *          writebacks since the page->count is kept > 1 for as long
1039 *          as the page has a write request pending.
1040 */
1041static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1042{
1043        struct nfs_write_data        *data = calldata;
1044
1045        nfs_writeback_done(task, data);
1046}
1047
1048static void nfs_writeback_release_full(void *calldata)
1049{
1050        struct nfs_write_data        *data = calldata;
1051        int status = data->task.tk_status;
1052
1053        /* Update attributes as result of writeback. */
1054        while (!list_empty(&data->pages)) {
1055                struct nfs_page *req = nfs_list_entry(data->pages.next);
1056                struct page *page = req->wb_page;
1057
1058                nfs_list_remove_request(req);
1059
1060                dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1061                        data->task.tk_pid,
1062                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
1063                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1064                        req->wb_bytes,
1065                        (long long)req_offset(req));
1066
1067                if (status < 0) {
1068                        nfs_set_pageerror(page);
1069                        nfs_context_set_write_error(req->wb_context, status);
1070                        dprintk(", error = %d\n", status);
1071                        goto remove_request;
1072                }
1073
1074                if (nfs_write_need_commit(data)) {
1075                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1076                        nfs_mark_request_commit(req);
1077                        nfs_end_page_writeback(page);
1078                        dprintk(" marked for commit\n");
1079                        goto next;
1080                }
1081                dprintk(" OK\n");
1082remove_request:
1083                nfs_end_page_writeback(page);
1084                nfs_inode_remove_request(req);
1085        next:
1086                nfs_clear_page_tag_locked(req);
1087        }
1088        nfs_writedata_release(calldata);
1089}
1090
1091static const struct rpc_call_ops nfs_write_full_ops = {
1092        .rpc_call_done = nfs_writeback_done_full,
1093        .rpc_release = nfs_writeback_release_full,
1094};
1095
1096
1097/*
1098 * This function is called when the WRITE call is complete.
1099 */
1100int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1101{
1102        struct nfs_writeargs        *argp = &data->args;
1103        struct nfs_writeres        *resp = &data->res;
1104        int status;
1105
1106        dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
1107                task->tk_pid, task->tk_status);
1108
1109        /*
1110         * ->write_done will attempt to use post-op attributes to detect
1111         * conflicting writes by other clients.  A strict interpretation
1112         * of close-to-open would allow us to continue caching even if
1113         * another writer had changed the file, but some applications
1114         * depend on tighter cache coherency when writing.
1115         */
1116        status = NFS_PROTO(data->inode)->write_done(task, data);
1117        if (status != 0)
1118                return status;
1119        nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1120
1121#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1122        if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1123                /* We tried a write call, but the server did not
1124                 * commit data to stable storage even though we
1125                 * requested it.
1126                 * Note: There is a known bug in Tru64 < 5.0 in which
1127                 *         the server reports NFS_DATA_SYNC, but performs
1128                 *         NFS_FILE_SYNC. We therefore implement this checking
1129                 *         as a dprintk() in order to avoid filling syslog.
1130                 */
1131                static unsigned long    complain;
1132
1133                if (time_before(complain, jiffies)) {
1134                        dprintk("NFS:       faulty NFS server %s:"
1135                                " (committed = %d) != (stable = %d)\n",
1136                                NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1137                                resp->verf->committed, argp->stable);
1138                        complain = jiffies + 300 * HZ;
1139                }
1140        }
1141#endif
1142        /* Is this a short write? */
1143        if (task->tk_status >= 0 && resp->count < argp->count) {
1144                static unsigned long    complain;
1145
1146                nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
1147
1148                /* Has the server at least made some progress? */
1149                if (resp->count != 0) {
1150                        /* Was this an NFSv2 write or an NFSv3 stable write? */
1151                        if (resp->verf->committed != NFS_UNSTABLE) {
1152                                /* Resend from where the server left off */
1153                                argp->offset += resp->count;
1154                                argp->pgbase += resp->count;
1155                                argp->count -= resp->count;
1156                        } else {
1157                                /* Resend as a stable write in order to avoid
1158                                 * headaches in the case of a server crash.
1159                                 */
1160                                argp->stable = NFS_FILE_SYNC;
1161                        }
1162                        rpc_restart_call(task);
1163                        return -EAGAIN;
1164                }
1165                if (time_before(complain, jiffies)) {
1166                        printk(KERN_WARNING
1167                               "NFS: Server wrote zero bytes, expected %u.\n",
1168                                        argp->count);
1169                        complain = jiffies + 300 * HZ;
1170                }
1171                /* Can't do anything about it except throw an error. */
1172                task->tk_status = -EIO;
1173        }
1174        return 0;
1175}
1176
1177
1178#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1179void nfs_commitdata_release(void *data)
1180{
1181        struct nfs_write_data *wdata = data;
1182
1183        put_nfs_open_context(wdata->args.context);
1184        nfs_commit_free(wdata);
1185}
1186
1187/*
1188 * Set up the argument/result storage required for the RPC call.
1189 */
1190static int nfs_commit_rpcsetup(struct list_head *head,
1191                struct nfs_write_data *data,
1192                int how)
1193{
1194        struct nfs_page *first = nfs_list_entry(head->next);
1195        struct inode *inode = first->wb_context->path.dentry->d_inode;
1196        int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
1197        int priority = flush_task_priority(how);
1198        struct rpc_task *task;
1199        struct rpc_message msg = {
1200                .rpc_argp = &data->args,
1201                .rpc_resp = &data->res,
1202                .rpc_cred = first->wb_context->cred,
1203        };
1204        struct rpc_task_setup task_setup_data = {
1205                .task = &data->task,
1206                .rpc_client = NFS_CLIENT(inode),
1207                .rpc_message = &msg,
1208                .callback_ops = &nfs_commit_ops,
1209                .callback_data = data,
1210                .workqueue = nfsiod_workqueue,
1211                .flags = flags,
1212                .priority = priority,
1213        };
1214
1215        /* Set up the RPC argument and reply structs
1216         * NB: take care not to mess about with data->commit et al. */
1217
1218        list_splice_init(head, &data->pages);
1219
1220        data->inode          = inode;
1221        data->cred          = msg.rpc_cred;
1222
1223        data->args.fh     = NFS_FH(data->inode);
1224        /* Note: we always request a commit of the entire inode */
1225        data->args.offset = 0;
1226        data->args.count  = 0;
1227        data->args.context = get_nfs_open_context(first->wb_context);
1228        data->res.count   = 0;
1229        data->res.fattr   = &data->fattr;
1230        data->res.verf    = &data->verf;
1231        nfs_fattr_init(&data->fattr);
1232
1233        /* Set up the initial task struct.  */
1234        NFS_PROTO(inode)->commit_setup(data, &msg);
1235
1236        dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1237
1238        task = rpc_run_task(&task_setup_data);
1239        if (IS_ERR(task))
1240                return PTR_ERR(task);
1241        rpc_put_task(task);
1242        return 0;
1243}
1244
1245/*
1246 * Commit dirty pages
1247 */
1248static int
1249nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1250{
1251        struct nfs_write_data        *data;
1252        struct nfs_page         *req;
1253
1254        data = nfs_commitdata_alloc();
1255
1256        if (!data)
1257                goto out_bad;
1258
1259        /* Set up the argument struct */
1260        return nfs_commit_rpcsetup(head, data, how);
1261 out_bad:
1262        while (!list_empty(head)) {
1263                req = nfs_list_entry(head->next);
1264                nfs_list_remove_request(req);
1265                nfs_mark_request_commit(req);
1266                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1267                dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1268                                BDI_RECLAIMABLE);
1269                nfs_clear_page_tag_locked(req);
1270        }
1271        return -ENOMEM;
1272}
1273
1274/*
1275 * COMMIT call returned
1276 */
1277static void nfs_commit_done(struct rpc_task *task, void *calldata)
1278{
1279        struct nfs_write_data        *data = calldata;
1280
1281        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1282                                task->tk_pid, task->tk_status);
1283
1284        /* Call the NFS version-specific code */
1285        if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
1286                return;
1287}
1288
1289static void nfs_commit_release(void *calldata)
1290{
1291        struct nfs_write_data        *data = calldata;
1292        struct nfs_page                *req;
1293        int status = data->task.tk_status;
1294
1295        while (!list_empty(&data->pages)) {
1296                req = nfs_list_entry(data->pages.next);
1297                nfs_list_remove_request(req);
1298                nfs_clear_request_commit(req);
1299
1300                dprintk("NFS:       commit (%s/%lld %d@%lld)",
1301                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
1302                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1303                        req->wb_bytes,
1304                        (long long)req_offset(req));
1305                if (status < 0) {
1306                        nfs_context_set_write_error(req->wb_context, status);
1307                        nfs_inode_remove_request(req);
1308                        dprintk(", error = %d\n", status);
1309                        goto next;
1310                }
1311
1312                /* Okay, COMMIT succeeded, apparently. Check the verifier
1313                 * returned by the server against all stored verfs. */
1314                if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1315                        /* We have a match */
1316                        nfs_inode_remove_request(req);
1317                        dprintk(" OK\n");
1318                        goto next;
1319                }
1320                /* We have a mismatch. Write the page again */
1321                dprintk(" mismatch\n");
1322                nfs_mark_request_dirty(req);
1323        next:
1324                nfs_clear_page_tag_locked(req);
1325        }
1326        nfs_commitdata_release(calldata);
1327}
1328
1329static const struct rpc_call_ops nfs_commit_ops = {
1330        .rpc_call_done = nfs_commit_done,
1331        .rpc_release = nfs_commit_release,
1332};
1333
1334int nfs_commit_inode(struct inode *inode, int how)
1335{
1336        LIST_HEAD(head);
1337        int res;
1338
1339        spin_lock(&inode->i_lock);
1340        res = nfs_scan_commit(inode, &head, 0, 0);
1341        spin_unlock(&inode->i_lock);
1342        if (res) {
1343                int error = nfs_commit_list(inode, &head, how);
1344                if (error < 0)
1345                        return error;
1346        }
1347        return res;
1348}
1349#else
1350static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1351{
1352        return 0;
1353}
1354#endif
1355
1356long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1357{
1358        struct inode *inode = mapping->host;
1359        pgoff_t idx_start, idx_end;
1360        unsigned int npages = 0;
1361        LIST_HEAD(head);
1362        int nocommit = how & FLUSH_NOCOMMIT;
1363        long pages, ret;
1364
1365        /* FIXME */
1366        if (wbc->range_cyclic)
1367                idx_start = 0;
1368        else {
1369                idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
1370                idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
1371                if (idx_end > idx_start) {
1372                        pgoff_t l_npages = 1 + idx_end - idx_start;
1373                        npages = l_npages;
1374                        if (sizeof(npages) != sizeof(l_npages) &&
1375                                        (pgoff_t)npages != l_npages)
1376                                npages = 0;
1377                }
1378        }
1379        how &= ~FLUSH_NOCOMMIT;
1380        spin_lock(&inode->i_lock);
1381        do {
1382                ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1383                if (ret != 0)
1384                        continue;
1385                if (nocommit)
1386                        break;
1387                pages = nfs_scan_commit(inode, &head, idx_start, npages);
1388                if (pages == 0)
1389                        break;
1390                if (how & FLUSH_INVALIDATE) {
1391                        spin_unlock(&inode->i_lock);
1392                        nfs_cancel_commit_list(&head);
1393                        ret = pages;
1394                        spin_lock(&inode->i_lock);
1395                        continue;
1396                }
1397                pages += nfs_scan_commit(inode, &head, 0, 0);
1398                spin_unlock(&inode->i_lock);
1399                ret = nfs_commit_list(inode, &head, how);
1400                spin_lock(&inode->i_lock);
1401
1402        } while (ret >= 0);
1403        spin_unlock(&inode->i_lock);
1404        return ret;
1405}
1406
1407static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1408{
1409        int ret;
1410
1411        ret = nfs_writepages(mapping, wbc);
1412        if (ret < 0)
1413                goto out;
1414        ret = nfs_sync_mapping_wait(mapping, wbc, how);
1415        if (ret < 0)
1416                goto out;
1417        return 0;
1418out:
1419        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1420        return ret;
1421}
1422
1423/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
1424static int nfs_write_mapping(struct address_space *mapping, int how)
1425{
1426        struct writeback_control wbc = {
1427                .bdi = mapping->backing_dev_info,
1428                .sync_mode = WB_SYNC_NONE,
1429                .nr_to_write = LONG_MAX,
1430                .range_start = 0,
1431                .range_end = LLONG_MAX,
1432                .for_writepages = 1,
1433        };
1434        int ret;
1435
1436        ret = __nfs_write_mapping(mapping, &wbc, how);
1437        if (ret < 0)
1438                return ret;
1439        wbc.sync_mode = WB_SYNC_ALL;
1440        return __nfs_write_mapping(mapping, &wbc, how);
1441}
1442
1443/*
1444 * flush the inode to disk.
1445 */
1446int nfs_wb_all(struct inode *inode)
1447{
1448        return nfs_write_mapping(inode->i_mapping, 0);
1449}
1450
1451int nfs_wb_nocommit(struct inode *inode)
1452{
1453        return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
1454}
1455
1456int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1457{
1458        struct nfs_page *req;
1459        loff_t range_start = page_offset(page);
1460        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1461        struct writeback_control wbc = {
1462                .bdi = page->mapping->backing_dev_info,
1463                .sync_mode = WB_SYNC_ALL,
1464                .nr_to_write = LONG_MAX,
1465                .range_start = range_start,
1466                .range_end = range_end,
1467        };
1468        int ret = 0;
1469
1470        BUG_ON(!PageLocked(page));
1471        for (;;) {
1472                req = nfs_page_find_request(page);
1473                if (req == NULL)
1474                        goto out;
1475                if (test_bit(PG_CLEAN, &req->wb_flags)) {
1476                        nfs_release_request(req);
1477                        break;
1478                }
1479                if (nfs_lock_request_dontget(req)) {
1480                        nfs_inode_remove_request(req);
1481                        /*
1482                         * In case nfs_inode_remove_request has marked the
1483                         * page as being dirty
1484                         */
1485                        cancel_dirty_page(page, PAGE_CACHE_SIZE);
1486                        nfs_unlock_request(req);
1487                        break;
1488                }
1489                ret = nfs_wait_on_request(req);
1490                if (ret < 0)
1491                        goto out;
1492        }
1493        if (!PagePrivate(page))
1494                return 0;
1495        ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
1496out:
1497        return ret;
1498}
1499
1500static int nfs_wb_page_priority(struct inode *inode, struct page *page,
1501                                int how)
1502{
1503        loff_t range_start = page_offset(page);
1504        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1505        struct writeback_control wbc = {
1506                .bdi = page->mapping->backing_dev_info,
1507                .sync_mode = WB_SYNC_ALL,
1508                .nr_to_write = LONG_MAX,
1509                .range_start = range_start,
1510                .range_end = range_end,
1511        };
1512        int ret;
1513
1514        do {
1515                if (clear_page_dirty_for_io(page)) {
1516                        ret = nfs_writepage_locked(page, &wbc);
1517                        if (ret < 0)
1518                                goto out_error;
1519                } else if (!PagePrivate(page))
1520                        break;
1521                ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
1522                if (ret < 0)
1523                        goto out_error;
1524        } while (PagePrivate(page));
1525        return 0;
1526out_error:
1527        __mark_inode_dirty(inode, I_DIRTY_PAGES);
1528        return ret;
1529}
1530
1531/*
1532 * Write back all requests on one page - we do this before reading it.
1533 */
1534int nfs_wb_page(struct inode *inode, struct page* page)
1535{
1536        return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1537}
1538
1539int __init nfs_init_writepagecache(void)
1540{
1541        nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1542                                             sizeof(struct nfs_write_data),
1543                                             0, SLAB_HWCACHE_ALIGN,
1544                                             NULL);
1545        if (nfs_wdata_cachep == NULL)
1546                return -ENOMEM;
1547
1548        nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1549                                                     nfs_wdata_cachep);
1550        if (nfs_wdata_mempool == NULL)
1551                return -ENOMEM;
1552
1553        nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1554                                                      nfs_wdata_cachep);
1555        if (nfs_commit_mempool == NULL)
1556                return -ENOMEM;
1557
1558        /*
1559         * NFS congestion size, scale with available memory.
1560         *
1561         *  64MB:    8192k
1562         * 128MB:   11585k
1563         * 256MB:   16384k
1564         * 512MB:   23170k
1565         *   1GB:   32768k
1566         *   2GB:   46340k
1567         *   4GB:   65536k
1568         *   8GB:   92681k
1569         *  16GB:  131072k
1570         *
1571         * This allows larger machines to have larger/more transfers.
1572         * Limit the default to 256M
1573         */
1574        nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
1575        if (nfs_congestion_kb > 256*1024)
1576                nfs_congestion_kb = 256*1024;
1577
1578        return 0;
1579}
1580
1581void nfs_destroy_writepagecache(void)
1582{
1583        mempool_destroy(nfs_commit_mempool);
1584        mempool_destroy(nfs_wdata_mempool);
1585        kmem_cache_destroy(nfs_wdata_cachep);
1586}
1587