Showing error 1348

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: fs/jbd2/transaction.c
Line in file: 479
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 * linux/fs/jbd2/transaction.c
   3 *
   4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
   5 *
   6 * Copyright 1998 Red Hat corp --- All Rights Reserved
   7 *
   8 * This file is part of the Linux kernel and is made available under
   9 * the terms of the GNU General Public License, version 2, or at your
  10 * option, any later version, incorporated herein by reference.
  11 *
  12 * Generic filesystem transaction handling code; part of the ext2fs
  13 * journaling system.
  14 *
  15 * This file manages transactions (compound commits managed by the
  16 * journaling code) and handles (individual atomic operations by the
  17 * filesystem).
  18 */
  19
  20#include <linux/time.h>
  21#include <linux/fs.h>
  22#include <linux/jbd2.h>
  23#include <linux/errno.h>
  24#include <linux/slab.h>
  25#include <linux/timer.h>
  26#include <linux/mm.h>
  27#include <linux/highmem.h>
  28
  29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  30
  31/*
  32 * jbd2_get_transaction: obtain a new transaction_t object.
  33 *
  34 * Simply allocate and initialise a new transaction.  Create it in
  35 * RUNNING state and add it to the current journal (which should not
  36 * have an existing running transaction: we only make a new transaction
  37 * once we have started to commit the old one).
  38 *
  39 * Preconditions:
  40 *        The journal MUST be locked.  We don't perform atomic mallocs on the
  41 *        new transaction        and we can't block without protecting against other
  42 *        processes trying to touch the journal while it is in transition.
  43 *
  44 */
  45
  46static transaction_t *
  47jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
  48{
  49        transaction->t_journal = journal;
  50        transaction->t_state = T_RUNNING;
  51        transaction->t_tid = journal->j_transaction_sequence++;
  52        transaction->t_expires = jiffies + journal->j_commit_interval;
  53        spin_lock_init(&transaction->t_handle_lock);
  54        INIT_LIST_HEAD(&transaction->t_inode_list);
  55        INIT_LIST_HEAD(&transaction->t_private_list);
  56
  57        /* Set up the commit timer for the new transaction. */
  58        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
  59        add_timer(&journal->j_commit_timer);
  60
  61        J_ASSERT(journal->j_running_transaction == NULL);
  62        journal->j_running_transaction = transaction;
  63        transaction->t_max_wait = 0;
  64        transaction->t_start = jiffies;
  65
  66        return transaction;
  67}
  68
  69/*
  70 * Handle management.
  71 *
  72 * A handle_t is an object which represents a single atomic update to a
  73 * filesystem, and which tracks all of the modifications which form part
  74 * of that one update.
  75 */
  76
  77/*
  78 * start_this_handle: Given a handle, deal with any locking or stalling
  79 * needed to make sure that there is enough journal space for the handle
  80 * to begin.  Attach the handle to a transaction and set up the
  81 * transaction's buffer credits.
  82 */
  83
  84static int start_this_handle(journal_t *journal, handle_t *handle)
  85{
  86        transaction_t *transaction;
  87        int needed;
  88        int nblocks = handle->h_buffer_credits;
  89        transaction_t *new_transaction = NULL;
  90        int ret = 0;
  91        unsigned long ts = jiffies;
  92
  93        if (nblocks > journal->j_max_transaction_buffers) {
  94                printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
  95                       current->comm, nblocks,
  96                       journal->j_max_transaction_buffers);
  97                ret = -ENOSPC;
  98                goto out;
  99        }
 100
 101alloc_transaction:
 102        if (!journal->j_running_transaction) {
 103                new_transaction = kzalloc(sizeof(*new_transaction),
 104                                                GFP_NOFS|__GFP_NOFAIL);
 105                if (!new_transaction) {
 106                        ret = -ENOMEM;
 107                        goto out;
 108                }
 109        }
 110
 111        jbd_debug(3, "New handle %p going live.\n", handle);
 112
 113repeat:
 114
 115        /*
 116         * We need to hold j_state_lock until t_updates has been incremented,
 117         * for proper journal barrier handling
 118         */
 119        spin_lock(&journal->j_state_lock);
 120repeat_locked:
 121        if (is_journal_aborted(journal) ||
 122            (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 123                spin_unlock(&journal->j_state_lock);
 124                ret = -EROFS;
 125                goto out;
 126        }
 127
 128        /* Wait on the journal's transaction barrier if necessary */
 129        if (journal->j_barrier_count) {
 130                spin_unlock(&journal->j_state_lock);
 131                wait_event(journal->j_wait_transaction_locked,
 132                                journal->j_barrier_count == 0);
 133                goto repeat;
 134        }
 135
 136        if (!journal->j_running_transaction) {
 137                if (!new_transaction) {
 138                        spin_unlock(&journal->j_state_lock);
 139                        goto alloc_transaction;
 140                }
 141                jbd2_get_transaction(journal, new_transaction);
 142                new_transaction = NULL;
 143        }
 144
 145        transaction = journal->j_running_transaction;
 146
 147        /*
 148         * If the current transaction is locked down for commit, wait for the
 149         * lock to be released.
 150         */
 151        if (transaction->t_state == T_LOCKED) {
 152                DEFINE_WAIT(wait);
 153
 154                prepare_to_wait(&journal->j_wait_transaction_locked,
 155                                        &wait, TASK_UNINTERRUPTIBLE);
 156                spin_unlock(&journal->j_state_lock);
 157                schedule();
 158                finish_wait(&journal->j_wait_transaction_locked, &wait);
 159                goto repeat;
 160        }
 161
 162        /*
 163         * If there is not enough space left in the log to write all potential
 164         * buffers requested by this operation, we need to stall pending a log
 165         * checkpoint to free some more log space.
 166         */
 167        spin_lock(&transaction->t_handle_lock);
 168        needed = transaction->t_outstanding_credits + nblocks;
 169
 170        if (needed > journal->j_max_transaction_buffers) {
 171                /*
 172                 * If the current transaction is already too large, then start
 173                 * to commit it: we can then go back and attach this handle to
 174                 * a new transaction.
 175                 */
 176                DEFINE_WAIT(wait);
 177
 178                jbd_debug(2, "Handle %p starting new commit...\n", handle);
 179                spin_unlock(&transaction->t_handle_lock);
 180                prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 181                                TASK_UNINTERRUPTIBLE);
 182                __jbd2_log_start_commit(journal, transaction->t_tid);
 183                spin_unlock(&journal->j_state_lock);
 184                schedule();
 185                finish_wait(&journal->j_wait_transaction_locked, &wait);
 186                goto repeat;
 187        }
 188
 189        /*
 190         * The commit code assumes that it can get enough log space
 191         * without forcing a checkpoint.  This is *critical* for
 192         * correctness: a checkpoint of a buffer which is also
 193         * associated with a committing transaction creates a deadlock,
 194         * so commit simply cannot force through checkpoints.
 195         *
 196         * We must therefore ensure the necessary space in the journal
 197         * *before* starting to dirty potentially checkpointed buffers
 198         * in the new transaction.
 199         *
 200         * The worst part is, any transaction currently committing can
 201         * reduce the free space arbitrarily.  Be careful to account for
 202         * those buffers when checkpointing.
 203         */
 204
 205        /*
 206         * @@@ AKPM: This seems rather over-defensive.  We're giving commit
 207         * a _lot_ of headroom: 1/4 of the journal plus the size of
 208         * the committing transaction.  Really, we only need to give it
 209         * committing_transaction->t_outstanding_credits plus "enough" for
 210         * the log control blocks.
 211         * Also, this test is inconsitent with the matching one in
 212         * jbd2_journal_extend().
 213         */
 214        if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 215                jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
 216                spin_unlock(&transaction->t_handle_lock);
 217                __jbd2_log_wait_for_space(journal);
 218                goto repeat_locked;
 219        }
 220
 221        /* OK, account for the buffers that this operation expects to
 222         * use and add the handle to the running transaction. */
 223
 224        if (time_after(transaction->t_start, ts)) {
 225                ts = jbd2_time_diff(ts, transaction->t_start);
 226                if (ts > transaction->t_max_wait)
 227                        transaction->t_max_wait = ts;
 228        }
 229
 230        handle->h_transaction = transaction;
 231        transaction->t_outstanding_credits += nblocks;
 232        transaction->t_updates++;
 233        transaction->t_handle_count++;
 234        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
 235                  handle, nblocks, transaction->t_outstanding_credits,
 236                  __jbd2_log_space_left(journal));
 237        spin_unlock(&transaction->t_handle_lock);
 238        spin_unlock(&journal->j_state_lock);
 239out:
 240        if (unlikely(new_transaction))                /* It's usually NULL */
 241                kfree(new_transaction);
 242        return ret;
 243}
 244
 245static struct lock_class_key jbd2_handle_key;
 246
 247/* Allocate a new handle.  This should probably be in a slab... */
 248static handle_t *new_handle(int nblocks)
 249{
 250        handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
 251        if (!handle)
 252                return NULL;
 253        memset(handle, 0, sizeof(*handle));
 254        handle->h_buffer_credits = nblocks;
 255        handle->h_ref = 1;
 256
 257        lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
 258                                                &jbd2_handle_key, 0);
 259
 260        return handle;
 261}
 262
 263/**
 264 * handle_t *jbd2_journal_start() - Obtain a new handle.
 265 * @journal: Journal to start transaction on.
 266 * @nblocks: number of block buffer we might modify
 267 *
 268 * We make sure that the transaction can guarantee at least nblocks of
 269 * modified buffers in the log.  We block until the log can guarantee
 270 * that much space.
 271 *
 272 * This function is visible to journal users (like ext3fs), so is not
 273 * called with the journal already locked.
 274 *
 275 * Return a pointer to a newly allocated handle, or NULL on failure
 276 */
 277handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 278{
 279        handle_t *handle = journal_current_handle();
 280        int err;
 281
 282        if (!journal)
 283                return ERR_PTR(-EROFS);
 284
 285        if (handle) {
 286                J_ASSERT(handle->h_transaction->t_journal == journal);
 287                handle->h_ref++;
 288                return handle;
 289        }
 290
 291        handle = new_handle(nblocks);
 292        if (!handle)
 293                return ERR_PTR(-ENOMEM);
 294
 295        current->journal_info = handle;
 296
 297        err = start_this_handle(journal, handle);
 298        if (err < 0) {
 299                jbd2_free_handle(handle);
 300                current->journal_info = NULL;
 301                handle = ERR_PTR(err);
 302                goto out;
 303        }
 304
 305        lock_map_acquire(&handle->h_lockdep_map);
 306out:
 307        return handle;
 308}
 309
 310/**
 311 * int jbd2_journal_extend() - extend buffer credits.
 312 * @handle:  handle to 'extend'
 313 * @nblocks: nr blocks to try to extend by.
 314 *
 315 * Some transactions, such as large extends and truncates, can be done
 316 * atomically all at once or in several stages.  The operation requests
 317 * a credit for a number of buffer modications in advance, but can
 318 * extend its credit if it needs more.
 319 *
 320 * jbd2_journal_extend tries to give the running handle more buffer credits.
 321 * It does not guarantee that allocation - this is a best-effort only.
 322 * The calling process MUST be able to deal cleanly with a failure to
 323 * extend here.
 324 *
 325 * Return 0 on success, non-zero on failure.
 326 *
 327 * return code < 0 implies an error
 328 * return code > 0 implies normal transaction-full status.
 329 */
 330int jbd2_journal_extend(handle_t *handle, int nblocks)
 331{
 332        transaction_t *transaction = handle->h_transaction;
 333        journal_t *journal = transaction->t_journal;
 334        int result;
 335        int wanted;
 336
 337        result = -EIO;
 338        if (is_handle_aborted(handle))
 339                goto out;
 340
 341        result = 1;
 342
 343        spin_lock(&journal->j_state_lock);
 344
 345        /* Don't extend a locked-down transaction! */
 346        if (handle->h_transaction->t_state != T_RUNNING) {
 347                jbd_debug(3, "denied handle %p %d blocks: "
 348                          "transaction not running\n", handle, nblocks);
 349                goto error_out;
 350        }
 351
 352        spin_lock(&transaction->t_handle_lock);
 353        wanted = transaction->t_outstanding_credits + nblocks;
 354
 355        if (wanted > journal->j_max_transaction_buffers) {
 356                jbd_debug(3, "denied handle %p %d blocks: "
 357                          "transaction too large\n", handle, nblocks);
 358                goto unlock;
 359        }
 360
 361        if (wanted > __jbd2_log_space_left(journal)) {
 362                jbd_debug(3, "denied handle %p %d blocks: "
 363                          "insufficient log space\n", handle, nblocks);
 364                goto unlock;
 365        }
 366
 367        handle->h_buffer_credits += nblocks;
 368        transaction->t_outstanding_credits += nblocks;
 369        result = 0;
 370
 371        jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
 372unlock:
 373        spin_unlock(&transaction->t_handle_lock);
 374error_out:
 375        spin_unlock(&journal->j_state_lock);
 376out:
 377        return result;
 378}
 379
 380
 381/**
 382 * int jbd2_journal_restart() - restart a handle .
 383 * @handle:  handle to restart
 384 * @nblocks: nr credits requested
 385 *
 386 * Restart a handle for a multi-transaction filesystem
 387 * operation.
 388 *
 389 * If the jbd2_journal_extend() call above fails to grant new buffer credits
 390 * to a running handle, a call to jbd2_journal_restart will commit the
 391 * handle's transaction so far and reattach the handle to a new
 392 * transaction capabable of guaranteeing the requested number of
 393 * credits.
 394 */
 395
 396int jbd2_journal_restart(handle_t *handle, int nblocks)
 397{
 398        transaction_t *transaction = handle->h_transaction;
 399        journal_t *journal = transaction->t_journal;
 400        int ret;
 401
 402        /* If we've had an abort of any type, don't even think about
 403         * actually doing the restart! */
 404        if (is_handle_aborted(handle))
 405                return 0;
 406
 407        /*
 408         * First unlink the handle from its current transaction, and start the
 409         * commit on that.
 410         */
 411        J_ASSERT(transaction->t_updates > 0);
 412        J_ASSERT(journal_current_handle() == handle);
 413
 414        spin_lock(&journal->j_state_lock);
 415        spin_lock(&transaction->t_handle_lock);
 416        transaction->t_outstanding_credits -= handle->h_buffer_credits;
 417        transaction->t_updates--;
 418
 419        if (!transaction->t_updates)
 420                wake_up(&journal->j_wait_updates);
 421        spin_unlock(&transaction->t_handle_lock);
 422
 423        jbd_debug(2, "restarting handle %p\n", handle);
 424        __jbd2_log_start_commit(journal, transaction->t_tid);
 425        spin_unlock(&journal->j_state_lock);
 426
 427        handle->h_buffer_credits = nblocks;
 428        ret = start_this_handle(journal, handle);
 429        return ret;
 430}
 431
 432
 433/**
 434 * void jbd2_journal_lock_updates () - establish a transaction barrier.
 435 * @journal:  Journal to establish a barrier on.
 436 *
 437 * This locks out any further updates from being started, and blocks
 438 * until all existing updates have completed, returning only once the
 439 * journal is in a quiescent state with no updates running.
 440 *
 441 * The journal lock should not be held on entry.
 442 */
 443void jbd2_journal_lock_updates(journal_t *journal)
 444{
 445        DEFINE_WAIT(wait);
 446
 447        spin_lock(&journal->j_state_lock);
 448        ++journal->j_barrier_count;
 449
 450        /* Wait until there are no running updates */
 451        while (1) {
 452                transaction_t *transaction = journal->j_running_transaction;
 453
 454                if (!transaction)
 455                        break;
 456
 457                spin_lock(&transaction->t_handle_lock);
 458                if (!transaction->t_updates) {
 459                        spin_unlock(&transaction->t_handle_lock);
 460                        break;
 461                }
 462                prepare_to_wait(&journal->j_wait_updates, &wait,
 463                                TASK_UNINTERRUPTIBLE);
 464                spin_unlock(&transaction->t_handle_lock);
 465                spin_unlock(&journal->j_state_lock);
 466                schedule();
 467                finish_wait(&journal->j_wait_updates, &wait);
 468                spin_lock(&journal->j_state_lock);
 469        }
 470        spin_unlock(&journal->j_state_lock);
 471
 472        /*
 473         * We have now established a barrier against other normal updates, but
 474         * we also need to barrier against other jbd2_journal_lock_updates() calls
 475         * to make sure that we serialise special journal-locked operations
 476         * too.
 477         */
 478        mutex_lock(&journal->j_barrier);
 479}
 480
 481/**
 482 * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
 483 * @journal:  Journal to release the barrier on.
 484 *
 485 * Release a transaction barrier obtained with jbd2_journal_lock_updates().
 486 *
 487 * Should be called without the journal lock held.
 488 */
 489void jbd2_journal_unlock_updates (journal_t *journal)
 490{
 491        J_ASSERT(journal->j_barrier_count != 0);
 492
 493        mutex_unlock(&journal->j_barrier);
 494        spin_lock(&journal->j_state_lock);
 495        --journal->j_barrier_count;
 496        spin_unlock(&journal->j_state_lock);
 497        wake_up(&journal->j_wait_transaction_locked);
 498}
 499
 500/*
 501 * Report any unexpected dirty buffers which turn up.  Normally those
 502 * indicate an error, but they can occur if the user is running (say)
 503 * tune2fs to modify the live filesystem, so we need the option of
 504 * continuing as gracefully as possible.  #
 505 *
 506 * The caller should already hold the journal lock and
 507 * j_list_lock spinlock: most callers will need those anyway
 508 * in order to probe the buffer's journaling state safely.
 509 */
 510static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 511{
 512        int jlist;
 513
 514        /* If this buffer is one which might reasonably be dirty
 515         * --- ie. data, or not part of this journal --- then
 516         * we're OK to leave it alone, but otherwise we need to
 517         * move the dirty bit to the journal's own internal
 518         * JBDDirty bit. */
 519        jlist = jh->b_jlist;
 520
 521        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
 522            jlist == BJ_Shadow || jlist == BJ_Forget) {
 523                struct buffer_head *bh = jh2bh(jh);
 524
 525                if (test_clear_buffer_dirty(bh))
 526                        set_buffer_jbddirty(bh);
 527        }
 528}
 529
 530/*
 531 * If the buffer is already part of the current transaction, then there
 532 * is nothing we need to do.  If it is already part of a prior
 533 * transaction which we are still committing to disk, then we need to
 534 * make sure that we do not overwrite the old copy: we do copy-out to
 535 * preserve the copy going to disk.  We also account the buffer against
 536 * the handle's metadata buffer credits (unless the buffer is already
 537 * part of the transaction, that is).
 538 *
 539 */
 540static int
 541do_get_write_access(handle_t *handle, struct journal_head *jh,
 542                        int force_copy)
 543{
 544        struct buffer_head *bh;
 545        transaction_t *transaction;
 546        journal_t *journal;
 547        int error;
 548        char *frozen_buffer = NULL;
 549        int need_copy = 0;
 550
 551        if (is_handle_aborted(handle))
 552                return -EROFS;
 553
 554        transaction = handle->h_transaction;
 555        journal = transaction->t_journal;
 556
 557        jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
 558
 559        JBUFFER_TRACE(jh, "entry");
 560repeat:
 561        bh = jh2bh(jh);
 562
 563        /* @@@ Need to check for errors here at some point. */
 564
 565        lock_buffer(bh);
 566        jbd_lock_bh_state(bh);
 567
 568        /* We now hold the buffer lock so it is safe to query the buffer
 569         * state.  Is the buffer dirty?
 570         *
 571         * If so, there are two possibilities.  The buffer may be
 572         * non-journaled, and undergoing a quite legitimate writeback.
 573         * Otherwise, it is journaled, and we don't expect dirty buffers
 574         * in that state (the buffers should be marked JBD_Dirty
 575         * instead.)  So either the IO is being done under our own
 576         * control and this is a bug, or it's a third party IO such as
 577         * dump(8) (which may leave the buffer scheduled for read ---
 578         * ie. locked but not dirty) or tune2fs (which may actually have
 579         * the buffer dirtied, ugh.)  */
 580
 581        if (buffer_dirty(bh)) {
 582                /*
 583                 * First question: is this buffer already part of the current
 584                 * transaction or the existing committing transaction?
 585                 */
 586                if (jh->b_transaction) {
 587                        J_ASSERT_JH(jh,
 588                                jh->b_transaction == transaction ||
 589                                jh->b_transaction ==
 590                                        journal->j_committing_transaction);
 591                        if (jh->b_next_transaction)
 592                                J_ASSERT_JH(jh, jh->b_next_transaction ==
 593                                                        transaction);
 594                }
 595                /*
 596                 * In any case we need to clean the dirty flag and we must
 597                 * do it under the buffer lock to be sure we don't race
 598                 * with running write-out.
 599                 */
 600                JBUFFER_TRACE(jh, "Unexpected dirty buffer");
 601                jbd_unexpected_dirty_buffer(jh);
 602        }
 603
 604        unlock_buffer(bh);
 605
 606        error = -EROFS;
 607        if (is_handle_aborted(handle)) {
 608                jbd_unlock_bh_state(bh);
 609                goto out;
 610        }
 611        error = 0;
 612
 613        /*
 614         * The buffer is already part of this transaction if b_transaction or
 615         * b_next_transaction points to it
 616         */
 617        if (jh->b_transaction == transaction ||
 618            jh->b_next_transaction == transaction)
 619                goto done;
 620
 621        /*
 622         * this is the first time this transaction is touching this buffer,
 623         * reset the modified flag
 624         */
 625       jh->b_modified = 0;
 626
 627        /*
 628         * If there is already a copy-out version of this buffer, then we don't
 629         * need to make another one
 630         */
 631        if (jh->b_frozen_data) {
 632                JBUFFER_TRACE(jh, "has frozen data");
 633                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 634                jh->b_next_transaction = transaction;
 635                goto done;
 636        }
 637
 638        /* Is there data here we need to preserve? */
 639
 640        if (jh->b_transaction && jh->b_transaction != transaction) {
 641                JBUFFER_TRACE(jh, "owned by older transaction");
 642                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 643                J_ASSERT_JH(jh, jh->b_transaction ==
 644                                        journal->j_committing_transaction);
 645
 646                /* There is one case we have to be very careful about.
 647                 * If the committing transaction is currently writing
 648                 * this buffer out to disk and has NOT made a copy-out,
 649                 * then we cannot modify the buffer contents at all
 650                 * right now.  The essence of copy-out is that it is the
 651                 * extra copy, not the primary copy, which gets
 652                 * journaled.  If the primary copy is already going to
 653                 * disk then we cannot do copy-out here. */
 654
 655                if (jh->b_jlist == BJ_Shadow) {
 656                        DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
 657                        wait_queue_head_t *wqh;
 658
 659                        wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
 660
 661                        JBUFFER_TRACE(jh, "on shadow: sleep");
 662                        jbd_unlock_bh_state(bh);
 663                        /* commit wakes up all shadow buffers after IO */
 664                        for ( ; ; ) {
 665                                prepare_to_wait(wqh, &wait.wait,
 666                                                TASK_UNINTERRUPTIBLE);
 667                                if (jh->b_jlist != BJ_Shadow)
 668                                        break;
 669                                schedule();
 670                        }
 671                        finish_wait(wqh, &wait.wait);
 672                        goto repeat;
 673                }
 674
 675                /* Only do the copy if the currently-owning transaction
 676                 * still needs it.  If it is on the Forget list, the
 677                 * committing transaction is past that stage.  The
 678                 * buffer had better remain locked during the kmalloc,
 679                 * but that should be true --- we hold the journal lock
 680                 * still and the buffer is already on the BUF_JOURNAL
 681                 * list so won't be flushed.
 682                 *
 683                 * Subtle point, though: if this is a get_undo_access,
 684                 * then we will be relying on the frozen_data to contain
 685                 * the new value of the committed_data record after the
 686                 * transaction, so we HAVE to force the frozen_data copy
 687                 * in that case. */
 688
 689                if (jh->b_jlist != BJ_Forget || force_copy) {
 690                        JBUFFER_TRACE(jh, "generate frozen data");
 691                        if (!frozen_buffer) {
 692                                JBUFFER_TRACE(jh, "allocate memory for buffer");
 693                                jbd_unlock_bh_state(bh);
 694                                frozen_buffer =
 695                                        jbd2_alloc(jh2bh(jh)->b_size,
 696                                                         GFP_NOFS);
 697                                if (!frozen_buffer) {
 698                                        printk(KERN_EMERG
 699                                               "%s: OOM for frozen_buffer\n",
 700                                               __func__);
 701                                        JBUFFER_TRACE(jh, "oom!");
 702                                        error = -ENOMEM;
 703                                        jbd_lock_bh_state(bh);
 704                                        goto done;
 705                                }
 706                                goto repeat;
 707                        }
 708                        jh->b_frozen_data = frozen_buffer;
 709                        frozen_buffer = NULL;
 710                        need_copy = 1;
 711                }
 712                jh->b_next_transaction = transaction;
 713        }
 714
 715
 716        /*
 717         * Finally, if the buffer is not journaled right now, we need to make
 718         * sure it doesn't get written to disk before the caller actually
 719         * commits the new data
 720         */
 721        if (!jh->b_transaction) {
 722                JBUFFER_TRACE(jh, "no transaction");
 723                J_ASSERT_JH(jh, !jh->b_next_transaction);
 724                jh->b_transaction = transaction;
 725                JBUFFER_TRACE(jh, "file as BJ_Reserved");
 726                spin_lock(&journal->j_list_lock);
 727                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 728                spin_unlock(&journal->j_list_lock);
 729        }
 730
 731done:
 732        if (need_copy) {
 733                struct page *page;
 734                int offset;
 735                char *source;
 736
 737                J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
 738                            "Possible IO failure.\n");
 739                page = jh2bh(jh)->b_page;
 740                offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
 741                source = kmap_atomic(page, KM_USER0);
 742                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 743                kunmap_atomic(source, KM_USER0);
 744        }
 745        jbd_unlock_bh_state(bh);
 746
 747        /*
 748         * If we are about to journal a buffer, then any revoke pending on it is
 749         * no longer valid
 750         */
 751        jbd2_journal_cancel_revoke(handle, jh);
 752
 753out:
 754        if (unlikely(frozen_buffer))        /* It's usually NULL */
 755                jbd2_free(frozen_buffer, bh->b_size);
 756
 757        JBUFFER_TRACE(jh, "exit");
 758        return error;
 759}
 760
 761/**
 762 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
 763 * @handle: transaction to add buffer modifications to
 764 * @bh:     bh to be used for metadata writes
 765 * @credits: variable that will receive credits for the buffer
 766 *
 767 * Returns an error code or 0 on success.
 768 *
 769 * In full data journalling mode the buffer may be of type BJ_AsyncData,
 770 * because we're write()ing a buffer which is also part of a shared mapping.
 771 */
 772
 773int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 774{
 775        struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 776        int rc;
 777
 778        /* We do not want to get caught playing with fields which the
 779         * log thread also manipulates.  Make sure that the buffer
 780         * completes any outstanding IO before proceeding. */
 781        rc = do_get_write_access(handle, jh, 0);
 782        jbd2_journal_put_journal_head(jh);
 783        return rc;
 784}
 785
 786
 787/*
 788 * When the user wants to journal a newly created buffer_head
 789 * (ie. getblk() returned a new buffer and we are going to populate it
 790 * manually rather than reading off disk), then we need to keep the
 791 * buffer_head locked until it has been completely filled with new
 792 * data.  In this case, we should be able to make the assertion that
 793 * the bh is not already part of an existing transaction.
 794 *
 795 * The buffer should already be locked by the caller by this point.
 796 * There is no lock ranking violation: it was a newly created,
 797 * unlocked buffer beforehand. */
 798
 799/**
 800 * int jbd2_journal_get_create_access () - notify intent to use newly created bh
 801 * @handle: transaction to new buffer to
 802 * @bh: new buffer.
 803 *
 804 * Call this if you create a new bh.
 805 */
 806int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 807{
 808        transaction_t *transaction = handle->h_transaction;
 809        journal_t *journal = transaction->t_journal;
 810        struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 811        int err;
 812
 813        jbd_debug(5, "journal_head %p\n", jh);
 814        err = -EROFS;
 815        if (is_handle_aborted(handle))
 816                goto out;
 817        err = 0;
 818
 819        JBUFFER_TRACE(jh, "entry");
 820        /*
 821         * The buffer may already belong to this transaction due to pre-zeroing
 822         * in the filesystem's new_block code.  It may also be on the previous,
 823         * committing transaction's lists, but it HAS to be in Forget state in
 824         * that case: the transaction must have deleted the buffer for it to be
 825         * reused here.
 826         */
 827        jbd_lock_bh_state(bh);
 828        spin_lock(&journal->j_list_lock);
 829        J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
 830                jh->b_transaction == NULL ||
 831                (jh->b_transaction == journal->j_committing_transaction &&
 832                          jh->b_jlist == BJ_Forget)));
 833
 834        J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 835        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
 836
 837        if (jh->b_transaction == NULL) {
 838                jh->b_transaction = transaction;
 839
 840                /* first access by this transaction */
 841                jh->b_modified = 0;
 842
 843                JBUFFER_TRACE(jh, "file as BJ_Reserved");
 844                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 845        } else if (jh->b_transaction == journal->j_committing_transaction) {
 846                /* first access by this transaction */
 847                jh->b_modified = 0;
 848
 849                JBUFFER_TRACE(jh, "set next transaction");
 850                jh->b_next_transaction = transaction;
 851        }
 852        spin_unlock(&journal->j_list_lock);
 853        jbd_unlock_bh_state(bh);
 854
 855        /*
 856         * akpm: I added this.  ext3_alloc_branch can pick up new indirect
 857         * blocks which contain freed but then revoked metadata.  We need
 858         * to cancel the revoke in case we end up freeing it yet again
 859         * and the reallocating as data - this would cause a second revoke,
 860         * which hits an assertion error.
 861         */
 862        JBUFFER_TRACE(jh, "cancelling revoke");
 863        jbd2_journal_cancel_revoke(handle, jh);
 864        jbd2_journal_put_journal_head(jh);
 865out:
 866        return err;
 867}
 868
 869/**
 870 * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
 871 *     non-rewindable consequences
 872 * @handle: transaction
 873 * @bh: buffer to undo
 874 * @credits: store the number of taken credits here (if not NULL)
 875 *
 876 * Sometimes there is a need to distinguish between metadata which has
 877 * been committed to disk and that which has not.  The ext3fs code uses
 878 * this for freeing and allocating space, we have to make sure that we
 879 * do not reuse freed space until the deallocation has been committed,
 880 * since if we overwrote that space we would make the delete
 881 * un-rewindable in case of a crash.
 882 *
 883 * To deal with that, jbd2_journal_get_undo_access requests write access to a
 884 * buffer for parts of non-rewindable operations such as delete
 885 * operations on the bitmaps.  The journaling code must keep a copy of
 886 * the buffer's contents prior to the undo_access call until such time
 887 * as we know that the buffer has definitely been committed to disk.
 888 *
 889 * We never need to know which transaction the committed data is part
 890 * of, buffers touched here are guaranteed to be dirtied later and so
 891 * will be committed to a new transaction in due course, at which point
 892 * we can discard the old committed data pointer.
 893 *
 894 * Returns error number or 0 on success.
 895 */
 896int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 897{
 898        int err;
 899        struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 900        char *committed_data = NULL;
 901
 902        JBUFFER_TRACE(jh, "entry");
 903
 904        /*
 905         * Do this first --- it can drop the journal lock, so we want to
 906         * make sure that obtaining the committed_data is done
 907         * atomically wrt. completion of any outstanding commits.
 908         */
 909        err = do_get_write_access(handle, jh, 1);
 910        if (err)
 911                goto out;
 912
 913repeat:
 914        if (!jh->b_committed_data) {
 915                committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 916                if (!committed_data) {
 917                        printk(KERN_EMERG "%s: No memory for committed data\n",
 918                                __func__);
 919                        err = -ENOMEM;
 920                        goto out;
 921                }
 922        }
 923
 924        jbd_lock_bh_state(bh);
 925        if (!jh->b_committed_data) {
 926                /* Copy out the current buffer contents into the
 927                 * preserved, committed copy. */
 928                JBUFFER_TRACE(jh, "generate b_committed data");
 929                if (!committed_data) {
 930                        jbd_unlock_bh_state(bh);
 931                        goto repeat;
 932                }
 933
 934                jh->b_committed_data = committed_data;
 935                committed_data = NULL;
 936                memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
 937        }
 938        jbd_unlock_bh_state(bh);
 939out:
 940        jbd2_journal_put_journal_head(jh);
 941        if (unlikely(committed_data))
 942                jbd2_free(committed_data, bh->b_size);
 943        return err;
 944}
 945
 946/**
 947 * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
 948 * @handle: transaction to add buffer to.
 949 * @bh: buffer to mark
 950 *
 951 * mark dirty metadata which needs to be journaled as part of the current
 952 * transaction.
 953 *
 954 * The buffer is placed on the transaction's metadata list and is marked
 955 * as belonging to the transaction.
 956 *
 957 * Returns error number or 0 on success.
 958 *
 959 * Special care needs to be taken if the buffer already belongs to the
 960 * current committing transaction (in which case we should have frozen
 961 * data present for that commit).  In that case, we don't relink the
 962 * buffer: that only gets done when the old transaction finally
 963 * completes its commit.
 964 */
 965int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 966{
 967        transaction_t *transaction = handle->h_transaction;
 968        journal_t *journal = transaction->t_journal;
 969        struct journal_head *jh = bh2jh(bh);
 970
 971        jbd_debug(5, "journal_head %p\n", jh);
 972        JBUFFER_TRACE(jh, "entry");
 973        if (is_handle_aborted(handle))
 974                goto out;
 975
 976        jbd_lock_bh_state(bh);
 977
 978        if (jh->b_modified == 0) {
 979                /*
 980                 * This buffer's got modified and becoming part
 981                 * of the transaction. This needs to be done
 982                 * once a transaction -bzzz
 983                 */
 984                jh->b_modified = 1;
 985                J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
 986                handle->h_buffer_credits--;
 987        }
 988
 989        /*
 990         * fastpath, to avoid expensive locking.  If this buffer is already
 991         * on the running transaction's metadata list there is nothing to do.
 992         * Nobody can take it off again because there is a handle open.
 993         * I _think_ we're OK here with SMP barriers - a mistaken decision will
 994         * result in this test being false, so we go in and take the locks.
 995         */
 996        if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
 997                JBUFFER_TRACE(jh, "fastpath");
 998                J_ASSERT_JH(jh, jh->b_transaction ==
 999                                        journal->j_running_transaction);
1000                goto out_unlock_bh;
1001        }
1002
1003        set_buffer_jbddirty(bh);
1004
1005        /*
1006         * Metadata already on the current transaction list doesn't
1007         * need to be filed.  Metadata on another transaction's list must
1008         * be committing, and will be refiled once the commit completes:
1009         * leave it alone for now.
1010         */
1011        if (jh->b_transaction != transaction) {
1012                JBUFFER_TRACE(jh, "already on other transaction");
1013                J_ASSERT_JH(jh, jh->b_transaction ==
1014                                        journal->j_committing_transaction);
1015                J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
1016                /* And this case is illegal: we can't reuse another
1017                 * transaction's data buffer, ever. */
1018                goto out_unlock_bh;
1019        }
1020
1021        /* That test should have eliminated the following case: */
1022        J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1023
1024        JBUFFER_TRACE(jh, "file as BJ_Metadata");
1025        spin_lock(&journal->j_list_lock);
1026        __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
1027        spin_unlock(&journal->j_list_lock);
1028out_unlock_bh:
1029        jbd_unlock_bh_state(bh);
1030out:
1031        JBUFFER_TRACE(jh, "exit");
1032        return 0;
1033}
1034
1035/*
1036 * jbd2_journal_release_buffer: undo a get_write_access without any buffer
1037 * updates, if the update decided in the end that it didn't need access.
1038 *
1039 */
1040void
1041jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1042{
1043        BUFFER_TRACE(bh, "entry");
1044}
1045
1046/**
1047 * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
1048 * @handle: transaction handle
1049 * @bh:     bh to 'forget'
1050 *
1051 * We can only do the bforget if there are no commits pending against the
1052 * buffer.  If the buffer is dirty in the current running transaction we
1053 * can safely unlink it.
1054 *
1055 * bh may not be a journalled buffer at all - it may be a non-JBD
1056 * buffer which came off the hashtable.  Check for this.
1057 *
1058 * Decrements bh->b_count by one.
1059 *
1060 * Allow this call even if the handle has aborted --- it may be part of
1061 * the caller's cleanup after an abort.
1062 */
1063int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1064{
1065        transaction_t *transaction = handle->h_transaction;
1066        journal_t *journal = transaction->t_journal;
1067        struct journal_head *jh;
1068        int drop_reserve = 0;
1069        int err = 0;
1070        int was_modified = 0;
1071
1072        BUFFER_TRACE(bh, "entry");
1073
1074        jbd_lock_bh_state(bh);
1075        spin_lock(&journal->j_list_lock);
1076
1077        if (!buffer_jbd(bh))
1078                goto not_jbd;
1079        jh = bh2jh(bh);
1080
1081        /* Critical error: attempting to delete a bitmap buffer, maybe?
1082         * Don't do any jbd operations, and return an error. */
1083        if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1084                         "inconsistent data on disk")) {
1085                err = -EIO;
1086                goto not_jbd;
1087        }
1088
1089        /* keep track of wether or not this transaction modified us */
1090        was_modified = jh->b_modified;
1091
1092        /*
1093         * The buffer's going from the transaction, we must drop
1094         * all references -bzzz
1095         */
1096        jh->b_modified = 0;
1097
1098        if (jh->b_transaction == handle->h_transaction) {
1099                J_ASSERT_JH(jh, !jh->b_frozen_data);
1100
1101                /* If we are forgetting a buffer which is already part
1102                 * of this transaction, then we can just drop it from
1103                 * the transaction immediately. */
1104                clear_buffer_dirty(bh);
1105                clear_buffer_jbddirty(bh);
1106
1107                JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1108
1109                /*
1110                 * we only want to drop a reference if this transaction
1111                 * modified the buffer
1112                 */
1113                if (was_modified)
1114                        drop_reserve = 1;
1115
1116                /*
1117                 * We are no longer going to journal this buffer.
1118                 * However, the commit of this transaction is still
1119                 * important to the buffer: the delete that we are now
1120                 * processing might obsolete an old log entry, so by
1121                 * committing, we can satisfy the buffer's checkpoint.
1122                 *
1123                 * So, if we have a checkpoint on the buffer, we should
1124                 * now refile the buffer on our BJ_Forget list so that
1125                 * we know to remove the checkpoint after we commit.
1126                 */
1127
1128                if (jh->b_cp_transaction) {
1129                        __jbd2_journal_temp_unlink_buffer(jh);
1130                        __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1131                } else {
1132                        __jbd2_journal_unfile_buffer(jh);
1133                        jbd2_journal_remove_journal_head(bh);
1134                        __brelse(bh);
1135                        if (!buffer_jbd(bh)) {
1136                                spin_unlock(&journal->j_list_lock);
1137                                jbd_unlock_bh_state(bh);
1138                                __bforget(bh);
1139                                goto drop;
1140                        }
1141                }
1142        } else if (jh->b_transaction) {
1143                J_ASSERT_JH(jh, (jh->b_transaction ==
1144                                 journal->j_committing_transaction));
1145                /* However, if the buffer is still owned by a prior
1146                 * (committing) transaction, we can't drop it yet... */
1147                JBUFFER_TRACE(jh, "belongs to older transaction");
1148                /* ... but we CAN drop it from the new transaction if we
1149                 * have also modified it since the original commit. */
1150
1151                if (jh->b_next_transaction) {
1152                        J_ASSERT(jh->b_next_transaction == transaction);
1153                        jh->b_next_transaction = NULL;
1154
1155                        /*
1156                         * only drop a reference if this transaction modified
1157                         * the buffer
1158                         */
1159                        if (was_modified)
1160                                drop_reserve = 1;
1161                }
1162        }
1163
1164not_jbd:
1165        spin_unlock(&journal->j_list_lock);
1166        jbd_unlock_bh_state(bh);
1167        __brelse(bh);
1168drop:
1169        if (drop_reserve) {
1170                /* no need to reserve log space for this block -bzzz */
1171                handle->h_buffer_credits++;
1172        }
1173        return err;
1174}
1175
1176/**
1177 * int jbd2_journal_stop() - complete a transaction
1178 * @handle: tranaction to complete.
1179 *
1180 * All done for a particular handle.
1181 *
1182 * There is not much action needed here.  We just return any remaining
1183 * buffer credits to the transaction and remove the handle.  The only
1184 * complication is that we need to start a commit operation if the
1185 * filesystem is marked for synchronous update.
1186 *
1187 * jbd2_journal_stop itself will not usually return an error, but it may
1188 * do so in unusual circumstances.  In particular, expect it to
1189 * return -EIO if a jbd2_journal_abort has been executed since the
1190 * transaction began.
1191 */
1192int jbd2_journal_stop(handle_t *handle)
1193{
1194        transaction_t *transaction = handle->h_transaction;
1195        journal_t *journal = transaction->t_journal;
1196        int old_handle_count, err;
1197        pid_t pid;
1198
1199        J_ASSERT(journal_current_handle() == handle);
1200
1201        if (is_handle_aborted(handle))
1202                err = -EIO;
1203        else {
1204                J_ASSERT(transaction->t_updates > 0);
1205                err = 0;
1206        }
1207
1208        if (--handle->h_ref > 0) {
1209                jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1210                          handle->h_ref);
1211                return err;
1212        }
1213
1214        jbd_debug(4, "Handle %p going down\n", handle);
1215
1216        /*
1217         * Implement synchronous transaction batching.  If the handle
1218         * was synchronous, don't force a commit immediately.  Let's
1219         * yield and let another thread piggyback onto this transaction.
1220         * Keep doing that while new threads continue to arrive.
1221         * It doesn't cost much - we're about to run a commit and sleep
1222         * on IO anyway.  Speeds up many-threaded, many-dir operations
1223         * by 30x or more...
1224         *
1225         * But don't do this if this process was the most recent one to
1226         * perform a synchronous write.  We do this to detect the case where a
1227         * single process is doing a stream of sync writes.  No point in waiting
1228         * for joiners in that case.
1229         */
1230        pid = current->pid;
1231        if (handle->h_sync && journal->j_last_sync_writer != pid) {
1232                journal->j_last_sync_writer = pid;
1233                do {
1234                        old_handle_count = transaction->t_handle_count;
1235                        schedule_timeout_uninterruptible(1);
1236                } while (old_handle_count != transaction->t_handle_count);
1237        }
1238
1239        current->journal_info = NULL;
1240        spin_lock(&journal->j_state_lock);
1241        spin_lock(&transaction->t_handle_lock);
1242        transaction->t_outstanding_credits -= handle->h_buffer_credits;
1243        transaction->t_updates--;
1244        if (!transaction->t_updates) {
1245                wake_up(&journal->j_wait_updates);
1246                if (journal->j_barrier_count)
1247                        wake_up(&journal->j_wait_transaction_locked);
1248        }
1249
1250        /*
1251         * If the handle is marked SYNC, we need to set another commit
1252         * going!  We also want to force a commit if the current
1253         * transaction is occupying too much of the log, or if the
1254         * transaction is too old now.
1255         */
1256        if (handle->h_sync ||
1257                        transaction->t_outstanding_credits >
1258                                journal->j_max_transaction_buffers ||
1259                        time_after_eq(jiffies, transaction->t_expires)) {
1260                /* Do this even for aborted journals: an abort still
1261                 * completes the commit thread, it just doesn't write
1262                 * anything to disk. */
1263                tid_t tid = transaction->t_tid;
1264
1265                spin_unlock(&transaction->t_handle_lock);
1266                jbd_debug(2, "transaction too old, requesting commit for "
1267                                        "handle %p\n", handle);
1268                /* This is non-blocking */
1269                __jbd2_log_start_commit(journal, transaction->t_tid);
1270                spin_unlock(&journal->j_state_lock);
1271
1272                /*
1273                 * Special case: JBD2_SYNC synchronous updates require us
1274                 * to wait for the commit to complete.
1275                 */
1276                if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1277                        err = jbd2_log_wait_commit(journal, tid);
1278        } else {
1279                spin_unlock(&transaction->t_handle_lock);
1280                spin_unlock(&journal->j_state_lock);
1281        }
1282
1283        lock_map_release(&handle->h_lockdep_map);
1284
1285        jbd2_free_handle(handle);
1286        return err;
1287}
1288
1289/**
1290 * int jbd2_journal_force_commit() - force any uncommitted transactions
1291 * @journal: journal to force
1292 *
1293 * For synchronous operations: force any uncommitted transactions
1294 * to disk.  May seem kludgy, but it reuses all the handle batching
1295 * code in a very simple manner.
1296 */
1297int jbd2_journal_force_commit(journal_t *journal)
1298{
1299        handle_t *handle;
1300        int ret;
1301
1302        handle = jbd2_journal_start(journal, 1);
1303        if (IS_ERR(handle)) {
1304                ret = PTR_ERR(handle);
1305        } else {
1306                handle->h_sync = 1;
1307                ret = jbd2_journal_stop(handle);
1308        }
1309        return ret;
1310}
1311
1312/*
1313 *
1314 * List management code snippets: various functions for manipulating the
1315 * transaction buffer lists.
1316 *
1317 */
1318
1319/*
1320 * Append a buffer to a transaction list, given the transaction's list head
1321 * pointer.
1322 *
1323 * j_list_lock is held.
1324 *
1325 * jbd_lock_bh_state(jh2bh(jh)) is held.
1326 */
1327
1328static inline void
1329__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1330{
1331        if (!*list) {
1332                jh->b_tnext = jh->b_tprev = jh;
1333                *list = jh;
1334        } else {
1335                /* Insert at the tail of the list to preserve order */
1336                struct journal_head *first = *list, *last = first->b_tprev;
1337                jh->b_tprev = last;
1338                jh->b_tnext = first;
1339                last->b_tnext = first->b_tprev = jh;
1340        }
1341}
1342
1343/*
1344 * Remove a buffer from a transaction list, given the transaction's list
1345 * head pointer.
1346 *
1347 * Called with j_list_lock held, and the journal may not be locked.
1348 *
1349 * jbd_lock_bh_state(jh2bh(jh)) is held.
1350 */
1351
1352static inline void
1353__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1354{
1355        if (*list == jh) {
1356                *list = jh->b_tnext;
1357                if (*list == jh)
1358                        *list = NULL;
1359        }
1360        jh->b_tprev->b_tnext = jh->b_tnext;
1361        jh->b_tnext->b_tprev = jh->b_tprev;
1362}
1363
1364/*
1365 * Remove a buffer from the appropriate transaction list.
1366 *
1367 * Note that this function can *change* the value of
1368 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
1369 * t_log_list or t_reserved_list.  If the caller is holding onto a copy of one
1370 * of these pointers, it could go bad.  Generally the caller needs to re-read
1371 * the pointer from the transaction_t.
1372 *
1373 * Called under j_list_lock.  The journal may not be locked.
1374 */
1375void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1376{
1377        struct journal_head **list = NULL;
1378        transaction_t *transaction;
1379        struct buffer_head *bh = jh2bh(jh);
1380
1381        J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1382        transaction = jh->b_transaction;
1383        if (transaction)
1384                assert_spin_locked(&transaction->t_journal->j_list_lock);
1385
1386        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1387        if (jh->b_jlist != BJ_None)
1388                J_ASSERT_JH(jh, transaction != NULL);
1389
1390        switch (jh->b_jlist) {
1391        case BJ_None:
1392                return;
1393        case BJ_Metadata:
1394                transaction->t_nr_buffers--;
1395                J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1396                list = &transaction->t_buffers;
1397                break;
1398        case BJ_Forget:
1399                list = &transaction->t_forget;
1400                break;
1401        case BJ_IO:
1402                list = &transaction->t_iobuf_list;
1403                break;
1404        case BJ_Shadow:
1405                list = &transaction->t_shadow_list;
1406                break;
1407        case BJ_LogCtl:
1408                list = &transaction->t_log_list;
1409                break;
1410        case BJ_Reserved:
1411                list = &transaction->t_reserved_list;
1412                break;
1413        }
1414
1415        __blist_del_buffer(list, jh);
1416        jh->b_jlist = BJ_None;
1417        if (test_clear_buffer_jbddirty(bh))
1418                mark_buffer_dirty(bh);        /* Expose it to the VM */
1419}
1420
1421void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1422{
1423        __jbd2_journal_temp_unlink_buffer(jh);
1424        jh->b_transaction = NULL;
1425}
1426
1427void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1428{
1429        jbd_lock_bh_state(jh2bh(jh));
1430        spin_lock(&journal->j_list_lock);
1431        __jbd2_journal_unfile_buffer(jh);
1432        spin_unlock(&journal->j_list_lock);
1433        jbd_unlock_bh_state(jh2bh(jh));
1434}
1435
1436/*
1437 * Called from jbd2_journal_try_to_free_buffers().
1438 *
1439 * Called under jbd_lock_bh_state(bh)
1440 */
1441static void
1442__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1443{
1444        struct journal_head *jh;
1445
1446        jh = bh2jh(bh);
1447
1448        if (buffer_locked(bh) || buffer_dirty(bh))
1449                goto out;
1450
1451        if (jh->b_next_transaction != NULL)
1452                goto out;
1453
1454        spin_lock(&journal->j_list_lock);
1455        if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1456                /* written-back checkpointed metadata buffer */
1457                if (jh->b_jlist == BJ_None) {
1458                        JBUFFER_TRACE(jh, "remove from checkpoint list");
1459                        __jbd2_journal_remove_checkpoint(jh);
1460                        jbd2_journal_remove_journal_head(bh);
1461                        __brelse(bh);
1462                }
1463        }
1464        spin_unlock(&journal->j_list_lock);
1465out:
1466        return;
1467}
1468
1469/*
1470 * jbd2_journal_try_to_free_buffers() could race with
1471 * jbd2_journal_commit_transaction(). The later might still hold the
1472 * reference count to the buffers when inspecting them on
1473 * t_syncdata_list or t_locked_list.
1474 *
1475 * jbd2_journal_try_to_free_buffers() will call this function to
1476 * wait for the current transaction to finish syncing data buffers, before
1477 * try to free that buffer.
1478 *
1479 * Called with journal->j_state_lock hold.
1480 */
1481static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1482{
1483        transaction_t *transaction;
1484        tid_t tid;
1485
1486        spin_lock(&journal->j_state_lock);
1487        transaction = journal->j_committing_transaction;
1488
1489        if (!transaction) {
1490                spin_unlock(&journal->j_state_lock);
1491                return;
1492        }
1493
1494        tid = transaction->t_tid;
1495        spin_unlock(&journal->j_state_lock);
1496        jbd2_log_wait_commit(journal, tid);
1497}
1498
1499/**
1500 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1501 * @journal: journal for operation
1502 * @page: to try and free
1503 * @gfp_mask: we use the mask to detect how hard should we try to release
1504 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1505 * release the buffers.
1506 *
1507 *
1508 * For all the buffers on this page,
1509 * if they are fully written out ordered data, move them onto BUF_CLEAN
1510 * so try_to_free_buffers() can reap them.
1511 *
1512 * This function returns non-zero if we wish try_to_free_buffers()
1513 * to be called. We do this if the page is releasable by try_to_free_buffers().
1514 * We also do it if the page has locked or dirty buffers and the caller wants
1515 * us to perform sync or async writeout.
1516 *
1517 * This complicates JBD locking somewhat.  We aren't protected by the
1518 * BKL here.  We wish to remove the buffer from its committing or
1519 * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
1520 *
1521 * This may *change* the value of transaction_t->t_datalist, so anyone
1522 * who looks at t_datalist needs to lock against this function.
1523 *
1524 * Even worse, someone may be doing a jbd2_journal_dirty_data on this
1525 * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
1526 * will come out of the lock with the buffer dirty, which makes it
1527 * ineligible for release here.
1528 *
1529 * Who else is affected by this?  hmm...  Really the only contender
1530 * is do_get_write_access() - it could be looking at the buffer while
1531 * journal_try_to_free_buffer() is changing its state.  But that
1532 * cannot happen because we never reallocate freed data as metadata
1533 * while the data is part of a transaction.  Yes?
1534 *
1535 * Return 0 on failure, 1 on success
1536 */
1537int jbd2_journal_try_to_free_buffers(journal_t *journal,
1538                                struct page *page, gfp_t gfp_mask)
1539{
1540        struct buffer_head *head;
1541        struct buffer_head *bh;
1542        int ret = 0;
1543
1544        J_ASSERT(PageLocked(page));
1545
1546        head = page_buffers(page);
1547        bh = head;
1548        do {
1549                struct journal_head *jh;
1550
1551                /*
1552                 * We take our own ref against the journal_head here to avoid
1553                 * having to add tons of locking around each instance of
1554                 * jbd2_journal_remove_journal_head() and
1555                 * jbd2_journal_put_journal_head().
1556                 */
1557                jh = jbd2_journal_grab_journal_head(bh);
1558                if (!jh)
1559                        continue;
1560
1561                jbd_lock_bh_state(bh);
1562                __journal_try_to_free_buffer(journal, bh);
1563                jbd2_journal_put_journal_head(jh);
1564                jbd_unlock_bh_state(bh);
1565                if (buffer_jbd(bh))
1566                        goto busy;
1567        } while ((bh = bh->b_this_page) != head);
1568
1569        ret = try_to_free_buffers(page);
1570
1571        /*
1572         * There are a number of places where jbd2_journal_try_to_free_buffers()
1573         * could race with jbd2_journal_commit_transaction(), the later still
1574         * holds the reference to the buffers to free while processing them.
1575         * try_to_free_buffers() failed to free those buffers. Some of the
1576         * caller of releasepage() request page buffers to be dropped, otherwise
1577         * treat the fail-to-free as errors (such as generic_file_direct_IO())
1578         *
1579         * So, if the caller of try_to_release_page() wants the synchronous
1580         * behaviour(i.e make sure buffers are dropped upon return),
1581         * let's wait for the current transaction to finish flush of
1582         * dirty data buffers, then try to free those buffers again,
1583         * with the journal locked.
1584         */
1585        if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1586                jbd2_journal_wait_for_transaction_sync_data(journal);
1587                ret = try_to_free_buffers(page);
1588        }
1589
1590busy:
1591        return ret;
1592}
1593
1594/*
1595 * This buffer is no longer needed.  If it is on an older transaction's
1596 * checkpoint list we need to record it on this transaction's forget list
1597 * to pin this buffer (and hence its checkpointing transaction) down until
1598 * this transaction commits.  If the buffer isn't on a checkpoint list, we
1599 * release it.
1600 * Returns non-zero if JBD no longer has an interest in the buffer.
1601 *
1602 * Called under j_list_lock.
1603 *
1604 * Called under jbd_lock_bh_state(bh).
1605 */
1606static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1607{
1608        int may_free = 1;
1609        struct buffer_head *bh = jh2bh(jh);
1610
1611        __jbd2_journal_unfile_buffer(jh);
1612
1613        if (jh->b_cp_transaction) {
1614                JBUFFER_TRACE(jh, "on running+cp transaction");
1615                __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1616                clear_buffer_jbddirty(bh);
1617                may_free = 0;
1618        } else {
1619                JBUFFER_TRACE(jh, "on running transaction");
1620                jbd2_journal_remove_journal_head(bh);
1621                __brelse(bh);
1622        }
1623        return may_free;
1624}
1625
1626/*
1627 * jbd2_journal_invalidatepage
1628 *
1629 * This code is tricky.  It has a number of cases to deal with.
1630 *
1631 * There are two invariants which this code relies on:
1632 *
1633 * i_size must be updated on disk before we start calling invalidatepage on the
1634 * data.
1635 *
1636 *  This is done in ext3 by defining an ext3_setattr method which
1637 *  updates i_size before truncate gets going.  By maintaining this
1638 *  invariant, we can be sure that it is safe to throw away any buffers
1639 *  attached to the current transaction: once the transaction commits,
1640 *  we know that the data will not be needed.
1641 *
1642 *  Note however that we can *not* throw away data belonging to the
1643 *  previous, committing transaction!
1644 *
1645 * Any disk blocks which *are* part of the previous, committing
1646 * transaction (and which therefore cannot be discarded immediately) are
1647 * not going to be reused in the new running transaction
1648 *
1649 *  The bitmap committed_data images guarantee this: any block which is
1650 *  allocated in one transaction and removed in the next will be marked
1651 *  as in-use in the committed_data bitmap, so cannot be reused until
1652 *  the next transaction to delete the block commits.  This means that
1653 *  leaving committing buffers dirty is quite safe: the disk blocks
1654 *  cannot be reallocated to a different file and so buffer aliasing is
1655 *  not possible.
1656 *
1657 *
1658 * The above applies mainly to ordered data mode.  In writeback mode we
1659 * don't make guarantees about the order in which data hits disk --- in
1660 * particular we don't guarantee that new dirty data is flushed before
1661 * transaction commit --- so it is always safe just to discard data
1662 * immediately in that mode.  --sct
1663 */
1664
1665/*
1666 * The journal_unmap_buffer helper function returns zero if the buffer
1667 * concerned remains pinned as an anonymous buffer belonging to an older
1668 * transaction.
1669 *
1670 * We're outside-transaction here.  Either or both of j_running_transaction
1671 * and j_committing_transaction may be NULL.
1672 */
1673static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1674{
1675        transaction_t *transaction;
1676        struct journal_head *jh;
1677        int may_free = 1;
1678        int ret;
1679
1680        BUFFER_TRACE(bh, "entry");
1681
1682        /*
1683         * It is safe to proceed here without the j_list_lock because the
1684         * buffers cannot be stolen by try_to_free_buffers as long as we are
1685         * holding the page lock. --sct
1686         */
1687
1688        if (!buffer_jbd(bh))
1689                goto zap_buffer_unlocked;
1690
1691        /* OK, we have data buffer in journaled mode */
1692        spin_lock(&journal->j_state_lock);
1693        jbd_lock_bh_state(bh);
1694        spin_lock(&journal->j_list_lock);
1695
1696        jh = jbd2_journal_grab_journal_head(bh);
1697        if (!jh)
1698                goto zap_buffer_no_jh;
1699
1700        transaction = jh->b_transaction;
1701        if (transaction == NULL) {
1702                /* First case: not on any transaction.  If it
1703                 * has no checkpoint link, then we can zap it:
1704                 * it's a writeback-mode buffer so we don't care
1705                 * if it hits disk safely. */
1706                if (!jh->b_cp_transaction) {
1707                        JBUFFER_TRACE(jh, "not on any transaction: zap");
1708                        goto zap_buffer;
1709                }
1710
1711                if (!buffer_dirty(bh)) {
1712                        /* bdflush has written it.  We can drop it now */
1713                        goto zap_buffer;
1714                }
1715
1716                /* OK, it must be in the journal but still not
1717                 * written fully to disk: it's metadata or
1718                 * journaled data... */
1719
1720                if (journal->j_running_transaction) {
1721                        /* ... and once the current transaction has
1722                         * committed, the buffer won't be needed any
1723                         * longer. */
1724                        JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1725                        ret = __dispose_buffer(jh,
1726                                        journal->j_running_transaction);
1727                        jbd2_journal_put_journal_head(jh);
1728                        spin_unlock(&journal->j_list_lock);
1729                        jbd_unlock_bh_state(bh);
1730                        spin_unlock(&journal->j_state_lock);
1731                        return ret;
1732                } else {
1733                        /* There is no currently-running transaction. So the
1734                         * orphan record which we wrote for this file must have
1735                         * passed into commit.  We must attach this buffer to
1736                         * the committing transaction, if it exists. */
1737                        if (journal->j_committing_transaction) {
1738                                JBUFFER_TRACE(jh, "give to committing trans");
1739                                ret = __dispose_buffer(jh,
1740                                        journal->j_committing_transaction);
1741                                jbd2_journal_put_journal_head(jh);
1742                                spin_unlock(&journal->j_list_lock);
1743                                jbd_unlock_bh_state(bh);
1744                                spin_unlock(&journal->j_state_lock);
1745                                return ret;
1746                        } else {
1747                                /* The orphan record's transaction has
1748                                 * committed.  We can cleanse this buffer */
1749                                clear_buffer_jbddirty(bh);
1750                                goto zap_buffer;
1751                        }
1752                }
1753        } else if (transaction == journal->j_committing_transaction) {
1754                JBUFFER_TRACE(jh, "on committing transaction");
1755                /*
1756                 * If it is committing, we simply cannot touch it.  We
1757                 * can remove it's next_transaction pointer from the
1758                 * running transaction if that is set, but nothing
1759                 * else. */
1760                set_buffer_freed(bh);
1761                if (jh->b_next_transaction) {
1762                        J_ASSERT(jh->b_next_transaction ==
1763                                        journal->j_running_transaction);
1764                        jh->b_next_transaction = NULL;
1765                }
1766                jbd2_journal_put_journal_head(jh);
1767                spin_unlock(&journal->j_list_lock);
1768                jbd_unlock_bh_state(bh);
1769                spin_unlock(&journal->j_state_lock);
1770                return 0;
1771        } else {
1772                /* Good, the buffer belongs to the running transaction.
1773                 * We are writing our own transaction's data, not any
1774                 * previous one's, so it is safe to throw it away
1775                 * (remember that we expect the filesystem to have set
1776                 * i_size already for this truncate so recovery will not
1777                 * expose the disk blocks we are discarding here.) */
1778                J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
1779                JBUFFER_TRACE(jh, "on running transaction");
1780                may_free = __dispose_buffer(jh, transaction);
1781        }
1782
1783zap_buffer:
1784        jbd2_journal_put_journal_head(jh);
1785zap_buffer_no_jh:
1786        spin_unlock(&journal->j_list_lock);
1787        jbd_unlock_bh_state(bh);
1788        spin_unlock(&journal->j_state_lock);
1789zap_buffer_unlocked:
1790        clear_buffer_dirty(bh);
1791        J_ASSERT_BH(bh, !buffer_jbddirty(bh));
1792        clear_buffer_mapped(bh);
1793        clear_buffer_req(bh);
1794        clear_buffer_new(bh);
1795        bh->b_bdev = NULL;
1796        return may_free;
1797}
1798
1799/**
1800 * void jbd2_journal_invalidatepage()
1801 * @journal: journal to use for flush...
1802 * @page:    page to flush
1803 * @offset:  length of page to invalidate.
1804 *
1805 * Reap page buffers containing data after offset in page.
1806 *
1807 */
1808void jbd2_journal_invalidatepage(journal_t *journal,
1809                      struct page *page,
1810                      unsigned long offset)
1811{
1812        struct buffer_head *head, *bh, *next;
1813        unsigned int curr_off = 0;
1814        int may_free = 1;
1815
1816        if (!PageLocked(page))
1817                BUG();
1818        if (!page_has_buffers(page))
1819                return;
1820
1821        /* We will potentially be playing with lists other than just the
1822         * data lists (especially for journaled data mode), so be
1823         * cautious in our locking. */
1824
1825        head = bh = page_buffers(page);
1826        do {
1827                unsigned int next_off = curr_off + bh->b_size;
1828                next = bh->b_this_page;
1829
1830                if (offset <= curr_off) {
1831                        /* This block is wholly outside the truncation point */
1832                        lock_buffer(bh);
1833                        may_free &= journal_unmap_buffer(journal, bh);
1834                        unlock_buffer(bh);
1835                }
1836                curr_off = next_off;
1837                bh = next;
1838
1839        } while (bh != head);
1840
1841        if (!offset) {
1842                if (may_free && try_to_free_buffers(page))
1843                        J_ASSERT(!page_has_buffers(page));
1844        }
1845}
1846
1847/*
1848 * File a buffer on the given transaction list.
1849 */
1850void __jbd2_journal_file_buffer(struct journal_head *jh,
1851                        transaction_t *transaction, int jlist)
1852{
1853        struct journal_head **list = NULL;
1854        int was_dirty = 0;
1855        struct buffer_head *bh = jh2bh(jh);
1856
1857        J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1858        assert_spin_locked(&transaction->t_journal->j_list_lock);
1859
1860        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1861        J_ASSERT_JH(jh, jh->b_transaction == transaction ||
1862                                jh->b_transaction == NULL);
1863
1864        if (jh->b_transaction && jh->b_jlist == jlist)
1865                return;
1866
1867        /* The following list of buffer states needs to be consistent
1868         * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1869         * state. */
1870
1871        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1872            jlist == BJ_Shadow || jlist == BJ_Forget) {
1873                if (test_clear_buffer_dirty(bh) ||
1874                    test_clear_buffer_jbddirty(bh))
1875                        was_dirty = 1;
1876        }
1877
1878        if (jh->b_transaction)
1879                __jbd2_journal_temp_unlink_buffer(jh);
1880        jh->b_transaction = transaction;
1881
1882        switch (jlist) {
1883        case BJ_None:
1884                J_ASSERT_JH(jh, !jh->b_committed_data);
1885                J_ASSERT_JH(jh, !jh->b_frozen_data);
1886                return;
1887        case BJ_Metadata:
1888                transaction->t_nr_buffers++;
1889                list = &transaction->t_buffers;
1890                break;
1891        case BJ_Forget:
1892                list = &transaction->t_forget;
1893                break;
1894        case BJ_IO:
1895                list = &transaction->t_iobuf_list;
1896                break;
1897        case BJ_Shadow:
1898                list = &transaction->t_shadow_list;
1899                break;
1900        case BJ_LogCtl:
1901                list = &transaction->t_log_list;
1902                break;
1903        case BJ_Reserved:
1904                list = &transaction->t_reserved_list;
1905                break;
1906        }
1907
1908        __blist_add_buffer(list, jh);
1909        jh->b_jlist = jlist;
1910
1911        if (was_dirty)
1912                set_buffer_jbddirty(bh);
1913}
1914
1915void jbd2_journal_file_buffer(struct journal_head *jh,
1916                                transaction_t *transaction, int jlist)
1917{
1918        jbd_lock_bh_state(jh2bh(jh));
1919        spin_lock(&transaction->t_journal->j_list_lock);
1920        __jbd2_journal_file_buffer(jh, transaction, jlist);
1921        spin_unlock(&transaction->t_journal->j_list_lock);
1922        jbd_unlock_bh_state(jh2bh(jh));
1923}
1924
1925/*
1926 * Remove a buffer from its current buffer list in preparation for
1927 * dropping it from its current transaction entirely.  If the buffer has
1928 * already started to be used by a subsequent transaction, refile the
1929 * buffer on that transaction's metadata list.
1930 *
1931 * Called under journal->j_list_lock
1932 *
1933 * Called under jbd_lock_bh_state(jh2bh(jh))
1934 */
1935void __jbd2_journal_refile_buffer(struct journal_head *jh)
1936{
1937        int was_dirty;
1938        struct buffer_head *bh = jh2bh(jh);
1939
1940        J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1941        if (jh->b_transaction)
1942                assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
1943
1944        /* If the buffer is now unused, just drop it. */
1945        if (jh->b_next_transaction == NULL) {
1946                __jbd2_journal_unfile_buffer(jh);
1947                return;
1948        }
1949
1950        /*
1951         * It has been modified by a later transaction: add it to the new
1952         * transaction's metadata list.
1953         */
1954
1955        was_dirty = test_clear_buffer_jbddirty(bh);
1956        __jbd2_journal_temp_unlink_buffer(jh);
1957        jh->b_transaction = jh->b_next_transaction;
1958        jh->b_next_transaction = NULL;
1959        __jbd2_journal_file_buffer(jh, jh->b_transaction,
1960                                jh->b_modified ? BJ_Metadata : BJ_Reserved);
1961        J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
1962
1963        if (was_dirty)
1964                set_buffer_jbddirty(bh);
1965}
1966
1967/*
1968 * For the unlocked version of this call, also make sure that any
1969 * hanging journal_head is cleaned up if necessary.
1970 *
1971 * __jbd2_journal_refile_buffer is usually called as part of a single locked
1972 * operation on a buffer_head, in which the caller is probably going to
1973 * be hooking the journal_head onto other lists.  In that case it is up
1974 * to the caller to remove the journal_head if necessary.  For the
1975 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
1976 * doing anything else to the buffer so we need to do the cleanup
1977 * ourselves to avoid a jh leak.
1978 *
1979 * *** The journal_head may be freed by this call! ***
1980 */
1981void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
1982{
1983        struct buffer_head *bh = jh2bh(jh);
1984
1985        jbd_lock_bh_state(bh);
1986        spin_lock(&journal->j_list_lock);
1987
1988        __jbd2_journal_refile_buffer(jh);
1989        jbd_unlock_bh_state(bh);
1990        jbd2_journal_remove_journal_head(bh);
1991
1992        spin_unlock(&journal->j_list_lock);
1993        __brelse(bh);
1994}
1995
1996/*
1997 * File inode in the inode list of the handle's transaction
1998 */
1999int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
2000{
2001        transaction_t *transaction = handle->h_transaction;
2002        journal_t *journal = transaction->t_journal;
2003
2004        if (is_handle_aborted(handle))
2005                return -EIO;
2006
2007        jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2008                        transaction->t_tid);
2009
2010        /*
2011         * First check whether inode isn't already on the transaction's
2012         * lists without taking the lock. Note that this check is safe
2013         * without the lock as we cannot race with somebody removing inode
2014         * from the transaction. The reason is that we remove inode from the
2015         * transaction only in journal_release_jbd_inode() and when we commit
2016         * the transaction. We are guarded from the first case by holding
2017         * a reference to the inode. We are safe against the second case
2018         * because if jinode->i_transaction == transaction, commit code
2019         * cannot touch the transaction because we hold reference to it,
2020         * and if jinode->i_next_transaction == transaction, commit code
2021         * will only file the inode where we want it.
2022         */
2023        if (jinode->i_transaction == transaction ||
2024            jinode->i_next_transaction == transaction)
2025                return 0;
2026
2027        spin_lock(&journal->j_list_lock);
2028
2029        if (jinode->i_transaction == transaction ||
2030            jinode->i_next_transaction == transaction)
2031                goto done;
2032
2033        /* On some different transaction's list - should be
2034         * the committing one */
2035        if (jinode->i_transaction) {
2036                J_ASSERT(jinode->i_next_transaction == NULL);
2037                J_ASSERT(jinode->i_transaction ==
2038                                        journal->j_committing_transaction);
2039                jinode->i_next_transaction = transaction;
2040                goto done;
2041        }
2042        /* Not on any transaction list... */
2043        J_ASSERT(!jinode->i_next_transaction);
2044        jinode->i_transaction = transaction;
2045        list_add(&jinode->i_list, &transaction->t_inode_list);
2046done:
2047        spin_unlock(&journal->j_list_lock);
2048
2049        return 0;
2050}
2051
2052/*
2053 * This function must be called when inode is journaled in ordered mode
2054 * before truncation happens. It starts writeout of truncated part in
2055 * case it is in the committing transaction so that we stand to ordered
2056 * mode consistency guarantees.
2057 */
2058int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
2059                                        loff_t new_size)
2060{
2061        journal_t *journal;
2062        transaction_t *commit_trans;
2063        int ret = 0;
2064
2065        if (!inode->i_transaction && !inode->i_next_transaction)
2066                goto out;
2067        journal = inode->i_transaction->t_journal;
2068        spin_lock(&journal->j_state_lock);
2069        commit_trans = journal->j_committing_transaction;
2070        spin_unlock(&journal->j_state_lock);
2071        if (inode->i_transaction == commit_trans) {
2072                ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
2073                        new_size, LLONG_MAX);
2074                if (ret)
2075                        jbd2_journal_abort(journal, ret);
2076        }
2077out:
2078        return ret;
2079}