Showing error 1373

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: fs/xfs/xfs_buf_item.c
Line in file: 1147
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_inum.h"
  24#include "xfs_trans.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_dmapi.h"
  28#include "xfs_mount.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trans_priv.h"
  31#include "xfs_error.h"
  32
  33
  34kmem_zone_t        *xfs_buf_item_zone;
  35
  36#ifdef XFS_TRANS_DEBUG
  37/*
  38 * This function uses an alternate strategy for tracking the bytes
  39 * that the user requests to be logged.  This can then be used
  40 * in conjunction with the bli_orig array in the buf log item to
  41 * catch bugs in our callers' code.
  42 *
  43 * We also double check the bits set in xfs_buf_item_log using a
  44 * simple algorithm to check that every byte is accounted for.
  45 */
  46STATIC void
  47xfs_buf_item_log_debug(
  48        xfs_buf_log_item_t        *bip,
  49        uint                        first,
  50        uint                        last)
  51{
  52        uint        x;
  53        uint        byte;
  54        uint        nbytes;
  55        uint        chunk_num;
  56        uint        word_num;
  57        uint        bit_num;
  58        uint        bit_set;
  59        uint        *wordp;
  60
  61        ASSERT(bip->bli_logged != NULL);
  62        byte = first;
  63        nbytes = last - first + 1;
  64        bfset(bip->bli_logged, first, nbytes);
  65        for (x = 0; x < nbytes; x++) {
  66                chunk_num = byte >> XFS_BLI_SHIFT;
  67                word_num = chunk_num >> BIT_TO_WORD_SHIFT;
  68                bit_num = chunk_num & (NBWORD - 1);
  69                wordp = &(bip->bli_format.blf_data_map[word_num]);
  70                bit_set = *wordp & (1 << bit_num);
  71                ASSERT(bit_set);
  72                byte++;
  73        }
  74}
  75
  76/*
  77 * This function is called when we flush something into a buffer without
  78 * logging it.  This happens for things like inodes which are logged
  79 * separately from the buffer.
  80 */
  81void
  82xfs_buf_item_flush_log_debug(
  83        xfs_buf_t        *bp,
  84        uint                first,
  85        uint                last)
  86{
  87        xfs_buf_log_item_t        *bip;
  88        uint                        nbytes;
  89
  90        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
  91        if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
  92                return;
  93        }
  94
  95        ASSERT(bip->bli_logged != NULL);
  96        nbytes = last - first + 1;
  97        bfset(bip->bli_logged, first, nbytes);
  98}
  99
 100/*
 101 * This function is called to verify that our callers have logged
 102 * all the bytes that they changed.
 103 *
 104 * It does this by comparing the original copy of the buffer stored in
 105 * the buf log item's bli_orig array to the current copy of the buffer
 106 * and ensuring that all bytes which mismatch are set in the bli_logged
 107 * array of the buf log item.
 108 */
 109STATIC void
 110xfs_buf_item_log_check(
 111        xfs_buf_log_item_t        *bip)
 112{
 113        char                *orig;
 114        char                *buffer;
 115        int                x;
 116        xfs_buf_t        *bp;
 117
 118        ASSERT(bip->bli_orig != NULL);
 119        ASSERT(bip->bli_logged != NULL);
 120
 121        bp = bip->bli_buf;
 122        ASSERT(XFS_BUF_COUNT(bp) > 0);
 123        ASSERT(XFS_BUF_PTR(bp) != NULL);
 124        orig = bip->bli_orig;
 125        buffer = XFS_BUF_PTR(bp);
 126        for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
 127                if (orig[x] != buffer[x] && !btst(bip->bli_logged, x))
 128                        cmn_err(CE_PANIC,
 129        "xfs_buf_item_log_check bip %x buffer %x orig %x index %d",
 130                                bip, bp, orig, x);
 131        }
 132}
 133#else
 134#define                xfs_buf_item_log_debug(x,y,z)
 135#define                xfs_buf_item_log_check(x)
 136#endif
 137
 138STATIC void        xfs_buf_error_relse(xfs_buf_t *bp);
 139STATIC void        xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
 140
 141/*
 142 * This returns the number of log iovecs needed to log the
 143 * given buf log item.
 144 *
 145 * It calculates this as 1 iovec for the buf log format structure
 146 * and 1 for each stretch of non-contiguous chunks to be logged.
 147 * Contiguous chunks are logged in a single iovec.
 148 *
 149 * If the XFS_BLI_STALE flag has been set, then log nothing.
 150 */
 151STATIC uint
 152xfs_buf_item_size(
 153        xfs_buf_log_item_t        *bip)
 154{
 155        uint                nvecs;
 156        int                next_bit;
 157        int                last_bit;
 158        xfs_buf_t        *bp;
 159
 160        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 161        if (bip->bli_flags & XFS_BLI_STALE) {
 162                /*
 163                 * The buffer is stale, so all we need to log
 164                 * is the buf log format structure with the
 165                 * cancel flag in it.
 166                 */
 167                xfs_buf_item_trace("SIZE STALE", bip);
 168                ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
 169                return 1;
 170        }
 171
 172        bp = bip->bli_buf;
 173        ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
 174        nvecs = 1;
 175        last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
 176                                         bip->bli_format.blf_map_size, 0);
 177        ASSERT(last_bit != -1);
 178        nvecs++;
 179        while (last_bit != -1) {
 180                /*
 181                 * This takes the bit number to start looking from and
 182                 * returns the next set bit from there.  It returns -1
 183                 * if there are no more bits set or the start bit is
 184                 * beyond the end of the bitmap.
 185                 */
 186                next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
 187                                                 bip->bli_format.blf_map_size,
 188                                                 last_bit + 1);
 189                /*
 190                 * If we run out of bits, leave the loop,
 191                 * else if we find a new set of bits bump the number of vecs,
 192                 * else keep scanning the current set of bits.
 193                 */
 194                if (next_bit == -1) {
 195                        last_bit = -1;
 196                } else if (next_bit != last_bit + 1) {
 197                        last_bit = next_bit;
 198                        nvecs++;
 199                } else if (xfs_buf_offset(bp, next_bit * XFS_BLI_CHUNK) !=
 200                           (xfs_buf_offset(bp, last_bit * XFS_BLI_CHUNK) +
 201                            XFS_BLI_CHUNK)) {
 202                        last_bit = next_bit;
 203                        nvecs++;
 204                } else {
 205                        last_bit++;
 206                }
 207        }
 208
 209        xfs_buf_item_trace("SIZE NORM", bip);
 210        return nvecs;
 211}
 212
 213/*
 214 * This is called to fill in the vector of log iovecs for the
 215 * given log buf item.  It fills the first entry with a buf log
 216 * format structure, and the rest point to contiguous chunks
 217 * within the buffer.
 218 */
 219STATIC void
 220xfs_buf_item_format(
 221        xfs_buf_log_item_t        *bip,
 222        xfs_log_iovec_t                *log_vector)
 223{
 224        uint                base_size;
 225        uint                nvecs;
 226        xfs_log_iovec_t        *vecp;
 227        xfs_buf_t        *bp;
 228        int                first_bit;
 229        int                last_bit;
 230        int                next_bit;
 231        uint                nbits;
 232        uint                buffer_offset;
 233
 234        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 235        ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
 236               (bip->bli_flags & XFS_BLI_STALE));
 237        bp = bip->bli_buf;
 238        vecp = log_vector;
 239
 240        /*
 241         * The size of the base structure is the size of the
 242         * declared structure plus the space for the extra words
 243         * of the bitmap.  We subtract one from the map size, because
 244         * the first element of the bitmap is accounted for in the
 245         * size of the base structure.
 246         */
 247        base_size =
 248                (uint)(sizeof(xfs_buf_log_format_t) +
 249                       ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
 250        vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
 251        vecp->i_len = base_size;
 252        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT);
 253        vecp++;
 254        nvecs = 1;
 255
 256        if (bip->bli_flags & XFS_BLI_STALE) {
 257                /*
 258                 * The buffer is stale, so all we need to log
 259                 * is the buf log format structure with the
 260                 * cancel flag in it.
 261                 */
 262                xfs_buf_item_trace("FORMAT STALE", bip);
 263                ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
 264                bip->bli_format.blf_size = nvecs;
 265                return;
 266        }
 267
 268        /*
 269         * Fill in an iovec for each set of contiguous chunks.
 270         */
 271        first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
 272                                         bip->bli_format.blf_map_size, 0);
 273        ASSERT(first_bit != -1);
 274        last_bit = first_bit;
 275        nbits = 1;
 276        for (;;) {
 277                /*
 278                 * This takes the bit number to start looking from and
 279                 * returns the next set bit from there.  It returns -1
 280                 * if there are no more bits set or the start bit is
 281                 * beyond the end of the bitmap.
 282                 */
 283                next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
 284                                                 bip->bli_format.blf_map_size,
 285                                                 (uint)last_bit + 1);
 286                /*
 287                 * If we run out of bits fill in the last iovec and get
 288                 * out of the loop.
 289                 * Else if we start a new set of bits then fill in the
 290                 * iovec for the series we were looking at and start
 291                 * counting the bits in the new one.
 292                 * Else we're still in the same set of bits so just
 293                 * keep counting and scanning.
 294                 */
 295                if (next_bit == -1) {
 296                        buffer_offset = first_bit * XFS_BLI_CHUNK;
 297                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 298                        vecp->i_len = nbits * XFS_BLI_CHUNK;
 299                        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 300                        nvecs++;
 301                        break;
 302                } else if (next_bit != last_bit + 1) {
 303                        buffer_offset = first_bit * XFS_BLI_CHUNK;
 304                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 305                        vecp->i_len = nbits * XFS_BLI_CHUNK;
 306                        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 307                        nvecs++;
 308                        vecp++;
 309                        first_bit = next_bit;
 310                        last_bit = next_bit;
 311                        nbits = 1;
 312                } else if (xfs_buf_offset(bp, next_bit << XFS_BLI_SHIFT) !=
 313                           (xfs_buf_offset(bp, last_bit << XFS_BLI_SHIFT) +
 314                            XFS_BLI_CHUNK)) {
 315                        buffer_offset = first_bit * XFS_BLI_CHUNK;
 316                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
 317                        vecp->i_len = nbits * XFS_BLI_CHUNK;
 318                        XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 319/* You would think we need to bump the nvecs here too, but we do not
 320 * this number is used by recovery, and it gets confused by the boundary
 321 * split here
 322 *                        nvecs++;
 323 */
 324                        vecp++;
 325                        first_bit = next_bit;
 326                        last_bit = next_bit;
 327                        nbits = 1;
 328                } else {
 329                        last_bit++;
 330                        nbits++;
 331                }
 332        }
 333        bip->bli_format.blf_size = nvecs;
 334
 335        /*
 336         * Check to make sure everything is consistent.
 337         */
 338        xfs_buf_item_trace("FORMAT NORM", bip);
 339        xfs_buf_item_log_check(bip);
 340}
 341
 342/*
 343 * This is called to pin the buffer associated with the buf log
 344 * item in memory so it cannot be written out.  Simply call bpin()
 345 * on the buffer to do this.
 346 */
 347STATIC void
 348xfs_buf_item_pin(
 349        xfs_buf_log_item_t        *bip)
 350{
 351        xfs_buf_t        *bp;
 352
 353        bp = bip->bli_buf;
 354        ASSERT(XFS_BUF_ISBUSY(bp));
 355        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 356        ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
 357               (bip->bli_flags & XFS_BLI_STALE));
 358        xfs_buf_item_trace("PIN", bip);
 359        xfs_buftrace("XFS_PIN", bp);
 360        xfs_bpin(bp);
 361}
 362
 363
 364/*
 365 * This is called to unpin the buffer associated with the buf log
 366 * item which was previously pinned with a call to xfs_buf_item_pin().
 367 * Just call bunpin() on the buffer to do this.
 368 *
 369 * Also drop the reference to the buf item for the current transaction.
 370 * If the XFS_BLI_STALE flag is set and we are the last reference,
 371 * then free up the buf log item and unlock the buffer.
 372 */
 373STATIC void
 374xfs_buf_item_unpin(
 375        xfs_buf_log_item_t        *bip,
 376        int                        stale)
 377{
 378        xfs_mount_t        *mp;
 379        xfs_buf_t        *bp;
 380        int                freed;
 381
 382        bp = bip->bli_buf;
 383        ASSERT(bp != NULL);
 384        ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
 385        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 386        xfs_buf_item_trace("UNPIN", bip);
 387        xfs_buftrace("XFS_UNPIN", bp);
 388
 389        freed = atomic_dec_and_test(&bip->bli_refcount);
 390        mp = bip->bli_item.li_mountp;
 391        xfs_bunpin(bp);
 392        if (freed && stale) {
 393                ASSERT(bip->bli_flags & XFS_BLI_STALE);
 394                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 395                ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
 396                ASSERT(XFS_BUF_ISSTALE(bp));
 397                ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
 398                xfs_buf_item_trace("UNPIN STALE", bip);
 399                xfs_buftrace("XFS_UNPIN STALE", bp);
 400                /*
 401                 * If we get called here because of an IO error, we may
 402                 * or may not have the item on the AIL. xfs_trans_delete_ail()
 403                 * will take care of that situation.
 404                 * xfs_trans_delete_ail() drops the AIL lock.
 405                 */
 406                if (bip->bli_flags & XFS_BLI_STALE_INODE) {
 407                        xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
 408                        XFS_BUF_SET_FSPRIVATE(bp, NULL);
 409                        XFS_BUF_CLR_IODONE_FUNC(bp);
 410                } else {
 411                        spin_lock(&mp->m_ail_lock);
 412                        xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
 413                        xfs_buf_item_relse(bp);
 414                        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
 415                }
 416                xfs_buf_relse(bp);
 417        }
 418}
 419
 420/*
 421 * this is called from uncommit in the forced-shutdown path.
 422 * we need to check to see if the reference count on the log item
 423 * is going to drop to zero.  If so, unpin will free the log item
 424 * so we need to free the item's descriptor (that points to the item)
 425 * in the transaction.
 426 */
 427STATIC void
 428xfs_buf_item_unpin_remove(
 429        xfs_buf_log_item_t        *bip,
 430        xfs_trans_t                *tp)
 431{
 432        xfs_buf_t                *bp;
 433        xfs_log_item_desc_t        *lidp;
 434        int                        stale = 0;
 435
 436        bp = bip->bli_buf;
 437        /*
 438         * will xfs_buf_item_unpin() call xfs_buf_item_relse()?
 439         */
 440        if ((atomic_read(&bip->bli_refcount) == 1) &&
 441            (bip->bli_flags & XFS_BLI_STALE)) {
 442                ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
 443                xfs_buf_item_trace("UNPIN REMOVE", bip);
 444                xfs_buftrace("XFS_UNPIN_REMOVE", bp);
 445                /*
 446                 * yes -- clear the xaction descriptor in-use flag
 447                 * and free the chunk if required.  We can safely
 448                 * do some work here and then call buf_item_unpin
 449                 * to do the rest because if the if is true, then
 450                 * we are holding the buffer locked so no one else
 451                 * will be able to bump up the refcount.
 452                 */
 453                lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip);
 454                stale = lidp->lid_flags & XFS_LID_BUF_STALE;
 455                xfs_trans_free_item(tp, lidp);
 456                /*
 457                 * Since the transaction no longer refers to the buffer,
 458                 * the buffer should no longer refer to the transaction.
 459                 */
 460                XFS_BUF_SET_FSPRIVATE2(bp, NULL);
 461        }
 462
 463        xfs_buf_item_unpin(bip, stale);
 464
 465        return;
 466}
 467
 468/*
 469 * This is called to attempt to lock the buffer associated with this
 470 * buf log item.  Don't sleep on the buffer lock.  If we can't get
 471 * the lock right away, return 0.  If we can get the lock, pull the
 472 * buffer from the free list, mark it busy, and return 1.
 473 */
 474STATIC uint
 475xfs_buf_item_trylock(
 476        xfs_buf_log_item_t        *bip)
 477{
 478        xfs_buf_t        *bp;
 479
 480        bp = bip->bli_buf;
 481
 482        if (XFS_BUF_ISPINNED(bp)) {
 483                return XFS_ITEM_PINNED;
 484        }
 485
 486        if (!XFS_BUF_CPSEMA(bp)) {
 487                return XFS_ITEM_LOCKED;
 488        }
 489
 490        /*
 491         * Remove the buffer from the free list.  Only do this
 492         * if it's on the free list.  Private buffers like the
 493         * superblock buffer are not.
 494         */
 495        XFS_BUF_HOLD(bp);
 496
 497        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 498        xfs_buf_item_trace("TRYLOCK SUCCESS", bip);
 499        return XFS_ITEM_SUCCESS;
 500}
 501
 502/*
 503 * Release the buffer associated with the buf log item.
 504 * If there is no dirty logged data associated with the
 505 * buffer recorded in the buf log item, then free the
 506 * buf log item and remove the reference to it in the
 507 * buffer.
 508 *
 509 * This call ignores the recursion count.  It is only called
 510 * when the buffer should REALLY be unlocked, regardless
 511 * of the recursion count.
 512 *
 513 * If the XFS_BLI_HOLD flag is set in the buf log item, then
 514 * free the log item if necessary but do not unlock the buffer.
 515 * This is for support of xfs_trans_bhold(). Make sure the
 516 * XFS_BLI_HOLD field is cleared if we don't free the item.
 517 */
 518STATIC void
 519xfs_buf_item_unlock(
 520        xfs_buf_log_item_t        *bip)
 521{
 522        int                aborted;
 523        xfs_buf_t        *bp;
 524        uint                hold;
 525
 526        bp = bip->bli_buf;
 527        xfs_buftrace("XFS_UNLOCK", bp);
 528
 529        /*
 530         * Clear the buffer's association with this transaction.
 531         */
 532        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
 533
 534        /*
 535         * If this is a transaction abort, don't return early.
 536         * Instead, allow the brelse to happen.
 537         * Normally it would be done for stale (cancelled) buffers
 538         * at unpin time, but we'll never go through the pin/unpin
 539         * cycle if we abort inside commit.
 540         */
 541        aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0;
 542
 543        /*
 544         * If the buf item is marked stale, then don't do anything.
 545         * We'll unlock the buffer and free the buf item when the
 546         * buffer is unpinned for the last time.
 547         */
 548        if (bip->bli_flags & XFS_BLI_STALE) {
 549                bip->bli_flags &= ~XFS_BLI_LOGGED;
 550                xfs_buf_item_trace("UNLOCK STALE", bip);
 551                ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
 552                if (!aborted)
 553                        return;
 554        }
 555
 556        /*
 557         * Drop the transaction's reference to the log item if
 558         * it was not logged as part of the transaction.  Otherwise
 559         * we'll drop the reference in xfs_buf_item_unpin() when
 560         * the transaction is really through with the buffer.
 561         */
 562        if (!(bip->bli_flags & XFS_BLI_LOGGED)) {
 563                atomic_dec(&bip->bli_refcount);
 564        } else {
 565                /*
 566                 * Clear the logged flag since this is per
 567                 * transaction state.
 568                 */
 569                bip->bli_flags &= ~XFS_BLI_LOGGED;
 570        }
 571
 572        /*
 573         * Before possibly freeing the buf item, determine if we should
 574         * release the buffer at the end of this routine.
 575         */
 576        hold = bip->bli_flags & XFS_BLI_HOLD;
 577        xfs_buf_item_trace("UNLOCK", bip);
 578
 579        /*
 580         * If the buf item isn't tracking any data, free it.
 581         * Otherwise, if XFS_BLI_HOLD is set clear it.
 582         */
 583        if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
 584                             bip->bli_format.blf_map_size)) {
 585                xfs_buf_item_relse(bp);
 586        } else if (hold) {
 587                bip->bli_flags &= ~XFS_BLI_HOLD;
 588        }
 589
 590        /*
 591         * Release the buffer if XFS_BLI_HOLD was not set.
 592         */
 593        if (!hold) {
 594                xfs_buf_relse(bp);
 595        }
 596}
 597
 598/*
 599 * This is called to find out where the oldest active copy of the
 600 * buf log item in the on disk log resides now that the last log
 601 * write of it completed at the given lsn.
 602 * We always re-log all the dirty data in a buffer, so usually the
 603 * latest copy in the on disk log is the only one that matters.  For
 604 * those cases we simply return the given lsn.
 605 *
 606 * The one exception to this is for buffers full of newly allocated
 607 * inodes.  These buffers are only relogged with the XFS_BLI_INODE_BUF
 608 * flag set, indicating that only the di_next_unlinked fields from the
 609 * inodes in the buffers will be replayed during recovery.  If the
 610 * original newly allocated inode images have not yet been flushed
 611 * when the buffer is so relogged, then we need to make sure that we
 612 * keep the old images in the 'active' portion of the log.  We do this
 613 * by returning the original lsn of that transaction here rather than
 614 * the current one.
 615 */
 616STATIC xfs_lsn_t
 617xfs_buf_item_committed(
 618        xfs_buf_log_item_t        *bip,
 619        xfs_lsn_t                lsn)
 620{
 621        xfs_buf_item_trace("COMMITTED", bip);
 622        if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
 623            (bip->bli_item.li_lsn != 0)) {
 624                return bip->bli_item.li_lsn;
 625        }
 626        return (lsn);
 627}
 628
 629/*
 630 * This is called to asynchronously write the buffer associated with this
 631 * buf log item out to disk. The buffer will already have been locked by
 632 * a successful call to xfs_buf_item_trylock().  If the buffer still has
 633 * B_DELWRI set, then get it going out to disk with a call to bawrite().
 634 * If not, then just release the buffer.
 635 */
 636STATIC void
 637xfs_buf_item_push(
 638        xfs_buf_log_item_t        *bip)
 639{
 640        xfs_buf_t        *bp;
 641
 642        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 643        xfs_buf_item_trace("PUSH", bip);
 644
 645        bp = bip->bli_buf;
 646
 647        if (XFS_BUF_ISDELAYWRITE(bp)) {
 648                int        error;
 649                error = xfs_bawrite(bip->bli_item.li_mountp, bp);
 650                if (error)
 651                        xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp,
 652                        "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p",
 653                                        error, bip, bp);
 654        } else {
 655                xfs_buf_relse(bp);
 656        }
 657}
 658
 659/* ARGSUSED */
 660STATIC void
 661xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 662{
 663}
 664
 665/*
 666 * This is the ops vector shared by all buf log items.
 667 */
 668static struct xfs_item_ops xfs_buf_item_ops = {
 669        .iop_size        = (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
 670        .iop_format        = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 671                                        xfs_buf_item_format,
 672        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
 673        .iop_unpin        = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin,
 674        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
 675                                        xfs_buf_item_unpin_remove,
 676        .iop_trylock        = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
 677        .iop_unlock        = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock,
 678        .iop_committed        = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
 679                                        xfs_buf_item_committed,
 680        .iop_push        = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
 681        .iop_pushbuf        = NULL,
 682        .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
 683                                        xfs_buf_item_committing
 684};
 685
 686
 687/*
 688 * Allocate a new buf log item to go with the given buffer.
 689 * Set the buffer's b_fsprivate field to point to the new
 690 * buf log item.  If there are other item's attached to the
 691 * buffer (see xfs_buf_attach_iodone() below), then put the
 692 * buf log item at the front.
 693 */
 694void
 695xfs_buf_item_init(
 696        xfs_buf_t        *bp,
 697        xfs_mount_t        *mp)
 698{
 699        xfs_log_item_t                *lip;
 700        xfs_buf_log_item_t        *bip;
 701        int                        chunks;
 702        int                        map_size;
 703
 704        /*
 705         * Check to see if there is already a buf log item for
 706         * this buffer.  If there is, it is guaranteed to be
 707         * the first.  If we do already have one, there is
 708         * nothing to do here so return.
 709         */
 710        if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp)
 711                XFS_BUF_SET_FSPRIVATE3(bp, mp);
 712        XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
 713        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
 714                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 715                if (lip->li_type == XFS_LI_BUF) {
 716                        return;
 717                }
 718        }
 719
 720        /*
 721         * chunks is the number of XFS_BLI_CHUNK size pieces
 722         * the buffer can be divided into. Make sure not to
 723         * truncate any pieces.  map_size is the size of the
 724         * bitmap needed to describe the chunks of the buffer.
 725         */
 726        chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLI_CHUNK - 1)) >> XFS_BLI_SHIFT);
 727        map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
 728
 729        bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
 730                                                    KM_SLEEP);
 731        bip->bli_item.li_type = XFS_LI_BUF;
 732        bip->bli_item.li_ops = &xfs_buf_item_ops;
 733        bip->bli_item.li_mountp = mp;
 734        bip->bli_buf = bp;
 735        xfs_buf_hold(bp);
 736        bip->bli_format.blf_type = XFS_LI_BUF;
 737        bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
 738        bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
 739        bip->bli_format.blf_map_size = map_size;
 740#ifdef XFS_BLI_TRACE
 741        bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
 742#endif
 743
 744#ifdef XFS_TRANS_DEBUG
 745        /*
 746         * Allocate the arrays for tracking what needs to be logged
 747         * and what our callers request to be logged.  bli_orig
 748         * holds a copy of the original, clean buffer for comparison
 749         * against, and bli_logged keeps a 1 bit flag per byte in
 750         * the buffer to indicate which bytes the callers have asked
 751         * to have logged.
 752         */
 753        bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
 754        memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
 755        bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
 756#endif
 757
 758        /*
 759         * Put the buf item into the list of items attached to the
 760         * buffer at the front.
 761         */
 762        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
 763                bip->bli_item.li_bio_list =
 764                                XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 765        }
 766        XFS_BUF_SET_FSPRIVATE(bp, bip);
 767}
 768
 769
 770/*
 771 * Mark bytes first through last inclusive as dirty in the buf
 772 * item's bitmap.
 773 */
 774void
 775xfs_buf_item_log(
 776        xfs_buf_log_item_t        *bip,
 777        uint                        first,
 778        uint                        last)
 779{
 780        uint                first_bit;
 781        uint                last_bit;
 782        uint                bits_to_set;
 783        uint                bits_set;
 784        uint                word_num;
 785        uint                *wordp;
 786        uint                bit;
 787        uint                end_bit;
 788        uint                mask;
 789
 790        /*
 791         * Mark the item as having some dirty data for
 792         * quick reference in xfs_buf_item_dirty.
 793         */
 794        bip->bli_flags |= XFS_BLI_DIRTY;
 795
 796        /*
 797         * Convert byte offsets to bit numbers.
 798         */
 799        first_bit = first >> XFS_BLI_SHIFT;
 800        last_bit = last >> XFS_BLI_SHIFT;
 801
 802        /*
 803         * Calculate the total number of bits to be set.
 804         */
 805        bits_to_set = last_bit - first_bit + 1;
 806
 807        /*
 808         * Get a pointer to the first word in the bitmap
 809         * to set a bit in.
 810         */
 811        word_num = first_bit >> BIT_TO_WORD_SHIFT;
 812        wordp = &(bip->bli_format.blf_data_map[word_num]);
 813
 814        /*
 815         * Calculate the starting bit in the first word.
 816         */
 817        bit = first_bit & (uint)(NBWORD - 1);
 818
 819        /*
 820         * First set any bits in the first word of our range.
 821         * If it starts at bit 0 of the word, it will be
 822         * set below rather than here.  That is what the variable
 823         * bit tells us. The variable bits_set tracks the number
 824         * of bits that have been set so far.  End_bit is the number
 825         * of the last bit to be set in this word plus one.
 826         */
 827        if (bit) {
 828                end_bit = MIN(bit + bits_to_set, (uint)NBWORD);
 829                mask = ((1 << (end_bit - bit)) - 1) << bit;
 830                *wordp |= mask;
 831                wordp++;
 832                bits_set = end_bit - bit;
 833        } else {
 834                bits_set = 0;
 835        }
 836
 837        /*
 838         * Now set bits a whole word at a time that are between
 839         * first_bit and last_bit.
 840         */
 841        while ((bits_to_set - bits_set) >= NBWORD) {
 842                *wordp |= 0xffffffff;
 843                bits_set += NBWORD;
 844                wordp++;
 845        }
 846
 847        /*
 848         * Finally, set any bits left to be set in one last partial word.
 849         */
 850        end_bit = bits_to_set - bits_set;
 851        if (end_bit) {
 852                mask = (1 << end_bit) - 1;
 853                *wordp |= mask;
 854        }
 855
 856        xfs_buf_item_log_debug(bip, first, last);
 857}
 858
 859
 860/*
 861 * Return 1 if the buffer has some data that has been logged (at any
 862 * point, not just the current transaction) and 0 if not.
 863 */
 864uint
 865xfs_buf_item_dirty(
 866        xfs_buf_log_item_t        *bip)
 867{
 868        return (bip->bli_flags & XFS_BLI_DIRTY);
 869}
 870
 871STATIC void
 872xfs_buf_item_free(
 873        xfs_buf_log_item_t        *bip)
 874{
 875#ifdef XFS_TRANS_DEBUG
 876        kmem_free(bip->bli_orig);
 877        kmem_free(bip->bli_logged);
 878#endif /* XFS_TRANS_DEBUG */
 879
 880#ifdef XFS_BLI_TRACE
 881        ktrace_free(bip->bli_trace);
 882#endif
 883        kmem_zone_free(xfs_buf_item_zone, bip);
 884}
 885
 886/*
 887 * This is called when the buf log item is no longer needed.  It should
 888 * free the buf log item associated with the given buffer and clear
 889 * the buffer's pointer to the buf log item.  If there are no more
 890 * items in the list, clear the b_iodone field of the buffer (see
 891 * xfs_buf_attach_iodone() below).
 892 */
 893void
 894xfs_buf_item_relse(
 895        xfs_buf_t        *bp)
 896{
 897        xfs_buf_log_item_t        *bip;
 898
 899        xfs_buftrace("XFS_RELSE", bp);
 900        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
 901        XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
 902        if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
 903            (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
 904                XFS_BUF_CLR_IODONE_FUNC(bp);
 905        }
 906        xfs_buf_rele(bp);
 907        xfs_buf_item_free(bip);
 908}
 909
 910
 911/*
 912 * Add the given log item with its callback to the list of callbacks
 913 * to be called when the buffer's I/O completes.  If it is not set
 914 * already, set the buffer's b_iodone() routine to be
 915 * xfs_buf_iodone_callbacks() and link the log item into the list of
 916 * items rooted at b_fsprivate.  Items are always added as the second
 917 * entry in the list if there is a first, because the buf item code
 918 * assumes that the buf log item is first.
 919 */
 920void
 921xfs_buf_attach_iodone(
 922        xfs_buf_t        *bp,
 923        void                (*cb)(xfs_buf_t *, xfs_log_item_t *),
 924        xfs_log_item_t        *lip)
 925{
 926        xfs_log_item_t        *head_lip;
 927
 928        ASSERT(XFS_BUF_ISBUSY(bp));
 929        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
 930
 931        lip->li_cb = cb;
 932        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
 933                head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 934                lip->li_bio_list = head_lip->li_bio_list;
 935                head_lip->li_bio_list = lip;
 936        } else {
 937                XFS_BUF_SET_FSPRIVATE(bp, lip);
 938        }
 939
 940        ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) ||
 941               (XFS_BUF_IODONE_FUNC(bp) == NULL));
 942        XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
 943}
 944
 945STATIC void
 946xfs_buf_do_callbacks(
 947        xfs_buf_t        *bp,
 948        xfs_log_item_t        *lip)
 949{
 950        xfs_log_item_t        *nlip;
 951
 952        while (lip != NULL) {
 953                nlip = lip->li_bio_list;
 954                ASSERT(lip->li_cb != NULL);
 955                /*
 956                 * Clear the next pointer so we don't have any
 957                 * confusion if the item is added to another buf.
 958                 * Don't touch the log item after calling its
 959                 * callback, because it could have freed itself.
 960                 */
 961                lip->li_bio_list = NULL;
 962                lip->li_cb(bp, lip);
 963                lip = nlip;
 964        }
 965}
 966
 967/*
 968 * This is the iodone() function for buffers which have had callbacks
 969 * attached to them by xfs_buf_attach_iodone().  It should remove each
 970 * log item from the buffer's list and call the callback of each in turn.
 971 * When done, the buffer's fsprivate field is set to NULL and the buffer
 972 * is unlocked with a call to iodone().
 973 */
 974void
 975xfs_buf_iodone_callbacks(
 976        xfs_buf_t        *bp)
 977{
 978        xfs_log_item_t        *lip;
 979        static ulong        lasttime;
 980        static xfs_buftarg_t *lasttarg;
 981        xfs_mount_t        *mp;
 982
 983        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 984        lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
 985
 986        if (XFS_BUF_GETERROR(bp) != 0) {
 987                /*
 988                 * If we've already decided to shutdown the filesystem
 989                 * because of IO errors, there's no point in giving this
 990                 * a retry.
 991                 */
 992                mp = lip->li_mountp;
 993                if (XFS_FORCED_SHUTDOWN(mp)) {
 994                        ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
 995                        XFS_BUF_SUPER_STALE(bp);
 996                        xfs_buftrace("BUF_IODONE_CB", bp);
 997                        xfs_buf_do_callbacks(bp, lip);
 998                        XFS_BUF_SET_FSPRIVATE(bp, NULL);
 999                        XFS_BUF_CLR_IODONE_FUNC(bp);
1000
1001                        /*
1002                         * XFS_SHUT flag gets set when we go thru the
1003                         * entire buffer cache and deliberately start
1004                         * throwing away delayed write buffers.
1005                         * Since there's no biowait done on those,
1006                         * we should just brelse them.
1007                         */
1008                        if (XFS_BUF_ISSHUT(bp)) {
1009                            XFS_BUF_UNSHUT(bp);
1010                                xfs_buf_relse(bp);
1011                        } else {
1012                                xfs_biodone(bp);
1013                        }
1014
1015                        return;
1016                }
1017
1018                if ((XFS_BUF_TARGET(bp) != lasttarg) ||
1019                    (time_after(jiffies, (lasttime + 5*HZ)))) {
1020                        lasttime = jiffies;
1021                        cmn_err(CE_ALERT, "Device %s, XFS metadata write error"
1022                                        " block 0x%llx in %s",
1023                                XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
1024                              (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname);
1025                }
1026                lasttarg = XFS_BUF_TARGET(bp);
1027
1028                if (XFS_BUF_ISASYNC(bp)) {
1029                        /*
1030                         * If the write was asynchronous then noone will be
1031                         * looking for the error.  Clear the error state
1032                         * and write the buffer out again delayed write.
1033                         *
1034                         * XXXsup This is OK, so long as we catch these
1035                         * before we start the umount; we don't want these
1036                         * DELWRI metadata bufs to be hanging around.
1037                         */
1038                        XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */
1039
1040                        if (!(XFS_BUF_ISSTALE(bp))) {
1041                                XFS_BUF_DELAYWRITE(bp);
1042                                XFS_BUF_DONE(bp);
1043                                XFS_BUF_SET_START(bp);
1044                        }
1045                        ASSERT(XFS_BUF_IODONE_FUNC(bp));
1046                        xfs_buftrace("BUF_IODONE ASYNC", bp);
1047                        xfs_buf_relse(bp);
1048                } else {
1049                        /*
1050                         * If the write of the buffer was not asynchronous,
1051                         * then we want to make sure to return the error
1052                         * to the caller of bwrite().  Because of this we
1053                         * cannot clear the B_ERROR state at this point.
1054                         * Instead we install a callback function that
1055                         * will be called when the buffer is released, and
1056                         * that routine will clear the error state and
1057                         * set the buffer to be written out again after
1058                         * some delay.
1059                         */
1060                        /* We actually overwrite the existing b-relse
1061                           function at times, but we're gonna be shutting down
1062                           anyway. */
1063                        XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
1064                        XFS_BUF_DONE(bp);
1065                        XFS_BUF_FINISH_IOWAIT(bp);
1066                }
1067                return;
1068        }
1069#ifdef XFSERRORDEBUG
1070        xfs_buftrace("XFS BUFCB NOERR", bp);
1071#endif
1072        xfs_buf_do_callbacks(bp, lip);
1073        XFS_BUF_SET_FSPRIVATE(bp, NULL);
1074        XFS_BUF_CLR_IODONE_FUNC(bp);
1075        xfs_biodone(bp);
1076}
1077
1078/*
1079 * This is a callback routine attached to a buffer which gets an error
1080 * when being written out synchronously.
1081 */
1082STATIC void
1083xfs_buf_error_relse(
1084        xfs_buf_t        *bp)
1085{
1086        xfs_log_item_t        *lip;
1087        xfs_mount_t        *mp;
1088
1089        lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1090        mp = (xfs_mount_t *)lip->li_mountp;
1091        ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
1092
1093        XFS_BUF_STALE(bp);
1094        XFS_BUF_DONE(bp);
1095        XFS_BUF_UNDELAYWRITE(bp);
1096        XFS_BUF_ERROR(bp,0);
1097        xfs_buftrace("BUF_ERROR_RELSE", bp);
1098        if (! XFS_FORCED_SHUTDOWN(mp))
1099                xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1100        /*
1101         * We have to unpin the pinned buffers so do the
1102         * callbacks.
1103         */
1104        xfs_buf_do_callbacks(bp, lip);
1105        XFS_BUF_SET_FSPRIVATE(bp, NULL);
1106        XFS_BUF_CLR_IODONE_FUNC(bp);
1107        XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
1108        xfs_buf_relse(bp);
1109}
1110
1111
1112/*
1113 * This is the iodone() function for buffers which have been
1114 * logged.  It is called when they are eventually flushed out.
1115 * It should remove the buf item from the AIL, and free the buf item.
1116 * It is called by xfs_buf_iodone_callbacks() above which will take
1117 * care of cleaning up the buffer itself.
1118 */
1119/* ARGSUSED */
1120void
1121xfs_buf_iodone(
1122        xfs_buf_t                *bp,
1123        xfs_buf_log_item_t        *bip)
1124{
1125        struct xfs_mount        *mp;
1126
1127        ASSERT(bip->bli_buf == bp);
1128
1129        xfs_buf_rele(bp);
1130        mp = bip->bli_item.li_mountp;
1131
1132        /*
1133         * If we are forcibly shutting down, this may well be
1134         * off the AIL already. That's because we simulate the
1135         * log-committed callbacks to unpin these buffers. Or we may never
1136         * have put this item on AIL because of the transaction was
1137         * aborted forcibly. xfs_trans_delete_ail() takes care of these.
1138         *
1139         * Either way, AIL is useless if we're forcing a shutdown.
1140         */
1141        spin_lock(&mp->m_ail_lock);
1142        /*
1143         * xfs_trans_delete_ail() drops the AIL lock.
1144         */
1145        xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1146        xfs_buf_item_free(bip);
1147}
1148
1149#if defined(XFS_BLI_TRACE)
1150void
1151xfs_buf_item_trace(
1152        char                        *id,
1153        xfs_buf_log_item_t        *bip)
1154{
1155        xfs_buf_t                *bp;
1156        ASSERT(bip->bli_trace != NULL);
1157
1158        bp = bip->bli_buf;
1159        ktrace_enter(bip->bli_trace,
1160                     (void *)id,
1161                     (void *)bip->bli_buf,
1162                     (void *)((unsigned long)bip->bli_flags),
1163                     (void *)((unsigned long)bip->bli_recur),
1164                     (void *)((unsigned long)atomic_read(&bip->bli_refcount)),
1165                     (void *)((unsigned long)
1166                                (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)),
1167                     (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))),
1168                     (void *)((unsigned long)XFS_BUF_COUNT(bp)),
1169                     (void *)((unsigned long)XFS_BUF_BFLAGS(bp)),
1170                     XFS_BUF_FSPRIVATE(bp, void *),
1171                     XFS_BUF_FSPRIVATE2(bp, void *),
1172                     (void *)(unsigned long)XFS_BUF_ISPINNED(bp),
1173                     (void *)XFS_BUF_IODONE_FUNC(bp),
1174                     (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))),
1175                     (void *)bip->bli_item.li_desc,
1176                     (void *)((unsigned long)bip->bli_item.li_flags));
1177}
1178#endif /* XFS_BLI_TRACE */