Showing error 1091

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: drivers/scsi/scsi_lib.c
Line in file: 1727
Project: Linux Kernel
Project version: 2.6.28
Tools: Undetermined 1
Entered: 2012-03-04 17:07:06 UTC


Source:

   1/*
   2 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
   3 *
   4 *  SCSI queueing library.
   5 *      Initial versions: Eric Youngdale (eric@andante.org).
   6 *                        Based upon conversations with large numbers
   7 *                        of people at Linux Expo.
   8 */
   9
  10#include <linux/bio.h>
  11#include <linux/bitops.h>
  12#include <linux/blkdev.h>
  13#include <linux/completion.h>
  14#include <linux/kernel.h>
  15#include <linux/mempool.h>
  16#include <linux/slab.h>
  17#include <linux/init.h>
  18#include <linux/pci.h>
  19#include <linux/delay.h>
  20#include <linux/hardirq.h>
  21#include <linux/scatterlist.h>
  22
  23#include <scsi/scsi.h>
  24#include <scsi/scsi_cmnd.h>
  25#include <scsi/scsi_dbg.h>
  26#include <scsi/scsi_device.h>
  27#include <scsi/scsi_driver.h>
  28#include <scsi/scsi_eh.h>
  29#include <scsi/scsi_host.h>
  30
  31#include "scsi_priv.h"
  32#include "scsi_logging.h"
  33
  34
  35#define SG_MEMPOOL_NR                ARRAY_SIZE(scsi_sg_pools)
  36#define SG_MEMPOOL_SIZE                2
  37
  38struct scsi_host_sg_pool {
  39        size_t                size;
  40        char                *name;
  41        struct kmem_cache        *slab;
  42        mempool_t        *pool;
  43};
  44
  45#define SP(x) { x, "sgpool-" __stringify(x) }
  46#if (SCSI_MAX_SG_SEGMENTS < 32)
  47#error SCSI_MAX_SG_SEGMENTS is too small (must be 32 or greater)
  48#endif
  49static struct scsi_host_sg_pool scsi_sg_pools[] = {
  50        SP(8),
  51        SP(16),
  52#if (SCSI_MAX_SG_SEGMENTS > 32)
  53        SP(32),
  54#if (SCSI_MAX_SG_SEGMENTS > 64)
  55        SP(64),
  56#if (SCSI_MAX_SG_SEGMENTS > 128)
  57        SP(128),
  58#if (SCSI_MAX_SG_SEGMENTS > 256)
  59#error SCSI_MAX_SG_SEGMENTS is too large (256 MAX)
  60#endif
  61#endif
  62#endif
  63#endif
  64        SP(SCSI_MAX_SG_SEGMENTS)
  65};
  66#undef SP
  67
  68struct kmem_cache *scsi_sdb_cache;
  69
  70static void scsi_run_queue(struct request_queue *q);
  71
  72/*
  73 * Function:        scsi_unprep_request()
  74 *
  75 * Purpose:        Remove all preparation done for a request, including its
  76 *                associated scsi_cmnd, so that it can be requeued.
  77 *
  78 * Arguments:        req        - request to unprepare
  79 *
  80 * Lock status:        Assumed that no locks are held upon entry.
  81 *
  82 * Returns:        Nothing.
  83 */
  84static void scsi_unprep_request(struct request *req)
  85{
  86        struct scsi_cmnd *cmd = req->special;
  87
  88        req->cmd_flags &= ~REQ_DONTPREP;
  89        req->special = NULL;
  90
  91        scsi_put_command(cmd);
  92}
  93
  94/*
  95 * Function:    scsi_queue_insert()
  96 *
  97 * Purpose:     Insert a command in the midlevel queue.
  98 *
  99 * Arguments:   cmd    - command that we are adding to queue.
 100 *              reason - why we are inserting command to queue.
 101 *
 102 * Lock status: Assumed that lock is not held upon entry.
 103 *
 104 * Returns:     Nothing.
 105 *
 106 * Notes:       We do this for one of two cases.  Either the host is busy
 107 *              and it cannot accept any more commands for the time being,
 108 *              or the device returned QUEUE_FULL and can accept no more
 109 *              commands.
 110 * Notes:       This could be called either from an interrupt context or a
 111 *              normal process context.
 112 */
 113int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 114{
 115        struct Scsi_Host *host = cmd->device->host;
 116        struct scsi_device *device = cmd->device;
 117        struct scsi_target *starget = scsi_target(device);
 118        struct request_queue *q = device->request_queue;
 119        unsigned long flags;
 120
 121        SCSI_LOG_MLQUEUE(1,
 122                 printk("Inserting command %p into mlqueue\n", cmd));
 123
 124        /*
 125         * Set the appropriate busy bit for the device/host.
 126         *
 127         * If the host/device isn't busy, assume that something actually
 128         * completed, and that we should be able to queue a command now.
 129         *
 130         * Note that the prior mid-layer assumption that any host could
 131         * always queue at least one command is now broken.  The mid-layer
 132         * will implement a user specifiable stall (see
 133         * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
 134         * if a command is requeued with no other commands outstanding
 135         * either for the device or for the host.
 136         */
 137        switch (reason) {
 138        case SCSI_MLQUEUE_HOST_BUSY:
 139                host->host_blocked = host->max_host_blocked;
 140                break;
 141        case SCSI_MLQUEUE_DEVICE_BUSY:
 142                device->device_blocked = device->max_device_blocked;
 143                break;
 144        case SCSI_MLQUEUE_TARGET_BUSY:
 145                starget->target_blocked = starget->max_target_blocked;
 146                break;
 147        }
 148
 149        /*
 150         * Decrement the counters, since these commands are no longer
 151         * active on the host/device.
 152         */
 153        scsi_device_unbusy(device);
 154
 155        /*
 156         * Requeue this command.  It will go before all other commands
 157         * that are already in the queue.
 158         *
 159         * NOTE: there is magic here about the way the queue is plugged if
 160         * we have no outstanding commands.
 161         * 
 162         * Although we *don't* plug the queue, we call the request
 163         * function.  The SCSI request function detects the blocked condition
 164         * and plugs the queue appropriately.
 165         */
 166        spin_lock_irqsave(q->queue_lock, flags);
 167        blk_requeue_request(q, cmd->request);
 168        spin_unlock_irqrestore(q->queue_lock, flags);
 169
 170        scsi_run_queue(q);
 171
 172        return 0;
 173}
 174
 175/**
 176 * scsi_execute - insert request and wait for the result
 177 * @sdev:        scsi device
 178 * @cmd:        scsi command
 179 * @data_direction: data direction
 180 * @buffer:        data buffer
 181 * @bufflen:        len of buffer
 182 * @sense:        optional sense buffer
 183 * @timeout:        request timeout in seconds
 184 * @retries:        number of times to retry request
 185 * @flags:        or into request flags;
 186 *
 187 * returns the req->errors value which is the scsi_cmnd result
 188 * field.
 189 */
 190int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 191                 int data_direction, void *buffer, unsigned bufflen,
 192                 unsigned char *sense, int timeout, int retries, int flags)
 193{
 194        struct request *req;
 195        int write = (data_direction == DMA_TO_DEVICE);
 196        int ret = DRIVER_ERROR << 24;
 197
 198        req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
 199
 200        if (bufflen &&        blk_rq_map_kern(sdev->request_queue, req,
 201                                        buffer, bufflen, __GFP_WAIT))
 202                goto out;
 203
 204        req->cmd_len = COMMAND_SIZE(cmd[0]);
 205        memcpy(req->cmd, cmd, req->cmd_len);
 206        req->sense = sense;
 207        req->sense_len = 0;
 208        req->retries = retries;
 209        req->timeout = timeout;
 210        req->cmd_type = REQ_TYPE_BLOCK_PC;
 211        req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 212
 213        /*
 214         * head injection *required* here otherwise quiesce won't work
 215         */
 216        blk_execute_rq(req->q, NULL, req, 1);
 217
 218        /*
 219         * Some devices (USB mass-storage in particular) may transfer
 220         * garbage data together with a residue indicating that the data
 221         * is invalid.  Prevent the garbage from being misinterpreted
 222         * and prevent security leaks by zeroing out the excess data.
 223         */
 224        if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
 225                memset(buffer + (bufflen - req->data_len), 0, req->data_len);
 226
 227        ret = req->errors;
 228 out:
 229        blk_put_request(req);
 230
 231        return ret;
 232}
 233EXPORT_SYMBOL(scsi_execute);
 234
 235
 236int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 237                     int data_direction, void *buffer, unsigned bufflen,
 238                     struct scsi_sense_hdr *sshdr, int timeout, int retries)
 239{
 240        char *sense = NULL;
 241        int result;
 242        
 243        if (sshdr) {
 244                sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
 245                if (!sense)
 246                        return DRIVER_ERROR << 24;
 247        }
 248        result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
 249                              sense, timeout, retries, 0);
 250        if (sshdr)
 251                scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
 252
 253        kfree(sense);
 254        return result;
 255}
 256EXPORT_SYMBOL(scsi_execute_req);
 257
 258struct scsi_io_context {
 259        void *data;
 260        void (*done)(void *data, char *sense, int result, int resid);
 261        char sense[SCSI_SENSE_BUFFERSIZE];
 262};
 263
 264static struct kmem_cache *scsi_io_context_cache;
 265
 266static void scsi_end_async(struct request *req, int uptodate)
 267{
 268        struct scsi_io_context *sioc = req->end_io_data;
 269
 270        if (sioc->done)
 271                sioc->done(sioc->data, sioc->sense, req->errors, req->data_len);
 272
 273        kmem_cache_free(scsi_io_context_cache, sioc);
 274        __blk_put_request(req->q, req);
 275}
 276
 277static int scsi_merge_bio(struct request *rq, struct bio *bio)
 278{
 279        struct request_queue *q = rq->q;
 280
 281        bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 282        if (rq_data_dir(rq) == WRITE)
 283                bio->bi_rw |= (1 << BIO_RW);
 284        blk_queue_bounce(q, &bio);
 285
 286        return blk_rq_append_bio(q, rq, bio);
 287}
 288
 289static void scsi_bi_endio(struct bio *bio, int error)
 290{
 291        bio_put(bio);
 292}
 293
 294/**
 295 * scsi_req_map_sg - map a scatterlist into a request
 296 * @rq:                request to fill
 297 * @sgl:        scatterlist
 298 * @nsegs:        number of elements
 299 * @bufflen:        len of buffer
 300 * @gfp:        memory allocation flags
 301 *
 302 * scsi_req_map_sg maps a scatterlist into a request so that the
 303 * request can be sent to the block layer. We do not trust the scatterlist
 304 * sent to use, as some ULDs use that struct to only organize the pages.
 305 */
 306static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
 307                           int nsegs, unsigned bufflen, gfp_t gfp)
 308{
 309        struct request_queue *q = rq->q;
 310        int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 311        unsigned int data_len = bufflen, len, bytes, off;
 312        struct scatterlist *sg;
 313        struct page *page;
 314        struct bio *bio = NULL;
 315        int i, err, nr_vecs = 0;
 316
 317        for_each_sg(sgl, sg, nsegs, i) {
 318                page = sg_page(sg);
 319                off = sg->offset;
 320                len = sg->length;
 321
 322                while (len > 0 && data_len > 0) {
 323                        /*
 324                         * sg sends a scatterlist that is larger than
 325                         * the data_len it wants transferred for certain
 326                         * IO sizes
 327                         */
 328                        bytes = min_t(unsigned int, len, PAGE_SIZE - off);
 329                        bytes = min(bytes, data_len);
 330
 331                        if (!bio) {
 332                                nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
 333                                nr_pages -= nr_vecs;
 334
 335                                bio = bio_alloc(gfp, nr_vecs);
 336                                if (!bio) {
 337                                        err = -ENOMEM;
 338                                        goto free_bios;
 339                                }
 340                                bio->bi_end_io = scsi_bi_endio;
 341                        }
 342
 343                        if (bio_add_pc_page(q, bio, page, bytes, off) !=
 344                            bytes) {
 345                                bio_put(bio);
 346                                err = -EINVAL;
 347                                goto free_bios;
 348                        }
 349
 350                        if (bio->bi_vcnt >= nr_vecs) {
 351                                err = scsi_merge_bio(rq, bio);
 352                                if (err) {
 353                                        bio_endio(bio, 0);
 354                                        goto free_bios;
 355                                }
 356                                bio = NULL;
 357                        }
 358
 359                        page++;
 360                        len -= bytes;
 361                        data_len -=bytes;
 362                        off = 0;
 363                }
 364        }
 365
 366        rq->buffer = rq->data = NULL;
 367        rq->data_len = bufflen;
 368        return 0;
 369
 370free_bios:
 371        while ((bio = rq->bio) != NULL) {
 372                rq->bio = bio->bi_next;
 373                /*
 374                 * call endio instead of bio_put incase it was bounced
 375                 */
 376                bio_endio(bio, 0);
 377        }
 378
 379        return err;
 380}
 381
 382/**
 383 * scsi_execute_async - insert request
 384 * @sdev:        scsi device
 385 * @cmd:        scsi command
 386 * @cmd_len:        length of scsi cdb
 387 * @data_direction: DMA_TO_DEVICE, DMA_FROM_DEVICE, or DMA_NONE
 388 * @buffer:        data buffer (this can be a kernel buffer or scatterlist)
 389 * @bufflen:        len of buffer
 390 * @use_sg:        if buffer is a scatterlist this is the number of elements
 391 * @timeout:        request timeout in seconds
 392 * @retries:        number of times to retry request
 393 * @privdata:        data passed to done()
 394 * @done:        callback function when done
 395 * @gfp:        memory allocation flags
 396 */
 397int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 398                       int cmd_len, int data_direction, void *buffer, unsigned bufflen,
 399                       int use_sg, int timeout, int retries, void *privdata,
 400                       void (*done)(void *, char *, int, int), gfp_t gfp)
 401{
 402        struct request *req;
 403        struct scsi_io_context *sioc;
 404        int err = 0;
 405        int write = (data_direction == DMA_TO_DEVICE);
 406
 407        sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp);
 408        if (!sioc)
 409                return DRIVER_ERROR << 24;
 410
 411        req = blk_get_request(sdev->request_queue, write, gfp);
 412        if (!req)
 413                goto free_sense;
 414        req->cmd_type = REQ_TYPE_BLOCK_PC;
 415        req->cmd_flags |= REQ_QUIET;
 416
 417        if (use_sg)
 418                err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
 419        else if (bufflen)
 420                err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp);
 421
 422        if (err)
 423                goto free_req;
 424
 425        req->cmd_len = cmd_len;
 426        memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
 427        memcpy(req->cmd, cmd, req->cmd_len);
 428        req->sense = sioc->sense;
 429        req->sense_len = 0;
 430        req->timeout = timeout;
 431        req->retries = retries;
 432        req->end_io_data = sioc;
 433
 434        sioc->data = privdata;
 435        sioc->done = done;
 436
 437        blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
 438        return 0;
 439
 440free_req:
 441        blk_put_request(req);
 442free_sense:
 443        kmem_cache_free(scsi_io_context_cache, sioc);
 444        return DRIVER_ERROR << 24;
 445}
 446EXPORT_SYMBOL_GPL(scsi_execute_async);
 447
 448/*
 449 * Function:    scsi_init_cmd_errh()
 450 *
 451 * Purpose:     Initialize cmd fields related to error handling.
 452 *
 453 * Arguments:   cmd        - command that is ready to be queued.
 454 *
 455 * Notes:       This function has the job of initializing a number of
 456 *              fields related to error handling.   Typically this will
 457 *              be called once for each command, as required.
 458 */
 459static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
 460{
 461        cmd->serial_number = 0;
 462        scsi_set_resid(cmd, 0);
 463        memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
 464        if (cmd->cmd_len == 0)
 465                cmd->cmd_len = scsi_command_size(cmd->cmnd);
 466}
 467
 468void scsi_device_unbusy(struct scsi_device *sdev)
 469{
 470        struct Scsi_Host *shost = sdev->host;
 471        struct scsi_target *starget = scsi_target(sdev);
 472        unsigned long flags;
 473
 474        spin_lock_irqsave(shost->host_lock, flags);
 475        shost->host_busy--;
 476        starget->target_busy--;
 477        if (unlikely(scsi_host_in_recovery(shost) &&
 478                     (shost->host_failed || shost->host_eh_scheduled)))
 479                scsi_eh_wakeup(shost);
 480        spin_unlock(shost->host_lock);
 481        spin_lock(sdev->request_queue->queue_lock);
 482        sdev->device_busy--;
 483        spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
 484}
 485
 486/*
 487 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
 488 * and call blk_run_queue for all the scsi_devices on the target -
 489 * including current_sdev first.
 490 *
 491 * Called with *no* scsi locks held.
 492 */
 493static void scsi_single_lun_run(struct scsi_device *current_sdev)
 494{
 495        struct Scsi_Host *shost = current_sdev->host;
 496        struct scsi_device *sdev, *tmp;
 497        struct scsi_target *starget = scsi_target(current_sdev);
 498        unsigned long flags;
 499
 500        spin_lock_irqsave(shost->host_lock, flags);
 501        starget->starget_sdev_user = NULL;
 502        spin_unlock_irqrestore(shost->host_lock, flags);
 503
 504        /*
 505         * Call blk_run_queue for all LUNs on the target, starting with
 506         * current_sdev. We race with others (to set starget_sdev_user),
 507         * but in most cases, we will be first. Ideally, each LU on the
 508         * target would get some limited time or requests on the target.
 509         */
 510        blk_run_queue(current_sdev->request_queue);
 511
 512        spin_lock_irqsave(shost->host_lock, flags);
 513        if (starget->starget_sdev_user)
 514                goto out;
 515        list_for_each_entry_safe(sdev, tmp, &starget->devices,
 516                        same_target_siblings) {
 517                if (sdev == current_sdev)
 518                        continue;
 519                if (scsi_device_get(sdev))
 520                        continue;
 521
 522                spin_unlock_irqrestore(shost->host_lock, flags);
 523                blk_run_queue(sdev->request_queue);
 524                spin_lock_irqsave(shost->host_lock, flags);
 525        
 526                scsi_device_put(sdev);
 527        }
 528 out:
 529        spin_unlock_irqrestore(shost->host_lock, flags);
 530}
 531
 532static inline int scsi_device_is_busy(struct scsi_device *sdev)
 533{
 534        if (sdev->device_busy >= sdev->queue_depth || sdev->device_blocked)
 535                return 1;
 536
 537        return 0;
 538}
 539
 540static inline int scsi_target_is_busy(struct scsi_target *starget)
 541{
 542        return ((starget->can_queue > 0 &&
 543                 starget->target_busy >= starget->can_queue) ||
 544                 starget->target_blocked);
 545}
 546
 547static inline int scsi_host_is_busy(struct Scsi_Host *shost)
 548{
 549        if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
 550            shost->host_blocked || shost->host_self_blocked)
 551                return 1;
 552
 553        return 0;
 554}
 555
 556/*
 557 * Function:        scsi_run_queue()
 558 *
 559 * Purpose:        Select a proper request queue to serve next
 560 *
 561 * Arguments:        q        - last request's queue
 562 *
 563 * Returns:     Nothing
 564 *
 565 * Notes:        The previous command was completely finished, start
 566 *                a new one if possible.
 567 */
 568static void scsi_run_queue(struct request_queue *q)
 569{
 570        struct scsi_device *sdev = q->queuedata;
 571        struct Scsi_Host *shost = sdev->host;
 572        LIST_HEAD(starved_list);
 573        unsigned long flags;
 574
 575        if (scsi_target(sdev)->single_lun)
 576                scsi_single_lun_run(sdev);
 577
 578        spin_lock_irqsave(shost->host_lock, flags);
 579        list_splice_init(&shost->starved_list, &starved_list);
 580
 581        while (!list_empty(&starved_list)) {
 582                int flagset;
 583
 584                /*
 585                 * As long as shost is accepting commands and we have
 586                 * starved queues, call blk_run_queue. scsi_request_fn
 587                 * drops the queue_lock and can add us back to the
 588                 * starved_list.
 589                 *
 590                 * host_lock protects the starved_list and starved_entry.
 591                 * scsi_request_fn must get the host_lock before checking
 592                 * or modifying starved_list or starved_entry.
 593                 */
 594                if (scsi_host_is_busy(shost))
 595                        break;
 596
 597                sdev = list_entry(starved_list.next,
 598                                  struct scsi_device, starved_entry);
 599                list_del_init(&sdev->starved_entry);
 600                if (scsi_target_is_busy(scsi_target(sdev))) {
 601                        list_move_tail(&sdev->starved_entry,
 602                                       &shost->starved_list);
 603                        continue;
 604                }
 605
 606                spin_unlock(shost->host_lock);
 607
 608                spin_lock(sdev->request_queue->queue_lock);
 609                flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
 610                                !test_bit(QUEUE_FLAG_REENTER,
 611                                        &sdev->request_queue->queue_flags);
 612                if (flagset)
 613                        queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
 614                __blk_run_queue(sdev->request_queue);
 615                if (flagset)
 616                        queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
 617                spin_unlock(sdev->request_queue->queue_lock);
 618
 619                spin_lock(shost->host_lock);
 620        }
 621        /* put any unprocessed entries back */
 622        list_splice(&starved_list, &shost->starved_list);
 623        spin_unlock_irqrestore(shost->host_lock, flags);
 624
 625        blk_run_queue(q);
 626}
 627
 628/*
 629 * Function:        scsi_requeue_command()
 630 *
 631 * Purpose:        Handle post-processing of completed commands.
 632 *
 633 * Arguments:        q        - queue to operate on
 634 *                cmd        - command that may need to be requeued.
 635 *
 636 * Returns:        Nothing
 637 *
 638 * Notes:        After command completion, there may be blocks left
 639 *                over which weren't finished by the previous command
 640 *                this can be for a number of reasons - the main one is
 641 *                I/O errors in the middle of the request, in which case
 642 *                we need to request the blocks that come after the bad
 643 *                sector.
 644 * Notes:        Upon return, cmd is a stale pointer.
 645 */
 646static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
 647{
 648        struct request *req = cmd->request;
 649        unsigned long flags;
 650
 651        spin_lock_irqsave(q->queue_lock, flags);
 652        scsi_unprep_request(req);
 653        blk_requeue_request(q, req);
 654        spin_unlock_irqrestore(q->queue_lock, flags);
 655
 656        scsi_run_queue(q);
 657}
 658
 659void scsi_next_command(struct scsi_cmnd *cmd)
 660{
 661        struct scsi_device *sdev = cmd->device;
 662        struct request_queue *q = sdev->request_queue;
 663
 664        /* need to hold a reference on the device before we let go of the cmd */
 665        get_device(&sdev->sdev_gendev);
 666
 667        scsi_put_command(cmd);
 668        scsi_run_queue(q);
 669
 670        /* ok to remove device now */
 671        put_device(&sdev->sdev_gendev);
 672}
 673
 674void scsi_run_host_queues(struct Scsi_Host *shost)
 675{
 676        struct scsi_device *sdev;
 677
 678        shost_for_each_device(sdev, shost)
 679                scsi_run_queue(sdev->request_queue);
 680}
 681
 682/*
 683 * Function:    scsi_end_request()
 684 *
 685 * Purpose:     Post-processing of completed commands (usually invoked at end
 686 *                of upper level post-processing and scsi_io_completion).
 687 *
 688 * Arguments:   cmd         - command that is complete.
 689 *              error    - 0 if I/O indicates success, < 0 for I/O error.
 690 *              bytes    - number of bytes of completed I/O
 691 *                requeue  - indicates whether we should requeue leftovers.
 692 *
 693 * Lock status: Assumed that lock is not held upon entry.
 694 *
 695 * Returns:     cmd if requeue required, NULL otherwise.
 696 *
 697 * Notes:       This is called for block device requests in order to
 698 *              mark some number of sectors as complete.
 699 * 
 700 *                We are guaranteeing that the request queue will be goosed
 701 *                at some point during this call.
 702 * Notes:        If cmd was requeued, upon return it will be a stale pointer.
 703 */
 704static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 705                                          int bytes, int requeue)
 706{
 707        struct request_queue *q = cmd->device->request_queue;
 708        struct request *req = cmd->request;
 709
 710        /*
 711         * If there are blocks left over at the end, set up the command
 712         * to queue the remainder of them.
 713         */
 714        if (blk_end_request(req, error, bytes)) {
 715                int leftover = (req->hard_nr_sectors << 9);
 716
 717                if (blk_pc_request(req))
 718                        leftover = req->data_len;
 719
 720                /* kill remainder if no retrys */
 721                if (error && scsi_noretry_cmd(cmd))
 722                        blk_end_request(req, error, leftover);
 723                else {
 724                        if (requeue) {
 725                                /*
 726                                 * Bleah.  Leftovers again.  Stick the
 727                                 * leftovers in the front of the
 728                                 * queue, and goose the queue again.
 729                                 */
 730                                scsi_requeue_command(q, cmd);
 731                                cmd = NULL;
 732                        }
 733                        return cmd;
 734                }
 735        }
 736
 737        /*
 738         * This will goose the queue request function at the end, so we don't
 739         * need to worry about launching another command.
 740         */
 741        scsi_next_command(cmd);
 742        return NULL;
 743}
 744
 745static inline unsigned int scsi_sgtable_index(unsigned short nents)
 746{
 747        unsigned int index;
 748
 749        BUG_ON(nents > SCSI_MAX_SG_SEGMENTS);
 750
 751        if (nents <= 8)
 752                index = 0;
 753        else
 754                index = get_count_order(nents) - 3;
 755
 756        return index;
 757}
 758
 759static void scsi_sg_free(struct scatterlist *sgl, unsigned int nents)
 760{
 761        struct scsi_host_sg_pool *sgp;
 762
 763        sgp = scsi_sg_pools + scsi_sgtable_index(nents);
 764        mempool_free(sgl, sgp->pool);
 765}
 766
 767static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
 768{
 769        struct scsi_host_sg_pool *sgp;
 770
 771        sgp = scsi_sg_pools + scsi_sgtable_index(nents);
 772        return mempool_alloc(sgp->pool, gfp_mask);
 773}
 774
 775static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 776                              gfp_t gfp_mask)
 777{
 778        int ret;
 779
 780        BUG_ON(!nents);
 781
 782        ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
 783                               gfp_mask, scsi_sg_alloc);
 784        if (unlikely(ret))
 785                __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS,
 786                                scsi_sg_free);
 787
 788        return ret;
 789}
 790
 791static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
 792{
 793        __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free);
 794}
 795
 796/*
 797 * Function:    scsi_release_buffers()
 798 *
 799 * Purpose:     Completion processing for block device I/O requests.
 800 *
 801 * Arguments:   cmd        - command that we are bailing.
 802 *
 803 * Lock status: Assumed that no lock is held upon entry.
 804 *
 805 * Returns:     Nothing
 806 *
 807 * Notes:       In the event that an upper level driver rejects a
 808 *                command, we must release resources allocated during
 809 *                the __init_io() function.  Primarily this would involve
 810 *                the scatter-gather table, and potentially any bounce
 811 *                buffers.
 812 */
 813void scsi_release_buffers(struct scsi_cmnd *cmd)
 814{
 815        if (cmd->sdb.table.nents)
 816                scsi_free_sgtable(&cmd->sdb);
 817
 818        memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 819
 820        if (scsi_bidi_cmnd(cmd)) {
 821                struct scsi_data_buffer *bidi_sdb =
 822                        cmd->request->next_rq->special;
 823                scsi_free_sgtable(bidi_sdb);
 824                kmem_cache_free(scsi_sdb_cache, bidi_sdb);
 825                cmd->request->next_rq->special = NULL;
 826        }
 827
 828        if (scsi_prot_sg_count(cmd))
 829                scsi_free_sgtable(cmd->prot_sdb);
 830}
 831EXPORT_SYMBOL(scsi_release_buffers);
 832
 833/*
 834 * Bidi commands Must be complete as a whole, both sides at once.
 835 * If part of the bytes were written and lld returned
 836 * scsi_in()->resid and/or scsi_out()->resid this information will be left
 837 * in req->data_len and req->next_rq->data_len. The upper-layer driver can
 838 * decide what to do with this information.
 839 */
 840static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 841{
 842        struct request *req = cmd->request;
 843        unsigned int dlen = req->data_len;
 844        unsigned int next_dlen = req->next_rq->data_len;
 845
 846        req->data_len = scsi_out(cmd)->resid;
 847        req->next_rq->data_len = scsi_in(cmd)->resid;
 848
 849        /* The req and req->next_rq have not been completed */
 850        BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen));
 851
 852        scsi_release_buffers(cmd);
 853
 854        /*
 855         * This will goose the queue request function at the end, so we don't
 856         * need to worry about launching another command.
 857         */
 858        scsi_next_command(cmd);
 859}
 860
 861/*
 862 * Function:    scsi_io_completion()
 863 *
 864 * Purpose:     Completion processing for block device I/O requests.
 865 *
 866 * Arguments:   cmd   - command that is finished.
 867 *
 868 * Lock status: Assumed that no lock is held upon entry.
 869 *
 870 * Returns:     Nothing
 871 *
 872 * Notes:       This function is matched in terms of capabilities to
 873 *              the function that created the scatter-gather list.
 874 *              In other words, if there are no bounce buffers
 875 *              (the normal case for most drivers), we don't need
 876 *              the logic to deal with cleaning up afterwards.
 877 *
 878 *                We must do one of several things here:
 879 *
 880 *                a) Call scsi_end_request.  This will finish off the
 881 *                   specified number of sectors.  If we are done, the
 882 *                   command block will be released, and the queue
 883 *                   function will be goosed.  If we are not done, then
 884 *                   scsi_end_request will directly goose the queue.
 885 *
 886 *                b) We can just use scsi_requeue_command() here.  This would
 887 *                   be used if we just wanted to retry, for example.
 888 */
 889void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 890{
 891        int result = cmd->result;
 892        int this_count;
 893        struct request_queue *q = cmd->device->request_queue;
 894        struct request *req = cmd->request;
 895        int error = 0;
 896        struct scsi_sense_hdr sshdr;
 897        int sense_valid = 0;
 898        int sense_deferred = 0;
 899
 900        if (result) {
 901                sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
 902                if (sense_valid)
 903                        sense_deferred = scsi_sense_is_deferred(&sshdr);
 904        }
 905
 906        if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
 907                req->errors = result;
 908                if (result) {
 909                        if (sense_valid && req->sense) {
 910                                /*
 911                                 * SG_IO wants current and deferred errors
 912                                 */
 913                                int len = 8 + cmd->sense_buffer[7];
 914
 915                                if (len > SCSI_SENSE_BUFFERSIZE)
 916                                        len = SCSI_SENSE_BUFFERSIZE;
 917                                memcpy(req->sense, cmd->sense_buffer,  len);
 918                                req->sense_len = len;
 919                        }
 920                        if (!sense_deferred)
 921                                error = -EIO;
 922                }
 923                if (scsi_bidi_cmnd(cmd)) {
 924                        /* will also release_buffers */
 925                        scsi_end_bidi_request(cmd);
 926                        return;
 927                }
 928                req->data_len = scsi_get_resid(cmd);
 929        }
 930
 931        BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
 932        scsi_release_buffers(cmd);
 933
 934        /*
 935         * Next deal with any sectors which we were able to correctly
 936         * handle.
 937         */
 938        SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
 939                                      "%d bytes done.\n",
 940                                      req->nr_sectors, good_bytes));
 941
 942        /* A number of bytes were successfully read.  If there
 943         * are leftovers and there is some kind of error
 944         * (result != 0), retry the rest.
 945         */
 946        if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
 947                return;
 948        this_count = blk_rq_bytes(req);
 949
 950        /* good_bytes = 0, or (inclusive) there were leftovers and
 951         * result = 0, so scsi_end_request couldn't retry.
 952         */
 953        if (sense_valid && !sense_deferred) {
 954                switch (sshdr.sense_key) {
 955                case UNIT_ATTENTION:
 956                        if (cmd->device->removable) {
 957                                /* Detected disc change.  Set a bit
 958                                 * and quietly refuse further access.
 959                                 */
 960                                cmd->device->changed = 1;
 961                                scsi_end_request(cmd, -EIO, this_count, 1);
 962                                return;
 963                        } else {
 964                                /* Must have been a power glitch, or a
 965                                 * bus reset.  Could not have been a
 966                                 * media change, so we just retry the
 967                                 * request and see what happens.
 968                                 */
 969                                scsi_requeue_command(q, cmd);
 970                                return;
 971                        }
 972                        break;
 973                case ILLEGAL_REQUEST:
 974                        /* If we had an ILLEGAL REQUEST returned, then
 975                         * we may have performed an unsupported
 976                         * command.  The only thing this should be
 977                         * would be a ten byte read where only a six
 978                         * byte read was supported.  Also, on a system
 979                         * where READ CAPACITY failed, we may have
 980                         * read past the end of the disk.
 981                         */
 982                        if ((cmd->device->use_10_for_rw &&
 983                            sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
 984                            (cmd->cmnd[0] == READ_10 ||
 985                             cmd->cmnd[0] == WRITE_10)) {
 986                                cmd->device->use_10_for_rw = 0;
 987                                /* This will cause a retry with a
 988                                 * 6-byte command.
 989                                 */
 990                                scsi_requeue_command(q, cmd);
 991                        } else if (sshdr.asc == 0x10) /* DIX */
 992                                scsi_end_request(cmd, -EIO, this_count, 0);
 993                        else
 994                                scsi_end_request(cmd, -EIO, this_count, 1);
 995                        return;
 996                case ABORTED_COMMAND:
 997                        if (sshdr.asc == 0x10) { /* DIF */
 998                                scsi_end_request(cmd, -EIO, this_count, 0);
 999                                return;
1000                        }
1001                        break;
1002                case NOT_READY:
1003                        /* If the device is in the process of becoming
1004                         * ready, or has a temporary blockage, retry.
1005                         */
1006                        if (sshdr.asc == 0x04) {
1007                                switch (sshdr.ascq) {
1008                                case 0x01: /* becoming ready */
1009                                case 0x04: /* format in progress */
1010                                case 0x05: /* rebuild in progress */
1011                                case 0x06: /* recalculation in progress */
1012                                case 0x07: /* operation in progress */
1013                                case 0x08: /* Long write in progress */
1014                                case 0x09: /* self test in progress */
1015                                        scsi_requeue_command(q, cmd);
1016                                        return;
1017                                default:
1018                                        break;
1019                                }
1020                        }
1021                        if (!(req->cmd_flags & REQ_QUIET))
1022                                scsi_cmd_print_sense_hdr(cmd,
1023                                                         "Device not ready",
1024                                                         &sshdr);
1025
1026                        scsi_end_request(cmd, -EIO, this_count, 1);
1027                        return;
1028                case VOLUME_OVERFLOW:
1029                        if (!(req->cmd_flags & REQ_QUIET)) {
1030                                scmd_printk(KERN_INFO, cmd,
1031                                            "Volume overflow, CDB: ");
1032                                __scsi_print_command(cmd->cmnd);
1033                                scsi_print_sense("", cmd);
1034                        }
1035                        /* See SSC3rXX or current. */
1036                        scsi_end_request(cmd, -EIO, this_count, 1);
1037                        return;
1038                default:
1039                        break;
1040                }
1041        }
1042        if (host_byte(result) == DID_RESET) {
1043                /* Third party bus reset or reset for error recovery
1044                 * reasons.  Just retry the request and see what
1045                 * happens.
1046                 */
1047                scsi_requeue_command(q, cmd);
1048                return;
1049        }
1050        if (result) {
1051                if (!(req->cmd_flags & REQ_QUIET)) {
1052                        scsi_print_result(cmd);
1053                        if (driver_byte(result) & DRIVER_SENSE)
1054                                scsi_print_sense("", cmd);
1055                }
1056        }
1057        scsi_end_request(cmd, -EIO, this_count, !result);
1058}
1059
1060static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
1061                             gfp_t gfp_mask)
1062{
1063        int count;
1064
1065        /*
1066         * If sg table allocation fails, requeue request later.
1067         */
1068        if (unlikely(scsi_alloc_sgtable(sdb, req->nr_phys_segments,
1069                                        gfp_mask))) {
1070                return BLKPREP_DEFER;
1071        }
1072
1073        req->buffer = NULL;
1074
1075        /* 
1076         * Next, walk the list, and fill in the addresses and sizes of
1077         * each segment.
1078         */
1079        count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
1080        BUG_ON(count > sdb->table.nents);
1081        sdb->table.nents = count;
1082        if (blk_pc_request(req))
1083                sdb->length = req->data_len;
1084        else
1085                sdb->length = req->nr_sectors << 9;
1086        return BLKPREP_OK;
1087}
1088
1089/*
1090 * Function:    scsi_init_io()
1091 *
1092 * Purpose:     SCSI I/O initialize function.
1093 *
1094 * Arguments:   cmd   - Command descriptor we wish to initialize
1095 *
1096 * Returns:     0 on success
1097 *                BLKPREP_DEFER if the failure is retryable
1098 *                BLKPREP_KILL if the failure is fatal
1099 */
1100int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
1101{
1102        int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask);
1103        if (error)
1104                goto err_exit;
1105
1106        if (blk_bidi_rq(cmd->request)) {
1107                struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc(
1108                        scsi_sdb_cache, GFP_ATOMIC);
1109                if (!bidi_sdb) {
1110                        error = BLKPREP_DEFER;
1111                        goto err_exit;
1112                }
1113
1114                cmd->request->next_rq->special = bidi_sdb;
1115                error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb,
1116                                                                    GFP_ATOMIC);
1117                if (error)
1118                        goto err_exit;
1119        }
1120
1121        if (blk_integrity_rq(cmd->request)) {
1122                struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
1123                int ivecs, count;
1124
1125                BUG_ON(prot_sdb == NULL);
1126                ivecs = blk_rq_count_integrity_sg(cmd->request);
1127
1128                if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
1129                        error = BLKPREP_DEFER;
1130                        goto err_exit;
1131                }
1132
1133                count = blk_rq_map_integrity_sg(cmd->request,
1134                                                prot_sdb->table.sgl);
1135                BUG_ON(unlikely(count > ivecs));
1136
1137                cmd->prot_sdb = prot_sdb;
1138                cmd->prot_sdb->table.nents = count;
1139        }
1140
1141        return BLKPREP_OK ;
1142
1143err_exit:
1144        scsi_release_buffers(cmd);
1145        if (error == BLKPREP_KILL)
1146                scsi_put_command(cmd);
1147        else /* BLKPREP_DEFER */
1148                scsi_unprep_request(cmd->request);
1149
1150        return error;
1151}
1152EXPORT_SYMBOL(scsi_init_io);
1153
1154static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
1155                struct request *req)
1156{
1157        struct scsi_cmnd *cmd;
1158
1159        if (!req->special) {
1160                cmd = scsi_get_command(sdev, GFP_ATOMIC);
1161                if (unlikely(!cmd))
1162                        return NULL;
1163                req->special = cmd;
1164        } else {
1165                cmd = req->special;
1166        }
1167
1168        /* pull a tag out of the request if we have one */
1169        cmd->tag = req->tag;
1170        cmd->request = req;
1171
1172        cmd->cmnd = req->cmd;
1173
1174        return cmd;
1175}
1176
1177int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
1178{
1179        struct scsi_cmnd *cmd;
1180        int ret = scsi_prep_state_check(sdev, req);
1181
1182        if (ret != BLKPREP_OK)
1183                return ret;
1184
1185        cmd = scsi_get_cmd_from_req(sdev, req);
1186        if (unlikely(!cmd))
1187                return BLKPREP_DEFER;
1188
1189        /*
1190         * BLOCK_PC requests may transfer data, in which case they must
1191         * a bio attached to them.  Or they might contain a SCSI command
1192         * that does not transfer data, in which case they may optionally
1193         * submit a request without an attached bio.
1194         */
1195        if (req->bio) {
1196                int ret;
1197
1198                BUG_ON(!req->nr_phys_segments);
1199
1200                ret = scsi_init_io(cmd, GFP_ATOMIC);
1201                if (unlikely(ret))
1202                        return ret;
1203        } else {
1204                BUG_ON(req->data_len);
1205                BUG_ON(req->data);
1206
1207                memset(&cmd->sdb, 0, sizeof(cmd->sdb));
1208                req->buffer = NULL;
1209        }
1210
1211        cmd->cmd_len = req->cmd_len;
1212        if (!req->data_len)
1213                cmd->sc_data_direction = DMA_NONE;
1214        else if (rq_data_dir(req) == WRITE)
1215                cmd->sc_data_direction = DMA_TO_DEVICE;
1216        else
1217                cmd->sc_data_direction = DMA_FROM_DEVICE;
1218        
1219        cmd->transfersize = req->data_len;
1220        cmd->allowed = req->retries;
1221        return BLKPREP_OK;
1222}
1223EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
1224
1225/*
1226 * Setup a REQ_TYPE_FS command.  These are simple read/write request
1227 * from filesystems that still need to be translated to SCSI CDBs from
1228 * the ULD.
1229 */
1230int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
1231{
1232        struct scsi_cmnd *cmd;
1233        int ret = scsi_prep_state_check(sdev, req);
1234
1235        if (ret != BLKPREP_OK)
1236                return ret;
1237
1238        if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
1239                         && sdev->scsi_dh_data->scsi_dh->prep_fn)) {
1240                ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
1241                if (ret != BLKPREP_OK)
1242                        return ret;
1243        }
1244
1245        /*
1246         * Filesystem requests must transfer data.
1247         */
1248        BUG_ON(!req->nr_phys_segments);
1249
1250        cmd = scsi_get_cmd_from_req(sdev, req);
1251        if (unlikely(!cmd))
1252                return BLKPREP_DEFER;
1253
1254        memset(cmd->cmnd, 0, BLK_MAX_CDB);
1255        return scsi_init_io(cmd, GFP_ATOMIC);
1256}
1257EXPORT_SYMBOL(scsi_setup_fs_cmnd);
1258
1259int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
1260{
1261        int ret = BLKPREP_OK;
1262
1263        /*
1264         * If the device is not in running state we will reject some
1265         * or all commands.
1266         */
1267        if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1268                switch (sdev->sdev_state) {
1269                case SDEV_OFFLINE:
1270                        /*
1271                         * If the device is offline we refuse to process any
1272                         * commands.  The device must be brought online
1273                         * before trying any recovery commands.
1274                         */
1275                        sdev_printk(KERN_ERR, sdev,
1276                                    "rejecting I/O to offline device\n");
1277                        ret = BLKPREP_KILL;
1278                        break;
1279                case SDEV_DEL:
1280                        /*
1281                         * If the device is fully deleted, we refuse to
1282                         * process any commands as well.
1283                         */
1284                        sdev_printk(KERN_ERR, sdev,
1285                                    "rejecting I/O to dead device\n");
1286                        ret = BLKPREP_KILL;
1287                        break;
1288                case SDEV_QUIESCE:
1289                case SDEV_BLOCK:
1290                case SDEV_CREATED_BLOCK:
1291                        /*
1292                         * If the devices is blocked we defer normal commands.
1293                         */
1294                        if (!(req->cmd_flags & REQ_PREEMPT))
1295                                ret = BLKPREP_DEFER;
1296                        break;
1297                default:
1298                        /*
1299                         * For any other not fully online state we only allow
1300                         * special commands.  In particular any user initiated
1301                         * command is not allowed.
1302                         */
1303                        if (!(req->cmd_flags & REQ_PREEMPT))
1304                                ret = BLKPREP_KILL;
1305                        break;
1306                }
1307        }
1308        return ret;
1309}
1310EXPORT_SYMBOL(scsi_prep_state_check);
1311
1312int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
1313{
1314        struct scsi_device *sdev = q->queuedata;
1315
1316        switch (ret) {
1317        case BLKPREP_KILL:
1318                req->errors = DID_NO_CONNECT << 16;
1319                /* release the command and kill it */
1320                if (req->special) {
1321                        struct scsi_cmnd *cmd = req->special;
1322                        scsi_release_buffers(cmd);
1323                        scsi_put_command(cmd);
1324                        req->special = NULL;
1325                }
1326                break;
1327        case BLKPREP_DEFER:
1328                /*
1329                 * If we defer, the elv_next_request() returns NULL, but the
1330                 * queue must be restarted, so we plug here if no returning
1331                 * command will automatically do that.
1332                 */
1333                if (sdev->device_busy == 0)
1334                        blk_plug_device(q);
1335                break;
1336        default:
1337                req->cmd_flags |= REQ_DONTPREP;
1338        }
1339
1340        return ret;
1341}
1342EXPORT_SYMBOL(scsi_prep_return);
1343
1344int scsi_prep_fn(struct request_queue *q, struct request *req)
1345{
1346        struct scsi_device *sdev = q->queuedata;
1347        int ret = BLKPREP_KILL;
1348
1349        if (req->cmd_type == REQ_TYPE_BLOCK_PC)
1350                ret = scsi_setup_blk_pc_cmnd(sdev, req);
1351        return scsi_prep_return(q, req, ret);
1352}
1353
1354/*
1355 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1356 * return 0.
1357 *
1358 * Called with the queue_lock held.
1359 */
1360static inline int scsi_dev_queue_ready(struct request_queue *q,
1361                                  struct scsi_device *sdev)
1362{
1363        if (sdev->device_busy == 0 && sdev->device_blocked) {
1364                /*
1365                 * unblock after device_blocked iterates to zero
1366                 */
1367                if (--sdev->device_blocked == 0) {
1368                        SCSI_LOG_MLQUEUE(3,
1369                                   sdev_printk(KERN_INFO, sdev,
1370                                   "unblocking device at zero depth\n"));
1371                } else {
1372                        blk_plug_device(q);
1373                        return 0;
1374                }
1375        }
1376        if (scsi_device_is_busy(sdev))
1377                return 0;
1378
1379        return 1;
1380}
1381
1382
1383/*
1384 * scsi_target_queue_ready: checks if there we can send commands to target
1385 * @sdev: scsi device on starget to check.
1386 *
1387 * Called with the host lock held.
1388 */
1389static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
1390                                           struct scsi_device *sdev)
1391{
1392        struct scsi_target *starget = scsi_target(sdev);
1393
1394        if (starget->single_lun) {
1395                if (starget->starget_sdev_user &&
1396                    starget->starget_sdev_user != sdev)
1397                        return 0;
1398                starget->starget_sdev_user = sdev;
1399        }
1400
1401        if (starget->target_busy == 0 && starget->target_blocked) {
1402                /*
1403                 * unblock after target_blocked iterates to zero
1404                 */
1405                if (--starget->target_blocked == 0) {
1406                        SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
1407                                         "unblocking target at zero depth\n"));
1408                } else {
1409                        blk_plug_device(sdev->request_queue);
1410                        return 0;
1411                }
1412        }
1413
1414        if (scsi_target_is_busy(starget)) {
1415                if (list_empty(&sdev->starved_entry)) {
1416                        list_add_tail(&sdev->starved_entry,
1417                                      &shost->starved_list);
1418                        return 0;
1419                }
1420        }
1421
1422        /* We're OK to process the command, so we can't be starved */
1423        if (!list_empty(&sdev->starved_entry))
1424                list_del_init(&sdev->starved_entry);
1425        return 1;
1426}
1427
1428/*
1429 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1430 * return 0. We must end up running the queue again whenever 0 is
1431 * returned, else IO can hang.
1432 *
1433 * Called with host_lock held.
1434 */
1435static inline int scsi_host_queue_ready(struct request_queue *q,
1436                                   struct Scsi_Host *shost,
1437                                   struct scsi_device *sdev)
1438{
1439        if (scsi_host_in_recovery(shost))
1440                return 0;
1441        if (shost->host_busy == 0 && shost->host_blocked) {
1442                /*
1443                 * unblock after host_blocked iterates to zero
1444                 */
1445                if (--shost->host_blocked == 0) {
1446                        SCSI_LOG_MLQUEUE(3,
1447                                printk("scsi%d unblocking host at zero depth\n",
1448                                        shost->host_no));
1449                } else {
1450                        return 0;
1451                }
1452        }
1453        if (scsi_host_is_busy(shost)) {
1454                if (list_empty(&sdev->starved_entry))
1455                        list_add_tail(&sdev->starved_entry, &shost->starved_list);
1456                return 0;
1457        }
1458
1459        /* We're OK to process the command, so we can't be starved */
1460        if (!list_empty(&sdev->starved_entry))
1461                list_del_init(&sdev->starved_entry);
1462
1463        return 1;
1464}
1465
1466/*
1467 * Busy state exporting function for request stacking drivers.
1468 *
1469 * For efficiency, no lock is taken to check the busy state of
1470 * shost/starget/sdev, since the returned value is not guaranteed and
1471 * may be changed after request stacking drivers call the function,
1472 * regardless of taking lock or not.
1473 *
1474 * When scsi can't dispatch I/Os anymore and needs to kill I/Os
1475 * (e.g. !sdev), scsi needs to return 'not busy'.
1476 * Otherwise, request stacking drivers may hold requests forever.
1477 */
1478static int scsi_lld_busy(struct request_queue *q)
1479{
1480        struct scsi_device *sdev = q->queuedata;
1481        struct Scsi_Host *shost;
1482        struct scsi_target *starget;
1483
1484        if (!sdev)
1485                return 0;
1486
1487        shost = sdev->host;
1488        starget = scsi_target(sdev);
1489
1490        if (scsi_host_in_recovery(shost) || scsi_host_is_busy(shost) ||
1491            scsi_target_is_busy(starget) || scsi_device_is_busy(sdev))
1492                return 1;
1493
1494        return 0;
1495}
1496
1497/*
1498 * Kill a request for a dead device
1499 */
1500static void scsi_kill_request(struct request *req, struct request_queue *q)
1501{
1502        struct scsi_cmnd *cmd = req->special;
1503        struct scsi_device *sdev = cmd->device;
1504        struct scsi_target *starget = scsi_target(sdev);
1505        struct Scsi_Host *shost = sdev->host;
1506
1507        blkdev_dequeue_request(req);
1508
1509        if (unlikely(cmd == NULL)) {
1510                printk(KERN_CRIT "impossible request in %s.\n",
1511                                 __func__);
1512                BUG();
1513        }
1514
1515        scsi_init_cmd_errh(cmd);
1516        cmd->result = DID_NO_CONNECT << 16;
1517        atomic_inc(&cmd->device->iorequest_cnt);
1518
1519        /*
1520         * SCSI request completion path will do scsi_device_unbusy(),
1521         * bump busy counts.  To bump the counters, we need to dance
1522         * with the locks as normal issue path does.
1523         */
1524        sdev->device_busy++;
1525        spin_unlock(sdev->request_queue->queue_lock);
1526        spin_lock(shost->host_lock);
1527        shost->host_busy++;
1528        starget->target_busy++;
1529        spin_unlock(shost->host_lock);
1530        spin_lock(sdev->request_queue->queue_lock);
1531
1532        blk_complete_request(req);
1533}
1534
1535static void scsi_softirq_done(struct request *rq)
1536{
1537        struct scsi_cmnd *cmd = rq->special;
1538        unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
1539        int disposition;
1540
1541        INIT_LIST_HEAD(&cmd->eh_entry);
1542
1543        /*
1544         * Set the serial numbers back to zero
1545         */
1546        cmd->serial_number = 0;
1547
1548        atomic_inc(&cmd->device->iodone_cnt);
1549        if (cmd->result)
1550                atomic_inc(&cmd->device->ioerr_cnt);
1551
1552        disposition = scsi_decide_disposition(cmd);
1553        if (disposition != SUCCESS &&
1554            time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
1555                sdev_printk(KERN_ERR, cmd->device,
1556                            "timing out command, waited %lus\n",
1557                            wait_for/HZ);
1558                disposition = SUCCESS;
1559        }
1560                        
1561        scsi_log_completion(cmd, disposition);
1562
1563        switch (disposition) {
1564                case SUCCESS:
1565                        scsi_finish_command(cmd);
1566                        break;
1567                case NEEDS_RETRY:
1568                        scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
1569                        break;
1570                case ADD_TO_MLQUEUE:
1571                        scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
1572                        break;
1573                default:
1574                        if (!scsi_eh_scmd_add(cmd, 0))
1575                                scsi_finish_command(cmd);
1576        }
1577}
1578
1579/*
1580 * Function:    scsi_request_fn()
1581 *
1582 * Purpose:     Main strategy routine for SCSI.
1583 *
1584 * Arguments:   q       - Pointer to actual queue.
1585 *
1586 * Returns:     Nothing
1587 *
1588 * Lock status: IO request lock assumed to be held when called.
1589 */
1590static void scsi_request_fn(struct request_queue *q)
1591{
1592        struct scsi_device *sdev = q->queuedata;
1593        struct Scsi_Host *shost;
1594        struct scsi_cmnd *cmd;
1595        struct request *req;
1596
1597        if (!sdev) {
1598                printk("scsi: killing requests for dead queue\n");
1599                while ((req = elv_next_request(q)) != NULL)
1600                        scsi_kill_request(req, q);
1601                return;
1602        }
1603
1604        if(!get_device(&sdev->sdev_gendev))
1605                /* We must be tearing the block queue down already */
1606                return;
1607
1608        /*
1609         * To start with, we keep looping until the queue is empty, or until
1610         * the host is no longer able to accept any more requests.
1611         */
1612        shost = sdev->host;
1613        while (!blk_queue_plugged(q)) {
1614                int rtn;
1615                /*
1616                 * get next queueable request.  We do this early to make sure
1617                 * that the request is fully prepared even if we cannot 
1618                 * accept it.
1619                 */
1620                req = elv_next_request(q);
1621                if (!req || !scsi_dev_queue_ready(q, sdev))
1622                        break;
1623
1624                if (unlikely(!scsi_device_online(sdev))) {
1625                        sdev_printk(KERN_ERR, sdev,
1626                                    "rejecting I/O to offline device\n");
1627                        scsi_kill_request(req, q);
1628                        continue;
1629                }
1630
1631
1632                /*
1633                 * Remove the request from the request list.
1634                 */
1635                if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1636                        blkdev_dequeue_request(req);
1637                sdev->device_busy++;
1638
1639                spin_unlock(q->queue_lock);
1640                cmd = req->special;
1641                if (unlikely(cmd == NULL)) {
1642                        printk(KERN_CRIT "impossible request in %s.\n"
1643                                         "please mail a stack trace to "
1644                                         "linux-scsi@vger.kernel.org\n",
1645                                         __func__);
1646                        blk_dump_rq_flags(req, "foo");
1647                        BUG();
1648                }
1649                spin_lock(shost->host_lock);
1650
1651                /*
1652                 * We hit this when the driver is using a host wide
1653                 * tag map. For device level tag maps the queue_depth check
1654                 * in the device ready fn would prevent us from trying
1655                 * to allocate a tag. Since the map is a shared host resource
1656                 * we add the dev to the starved list so it eventually gets
1657                 * a run when a tag is freed.
1658                 */
1659                if (blk_queue_tagged(q) && !blk_rq_tagged(req)) {
1660                        if (list_empty(&sdev->starved_entry))
1661                                list_add_tail(&sdev->starved_entry,
1662                                              &shost->starved_list);
1663                        goto not_ready;
1664                }
1665
1666                if (!scsi_target_queue_ready(shost, sdev))
1667                        goto not_ready;
1668
1669                if (!scsi_host_queue_ready(q, shost, sdev))
1670                        goto not_ready;
1671
1672                scsi_target(sdev)->target_busy++;
1673                shost->host_busy++;
1674
1675                /*
1676                 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1677                 *                take the lock again.
1678                 */
1679                spin_unlock_irq(shost->host_lock);
1680
1681                /*
1682                 * Finally, initialize any error handling parameters, and set up
1683                 * the timers for timeouts.
1684                 */
1685                scsi_init_cmd_errh(cmd);
1686
1687                /*
1688                 * Dispatch the command to the low-level driver.
1689                 */
1690                rtn = scsi_dispatch_cmd(cmd);
1691                spin_lock_irq(q->queue_lock);
1692                if(rtn) {
1693                        /* we're refusing the command; because of
1694                         * the way locks get dropped, we need to 
1695                         * check here if plugging is required */
1696                        if(sdev->device_busy == 0)
1697                                blk_plug_device(q);
1698
1699                        break;
1700                }
1701        }
1702
1703        goto out;
1704
1705 not_ready:
1706        spin_unlock_irq(shost->host_lock);
1707
1708        /*
1709         * lock q, handle tag, requeue req, and decrement device_busy. We
1710         * must return with queue_lock held.
1711         *
1712         * Decrementing device_busy without checking it is OK, as all such
1713         * cases (host limits or settings) should run the queue at some
1714         * later time.
1715         */
1716        spin_lock_irq(q->queue_lock);
1717        blk_requeue_request(q, req);
1718        sdev->device_busy--;
1719        if(sdev->device_busy == 0)
1720                blk_plug_device(q);
1721 out:
1722        /* must be careful here...if we trigger the ->remove() function
1723         * we cannot be holding the q lock */
1724        spin_unlock_irq(q->queue_lock);
1725        put_device(&sdev->sdev_gendev);
1726        spin_lock_irq(q->queue_lock);
1727}
1728
1729u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1730{
1731        struct device *host_dev;
1732        u64 bounce_limit = 0xffffffff;
1733
1734        if (shost->unchecked_isa_dma)
1735                return BLK_BOUNCE_ISA;
1736        /*
1737         * Platforms with virtual-DMA translation
1738         * hardware have no practical limit.
1739         */
1740        if (!PCI_DMA_BUS_IS_PHYS)
1741                return BLK_BOUNCE_ANY;
1742
1743        host_dev = scsi_get_device(shost);
1744        if (host_dev && host_dev->dma_mask)
1745                bounce_limit = *host_dev->dma_mask;
1746
1747        return bounce_limit;
1748}
1749EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1750
1751struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
1752                                         request_fn_proc *request_fn)
1753{
1754        struct request_queue *q;
1755        struct device *dev = shost->shost_gendev.parent;
1756
1757        q = blk_init_queue(request_fn, NULL);
1758        if (!q)
1759                return NULL;
1760
1761        /*
1762         * this limit is imposed by hardware restrictions
1763         */
1764        blk_queue_max_hw_segments(q, shost->sg_tablesize);
1765        blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
1766
1767        blk_queue_max_sectors(q, shost->max_sectors);
1768        blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1769        blk_queue_segment_boundary(q, shost->dma_boundary);
1770        dma_set_seg_boundary(dev, shost->dma_boundary);
1771
1772        blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
1773
1774        /* New queue, no concurrency on queue_flags */
1775        if (!shost->use_clustering)
1776                queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
1777
1778        /*
1779         * set a reasonable default alignment on word boundaries: the
1780         * host and device may alter it using
1781         * blk_queue_update_dma_alignment() later.
1782         */
1783        blk_queue_dma_alignment(q, 0x03);
1784
1785        return q;
1786}
1787EXPORT_SYMBOL(__scsi_alloc_queue);
1788
1789struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1790{
1791        struct request_queue *q;
1792
1793        q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
1794        if (!q)
1795                return NULL;
1796
1797        blk_queue_prep_rq(q, scsi_prep_fn);
1798        blk_queue_softirq_done(q, scsi_softirq_done);
1799        blk_queue_rq_timed_out(q, scsi_times_out);
1800        blk_queue_lld_busy(q, scsi_lld_busy);
1801        return q;
1802}
1803
1804void scsi_free_queue(struct request_queue *q)
1805{
1806        blk_cleanup_queue(q);
1807}
1808
1809/*
1810 * Function:    scsi_block_requests()
1811 *
1812 * Purpose:     Utility function used by low-level drivers to prevent further
1813 *                commands from being queued to the device.
1814 *
1815 * Arguments:   shost       - Host in question
1816 *
1817 * Returns:     Nothing
1818 *
1819 * Lock status: No locks are assumed held.
1820 *
1821 * Notes:       There is no timer nor any other means by which the requests
1822 *                get unblocked other than the low-level driver calling
1823 *                scsi_unblock_requests().
1824 */
1825void scsi_block_requests(struct Scsi_Host *shost)
1826{
1827        shost->host_self_blocked = 1;
1828}
1829EXPORT_SYMBOL(scsi_block_requests);
1830
1831/*
1832 * Function:    scsi_unblock_requests()
1833 *
1834 * Purpose:     Utility function used by low-level drivers to allow further
1835 *                commands from being queued to the device.
1836 *
1837 * Arguments:   shost       - Host in question
1838 *
1839 * Returns:     Nothing
1840 *
1841 * Lock status: No locks are assumed held.
1842 *
1843 * Notes:       There is no timer nor any other means by which the requests
1844 *                get unblocked other than the low-level driver calling
1845 *                scsi_unblock_requests().
1846 *
1847 *                This is done as an API function so that changes to the
1848 *                internals of the scsi mid-layer won't require wholesale
1849 *                changes to drivers that use this feature.
1850 */
1851void scsi_unblock_requests(struct Scsi_Host *shost)
1852{
1853        shost->host_self_blocked = 0;
1854        scsi_run_host_queues(shost);
1855}
1856EXPORT_SYMBOL(scsi_unblock_requests);
1857
1858int __init scsi_init_queue(void)
1859{
1860        int i;
1861
1862        scsi_io_context_cache = kmem_cache_create("scsi_io_context",
1863                                        sizeof(struct scsi_io_context),
1864                                        0, 0, NULL);
1865        if (!scsi_io_context_cache) {
1866                printk(KERN_ERR "SCSI: can't init scsi io context cache\n");
1867                return -ENOMEM;
1868        }
1869
1870        scsi_sdb_cache = kmem_cache_create("scsi_data_buffer",
1871                                           sizeof(struct scsi_data_buffer),
1872                                           0, 0, NULL);
1873        if (!scsi_sdb_cache) {
1874                printk(KERN_ERR "SCSI: can't init scsi sdb cache\n");
1875                goto cleanup_io_context;
1876        }
1877
1878        for (i = 0; i < SG_MEMPOOL_NR; i++) {
1879                struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1880                int size = sgp->size * sizeof(struct scatterlist);
1881
1882                sgp->slab = kmem_cache_create(sgp->name, size, 0,
1883                                SLAB_HWCACHE_ALIGN, NULL);
1884                if (!sgp->slab) {
1885                        printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1886                                        sgp->name);
1887                        goto cleanup_sdb;
1888                }
1889
1890                sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
1891                                                     sgp->slab);
1892                if (!sgp->pool) {
1893                        printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1894                                        sgp->name);
1895                        goto cleanup_sdb;
1896                }
1897        }
1898
1899        return 0;
1900
1901cleanup_sdb:
1902        for (i = 0; i < SG_MEMPOOL_NR; i++) {
1903                struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1904                if (sgp->pool)
1905                        mempool_destroy(sgp->pool);
1906                if (sgp->slab)
1907                        kmem_cache_destroy(sgp->slab);
1908        }
1909        kmem_cache_destroy(scsi_sdb_cache);
1910cleanup_io_context:
1911        kmem_cache_destroy(scsi_io_context_cache);
1912
1913        return -ENOMEM;
1914}
1915
1916void scsi_exit_queue(void)
1917{
1918        int i;
1919
1920        kmem_cache_destroy(scsi_io_context_cache);
1921        kmem_cache_destroy(scsi_sdb_cache);
1922
1923        for (i = 0; i < SG_MEMPOOL_NR; i++) {
1924                struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1925                mempool_destroy(sgp->pool);
1926                kmem_cache_destroy(sgp->slab);
1927        }
1928}
1929
1930/**
1931 *        scsi_mode_select - issue a mode select
1932 *        @sdev:        SCSI device to be queried
1933 *        @pf:        Page format bit (1 == standard, 0 == vendor specific)
1934 *        @sp:        Save page bit (0 == don't save, 1 == save)
1935 *        @modepage: mode page being requested
1936 *        @buffer: request buffer (may not be smaller than eight bytes)
1937 *        @len:        length of request buffer.
1938 *        @timeout: command timeout
1939 *        @retries: number of retries before failing
1940 *        @data: returns a structure abstracting the mode header data
1941 *        @sshdr: place to put sense data (or NULL if no sense to be collected).
1942 *                must be SCSI_SENSE_BUFFERSIZE big.
1943 *
1944 *        Returns zero if successful; negative error number or scsi
1945 *        status on error
1946 *
1947 */
1948int
1949scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
1950                 unsigned char *buffer, int len, int timeout, int retries,
1951                 struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
1952{
1953        unsigned char cmd[10];
1954        unsigned char *real_buffer;
1955        int ret;
1956
1957        memset(cmd, 0, sizeof(cmd));
1958        cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0);
1959
1960        if (sdev->use_10_for_ms) {
1961                if (len > 65535)
1962                        return -EINVAL;
1963                real_buffer = kmalloc(8 + len, GFP_KERNEL);
1964                if (!real_buffer)
1965                        return -ENOMEM;
1966                memcpy(real_buffer + 8, buffer, len);
1967                len += 8;
1968                real_buffer[0] = 0;
1969                real_buffer[1] = 0;
1970                real_buffer[2] = data->medium_type;
1971                real_buffer[3] = data->device_specific;
1972                real_buffer[4] = data->longlba ? 0x01 : 0;
1973                real_buffer[5] = 0;
1974                real_buffer[6] = data->block_descriptor_length >> 8;
1975                real_buffer[7] = data->block_descriptor_length;
1976
1977                cmd[0] = MODE_SELECT_10;
1978                cmd[7] = len >> 8;
1979                cmd[8] = len;
1980        } else {
1981                if (len > 255 || data->block_descriptor_length > 255 ||
1982                    data->longlba)
1983                        return -EINVAL;
1984
1985                real_buffer = kmalloc(4 + len, GFP_KERNEL);
1986                if (!real_buffer)
1987                        return -ENOMEM;
1988                memcpy(real_buffer + 4, buffer, len);
1989                len += 4;
1990                real_buffer[0] = 0;
1991                real_buffer[1] = data->medium_type;
1992                real_buffer[2] = data->device_specific;
1993                real_buffer[3] = data->block_descriptor_length;
1994                
1995
1996                cmd[0] = MODE_SELECT;
1997                cmd[4] = len;
1998        }
1999
2000        ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len,
2001                               sshdr, timeout, retries);
2002        kfree(real_buffer);
2003        return ret;
2004}
2005EXPORT_SYMBOL_GPL(scsi_mode_select);
2006
2007/**
2008 *        scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary.
2009 *        @sdev:        SCSI device to be queried
2010 *        @dbd:        set if mode sense will allow block descriptors to be returned
2011 *        @modepage: mode page being requested
2012 *        @buffer: request buffer (may not be smaller than eight bytes)
2013 *        @len:        length of request buffer.
2014 *        @timeout: command timeout
2015 *        @retries: number of retries before failing
2016 *        @data: returns a structure abstracting the mode header data
2017 *        @sshdr: place to put sense data (or NULL if no sense to be collected).
2018 *                must be SCSI_SENSE_BUFFERSIZE big.
2019 *
2020 *        Returns zero if unsuccessful, or the header offset (either 4
2021 *        or 8 depending on whether a six or ten byte command was
2022 *        issued) if successful.
2023 */
2024int
2025scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
2026                  unsigned char *buffer, int len, int timeout, int retries,
2027                  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
2028{
2029        unsigned char cmd[12];
2030        int use_10_for_ms;
2031        int header_length;
2032        int result;
2033        struct scsi_sense_hdr my_sshdr;
2034
2035        memset(data, 0, sizeof(*data));
2036        memset(&cmd[0], 0, 12);
2037        cmd[1] = dbd & 0x18;        /* allows DBD and LLBA bits */
2038        cmd[2] = modepage;
2039
2040        /* caller might not be interested in sense, but we need it */
2041        if (!sshdr)
2042                sshdr = &my_sshdr;
2043
2044 retry:
2045        use_10_for_ms = sdev->use_10_for_ms;
2046
2047        if (use_10_for_ms) {
2048                if (len < 8)
2049                        len = 8;
2050
2051                cmd[0] = MODE_SENSE_10;
2052                cmd[8] = len;
2053                header_length = 8;
2054        } else {
2055                if (len < 4)
2056                        len = 4;
2057
2058                cmd[0] = MODE_SENSE;
2059                cmd[4] = len;
2060                header_length = 4;
2061        }
2062
2063        memset(buffer, 0, len);
2064
2065        result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
2066                                  sshdr, timeout, retries);
2067
2068        /* This code looks awful: what it's doing is making sure an
2069         * ILLEGAL REQUEST sense return identifies the actual command
2070         * byte as the problem.  MODE_SENSE commands can return
2071         * ILLEGAL REQUEST if the code page isn't supported */
2072
2073        if (use_10_for_ms && !scsi_status_is_good(result) &&
2074            (driver_byte(result) & DRIVER_SENSE)) {
2075                if (scsi_sense_valid(sshdr)) {
2076                        if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
2077                            (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
2078                                /* 
2079                                 * Invalid command operation code
2080                                 */
2081                                sdev->use_10_for_ms = 0;
2082                                goto retry;
2083                        }
2084                }
2085        }
2086
2087        if(scsi_status_is_good(result)) {
2088                if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b &&
2089                             (modepage == 6 || modepage == 8))) {
2090                        /* Initio breakage? */
2091                        header_length = 0;
2092                        data->length = 13;
2093                        data->medium_type = 0;
2094                        data->device_specific = 0;
2095                        data->longlba = 0;
2096                        data->block_descriptor_length = 0;
2097                } else if(use_10_for_ms) {
2098                        data->length = buffer[0]*256 + buffer[1] + 2;
2099                        data->medium_type = buffer[2];
2100                        data->device_specific = buffer[3];
2101                        data->longlba = buffer[4] & 0x01;
2102                        data->block_descriptor_length = buffer[6]*256
2103                                + buffer[7];
2104                } else {
2105                        data->length = buffer[0] + 1;
2106                        data->medium_type = buffer[1];
2107                        data->device_specific = buffer[2];
2108                        data->block_descriptor_length = buffer[3];
2109                }
2110                data->header_length = header_length;
2111        }
2112
2113        return result;
2114}
2115EXPORT_SYMBOL(scsi_mode_sense);
2116
2117/**
2118 *        scsi_test_unit_ready - test if unit is ready
2119 *        @sdev:        scsi device to change the state of.
2120 *        @timeout: command timeout
2121 *        @retries: number of retries before failing
2122 *        @sshdr_external: Optional pointer to struct scsi_sense_hdr for
2123 *                returning sense. Make sure that this is cleared before passing
2124 *                in.
2125 *
2126 *        Returns zero if unsuccessful or an error if TUR failed.  For
2127 *        removable media, a return of NOT_READY or UNIT_ATTENTION is
2128 *        translated to success, with the ->changed flag updated.
2129 **/
2130int
2131scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
2132                     struct scsi_sense_hdr *sshdr_external)
2133{
2134        char cmd[] = {
2135                TEST_UNIT_READY, 0, 0, 0, 0, 0,
2136        };
2137        struct scsi_sense_hdr *sshdr;
2138        int result;
2139
2140        if (!sshdr_external)
2141                sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL);
2142        else
2143                sshdr = sshdr_external;
2144
2145        /* try to eat the UNIT_ATTENTION if there are enough retries */
2146        do {
2147                result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
2148                                          timeout, retries);
2149                if (sdev->removable && scsi_sense_valid(sshdr) &&
2150                    sshdr->sense_key == UNIT_ATTENTION)
2151                        sdev->changed = 1;
2152        } while (scsi_sense_valid(sshdr) &&
2153                 sshdr->sense_key == UNIT_ATTENTION && --retries);
2154
2155        if (!sshdr)
2156                /* could not allocate sense buffer, so can't process it */
2157                return result;
2158
2159        if (sdev->removable && scsi_sense_valid(sshdr) &&
2160            (sshdr->sense_key == UNIT_ATTENTION ||
2161             sshdr->sense_key == NOT_READY)) {
2162                sdev->changed = 1;
2163                result = 0;
2164        }
2165        if (!sshdr_external)
2166                kfree(sshdr);
2167        return result;
2168}
2169EXPORT_SYMBOL(scsi_test_unit_ready);
2170
2171/**
2172 *        scsi_device_set_state - Take the given device through the device state model.
2173 *        @sdev:        scsi device to change the state of.
2174 *        @state:        state to change to.
2175 *
2176 *        Returns zero if unsuccessful or an error if the requested 
2177 *        transition is illegal.
2178 */
2179int
2180scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
2181{
2182        enum scsi_device_state oldstate = sdev->sdev_state;
2183
2184        if (state == oldstate)
2185                return 0;
2186
2187        switch (state) {
2188        case SDEV_CREATED:
2189                switch (oldstate) {
2190                case SDEV_CREATED_BLOCK:
2191                        break;
2192                default:
2193                        goto illegal;
2194                }
2195                break;
2196                        
2197        case SDEV_RUNNING:
2198                switch (oldstate) {
2199                case SDEV_CREATED:
2200                case SDEV_OFFLINE:
2201                case SDEV_QUIESCE:
2202                case SDEV_BLOCK:
2203                        break;
2204                default:
2205                        goto illegal;
2206                }
2207                break;
2208
2209        case SDEV_QUIESCE:
2210                switch (oldstate) {
2211                case SDEV_RUNNING:
2212                case SDEV_OFFLINE:
2213                        break;
2214                default:
2215                        goto illegal;
2216                }
2217                break;
2218
2219        case SDEV_OFFLINE:
2220                switch (oldstate) {
2221                case SDEV_CREATED:
2222                case SDEV_RUNNING:
2223                case SDEV_QUIESCE:
2224                case SDEV_BLOCK:
2225                        break;
2226                default:
2227                        goto illegal;
2228                }
2229                break;
2230
2231        case SDEV_BLOCK:
2232                switch (oldstate) {
2233                case SDEV_RUNNING:
2234                case SDEV_CREATED_BLOCK:
2235                        break;
2236                default:
2237                        goto illegal;
2238                }
2239                break;
2240
2241        case SDEV_CREATED_BLOCK:
2242                switch (oldstate) {
2243                case SDEV_CREATED:
2244                        break;
2245                default:
2246                        goto illegal;
2247                }
2248                break;
2249
2250        case SDEV_CANCEL:
2251                switch (oldstate) {
2252                case SDEV_CREATED:
2253                case SDEV_RUNNING:
2254                case SDEV_QUIESCE:
2255                case SDEV_OFFLINE:
2256                case SDEV_BLOCK:
2257                        break;
2258                default:
2259                        goto illegal;
2260                }
2261                break;
2262
2263        case SDEV_DEL:
2264                switch (oldstate) {
2265                case SDEV_CREATED:
2266                case SDEV_RUNNING:
2267                case SDEV_OFFLINE:
2268                case SDEV_CANCEL:
2269                        break;
2270                default:
2271                        goto illegal;
2272                }
2273                break;
2274
2275        }
2276        sdev->sdev_state = state;
2277        return 0;
2278
2279 illegal:
2280        SCSI_LOG_ERROR_RECOVERY(1, 
2281                                sdev_printk(KERN_ERR, sdev,
2282                                            "Illegal state transition %s->%s\n",
2283                                            scsi_device_state_name(oldstate),
2284                                            scsi_device_state_name(state))
2285                                );
2286        return -EINVAL;
2287}
2288EXPORT_SYMBOL(scsi_device_set_state);
2289
2290/**
2291 *         sdev_evt_emit - emit a single SCSI device uevent
2292 *        @sdev: associated SCSI device
2293 *        @evt: event to emit
2294 *
2295 *        Send a single uevent (scsi_event) to the associated scsi_device.
2296 */
2297static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
2298{
2299        int idx = 0;
2300        char *envp[3];
2301
2302        switch (evt->evt_type) {
2303        case SDEV_EVT_MEDIA_CHANGE:
2304                envp[idx++] = "SDEV_MEDIA_CHANGE=1";
2305                break;
2306
2307        default:
2308                /* do nothing */
2309                break;
2310        }
2311
2312        envp[idx++] = NULL;
2313
2314        kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp);
2315}
2316
2317/**
2318 *         sdev_evt_thread - send a uevent for each scsi event
2319 *        @work: work struct for scsi_device
2320 *
2321 *        Dispatch queued events to their associated scsi_device kobjects
2322 *        as uevents.
2323 */
2324void scsi_evt_thread(struct work_struct *work)
2325{
2326        struct scsi_device *sdev;
2327        LIST_HEAD(event_list);
2328
2329        sdev = container_of(work, struct scsi_device, event_work);
2330
2331        while (1) {
2332                struct scsi_event *evt;
2333                struct list_head *this, *tmp;
2334                unsigned long flags;
2335
2336                spin_lock_irqsave(&sdev->list_lock, flags);
2337                list_splice_init(&sdev->event_list, &event_list);
2338                spin_unlock_irqrestore(&sdev->list_lock, flags);
2339
2340                if (list_empty(&event_list))
2341                        break;
2342
2343                list_for_each_safe(this, tmp, &event_list) {
2344                        evt = list_entry(this, struct scsi_event, node);
2345                        list_del(&evt->node);
2346                        scsi_evt_emit(sdev, evt);
2347                        kfree(evt);
2348                }
2349        }
2350}
2351
2352/**
2353 *         sdev_evt_send - send asserted event to uevent thread
2354 *        @sdev: scsi_device event occurred on
2355 *        @evt: event to send
2356 *
2357 *        Assert scsi device event asynchronously.
2358 */
2359void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt)
2360{
2361        unsigned long flags;
2362
2363#if 0
2364        /* FIXME: currently this check eliminates all media change events
2365         * for polled devices.  Need to update to discriminate between AN
2366         * and polled events */
2367        if (!test_bit(evt->evt_type, sdev->supported_events)) {
2368                kfree(evt);
2369                return;
2370        }
2371#endif
2372
2373        spin_lock_irqsave(&sdev->list_lock, flags);
2374        list_add_tail(&evt->node, &sdev->event_list);
2375        schedule_work(&sdev->event_work);
2376        spin_unlock_irqrestore(&sdev->list_lock, flags);
2377}
2378EXPORT_SYMBOL_GPL(sdev_evt_send);
2379
2380/**
2381 *         sdev_evt_alloc - allocate a new scsi event
2382 *        @evt_type: type of event to allocate
2383 *        @gfpflags: GFP flags for allocation
2384 *
2385 *        Allocates and returns a new scsi_event.
2386 */
2387struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
2388                                  gfp_t gfpflags)
2389{
2390        struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags);
2391        if (!evt)
2392                return NULL;
2393
2394        evt->evt_type = evt_type;
2395        INIT_LIST_HEAD(&evt->node);
2396
2397        /* evt_type-specific initialization, if any */
2398        switch (evt_type) {
2399        case SDEV_EVT_MEDIA_CHANGE:
2400        default:
2401                /* do nothing */
2402                break;
2403        }
2404
2405        return evt;
2406}
2407EXPORT_SYMBOL_GPL(sdev_evt_alloc);
2408
2409/**
2410 *         sdev_evt_send_simple - send asserted event to uevent thread
2411 *        @sdev: scsi_device event occurred on
2412 *        @evt_type: type of event to send
2413 *        @gfpflags: GFP flags for allocation
2414 *
2415 *        Assert scsi device event asynchronously, given an event type.
2416 */
2417void sdev_evt_send_simple(struct scsi_device *sdev,
2418                          enum scsi_device_event evt_type, gfp_t gfpflags)
2419{
2420        struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags);
2421        if (!evt) {
2422                sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n",
2423                            evt_type);
2424                return;
2425        }
2426
2427        sdev_evt_send(sdev, evt);
2428}
2429EXPORT_SYMBOL_GPL(sdev_evt_send_simple);
2430
2431/**
2432 *        scsi_device_quiesce - Block user issued commands.
2433 *        @sdev:        scsi device to quiesce.
2434 *
2435 *        This works by trying to transition to the SDEV_QUIESCE state
2436 *        (which must be a legal transition).  When the device is in this
2437 *        state, only special requests will be accepted, all others will
2438 *        be deferred.  Since special requests may also be requeued requests,
2439 *        a successful return doesn't guarantee the device will be 
2440 *        totally quiescent.
2441 *
2442 *        Must be called with user context, may sleep.
2443 *
2444 *        Returns zero if unsuccessful or an error if not.
2445 */
2446int
2447scsi_device_quiesce(struct scsi_device *sdev)
2448{
2449        int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
2450        if (err)
2451                return err;
2452
2453        scsi_run_queue(sdev->request_queue);
2454        while (sdev->device_busy) {
2455                msleep_interruptible(200);
2456                scsi_run_queue(sdev->request_queue);
2457        }
2458        return 0;
2459}
2460EXPORT_SYMBOL(scsi_device_quiesce);
2461
2462/**
2463 *        scsi_device_resume - Restart user issued commands to a quiesced device.
2464 *        @sdev:        scsi device to resume.
2465 *
2466 *        Moves the device from quiesced back to running and restarts the
2467 *        queues.
2468 *
2469 *        Must be called with user context, may sleep.
2470 */
2471void
2472scsi_device_resume(struct scsi_device *sdev)
2473{
2474        if(scsi_device_set_state(sdev, SDEV_RUNNING))
2475                return;
2476        scsi_run_queue(sdev->request_queue);
2477}
2478EXPORT_SYMBOL(scsi_device_resume);
2479
2480static void
2481device_quiesce_fn(struct scsi_device *sdev, void *data)
2482{
2483        scsi_device_quiesce(sdev);
2484}
2485
2486void
2487scsi_target_quiesce(struct scsi_target *starget)
2488{
2489        starget_for_each_device(starget, NULL, device_quiesce_fn);
2490}
2491EXPORT_SYMBOL(scsi_target_quiesce);
2492
2493static void
2494device_resume_fn(struct scsi_device *sdev, void *data)
2495{
2496        scsi_device_resume(sdev);
2497}
2498
2499void
2500scsi_target_resume(struct scsi_target *starget)
2501{
2502        starget_for_each_device(starget, NULL, device_resume_fn);
2503}
2504EXPORT_SYMBOL(scsi_target_resume);
2505
2506/**
2507 * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
2508 * @sdev:        device to block
2509 *
2510 * Block request made by scsi lld's to temporarily stop all
2511 * scsi commands on the specified device.  Called from interrupt
2512 * or normal process context.
2513 *
2514 * Returns zero if successful or error if not
2515 *
2516 * Notes:       
2517 *        This routine transitions the device to the SDEV_BLOCK state
2518 *        (which must be a legal transition).  When the device is in this
2519 *        state, all commands are deferred until the scsi lld reenables
2520 *        the device with scsi_device_unblock or device_block_tmo fires.
2521 *        This routine assumes the host_lock is held on entry.
2522 */
2523int
2524scsi_internal_device_block(struct scsi_device *sdev)
2525{
2526        struct request_queue *q = sdev->request_queue;
2527        unsigned long flags;
2528        int err = 0;
2529
2530        err = scsi_device_set_state(sdev, SDEV_BLOCK);
2531        if (err) {
2532                err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
2533
2534                if (err)
2535                        return err;
2536        }
2537
2538        /* 
2539         * The device has transitioned to SDEV_BLOCK.  Stop the
2540         * block layer from calling the midlayer with this device's
2541         * request queue. 
2542         */
2543        spin_lock_irqsave(q->queue_lock, flags);
2544        blk_stop_queue(q);
2545        spin_unlock_irqrestore(q->queue_lock, flags);
2546
2547        return 0;
2548}
2549EXPORT_SYMBOL_GPL(scsi_internal_device_block);
2550 
2551/**
2552 * scsi_internal_device_unblock - resume a device after a block request
2553 * @sdev:        device to resume
2554 *
2555 * Called by scsi lld's or the midlayer to restart the device queue
2556 * for the previously suspended scsi device.  Called from interrupt or
2557 * normal process context.
2558 *
2559 * Returns zero if successful or error if not.
2560 *
2561 * Notes:       
2562 *        This routine transitions the device to the SDEV_RUNNING state
2563 *        (which must be a legal transition) allowing the midlayer to
2564 *        goose the queue for this device.  This routine assumes the 
2565 *        host_lock is held upon entry.
2566 */
2567int
2568scsi_internal_device_unblock(struct scsi_device *sdev)
2569{
2570        struct request_queue *q = sdev->request_queue; 
2571        int err;
2572        unsigned long flags;
2573        
2574        /* 
2575         * Try to transition the scsi device to SDEV_RUNNING
2576         * and goose the device queue if successful.  
2577         */
2578        err = scsi_device_set_state(sdev, SDEV_RUNNING);
2579        if (err) {
2580                err = scsi_device_set_state(sdev, SDEV_CREATED);
2581
2582                if (err)
2583                        return err;
2584        }
2585
2586        spin_lock_irqsave(q->queue_lock, flags);
2587        blk_start_queue(q);
2588        spin_unlock_irqrestore(q->queue_lock, flags);
2589
2590        return 0;
2591}
2592EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
2593
2594static void
2595device_block(struct scsi_device *sdev, void *data)
2596{
2597        scsi_internal_device_block(sdev);
2598}
2599
2600static int
2601target_block(struct device *dev, void *data)
2602{
2603        if (scsi_is_target_device(dev))
2604                starget_for_each_device(to_scsi_target(dev), NULL,
2605                                        device_block);
2606        return 0;
2607}
2608
2609void
2610scsi_target_block(struct device *dev)
2611{
2612        if (scsi_is_target_device(dev))
2613                starget_for_each_device(to_scsi_target(dev), NULL,
2614                                        device_block);
2615        else
2616                device_for_each_child(dev, NULL, target_block);
2617}
2618EXPORT_SYMBOL_GPL(scsi_target_block);
2619
2620static void
2621device_unblock(struct scsi_device *sdev, void *data)
2622{
2623        scsi_internal_device_unblock(sdev);
2624}
2625
2626static int
2627target_unblock(struct device *dev, void *data)
2628{
2629        if (scsi_is_target_device(dev))
2630                starget_for_each_device(to_scsi_target(dev), NULL,
2631                                        device_unblock);
2632        return 0;
2633}
2634
2635void
2636scsi_target_unblock(struct device *dev)
2637{
2638        if (scsi_is_target_device(dev))
2639                starget_for_each_device(to_scsi_target(dev), NULL,
2640                                        device_unblock);
2641        else
2642                device_for_each_child(dev, NULL, target_unblock);
2643}
2644EXPORT_SYMBOL_GPL(scsi_target_unblock);
2645
2646/**
2647 * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt
2648 * @sgl:        scatter-gather list
2649 * @sg_count:        number of segments in sg
2650 * @offset:        offset in bytes into sg, on return offset into the mapped area
2651 * @len:        bytes to map, on return number of bytes mapped
2652 *
2653 * Returns virtual address of the start of the mapped page
2654 */
2655void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
2656                          size_t *offset, size_t *len)
2657{
2658        int i;
2659        size_t sg_len = 0, len_complete = 0;
2660        struct scatterlist *sg;
2661        struct page *page;
2662
2663        WARN_ON(!irqs_disabled());
2664
2665        for_each_sg(sgl, sg, sg_count, i) {
2666                len_complete = sg_len; /* Complete sg-entries */
2667                sg_len += sg->length;
2668                if (sg_len > *offset)
2669                        break;
2670        }
2671
2672        if (unlikely(i == sg_count)) {
2673                printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, "
2674                        "elements %d\n",
2675                       __func__, sg_len, *offset, sg_count);
2676                WARN_ON(1);
2677                return NULL;
2678        }
2679
2680        /* Offset starting from the beginning of first page in this sg-entry */
2681        *offset = *offset - len_complete + sg->offset;
2682
2683        /* Assumption: contiguous pages can be accessed as "page + i" */
2684        page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
2685        *offset &= ~PAGE_MASK;
2686
2687        /* Bytes in this sg-entry from *offset to the end of the page */
2688        sg_len = PAGE_SIZE - *offset;
2689        if (*len > sg_len)
2690                *len = sg_len;
2691
2692        return kmap_atomic(page, KM_BIO_SRC_IRQ);
2693}
2694EXPORT_SYMBOL(scsi_kmap_atomic_sg);
2695
2696/**
2697 * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg
2698 * @virt:        virtual address to be unmapped
2699 */
2700void scsi_kunmap_atomic_sg(void *virt)
2701{
2702        kunmap_atomic(virt, KM_BIO_SRC_IRQ);
2703}
2704EXPORT_SYMBOL(scsi_kunmap_atomic_sg);