Showing error 1230

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: arch/x86/kernel/io_apic.c
Line in file: 1030
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 *        Intel IO-APIC support for multi-Pentium hosts.
   3 *
   4 *        Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
   5 *
   6 *        Many thanks to Stig Venaas for trying out countless experimental
   7 *        patches and reporting/debugging problems patiently!
   8 *
   9 *        (c) 1999, Multiple IO-APIC support, developed by
  10 *        Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
  11 *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
  12 *        further tested and cleaned up by Zach Brown <zab@redhat.com>
  13 *        and Ingo Molnar <mingo@redhat.com>
  14 *
  15 *        Fixes
  16 *        Maciej W. Rozycki        :        Bits for genuine 82489DX APICs;
  17 *                                        thanks to Eric Gilmore
  18 *                                        and Rolf G. Tews
  19 *                                        for testing these extensively
  20 *        Paul Diefenbaugh        :        Added full ACPI support
  21 */
  22
  23#include <linux/mm.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/delay.h>
  27#include <linux/sched.h>
  28#include <linux/pci.h>
  29#include <linux/mc146818rtc.h>
  30#include <linux/compiler.h>
  31#include <linux/acpi.h>
  32#include <linux/module.h>
  33#include <linux/sysdev.h>
  34#include <linux/msi.h>
  35#include <linux/htirq.h>
  36#include <linux/freezer.h>
  37#include <linux/kthread.h>
  38#include <linux/jiffies.h>        /* time_after() */
  39#ifdef CONFIG_ACPI
  40#include <acpi/acpi_bus.h>
  41#endif
  42#include <linux/bootmem.h>
  43#include <linux/dmar.h>
  44#include <linux/hpet.h>
  45
  46#include <asm/idle.h>
  47#include <asm/io.h>
  48#include <asm/smp.h>
  49#include <asm/desc.h>
  50#include <asm/proto.h>
  51#include <asm/acpi.h>
  52#include <asm/dma.h>
  53#include <asm/timer.h>
  54#include <asm/i8259.h>
  55#include <asm/nmi.h>
  56#include <asm/msidef.h>
  57#include <asm/hypertransport.h>
  58#include <asm/setup.h>
  59#include <asm/irq_remapping.h>
  60#include <asm/hpet.h>
  61#include <asm/uv/uv_hub.h>
  62#include <asm/uv/uv_irq.h>
  63
  64#include <mach_ipi.h>
  65#include <mach_apic.h>
  66#include <mach_apicdef.h>
  67
  68#define __apicdebuginit(type) static type __init
  69
  70/*
  71 *      Is the SiS APIC rmw bug present ?
  72 *      -1 = don't know, 0 = no, 1 = yes
  73 */
  74int sis_apic_bug = -1;
  75
  76static DEFINE_SPINLOCK(ioapic_lock);
  77static DEFINE_SPINLOCK(vector_lock);
  78
  79/*
  80 * # of IRQ routing registers
  81 */
  82int nr_ioapic_registers[MAX_IO_APICS];
  83
  84/* I/O APIC entries */
  85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
  86int nr_ioapics;
  87
  88/* MP IRQ source entries */
  89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
  90
  91/* # of MP IRQ source entries */
  92int mp_irq_entries;
  93
  94#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
  95int mp_bus_id_to_type[MAX_MP_BUSSES];
  96#endif
  97
  98DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  99
 100int skip_ioapic_setup;
 101
 102static int __init parse_noapic(char *str)
 103{
 104        /* disable IO-APIC */
 105        disable_ioapic_setup();
 106        return 0;
 107}
 108early_param("noapic", parse_noapic);
 109
 110struct irq_pin_list;
 111struct irq_cfg {
 112        unsigned int irq;
 113        struct irq_pin_list *irq_2_pin;
 114        cpumask_t domain;
 115        cpumask_t old_domain;
 116        unsigned move_cleanup_count;
 117        u8 vector;
 118        u8 move_in_progress : 1;
 119};
 120
 121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 122static struct irq_cfg irq_cfgx[NR_IRQS] = {
 123        [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
 124        [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
 125        [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
 126        [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
 127        [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
 128        [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
 129        [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
 130        [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
 131        [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
 132        [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
 133        [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
 134        [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
 135        [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
 136        [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
 137        [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
 138        [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 139};
 140
 141#define for_each_irq_cfg(irq, cfg)                \
 142        for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
 143
 144static struct irq_cfg *irq_cfg(unsigned int irq)
 145{
 146        return irq < nr_irqs ? irq_cfgx + irq : NULL;
 147}
 148
 149static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
 150{
 151        return irq_cfg(irq);
 152}
 153
 154/*
 155 * Rough estimation of how many shared IRQs there are, can be changed
 156 * anytime.
 157 */
 158#define MAX_PLUS_SHARED_IRQS NR_IRQS
 159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 160
 161/*
 162 * This is performance-critical, we want to do it O(1)
 163 *
 164 * the indexing order of this array favors 1:1 mappings
 165 * between pins and IRQs.
 166 */
 167
 168struct irq_pin_list {
 169        int apic, pin;
 170        struct irq_pin_list *next;
 171};
 172
 173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
 174static struct irq_pin_list *irq_2_pin_ptr;
 175
 176static void __init irq_2_pin_init(void)
 177{
 178        struct irq_pin_list *pin = irq_2_pin_head;
 179        int i;
 180
 181        for (i = 1; i < PIN_MAP_SIZE; i++)
 182                pin[i-1].next = &pin[i];
 183
 184        irq_2_pin_ptr = &pin[0];
 185}
 186
 187static struct irq_pin_list *get_one_free_irq_2_pin(void)
 188{
 189        struct irq_pin_list *pin = irq_2_pin_ptr;
 190
 191        if (!pin)
 192                panic("can not get more irq_2_pin\n");
 193
 194        irq_2_pin_ptr = pin->next;
 195        pin->next = NULL;
 196        return pin;
 197}
 198
 199struct io_apic {
 200        unsigned int index;
 201        unsigned int unused[3];
 202        unsigned int data;
 203};
 204
 205static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 206{
 207        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
 208                + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 209}
 210
 211static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
 212{
 213        struct io_apic __iomem *io_apic = io_apic_base(apic);
 214        writel(reg, &io_apic->index);
 215        return readl(&io_apic->data);
 216}
 217
 218static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
 219{
 220        struct io_apic __iomem *io_apic = io_apic_base(apic);
 221        writel(reg, &io_apic->index);
 222        writel(value, &io_apic->data);
 223}
 224
 225/*
 226 * Re-write a value: to be used for read-modify-write
 227 * cycles where the read already set up the index register.
 228 *
 229 * Older SiS APIC requires we rewrite the index register
 230 */
 231static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 232{
 233        struct io_apic __iomem *io_apic = io_apic_base(apic);
 234
 235        if (sis_apic_bug)
 236                writel(reg, &io_apic->index);
 237        writel(value, &io_apic->data);
 238}
 239
 240static bool io_apic_level_ack_pending(unsigned int irq)
 241{
 242        struct irq_pin_list *entry;
 243        unsigned long flags;
 244        struct irq_cfg *cfg = irq_cfg(irq);
 245
 246        spin_lock_irqsave(&ioapic_lock, flags);
 247        entry = cfg->irq_2_pin;
 248        for (;;) {
 249                unsigned int reg;
 250                int pin;
 251
 252                if (!entry)
 253                        break;
 254                pin = entry->pin;
 255                reg = io_apic_read(entry->apic, 0x10 + pin*2);
 256                /* Is the remote IRR bit set? */
 257                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
 258                        spin_unlock_irqrestore(&ioapic_lock, flags);
 259                        return true;
 260                }
 261                if (!entry->next)
 262                        break;
 263                entry = entry->next;
 264        }
 265        spin_unlock_irqrestore(&ioapic_lock, flags);
 266
 267        return false;
 268}
 269
 270union entry_union {
 271        struct { u32 w1, w2; };
 272        struct IO_APIC_route_entry entry;
 273};
 274
 275static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 276{
 277        union entry_union eu;
 278        unsigned long flags;
 279        spin_lock_irqsave(&ioapic_lock, flags);
 280        eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
 281        eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
 282        spin_unlock_irqrestore(&ioapic_lock, flags);
 283        return eu.entry;
 284}
 285
 286/*
 287 * When we write a new IO APIC routing entry, we need to write the high
 288 * word first! If the mask bit in the low word is clear, we will enable
 289 * the interrupt, and we need to make sure the entry is fully populated
 290 * before that happens.
 291 */
 292static void
 293__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 294{
 295        union entry_union eu;
 296        eu.entry = e;
 297        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 298        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 299}
 300
 301static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 302{
 303        unsigned long flags;
 304        spin_lock_irqsave(&ioapic_lock, flags);
 305        __ioapic_write_entry(apic, pin, e);
 306        spin_unlock_irqrestore(&ioapic_lock, flags);
 307}
 308
 309/*
 310 * When we mask an IO APIC routing entry, we need to write the low
 311 * word first, in order to set the mask bit before we change the
 312 * high bits!
 313 */
 314static void ioapic_mask_entry(int apic, int pin)
 315{
 316        unsigned long flags;
 317        union entry_union eu = { .entry.mask = 1 };
 318
 319        spin_lock_irqsave(&ioapic_lock, flags);
 320        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 321        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 322        spin_unlock_irqrestore(&ioapic_lock, flags);
 323}
 324
 325#ifdef CONFIG_SMP
 326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 327{
 328        int apic, pin;
 329        struct irq_cfg *cfg;
 330        struct irq_pin_list *entry;
 331
 332        cfg = irq_cfg(irq);
 333        entry = cfg->irq_2_pin;
 334        for (;;) {
 335                unsigned int reg;
 336
 337                if (!entry)
 338                        break;
 339
 340                apic = entry->apic;
 341                pin = entry->pin;
 342#ifdef CONFIG_INTR_REMAP
 343                /*
 344                 * With interrupt-remapping, destination information comes
 345                 * from interrupt-remapping table entry.
 346                 */
 347                if (!irq_remapped(irq))
 348                        io_apic_write(apic, 0x11 + pin*2, dest);
 349#else
 350                io_apic_write(apic, 0x11 + pin*2, dest);
 351#endif
 352                reg = io_apic_read(apic, 0x10 + pin*2);
 353                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 354                reg |= vector;
 355                io_apic_modify(apic, 0x10 + pin*2, reg);
 356                if (!entry->next)
 357                        break;
 358                entry = entry->next;
 359        }
 360}
 361
 362static int assign_irq_vector(int irq, cpumask_t mask);
 363
 364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 365{
 366        struct irq_cfg *cfg;
 367        unsigned long flags;
 368        unsigned int dest;
 369        cpumask_t tmp;
 370        struct irq_desc *desc;
 371
 372        cpus_and(tmp, mask, cpu_online_map);
 373        if (cpus_empty(tmp))
 374                return;
 375
 376        cfg = irq_cfg(irq);
 377        if (assign_irq_vector(irq, mask))
 378                return;
 379
 380        cpus_and(tmp, cfg->domain, mask);
 381        dest = cpu_mask_to_apicid(tmp);
 382        /*
 383         * Only the high 8 bits are valid.
 384         */
 385        dest = SET_APIC_LOGICAL_ID(dest);
 386
 387        desc = irq_to_desc(irq);
 388        spin_lock_irqsave(&ioapic_lock, flags);
 389        __target_IO_APIC_irq(irq, dest, cfg->vector);
 390        desc->affinity = mask;
 391        spin_unlock_irqrestore(&ioapic_lock, flags);
 392}
 393#endif /* CONFIG_SMP */
 394
 395/*
 396 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
 397 * shared ISA-space IRQs, so we have to support them. We are super
 398 * fast in the common case, and fast for shared ISA-space IRQs.
 399 */
 400static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 401{
 402        struct irq_cfg *cfg;
 403        struct irq_pin_list *entry;
 404
 405        /* first time to refer irq_cfg, so with new */
 406        cfg = irq_cfg_alloc(irq);
 407        entry = cfg->irq_2_pin;
 408        if (!entry) {
 409                entry = get_one_free_irq_2_pin();
 410                cfg->irq_2_pin = entry;
 411                entry->apic = apic;
 412                entry->pin = pin;
 413                return;
 414        }
 415
 416        while (entry->next) {
 417                /* not again, please */
 418                if (entry->apic == apic && entry->pin == pin)
 419                        return;
 420
 421                entry = entry->next;
 422        }
 423
 424        entry->next = get_one_free_irq_2_pin();
 425        entry = entry->next;
 426        entry->apic = apic;
 427        entry->pin = pin;
 428}
 429
 430/*
 431 * Reroute an IRQ to a different pin.
 432 */
 433static void __init replace_pin_at_irq(unsigned int irq,
 434                                      int oldapic, int oldpin,
 435                                      int newapic, int newpin)
 436{
 437        struct irq_cfg *cfg = irq_cfg(irq);
 438        struct irq_pin_list *entry = cfg->irq_2_pin;
 439        int replaced = 0;
 440
 441        while (entry) {
 442                if (entry->apic == oldapic && entry->pin == oldpin) {
 443                        entry->apic = newapic;
 444                        entry->pin = newpin;
 445                        replaced = 1;
 446                        /* every one is different, right? */
 447                        break;
 448                }
 449                entry = entry->next;
 450        }
 451
 452        /* why? call replace before add? */
 453        if (!replaced)
 454                add_pin_to_irq(irq, newapic, newpin);
 455}
 456
 457static inline void io_apic_modify_irq(unsigned int irq,
 458                                int mask_and, int mask_or,
 459                                void (*final)(struct irq_pin_list *entry))
 460{
 461        int pin;
 462        struct irq_cfg *cfg;
 463        struct irq_pin_list *entry;
 464
 465        cfg = irq_cfg(irq);
 466        for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 467                unsigned int reg;
 468                pin = entry->pin;
 469                reg = io_apic_read(entry->apic, 0x10 + pin * 2);
 470                reg &= mask_and;
 471                reg |= mask_or;
 472                io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
 473                if (final)
 474                        final(entry);
 475        }
 476}
 477
 478static void __unmask_IO_APIC_irq(unsigned int irq)
 479{
 480        io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
 481}
 482
 483#ifdef CONFIG_X86_64
 484void io_apic_sync(struct irq_pin_list *entry)
 485{
 486        /*
 487         * Synchronize the IO-APIC and the CPU by doing
 488         * a dummy read from the IO-APIC
 489         */
 490        struct io_apic __iomem *io_apic;
 491        io_apic = io_apic_base(entry->apic);
 492        readl(&io_apic->data);
 493}
 494
 495static void __mask_IO_APIC_irq(unsigned int irq)
 496{
 497        io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 498}
 499#else /* CONFIG_X86_32 */
 500static void __mask_IO_APIC_irq(unsigned int irq)
 501{
 502        io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
 503}
 504
 505static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
 506{
 507        io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
 508                        IO_APIC_REDIR_MASKED, NULL);
 509}
 510
 511static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
 512{
 513        io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
 514                        IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 515}
 516#endif /* CONFIG_X86_32 */
 517
 518static void mask_IO_APIC_irq (unsigned int irq)
 519{
 520        unsigned long flags;
 521
 522        spin_lock_irqsave(&ioapic_lock, flags);
 523        __mask_IO_APIC_irq(irq);
 524        spin_unlock_irqrestore(&ioapic_lock, flags);
 525}
 526
 527static void unmask_IO_APIC_irq (unsigned int irq)
 528{
 529        unsigned long flags;
 530
 531        spin_lock_irqsave(&ioapic_lock, flags);
 532        __unmask_IO_APIC_irq(irq);
 533        spin_unlock_irqrestore(&ioapic_lock, flags);
 534}
 535
 536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 537{
 538        struct IO_APIC_route_entry entry;
 539
 540        /* Check delivery_mode to be sure we're not clearing an SMI pin */
 541        entry = ioapic_read_entry(apic, pin);
 542        if (entry.delivery_mode == dest_SMI)
 543                return;
 544        /*
 545         * Disable it in the IO-APIC irq-routing table:
 546         */
 547        ioapic_mask_entry(apic, pin);
 548}
 549
 550static void clear_IO_APIC (void)
 551{
 552        int apic, pin;
 553
 554        for (apic = 0; apic < nr_ioapics; apic++)
 555                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 556                        clear_IO_APIC_pin(apic, pin);
 557}
 558
 559#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
 560void send_IPI_self(int vector)
 561{
 562        unsigned int cfg;
 563
 564        /*
 565         * Wait for idle.
 566         */
 567        apic_wait_icr_idle();
 568        cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
 569        /*
 570         * Send the IPI. The write to APIC_ICR fires this off.
 571         */
 572        apic_write(APIC_ICR, cfg);
 573}
 574#endif /* !CONFIG_SMP && CONFIG_X86_32*/
 575
 576#ifdef CONFIG_X86_32
 577/*
 578 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
 579 * specific CPU-side IRQs.
 580 */
 581
 582#define MAX_PIRQS 8
 583static int pirq_entries [MAX_PIRQS];
 584static int pirqs_enabled;
 585
 586static int __init ioapic_pirq_setup(char *str)
 587{
 588        int i, max;
 589        int ints[MAX_PIRQS+1];
 590
 591        get_options(str, ARRAY_SIZE(ints), ints);
 592
 593        for (i = 0; i < MAX_PIRQS; i++)
 594                pirq_entries[i] = -1;
 595
 596        pirqs_enabled = 1;
 597        apic_printk(APIC_VERBOSE, KERN_INFO
 598                        "PIRQ redirection, working around broken MP-BIOS.\n");
 599        max = MAX_PIRQS;
 600        if (ints[0] < MAX_PIRQS)
 601                max = ints[0];
 602
 603        for (i = 0; i < max; i++) {
 604                apic_printk(APIC_VERBOSE, KERN_DEBUG
 605                                "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
 606                /*
 607                 * PIRQs are mapped upside down, usually.
 608                 */
 609                pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
 610        }
 611        return 1;
 612}
 613
 614__setup("pirq=", ioapic_pirq_setup);
 615#endif /* CONFIG_X86_32 */
 616
 617#ifdef CONFIG_INTR_REMAP
 618/* I/O APIC RTE contents at the OS boot up */
 619static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
 620
 621/*
 622 * Saves and masks all the unmasked IO-APIC RTE's
 623 */
 624int save_mask_IO_APIC_setup(void)
 625{
 626        union IO_APIC_reg_01 reg_01;
 627        unsigned long flags;
 628        int apic, pin;
 629
 630        /*
 631         * The number of IO-APIC IRQ registers (== #pins):
 632         */
 633        for (apic = 0; apic < nr_ioapics; apic++) {
 634                spin_lock_irqsave(&ioapic_lock, flags);
 635                reg_01.raw = io_apic_read(apic, 1);
 636                spin_unlock_irqrestore(&ioapic_lock, flags);
 637                nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 638        }
 639
 640        for (apic = 0; apic < nr_ioapics; apic++) {
 641                early_ioapic_entries[apic] =
 642                        kzalloc(sizeof(struct IO_APIC_route_entry) *
 643                                nr_ioapic_registers[apic], GFP_KERNEL);
 644                if (!early_ioapic_entries[apic])
 645                        goto nomem;
 646        }
 647
 648        for (apic = 0; apic < nr_ioapics; apic++)
 649                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 650                        struct IO_APIC_route_entry entry;
 651
 652                        entry = early_ioapic_entries[apic][pin] =
 653                                ioapic_read_entry(apic, pin);
 654                        if (!entry.mask) {
 655                                entry.mask = 1;
 656                                ioapic_write_entry(apic, pin, entry);
 657                        }
 658                }
 659
 660        return 0;
 661
 662nomem:
 663        while (apic >= 0)
 664                kfree(early_ioapic_entries[apic--]);
 665        memset(early_ioapic_entries, 0,
 666                ARRAY_SIZE(early_ioapic_entries));
 667
 668        return -ENOMEM;
 669}
 670
 671void restore_IO_APIC_setup(void)
 672{
 673        int apic, pin;
 674
 675        for (apic = 0; apic < nr_ioapics; apic++) {
 676                if (!early_ioapic_entries[apic])
 677                        break;
 678                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 679                        ioapic_write_entry(apic, pin,
 680                                           early_ioapic_entries[apic][pin]);
 681                kfree(early_ioapic_entries[apic]);
 682                early_ioapic_entries[apic] = NULL;
 683        }
 684}
 685
 686void reinit_intr_remapped_IO_APIC(int intr_remapping)
 687{
 688        /*
 689         * for now plain restore of previous settings.
 690         * TBD: In the case of OS enabling interrupt-remapping,
 691         * IO-APIC RTE's need to be setup to point to interrupt-remapping
 692         * table entries. for now, do a plain restore, and wait for
 693         * the setup_IO_APIC_irqs() to do proper initialization.
 694         */
 695        restore_IO_APIC_setup();
 696}
 697#endif
 698
 699/*
 700 * Find the IRQ entry number of a certain pin.
 701 */
 702static int find_irq_entry(int apic, int pin, int type)
 703{
 704        int i;
 705
 706        for (i = 0; i < mp_irq_entries; i++)
 707                if (mp_irqs[i].mp_irqtype == type &&
 708                    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
 709                     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
 710                    mp_irqs[i].mp_dstirq == pin)
 711                        return i;
 712
 713        return -1;
 714}
 715
 716/*
 717 * Find the pin to which IRQ[irq] (ISA) is connected
 718 */
 719static int __init find_isa_irq_pin(int irq, int type)
 720{
 721        int i;
 722
 723        for (i = 0; i < mp_irq_entries; i++) {
 724                int lbus = mp_irqs[i].mp_srcbus;
 725
 726                if (test_bit(lbus, mp_bus_not_pci) &&
 727                    (mp_irqs[i].mp_irqtype == type) &&
 728                    (mp_irqs[i].mp_srcbusirq == irq))
 729
 730                        return mp_irqs[i].mp_dstirq;
 731        }
 732        return -1;
 733}
 734
 735static int __init find_isa_irq_apic(int irq, int type)
 736{
 737        int i;
 738
 739        for (i = 0; i < mp_irq_entries; i++) {
 740                int lbus = mp_irqs[i].mp_srcbus;
 741
 742                if (test_bit(lbus, mp_bus_not_pci) &&
 743                    (mp_irqs[i].mp_irqtype == type) &&
 744                    (mp_irqs[i].mp_srcbusirq == irq))
 745                        break;
 746        }
 747        if (i < mp_irq_entries) {
 748                int apic;
 749                for(apic = 0; apic < nr_ioapics; apic++) {
 750                        if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
 751                                return apic;
 752                }
 753        }
 754
 755        return -1;
 756}
 757
 758/*
 759 * Find a specific PCI IRQ entry.
 760 * Not an __init, possibly needed by modules
 761 */
 762static int pin_2_irq(int idx, int apic, int pin);
 763
 764int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 765{
 766        int apic, i, best_guess = -1;
 767
 768        apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
 769                bus, slot, pin);
 770        if (test_bit(bus, mp_bus_not_pci)) {
 771                apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 772                return -1;
 773        }
 774        for (i = 0; i < mp_irq_entries; i++) {
 775                int lbus = mp_irqs[i].mp_srcbus;
 776
 777                for (apic = 0; apic < nr_ioapics; apic++)
 778                        if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
 779                            mp_irqs[i].mp_dstapic == MP_APIC_ALL)
 780                                break;
 781
 782                if (!test_bit(lbus, mp_bus_not_pci) &&
 783                    !mp_irqs[i].mp_irqtype &&
 784                    (bus == lbus) &&
 785                    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
 786                        int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 787
 788                        if (!(apic || IO_APIC_IRQ(irq)))
 789                                continue;
 790
 791                        if (pin == (mp_irqs[i].mp_srcbusirq & 3))
 792                                return irq;
 793                        /*
 794                         * Use the first all-but-pin matching entry as a
 795                         * best-guess fuzzy result for broken mptables.
 796                         */
 797                        if (best_guess < 0)
 798                                best_guess = irq;
 799                }
 800        }
 801        return best_guess;
 802}
 803
 804EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 805
 806#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 807/*
 808 * EISA Edge/Level control register, ELCR
 809 */
 810static int EISA_ELCR(unsigned int irq)
 811{
 812        if (irq < 16) {
 813                unsigned int port = 0x4d0 + (irq >> 3);
 814                return (inb(port) >> (irq & 7)) & 1;
 815        }
 816        apic_printk(APIC_VERBOSE, KERN_INFO
 817                        "Broken MPtable reports ISA irq %d\n", irq);
 818        return 0;
 819}
 820
 821#endif
 822
 823/* ISA interrupts are always polarity zero edge triggered,
 824 * when listed as conforming in the MP table. */
 825
 826#define default_ISA_trigger(idx)        (0)
 827#define default_ISA_polarity(idx)        (0)
 828
 829/* EISA interrupts are always polarity zero and can be edge or level
 830 * trigger depending on the ELCR value.  If an interrupt is listed as
 831 * EISA conforming in the MP table, that means its trigger type must
 832 * be read in from the ELCR */
 833
 834#define default_EISA_trigger(idx)        (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
 835#define default_EISA_polarity(idx)        default_ISA_polarity(idx)
 836
 837/* PCI interrupts are always polarity one level triggered,
 838 * when listed as conforming in the MP table. */
 839
 840#define default_PCI_trigger(idx)        (1)
 841#define default_PCI_polarity(idx)        (1)
 842
 843/* MCA interrupts are always polarity zero level triggered,
 844 * when listed as conforming in the MP table. */
 845
 846#define default_MCA_trigger(idx)        (1)
 847#define default_MCA_polarity(idx)        default_ISA_polarity(idx)
 848
 849static int MPBIOS_polarity(int idx)
 850{
 851        int bus = mp_irqs[idx].mp_srcbus;
 852        int polarity;
 853
 854        /*
 855         * Determine IRQ line polarity (high active or low active):
 856         */
 857        switch (mp_irqs[idx].mp_irqflag & 3)
 858        {
 859                case 0: /* conforms, ie. bus-type dependent polarity */
 860                        if (test_bit(bus, mp_bus_not_pci))
 861                                polarity = default_ISA_polarity(idx);
 862                        else
 863                                polarity = default_PCI_polarity(idx);
 864                        break;
 865                case 1: /* high active */
 866                {
 867                        polarity = 0;
 868                        break;
 869                }
 870                case 2: /* reserved */
 871                {
 872                        printk(KERN_WARNING "broken BIOS!!\n");
 873                        polarity = 1;
 874                        break;
 875                }
 876                case 3: /* low active */
 877                {
 878                        polarity = 1;
 879                        break;
 880                }
 881                default: /* invalid */
 882                {
 883                        printk(KERN_WARNING "broken BIOS!!\n");
 884                        polarity = 1;
 885                        break;
 886                }
 887        }
 888        return polarity;
 889}
 890
 891static int MPBIOS_trigger(int idx)
 892{
 893        int bus = mp_irqs[idx].mp_srcbus;
 894        int trigger;
 895
 896        /*
 897         * Determine IRQ trigger mode (edge or level sensitive):
 898         */
 899        switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
 900        {
 901                case 0: /* conforms, ie. bus-type dependent */
 902                        if (test_bit(bus, mp_bus_not_pci))
 903                                trigger = default_ISA_trigger(idx);
 904                        else
 905                                trigger = default_PCI_trigger(idx);
 906#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 907                        switch (mp_bus_id_to_type[bus]) {
 908                                case MP_BUS_ISA: /* ISA pin */
 909                                {
 910                                        /* set before the switch */
 911                                        break;
 912                                }
 913                                case MP_BUS_EISA: /* EISA pin */
 914                                {
 915                                        trigger = default_EISA_trigger(idx);
 916                                        break;
 917                                }
 918                                case MP_BUS_PCI: /* PCI pin */
 919                                {
 920                                        /* set before the switch */
 921                                        break;
 922                                }
 923                                case MP_BUS_MCA: /* MCA pin */
 924                                {
 925                                        trigger = default_MCA_trigger(idx);
 926                                        break;
 927                                }
 928                                default:
 929                                {
 930                                        printk(KERN_WARNING "broken BIOS!!\n");
 931                                        trigger = 1;
 932                                        break;
 933                                }
 934                        }
 935#endif
 936                        break;
 937                case 1: /* edge */
 938                {
 939                        trigger = 0;
 940                        break;
 941                }
 942                case 2: /* reserved */
 943                {
 944                        printk(KERN_WARNING "broken BIOS!!\n");
 945                        trigger = 1;
 946                        break;
 947                }
 948                case 3: /* level */
 949                {
 950                        trigger = 1;
 951                        break;
 952                }
 953                default: /* invalid */
 954                {
 955                        printk(KERN_WARNING "broken BIOS!!\n");
 956                        trigger = 0;
 957                        break;
 958                }
 959        }
 960        return trigger;
 961}
 962
 963static inline int irq_polarity(int idx)
 964{
 965        return MPBIOS_polarity(idx);
 966}
 967
 968static inline int irq_trigger(int idx)
 969{
 970        return MPBIOS_trigger(idx);
 971}
 972
 973int (*ioapic_renumber_irq)(int ioapic, int irq);
 974static int pin_2_irq(int idx, int apic, int pin)
 975{
 976        int irq, i;
 977        int bus = mp_irqs[idx].mp_srcbus;
 978
 979        /*
 980         * Debugging check, we are in big trouble if this message pops up!
 981         */
 982        if (mp_irqs[idx].mp_dstirq != pin)
 983                printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 984
 985        if (test_bit(bus, mp_bus_not_pci)) {
 986                irq = mp_irqs[idx].mp_srcbusirq;
 987        } else {
 988                /*
 989                 * PCI IRQs are mapped in order
 990                 */
 991                i = irq = 0;
 992                while (i < apic)
 993                        irq += nr_ioapic_registers[i++];
 994                irq += pin;
 995                /*
 996                 * For MPS mode, so far only needed by ES7000 platform
 997                 */
 998                if (ioapic_renumber_irq)
 999                        irq = ioapic_renumber_irq(apic, irq);
1000        }
1001
1002#ifdef CONFIG_X86_32
1003        /*
1004         * PCI IRQ command line redirection. Yes, limits are hardcoded.
1005         */
1006        if ((pin >= 16) && (pin <= 23)) {
1007                if (pirq_entries[pin-16] != -1) {
1008                        if (!pirq_entries[pin-16]) {
1009                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1010                                                "disabling PIRQ%d\n", pin-16);
1011                        } else {
1012                                irq = pirq_entries[pin-16];
1013                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1014                                                "using PIRQ%d -> IRQ %d\n",
1015                                                pin-16, irq);
1016                        }
1017                }
1018        }
1019#endif
1020
1021        return irq;
1022}
1023
1024void lock_vector_lock(void)
1025{
1026        /* Used to the online set of cpus does not change
1027         * during assign_irq_vector.
1028         */
1029        spin_lock(&vector_lock);
1030}
1031
1032void unlock_vector_lock(void)
1033{
1034        spin_unlock(&vector_lock);
1035}
1036
1037static int __assign_irq_vector(int irq, cpumask_t mask)
1038{
1039        /*
1040         * NOTE! The local APIC isn't very good at handling
1041         * multiple interrupts at the same interrupt level.
1042         * As the interrupt level is determined by taking the
1043         * vector number and shifting that right by 4, we
1044         * want to spread these out a bit so that they don't
1045         * all fall in the same interrupt level.
1046         *
1047         * Also, we've got to be careful not to trash gate
1048         * 0x80, because int 0x80 is hm, kind of importantish. ;)
1049         */
1050        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051        unsigned int old_vector;
1052        int cpu;
1053        struct irq_cfg *cfg;
1054
1055        cfg = irq_cfg(irq);
1056
1057        /* Only try and allocate irqs on cpus that are present */
1058        cpus_and(mask, mask, cpu_online_map);
1059
1060        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061                return -EBUSY;
1062
1063        old_vector = cfg->vector;
1064        if (old_vector) {
1065                cpumask_t tmp;
1066                cpus_and(tmp, cfg->domain, mask);
1067                if (!cpus_empty(tmp))
1068                        return 0;
1069        }
1070
1071        for_each_cpu_mask_nr(cpu, mask) {
1072                cpumask_t domain, new_mask;
1073                int new_cpu;
1074                int vector, offset;
1075
1076                domain = vector_allocation_domain(cpu);
1077                cpus_and(new_mask, domain, cpu_online_map);
1078
1079                vector = current_vector;
1080                offset = current_offset;
1081next:
1082                vector += 8;
1083                if (vector >= first_system_vector) {
1084                        /* If we run out of vectors on large boxen, must share them. */
1085                        offset = (offset + 1) % 8;
1086                        vector = FIRST_DEVICE_VECTOR + offset;
1087                }
1088                if (unlikely(current_vector == vector))
1089                        continue;
1090#ifdef CONFIG_X86_64
1091                if (vector == IA32_SYSCALL_VECTOR)
1092                        goto next;
1093#else
1094                if (vector == SYSCALL_VECTOR)
1095                        goto next;
1096#endif
1097                for_each_cpu_mask_nr(new_cpu, new_mask)
1098                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1099                                goto next;
1100                /* Found one! */
1101                current_vector = vector;
1102                current_offset = offset;
1103                if (old_vector) {
1104                        cfg->move_in_progress = 1;
1105                        cfg->old_domain = cfg->domain;
1106                }
1107                for_each_cpu_mask_nr(new_cpu, new_mask)
1108                        per_cpu(vector_irq, new_cpu)[vector] = irq;
1109                cfg->vector = vector;
1110                cfg->domain = domain;
1111                return 0;
1112        }
1113        return -ENOSPC;
1114}
1115
1116static int assign_irq_vector(int irq, cpumask_t mask)
1117{
1118        int err;
1119        unsigned long flags;
1120
1121        spin_lock_irqsave(&vector_lock, flags);
1122        err = __assign_irq_vector(irq, mask);
1123        spin_unlock_irqrestore(&vector_lock, flags);
1124        return err;
1125}
1126
1127static void __clear_irq_vector(int irq)
1128{
1129        struct irq_cfg *cfg;
1130        cpumask_t mask;
1131        int cpu, vector;
1132
1133        cfg = irq_cfg(irq);
1134        BUG_ON(!cfg->vector);
1135
1136        vector = cfg->vector;
1137        cpus_and(mask, cfg->domain, cpu_online_map);
1138        for_each_cpu_mask_nr(cpu, mask)
1139                per_cpu(vector_irq, cpu)[vector] = -1;
1140
1141        cfg->vector = 0;
1142        cpus_clear(cfg->domain);
1143
1144        if (likely(!cfg->move_in_progress))
1145                return;
1146        cpus_and(mask, cfg->old_domain, cpu_online_map);
1147        for_each_cpu_mask_nr(cpu, mask) {
1148                for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1149                                                                vector++) {
1150                        if (per_cpu(vector_irq, cpu)[vector] != irq)
1151                                continue;
1152                        per_cpu(vector_irq, cpu)[vector] = -1;
1153                        break;
1154                }
1155        }
1156        cfg->move_in_progress = 0;
1157}
1158
1159void __setup_vector_irq(int cpu)
1160{
1161        /* Initialize vector_irq on a new cpu */
1162        /* This function must be called with vector_lock held */
1163        int irq, vector;
1164        struct irq_cfg *cfg;
1165
1166        /* Mark the inuse vectors */
1167        for_each_irq_cfg(irq, cfg) {
1168                if (!cpu_isset(cpu, cfg->domain))
1169                        continue;
1170                vector = cfg->vector;
1171                per_cpu(vector_irq, cpu)[vector] = irq;
1172        }
1173        /* Mark the free vectors */
1174        for (vector = 0; vector < NR_VECTORS; ++vector) {
1175                irq = per_cpu(vector_irq, cpu)[vector];
1176                if (irq < 0)
1177                        continue;
1178
1179                cfg = irq_cfg(irq);
1180                if (!cpu_isset(cpu, cfg->domain))
1181                        per_cpu(vector_irq, cpu)[vector] = -1;
1182        }
1183}
1184
1185static struct irq_chip ioapic_chip;
1186#ifdef CONFIG_INTR_REMAP
1187static struct irq_chip ir_ioapic_chip;
1188#endif
1189
1190#define IOAPIC_AUTO     -1
1191#define IOAPIC_EDGE     0
1192#define IOAPIC_LEVEL    1
1193
1194#ifdef CONFIG_X86_32
1195static inline int IO_APIC_irq_trigger(int irq)
1196{
1197        int apic, idx, pin;
1198
1199        for (apic = 0; apic < nr_ioapics; apic++) {
1200                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1201                        idx = find_irq_entry(apic, pin, mp_INT);
1202                        if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1203                                return irq_trigger(idx);
1204                }
1205        }
1206        /*
1207         * nonexistent IRQs are edge default
1208         */
1209        return 0;
1210}
1211#else
1212static inline int IO_APIC_irq_trigger(int irq)
1213{
1214        return 1;
1215}
1216#endif
1217
1218static void ioapic_register_intr(int irq, unsigned long trigger)
1219{
1220        struct irq_desc *desc;
1221
1222        desc = irq_to_desc(irq);
1223
1224        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1225            trigger == IOAPIC_LEVEL)
1226                desc->status |= IRQ_LEVEL;
1227        else
1228                desc->status &= ~IRQ_LEVEL;
1229
1230#ifdef CONFIG_INTR_REMAP
1231        if (irq_remapped(irq)) {
1232                desc->status |= IRQ_MOVE_PCNTXT;
1233                if (trigger)
1234                        set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1235                                                      handle_fasteoi_irq,
1236                                                     "fasteoi");
1237                else
1238                        set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1239                                                      handle_edge_irq, "edge");
1240                return;
1241        }
1242#endif
1243        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1244            trigger == IOAPIC_LEVEL)
1245                set_irq_chip_and_handler_name(irq, &ioapic_chip,
1246                                              handle_fasteoi_irq,
1247                                              "fasteoi");
1248        else
1249                set_irq_chip_and_handler_name(irq, &ioapic_chip,
1250                                              handle_edge_irq, "edge");
1251}
1252
1253static int setup_ioapic_entry(int apic, int irq,
1254                              struct IO_APIC_route_entry *entry,
1255                              unsigned int destination, int trigger,
1256                              int polarity, int vector)
1257{
1258        /*
1259         * add it to the IO-APIC irq-routing table:
1260         */
1261        memset(entry,0,sizeof(*entry));
1262
1263#ifdef CONFIG_INTR_REMAP
1264        if (intr_remapping_enabled) {
1265                struct intel_iommu *iommu = map_ioapic_to_ir(apic);
1266                struct irte irte;
1267                struct IR_IO_APIC_route_entry *ir_entry =
1268                        (struct IR_IO_APIC_route_entry *) entry;
1269                int index;
1270
1271                if (!iommu)
1272                        panic("No mapping iommu for ioapic %d\n", apic);
1273
1274                index = alloc_irte(iommu, irq, 1);
1275                if (index < 0)
1276                        panic("Failed to allocate IRTE for ioapic %d\n", apic);
1277
1278                memset(&irte, 0, sizeof(irte));
1279
1280                irte.present = 1;
1281                irte.dst_mode = INT_DEST_MODE;
1282                irte.trigger_mode = trigger;
1283                irte.dlvry_mode = INT_DELIVERY_MODE;
1284                irte.vector = vector;
1285                irte.dest_id = IRTE_DEST(destination);
1286
1287                modify_irte(irq, &irte);
1288
1289                ir_entry->index2 = (index >> 15) & 0x1;
1290                ir_entry->zero = 0;
1291                ir_entry->format = 1;
1292                ir_entry->index = (index & 0x7fff);
1293        } else
1294#endif
1295        {
1296                entry->delivery_mode = INT_DELIVERY_MODE;
1297                entry->dest_mode = INT_DEST_MODE;
1298                entry->dest = destination;
1299        }
1300
1301        entry->mask = 0;                                /* enable IRQ */
1302        entry->trigger = trigger;
1303        entry->polarity = polarity;
1304        entry->vector = vector;
1305
1306        /* Mask level triggered irqs.
1307         * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1308         */
1309        if (trigger)
1310                entry->mask = 1;
1311        return 0;
1312}
1313
1314static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1315                              int trigger, int polarity)
1316{
1317        struct irq_cfg *cfg;
1318        struct IO_APIC_route_entry entry;
1319        cpumask_t mask;
1320
1321        if (!IO_APIC_IRQ(irq))
1322                return;
1323
1324        cfg = irq_cfg(irq);
1325
1326        mask = TARGET_CPUS;
1327        if (assign_irq_vector(irq, mask))
1328                return;
1329
1330        cpus_and(mask, cfg->domain, mask);
1331
1332        apic_printk(APIC_VERBOSE,KERN_DEBUG
1333                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1334                    "IRQ %d Mode:%i Active:%i)\n",
1335                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
1336                    irq, trigger, polarity);
1337
1338
1339        if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1340                               cpu_mask_to_apicid(mask), trigger, polarity,
1341                               cfg->vector)) {
1342                printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
1343                       mp_ioapics[apic].mp_apicid, pin);
1344                __clear_irq_vector(irq);
1345                return;
1346        }
1347
1348        ioapic_register_intr(irq, trigger);
1349        if (irq < 16)
1350                disable_8259A_irq(irq);
1351
1352        ioapic_write_entry(apic, pin, entry);
1353}
1354
1355static void __init setup_IO_APIC_irqs(void)
1356{
1357        int apic, pin, idx, irq;
1358        int notcon = 0;
1359
1360        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1361
1362        for (apic = 0; apic < nr_ioapics; apic++) {
1363                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1364
1365                        idx = find_irq_entry(apic, pin, mp_INT);
1366                        if (idx == -1) {
1367                                if (!notcon) {
1368                                        notcon = 1;
1369                                        apic_printk(APIC_VERBOSE,
1370                                                KERN_DEBUG " %d-%d",
1371                                                mp_ioapics[apic].mp_apicid,
1372                                                pin);
1373                                } else
1374                                        apic_printk(APIC_VERBOSE, " %d-%d",
1375                                                mp_ioapics[apic].mp_apicid,
1376                                                pin);
1377                                continue;
1378                        }
1379                        if (notcon) {
1380                                apic_printk(APIC_VERBOSE,
1381                                        " (apicid-pin) not connected\n");
1382                                notcon = 0;
1383                        }
1384
1385                        irq = pin_2_irq(idx, apic, pin);
1386#ifdef CONFIG_X86_32
1387                        if (multi_timer_check(apic, irq))
1388                                continue;
1389#endif
1390                        add_pin_to_irq(irq, apic, pin);
1391
1392                        setup_IO_APIC_irq(apic, pin, irq,
1393                                        irq_trigger(idx), irq_polarity(idx));
1394                }
1395        }
1396
1397        if (notcon)
1398                apic_printk(APIC_VERBOSE,
1399                        " (apicid-pin) not connected\n");
1400}
1401
1402/*
1403 * Set up the timer pin, possibly with the 8259A-master behind.
1404 */
1405static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1406                                        int vector)
1407{
1408        struct IO_APIC_route_entry entry;
1409
1410#ifdef CONFIG_INTR_REMAP
1411        if (intr_remapping_enabled)
1412                return;
1413#endif
1414
1415        memset(&entry, 0, sizeof(entry));
1416
1417        /*
1418         * We use logical delivery to get the timer IRQ
1419         * to the first CPU.
1420         */
1421        entry.dest_mode = INT_DEST_MODE;
1422        entry.mask = 1;                                        /* mask IRQ now */
1423        entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
1424        entry.delivery_mode = INT_DELIVERY_MODE;
1425        entry.polarity = 0;
1426        entry.trigger = 0;
1427        entry.vector = vector;
1428
1429        /*
1430         * The timer IRQ doesn't have to know that behind the
1431         * scene we may have a 8259A-master in AEOI mode ...
1432         */
1433        set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
1434
1435        /*
1436         * Add it to the IO-APIC irq-routing table:
1437         */
1438        ioapic_write_entry(apic, pin, entry);
1439}
1440
1441
1442__apicdebuginit(void) print_IO_APIC(void)
1443{
1444        int apic, i;
1445        union IO_APIC_reg_00 reg_00;
1446        union IO_APIC_reg_01 reg_01;
1447        union IO_APIC_reg_02 reg_02;
1448        union IO_APIC_reg_03 reg_03;
1449        unsigned long flags;
1450        struct irq_cfg *cfg;
1451        unsigned int irq;
1452
1453        if (apic_verbosity == APIC_QUIET)
1454                return;
1455
1456        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1457        for (i = 0; i < nr_ioapics; i++)
1458                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1459                       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1460
1461        /*
1462         * We are a bit conservative about what we expect.  We have to
1463         * know about every hardware change ASAP.
1464         */
1465        printk(KERN_INFO "testing the IO APIC.......................\n");
1466
1467        for (apic = 0; apic < nr_ioapics; apic++) {
1468
1469        spin_lock_irqsave(&ioapic_lock, flags);
1470        reg_00.raw = io_apic_read(apic, 0);
1471        reg_01.raw = io_apic_read(apic, 1);
1472        if (reg_01.bits.version >= 0x10)
1473                reg_02.raw = io_apic_read(apic, 2);
1474        if (reg_01.bits.version >= 0x20)
1475                reg_03.raw = io_apic_read(apic, 3);
1476        spin_unlock_irqrestore(&ioapic_lock, flags);
1477
1478        printk("\n");
1479        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1480        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1481        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
1482        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
1483        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
1484
1485        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1486        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
1487
1488        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
1489        printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
1490
1491        /*
1492         * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1493         * but the value of reg_02 is read as the previous read register
1494         * value, so ignore it if reg_02 == reg_01.
1495         */
1496        if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1497                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1498                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
1499        }
1500
1501        /*
1502         * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1503         * or reg_03, but the value of reg_0[23] is read as the previous read
1504         * register value, so ignore it if reg_03 == reg_0[12].
1505         */
1506        if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1507            reg_03.raw != reg_01.raw) {
1508                printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1509                printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
1510        }
1511
1512        printk(KERN_DEBUG ".... IRQ redirection table:\n");
1513
1514        printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1515                          " Stat Dmod Deli Vect:   \n");
1516
1517        for (i = 0; i <= reg_01.bits.entries; i++) {
1518                struct IO_APIC_route_entry entry;
1519
1520                entry = ioapic_read_entry(apic, i);
1521
1522                printk(KERN_DEBUG " %02x %03X ",
1523                        i,
1524                        entry.dest
1525                );
1526
1527                printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
1528                        entry.mask,
1529                        entry.trigger,
1530                        entry.irr,
1531                        entry.polarity,
1532                        entry.delivery_status,
1533                        entry.dest_mode,
1534                        entry.delivery_mode,
1535                        entry.vector
1536                );
1537        }
1538        }
1539        printk(KERN_DEBUG "IRQ to pin mappings:\n");
1540        for_each_irq_cfg(irq, cfg) {
1541                struct irq_pin_list *entry = cfg->irq_2_pin;
1542                if (!entry)
1543                        continue;
1544                printk(KERN_DEBUG "IRQ%d ", irq);
1545                for (;;) {
1546                        printk("-> %d:%d", entry->apic, entry->pin);
1547                        if (!entry->next)
1548                                break;
1549                        entry = entry->next;
1550                }
1551                printk("\n");
1552        }
1553
1554        printk(KERN_INFO ".................................... done.\n");
1555
1556        return;
1557}
1558
1559__apicdebuginit(void) print_APIC_bitfield(int base)
1560{
1561        unsigned int v;
1562        int i, j;
1563
1564        if (apic_verbosity == APIC_QUIET)
1565                return;
1566
1567        printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1568        for (i = 0; i < 8; i++) {
1569                v = apic_read(base + i*0x10);
1570                for (j = 0; j < 32; j++) {
1571                        if (v & (1<<j))
1572                                printk("1");
1573                        else
1574                                printk("0");
1575                }
1576                printk("\n");
1577        }
1578}
1579
1580__apicdebuginit(void) print_local_APIC(void *dummy)
1581{
1582        unsigned int v, ver, maxlvt;
1583        u64 icr;
1584
1585        if (apic_verbosity == APIC_QUIET)
1586                return;
1587
1588        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1589                smp_processor_id(), hard_smp_processor_id());
1590        v = apic_read(APIC_ID);
1591        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
1592        v = apic_read(APIC_LVR);
1593        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1594        ver = GET_APIC_VERSION(v);
1595        maxlvt = lapic_get_maxlvt();
1596
1597        v = apic_read(APIC_TASKPRI);
1598        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1599
1600        if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
1601                if (!APIC_XAPIC(ver)) {
1602                        v = apic_read(APIC_ARBPRI);
1603                        printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1604                               v & APIC_ARBPRI_MASK);
1605                }
1606                v = apic_read(APIC_PROCPRI);
1607                printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1608        }
1609
1610        /*
1611         * Remote read supported only in the 82489DX and local APIC for
1612         * Pentium processors.
1613         */
1614        if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1615                v = apic_read(APIC_RRR);
1616                printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1617        }
1618
1619        v = apic_read(APIC_LDR);
1620        printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1621        if (!x2apic_enabled()) {
1622                v = apic_read(APIC_DFR);
1623                printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1624        }
1625        v = apic_read(APIC_SPIV);
1626        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1627
1628        printk(KERN_DEBUG "... APIC ISR field:\n");
1629        print_APIC_bitfield(APIC_ISR);
1630        printk(KERN_DEBUG "... APIC TMR field:\n");
1631        print_APIC_bitfield(APIC_TMR);
1632        printk(KERN_DEBUG "... APIC IRR field:\n");
1633        print_APIC_bitfield(APIC_IRR);
1634
1635        if (APIC_INTEGRATED(ver)) {             /* !82489DX */
1636                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1637                        apic_write(APIC_ESR, 0);
1638
1639                v = apic_read(APIC_ESR);
1640                printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1641        }
1642
1643        icr = apic_icr_read();
1644        printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
1645        printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
1646
1647        v = apic_read(APIC_LVTT);
1648        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1649
1650        if (maxlvt > 3) {                       /* PC is LVT#4. */
1651                v = apic_read(APIC_LVTPC);
1652                printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1653        }
1654        v = apic_read(APIC_LVT0);
1655        printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1656        v = apic_read(APIC_LVT1);
1657        printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1658
1659        if (maxlvt > 2) {                        /* ERR is LVT#3. */
1660                v = apic_read(APIC_LVTERR);
1661                printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1662        }
1663
1664        v = apic_read(APIC_TMICT);
1665        printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1666        v = apic_read(APIC_TMCCT);
1667        printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1668        v = apic_read(APIC_TDCR);
1669        printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1670        printk("\n");
1671}
1672
1673__apicdebuginit(void) print_all_local_APICs(void)
1674{
1675        int cpu;
1676
1677        preempt_disable();
1678        for_each_online_cpu(cpu)
1679                smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1680        preempt_enable();
1681}
1682
1683__apicdebuginit(void) print_PIC(void)
1684{
1685        unsigned int v;
1686        unsigned long flags;
1687
1688        if (apic_verbosity == APIC_QUIET)
1689                return;
1690
1691        printk(KERN_DEBUG "\nprinting PIC contents\n");
1692
1693        spin_lock_irqsave(&i8259A_lock, flags);
1694
1695        v = inb(0xa1) << 8 | inb(0x21);
1696        printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
1697
1698        v = inb(0xa0) << 8 | inb(0x20);
1699        printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
1700
1701        outb(0x0b,0xa0);
1702        outb(0x0b,0x20);
1703        v = inb(0xa0) << 8 | inb(0x20);
1704        outb(0x0a,0xa0);
1705        outb(0x0a,0x20);
1706
1707        spin_unlock_irqrestore(&i8259A_lock, flags);
1708
1709        printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
1710
1711        v = inb(0x4d1) << 8 | inb(0x4d0);
1712        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1713}
1714
1715__apicdebuginit(int) print_all_ICs(void)
1716{
1717        print_PIC();
1718        print_all_local_APICs();
1719        print_IO_APIC();
1720
1721        return 0;
1722}
1723
1724fs_initcall(print_all_ICs);
1725
1726
1727/* Where if anywhere is the i8259 connect in external int mode */
1728static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1729
1730void __init enable_IO_APIC(void)
1731{
1732        union IO_APIC_reg_01 reg_01;
1733        int i8259_apic, i8259_pin;
1734        int apic;
1735        unsigned long flags;
1736
1737#ifdef CONFIG_X86_32
1738        int i;
1739        if (!pirqs_enabled)
1740                for (i = 0; i < MAX_PIRQS; i++)
1741                        pirq_entries[i] = -1;
1742#endif
1743
1744        /*
1745         * The number of IO-APIC IRQ registers (== #pins):
1746         */
1747        for (apic = 0; apic < nr_ioapics; apic++) {
1748                spin_lock_irqsave(&ioapic_lock, flags);
1749                reg_01.raw = io_apic_read(apic, 1);
1750                spin_unlock_irqrestore(&ioapic_lock, flags);
1751                nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1752        }
1753        for(apic = 0; apic < nr_ioapics; apic++) {
1754                int pin;
1755                /* See if any of the pins is in ExtINT mode */
1756                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1757                        struct IO_APIC_route_entry entry;
1758                        entry = ioapic_read_entry(apic, pin);
1759
1760                        /* If the interrupt line is enabled and in ExtInt mode
1761                         * I have found the pin where the i8259 is connected.
1762                         */
1763                        if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1764                                ioapic_i8259.apic = apic;
1765                                ioapic_i8259.pin  = pin;
1766                                goto found_i8259;
1767                        }
1768                }
1769        }
1770 found_i8259:
1771        /* Look to see what if the MP table has reported the ExtINT */
1772        /* If we could not find the appropriate pin by looking at the ioapic
1773         * the i8259 probably is not connected the ioapic but give the
1774         * mptable a chance anyway.
1775         */
1776        i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
1777        i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1778        /* Trust the MP table if nothing is setup in the hardware */
1779        if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1780                printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1781                ioapic_i8259.pin  = i8259_pin;
1782                ioapic_i8259.apic = i8259_apic;
1783        }
1784        /* Complain if the MP table and the hardware disagree */
1785        if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1786                (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1787        {
1788                printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1789        }
1790
1791        /*
1792         * Do not trust the IO-APIC being empty at bootup
1793         */
1794        clear_IO_APIC();
1795}
1796
1797/*
1798 * Not an __init, needed by the reboot code
1799 */
1800void disable_IO_APIC(void)
1801{
1802        /*
1803         * Clear the IO-APIC before rebooting:
1804         */
1805        clear_IO_APIC();
1806
1807        /*
1808         * If the i8259 is routed through an IOAPIC
1809         * Put that IOAPIC in virtual wire mode
1810         * so legacy interrupts can be delivered.
1811         */
1812        if (ioapic_i8259.pin != -1) {
1813                struct IO_APIC_route_entry entry;
1814
1815                memset(&entry, 0, sizeof(entry));
1816                entry.mask            = 0; /* Enabled */
1817                entry.trigger         = 0; /* Edge */
1818                entry.irr             = 0;
1819                entry.polarity        = 0; /* High */
1820                entry.delivery_status = 0;
1821                entry.dest_mode       = 0; /* Physical */
1822                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
1823                entry.vector          = 0;
1824                entry.dest            = read_apic_id();
1825
1826                /*
1827                 * Add it to the IO-APIC irq-routing table:
1828                 */
1829                ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1830        }
1831
1832        disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1833}
1834
1835#ifdef CONFIG_X86_32
1836/*
1837 * function to set the IO-APIC physical IDs based on the
1838 * values stored in the MPC table.
1839 *
1840 * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
1841 */
1842
1843static void __init setup_ioapic_ids_from_mpc(void)
1844{
1845        union IO_APIC_reg_00 reg_00;
1846        physid_mask_t phys_id_present_map;
1847        int apic;
1848        int i;
1849        unsigned char old_id;
1850        unsigned long flags;
1851
1852        if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1853                return;
1854
1855        /*
1856         * Don't check I/O APIC IDs for xAPIC systems.  They have
1857         * no meaning without the serial APIC bus.
1858         */
1859        if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1860                || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1861                return;
1862        /*
1863         * This is broken; anything with a real cpu count has to
1864         * circumvent this idiocy regardless.
1865         */
1866        phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1867
1868        /*
1869         * Set the IOAPIC ID to the value stored in the MPC table.
1870         */
1871        for (apic = 0; apic < nr_ioapics; apic++) {
1872
1873                /* Read the register 0 value */
1874                spin_lock_irqsave(&ioapic_lock, flags);
1875                reg_00.raw = io_apic_read(apic, 0);
1876                spin_unlock_irqrestore(&ioapic_lock, flags);
1877
1878                old_id = mp_ioapics[apic].mp_apicid;
1879
1880                if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1881                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1882                                apic, mp_ioapics[apic].mp_apicid);
1883                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1884                                reg_00.bits.ID);
1885                        mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1886                }
1887
1888                /*
1889                 * Sanity check, is the ID really free? Every APIC in a
1890                 * system must have a unique ID or we get lots of nice
1891                 * 'stuck on smp_invalidate_needed IPI wait' messages.
1892                 */
1893                if (check_apicid_used(phys_id_present_map,
1894                                        mp_ioapics[apic].mp_apicid)) {
1895                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1896                                apic, mp_ioapics[apic].mp_apicid);
1897                        for (i = 0; i < get_physical_broadcast(); i++)
1898                                if (!physid_isset(i, phys_id_present_map))
1899                                        break;
1900                        if (i >= get_physical_broadcast())
1901                                panic("Max APIC ID exceeded!\n");
1902                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1903                                i);
1904                        physid_set(i, phys_id_present_map);
1905                        mp_ioapics[apic].mp_apicid = i;
1906                } else {
1907                        physid_mask_t tmp;
1908                        tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1909                        apic_printk(APIC_VERBOSE, "Setting %d in the "
1910                                        "phys_id_present_map\n",
1911                                        mp_ioapics[apic].mp_apicid);
1912                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
1913                }
1914
1915
1916                /*
1917                 * We need to adjust the IRQ routing table
1918                 * if the ID changed.
1919                 */
1920                if (old_id != mp_ioapics[apic].mp_apicid)
1921                        for (i = 0; i < mp_irq_entries; i++)
1922                                if (mp_irqs[i].mp_dstapic == old_id)
1923                                        mp_irqs[i].mp_dstapic
1924                                                = mp_ioapics[apic].mp_apicid;
1925
1926                /*
1927                 * Read the right value from the MPC table and
1928                 * write it into the ID register.
1929                 */
1930                apic_printk(APIC_VERBOSE, KERN_INFO
1931                        "...changing IO-APIC physical APIC ID to %d ...",
1932                        mp_ioapics[apic].mp_apicid);
1933
1934                reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1935                spin_lock_irqsave(&ioapic_lock, flags);
1936                io_apic_write(apic, 0, reg_00.raw);
1937                spin_unlock_irqrestore(&ioapic_lock, flags);
1938
1939                /*
1940                 * Sanity check
1941                 */
1942                spin_lock_irqsave(&ioapic_lock, flags);
1943                reg_00.raw = io_apic_read(apic, 0);
1944                spin_unlock_irqrestore(&ioapic_lock, flags);
1945                if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1946                        printk("could not set ID!\n");
1947                else
1948                        apic_printk(APIC_VERBOSE, " ok.\n");
1949        }
1950}
1951#endif
1952
1953int no_timer_check __initdata;
1954
1955static int __init notimercheck(char *s)
1956{
1957        no_timer_check = 1;
1958        return 1;
1959}
1960__setup("no_timer_check", notimercheck);
1961
1962/*
1963 * There is a nasty bug in some older SMP boards, their mptable lies
1964 * about the timer IRQ. We do the following to work around the situation:
1965 *
1966 *        - timer IRQ defaults to IO-APIC IRQ
1967 *        - if this function detects that timer IRQs are defunct, then we fall
1968 *          back to ISA timer IRQs
1969 */
1970static int __init timer_irq_works(void)
1971{
1972        unsigned long t1 = jiffies;
1973        unsigned long flags;
1974
1975        if (no_timer_check)
1976                return 1;
1977
1978        local_save_flags(flags);
1979        local_irq_enable();
1980        /* Let ten ticks pass... */
1981        mdelay((10 * 1000) / HZ);
1982        local_irq_restore(flags);
1983
1984        /*
1985         * Expect a few ticks at least, to be sure some possible
1986         * glue logic does not lock up after one or two first
1987         * ticks in a non-ExtINT mode.  Also the local APIC
1988         * might have cached one ExtINT interrupt.  Finally, at
1989         * least one tick may be lost due to delays.
1990         */
1991
1992        /* jiffies wrap? */
1993        if (time_after(jiffies, t1 + 4))
1994                return 1;
1995        return 0;
1996}
1997
1998/*
1999 * In the SMP+IOAPIC case it might happen that there are an unspecified
2000 * number of pending IRQ events unhandled. These cases are very rare,
2001 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
2002 * better to do it this way as thus we do not have to be aware of
2003 * 'pending' interrupts in the IRQ path, except at this point.
2004 */
2005/*
2006 * Edge triggered needs to resend any interrupt
2007 * that was delayed but this is now handled in the device
2008 * independent code.
2009 */
2010
2011/*
2012 * Starting up a edge-triggered IO-APIC interrupt is
2013 * nasty - we need to make sure that we get the edge.
2014 * If it is already asserted for some reason, we need
2015 * return 1 to indicate that is was pending.
2016 *
2017 * This is not complete - we should be able to fake
2018 * an edge even if it isn't on the 8259A...
2019 */
2020
2021static unsigned int startup_ioapic_irq(unsigned int irq)
2022{
2023        int was_pending = 0;
2024        unsigned long flags;
2025
2026        spin_lock_irqsave(&ioapic_lock, flags);
2027        if (irq < 16) {
2028                disable_8259A_irq(irq);
2029                if (i8259A_irq_pending(irq))
2030                        was_pending = 1;
2031        }
2032        __unmask_IO_APIC_irq(irq);
2033        spin_unlock_irqrestore(&ioapic_lock, flags);
2034
2035        return was_pending;
2036}
2037
2038#ifdef CONFIG_X86_64
2039static int ioapic_retrigger_irq(unsigned int irq)
2040{
2041
2042        struct irq_cfg *cfg = irq_cfg(irq);
2043        unsigned long flags;
2044
2045        spin_lock_irqsave(&vector_lock, flags);
2046        send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
2047        spin_unlock_irqrestore(&vector_lock, flags);
2048
2049        return 1;
2050}
2051#else
2052static int ioapic_retrigger_irq(unsigned int irq)
2053{
2054        send_IPI_self(irq_cfg(irq)->vector);
2055
2056        return 1;
2057}
2058#endif
2059
2060/*
2061 * Level and edge triggered IO-APIC interrupts need different handling,
2062 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2063 * handled with the level-triggered descriptor, but that one has slightly
2064 * more overhead. Level-triggered interrupts cannot be handled with the
2065 * edge-triggered handler, without risking IRQ storms and other ugly
2066 * races.
2067 */
2068
2069#ifdef CONFIG_SMP
2070
2071#ifdef CONFIG_INTR_REMAP
2072static void ir_irq_migration(struct work_struct *work);
2073
2074static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2075
2076/*
2077 * Migrate the IO-APIC irq in the presence of intr-remapping.
2078 *
2079 * For edge triggered, irq migration is a simple atomic update(of vector
2080 * and cpu destination) of IRTE and flush the hardware cache.
2081 *
2082 * For level triggered, we need to modify the io-apic RTE aswell with the update
2083 * vector information, along with modifying IRTE with vector and destination.
2084 * So irq migration for level triggered is little  bit more complex compared to
2085 * edge triggered migration. But the good news is, we use the same algorithm
2086 * for level triggered migration as we have today, only difference being,
2087 * we now initiate the irq migration from process context instead of the
2088 * interrupt context.
2089 *
2090 * In future, when we do a directed EOI (combined with cpu EOI broadcast
2091 * suppression) to the IO-APIC, level triggered irq migration will also be
2092 * as simple as edge triggered migration and we can do the irq migration
2093 * with a simple atomic update to IO-APIC RTE.
2094 */
2095static void migrate_ioapic_irq(int irq, cpumask_t mask)
2096{
2097        struct irq_cfg *cfg;
2098        struct irq_desc *desc;
2099        cpumask_t tmp, cleanup_mask;
2100        struct irte irte;
2101        int modify_ioapic_rte;
2102        unsigned int dest;
2103        unsigned long flags;
2104
2105        cpus_and(tmp, mask, cpu_online_map);
2106        if (cpus_empty(tmp))
2107                return;
2108
2109        if (get_irte(irq, &irte))
2110                return;
2111
2112        if (assign_irq_vector(irq, mask))
2113                return;
2114
2115        cfg = irq_cfg(irq);
2116        cpus_and(tmp, cfg->domain, mask);
2117        dest = cpu_mask_to_apicid(tmp);
2118
2119        desc = irq_to_desc(irq);
2120        modify_ioapic_rte = desc->status & IRQ_LEVEL;
2121        if (modify_ioapic_rte) {
2122                spin_lock_irqsave(&ioapic_lock, flags);
2123                __target_IO_APIC_irq(irq, dest, cfg->vector);
2124                spin_unlock_irqrestore(&ioapic_lock, flags);
2125        }
2126
2127        irte.vector = cfg->vector;
2128        irte.dest_id = IRTE_DEST(dest);
2129
2130        /*
2131         * Modified the IRTE and flushes the Interrupt entry cache.
2132         */
2133        modify_irte(irq, &irte);
2134
2135        if (cfg->move_in_progress) {
2136                cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2137                cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2138                send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2139                cfg->move_in_progress = 0;
2140        }
2141
2142        desc->affinity = mask;
2143}
2144
2145static int migrate_irq_remapped_level(int irq)
2146{
2147        int ret = -1;
2148        struct irq_desc *desc = irq_to_desc(irq);
2149
2150        mask_IO_APIC_irq(irq);
2151
2152        if (io_apic_level_ack_pending(irq)) {
2153                /*
2154                 * Interrupt in progress. Migrating irq now will change the
2155                 * vector information in the IO-APIC RTE and that will confuse
2156                 * the EOI broadcast performed by cpu.
2157                 * So, delay the irq migration to the next instance.
2158                 */
2159                schedule_delayed_work(&ir_migration_work, 1);
2160                goto unmask;
2161        }
2162
2163        /* everthing is clear. we have right of way */
2164        migrate_ioapic_irq(irq, desc->pending_mask);
2165
2166        ret = 0;
2167        desc->status &= ~IRQ_MOVE_PENDING;
2168        cpus_clear(desc->pending_mask);
2169
2170unmask:
2171        unmask_IO_APIC_irq(irq);
2172        return ret;
2173}
2174
2175static void ir_irq_migration(struct work_struct *work)
2176{
2177        unsigned int irq;
2178        struct irq_desc *desc;
2179
2180        for_each_irq_desc(irq, desc) {
2181                if (desc->status & IRQ_MOVE_PENDING) {
2182                        unsigned long flags;
2183
2184                        spin_lock_irqsave(&desc->lock, flags);
2185                        if (!desc->chip->set_affinity ||
2186                            !(desc->status & IRQ_MOVE_PENDING)) {
2187                                desc->status &= ~IRQ_MOVE_PENDING;
2188                                spin_unlock_irqrestore(&desc->lock, flags);
2189                                continue;
2190                        }
2191
2192                        desc->chip->set_affinity(irq, desc->pending_mask);
2193                        spin_unlock_irqrestore(&desc->lock, flags);
2194                }
2195        }
2196}
2197
2198/*
2199 * Migrates the IRQ destination in the process context.
2200 */
2201static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2202{
2203        struct irq_desc *desc = irq_to_desc(irq);
2204
2205        if (desc->status & IRQ_LEVEL) {
2206                desc->status |= IRQ_MOVE_PENDING;
2207                desc->pending_mask = mask;
2208                migrate_irq_remapped_level(irq);
2209                return;
2210        }
2211
2212        migrate_ioapic_irq(irq, mask);
2213}
2214#endif
2215
2216asmlinkage void smp_irq_move_cleanup_interrupt(void)
2217{
2218        unsigned vector, me;
2219        ack_APIC_irq();
2220#ifdef CONFIG_X86_64
2221        exit_idle();
2222#endif
2223        irq_enter();
2224
2225        me = smp_processor_id();
2226        for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2227                unsigned int irq;
2228                struct irq_desc *desc;
2229                struct irq_cfg *cfg;
2230                irq = __get_cpu_var(vector_irq)[vector];
2231
2232                desc = irq_to_desc(irq);
2233                if (!desc)
2234                        continue;
2235
2236                cfg = irq_cfg(irq);
2237                spin_lock(&desc->lock);
2238                if (!cfg->move_cleanup_count)
2239                        goto unlock;
2240
2241                if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
2242                        goto unlock;
2243
2244                __get_cpu_var(vector_irq)[vector] = -1;
2245                cfg->move_cleanup_count--;
2246unlock:
2247                spin_unlock(&desc->lock);
2248        }
2249
2250        irq_exit();
2251}
2252
2253static void irq_complete_move(unsigned int irq)
2254{
2255        struct irq_cfg *cfg = irq_cfg(irq);
2256        unsigned vector, me;
2257
2258        if (likely(!cfg->move_in_progress))
2259                return;
2260
2261        vector = ~get_irq_regs()->orig_ax;
2262        me = smp_processor_id();
2263        if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
2264                cpumask_t cleanup_mask;
2265
2266                cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2267                cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2268                send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2269                cfg->move_in_progress = 0;
2270        }
2271}
2272#else
2273static inline void irq_complete_move(unsigned int irq) {}
2274#endif
2275#ifdef CONFIG_INTR_REMAP
2276static void ack_x2apic_level(unsigned int irq)
2277{
2278        ack_x2APIC_irq();
2279}
2280
2281static void ack_x2apic_edge(unsigned int irq)
2282{
2283        ack_x2APIC_irq();
2284}
2285#endif
2286
2287static void ack_apic_edge(unsigned int irq)
2288{
2289        irq_complete_move(irq);
2290        move_native_irq(irq);
2291        ack_APIC_irq();
2292}
2293
2294atomic_t irq_mis_count;
2295
2296static void ack_apic_level(unsigned int irq)
2297{
2298#ifdef CONFIG_X86_32
2299        unsigned long v;
2300        int i;
2301#endif
2302        int do_unmask_irq = 0;
2303
2304        irq_complete_move(irq);
2305#ifdef CONFIG_GENERIC_PENDING_IRQ
2306        /* If we are moving the irq we need to mask it */
2307        if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2308                do_unmask_irq = 1;
2309                mask_IO_APIC_irq(irq);
2310        }
2311#endif
2312
2313#ifdef CONFIG_X86_32
2314        /*
2315        * It appears there is an erratum which affects at least version 0x11
2316        * of I/O APIC (that's the 82093AA and cores integrated into various
2317        * chipsets).  Under certain conditions a level-triggered interrupt is
2318        * erroneously delivered as edge-triggered one but the respective IRR
2319        * bit gets set nevertheless.  As a result the I/O unit expects an EOI
2320        * message but it will never arrive and further interrupts are blocked
2321        * from the source.  The exact reason is so far unknown, but the
2322        * phenomenon was observed when two consecutive interrupt requests
2323        * from a given source get delivered to the same CPU and the source is
2324        * temporarily disabled in between.
2325        *
2326        * A workaround is to simulate an EOI message manually.  We achieve it
2327        * by setting the trigger mode to edge and then to level when the edge
2328        * trigger mode gets detected in the TMR of a local APIC for a
2329        * level-triggered interrupt.  We mask the source for the time of the
2330        * operation to prevent an edge-triggered interrupt escaping meanwhile.
2331        * The idea is from Manfred Spraul.  --macro
2332        */
2333        i = irq_cfg(irq)->vector;
2334
2335        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2336#endif
2337
2338        /*
2339         * We must acknowledge the irq before we move it or the acknowledge will
2340         * not propagate properly.
2341         */
2342        ack_APIC_irq();
2343
2344        /* Now we can move and renable the irq */
2345        if (unlikely(do_unmask_irq)) {
2346                /* Only migrate the irq if the ack has been received.
2347                 *
2348                 * On rare occasions the broadcast level triggered ack gets
2349                 * delayed going to ioapics, and if we reprogram the
2350                 * vector while Remote IRR is still set the irq will never
2351                 * fire again.
2352                 *
2353                 * To prevent this scenario we read the Remote IRR bit
2354                 * of the ioapic.  This has two effects.
2355                 * - On any sane system the read of the ioapic will
2356                 *   flush writes (and acks) going to the ioapic from
2357                 *   this cpu.
2358                 * - We get to see if the ACK has actually been delivered.
2359                 *
2360                 * Based on failed experiments of reprogramming the
2361                 * ioapic entry from outside of irq context starting
2362                 * with masking the ioapic entry and then polling until
2363                 * Remote IRR was clear before reprogramming the
2364                 * ioapic I don't trust the Remote IRR bit to be
2365                 * completey accurate.
2366                 *
2367                 * However there appears to be no other way to plug
2368                 * this race, so if the Remote IRR bit is not
2369                 * accurate and is causing problems then it is a hardware bug
2370                 * and you can go talk to the chipset vendor about it.
2371                 */
2372                if (!io_apic_level_ack_pending(irq))
2373                        move_masked_irq(irq);
2374                unmask_IO_APIC_irq(irq);
2375        }
2376
2377#ifdef CONFIG_X86_32
2378        if (!(v & (1 << (i & 0x1f)))) {
2379                atomic_inc(&irq_mis_count);
2380                spin_lock(&ioapic_lock);
2381                __mask_and_edge_IO_APIC_irq(irq);
2382                __unmask_and_level_IO_APIC_irq(irq);
2383                spin_unlock(&ioapic_lock);
2384        }
2385#endif
2386}
2387
2388static struct irq_chip ioapic_chip __read_mostly = {
2389        .name                = "IO-APIC",
2390        .startup        = startup_ioapic_irq,
2391        .mask                = mask_IO_APIC_irq,
2392        .unmask                = unmask_IO_APIC_irq,
2393        .ack                = ack_apic_edge,
2394        .eoi                = ack_apic_level,
2395#ifdef CONFIG_SMP
2396        .set_affinity        = set_ioapic_affinity_irq,
2397#endif
2398        .retrigger        = ioapic_retrigger_irq,
2399};
2400
2401#ifdef CONFIG_INTR_REMAP
2402static struct irq_chip ir_ioapic_chip __read_mostly = {
2403        .name                = "IR-IO-APIC",
2404        .startup        = startup_ioapic_irq,
2405        .mask                = mask_IO_APIC_irq,
2406        .unmask                = unmask_IO_APIC_irq,
2407        .ack                = ack_x2apic_edge,
2408        .eoi                = ack_x2apic_level,
2409#ifdef CONFIG_SMP
2410        .set_affinity        = set_ir_ioapic_affinity_irq,
2411#endif
2412        .retrigger        = ioapic_retrigger_irq,
2413};
2414#endif
2415
2416static inline void init_IO_APIC_traps(void)
2417{
2418        int irq;
2419        struct irq_desc *desc;
2420        struct irq_cfg *cfg;
2421
2422        /*
2423         * NOTE! The local APIC isn't very good at handling
2424         * multiple interrupts at the same interrupt level.
2425         * As the interrupt level is determined by taking the
2426         * vector number and shifting that right by 4, we
2427         * want to spread these out a bit so that they don't
2428         * all fall in the same interrupt level.
2429         *
2430         * Also, we've got to be careful not to trash gate
2431         * 0x80, because int 0x80 is hm, kind of importantish. ;)
2432         */
2433        for_each_irq_cfg(irq, cfg) {
2434                if (IO_APIC_IRQ(irq) && !cfg->vector) {
2435                        /*
2436                         * Hmm.. We don't have an entry for this,
2437                         * so default to an old-fashioned 8259
2438                         * interrupt if we can..
2439                         */
2440                        if (irq < 16)
2441                                make_8259A_irq(irq);
2442                        else {
2443                                desc = irq_to_desc(irq);
2444                                /* Strange. Oh, well.. */
2445                                desc->chip = &no_irq_chip;
2446                        }
2447                }
2448        }
2449}
2450
2451/*
2452 * The local APIC irq-chip implementation:
2453 */
2454
2455static void mask_lapic_irq(unsigned int irq)
2456{
2457        unsigned long v;
2458
2459        v = apic_read(APIC_LVT0);
2460        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2461}
2462
2463static void unmask_lapic_irq(unsigned int irq)
2464{
2465        unsigned long v;
2466
2467        v = apic_read(APIC_LVT0);
2468        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2469}
2470
2471static void ack_lapic_irq (unsigned int irq)
2472{
2473        ack_APIC_irq();
2474}
2475
2476static struct irq_chip lapic_chip __read_mostly = {
2477        .name                = "local-APIC",
2478        .mask                = mask_lapic_irq,
2479        .unmask                = unmask_lapic_irq,
2480        .ack                = ack_lapic_irq,
2481};
2482
2483static void lapic_register_intr(int irq)
2484{
2485        struct irq_desc *desc;
2486
2487        desc = irq_to_desc(irq);
2488        desc->status &= ~IRQ_LEVEL;
2489        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2490                                      "edge");
2491}
2492
2493static void __init setup_nmi(void)
2494{
2495        /*
2496         * Dirty trick to enable the NMI watchdog ...
2497         * We put the 8259A master into AEOI mode and
2498         * unmask on all local APICs LVT0 as NMI.
2499         *
2500         * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2501         * is from Maciej W. Rozycki - so we do not have to EOI from
2502         * the NMI handler or the timer interrupt.
2503         */
2504        apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2505
2506        enable_NMI_through_LVT0();
2507
2508        apic_printk(APIC_VERBOSE, " done.\n");
2509}
2510
2511/*
2512 * This looks a bit hackish but it's about the only one way of sending
2513 * a few INTA cycles to 8259As and any associated glue logic.  ICR does
2514 * not support the ExtINT mode, unfortunately.  We need to send these
2515 * cycles as some i82489DX-based boards have glue logic that keeps the
2516 * 8259A interrupt line asserted until INTA.  --macro
2517 */
2518static inline void __init unlock_ExtINT_logic(void)
2519{
2520        int apic, pin, i;
2521        struct IO_APIC_route_entry entry0, entry1;
2522        unsigned char save_control, save_freq_select;
2523
2524        pin  = find_isa_irq_pin(8, mp_INT);
2525        if (pin == -1) {
2526                WARN_ON_ONCE(1);
2527                return;
2528        }
2529        apic = find_isa_irq_apic(8, mp_INT);
2530        if (apic == -1) {
2531                WARN_ON_ONCE(1);
2532                return;
2533        }
2534
2535        entry0 = ioapic_read_entry(apic, pin);
2536        clear_IO_APIC_pin(apic, pin);
2537
2538        memset(&entry1, 0, sizeof(entry1));
2539
2540        entry1.dest_mode = 0;                        /* physical delivery */
2541        entry1.mask = 0;                        /* unmask IRQ now */
2542        entry1.dest = hard_smp_processor_id();
2543        entry1.delivery_mode = dest_ExtINT;
2544        entry1.polarity = entry0.polarity;
2545        entry1.trigger = 0;
2546        entry1.vector = 0;
2547
2548        ioapic_write_entry(apic, pin, entry1);
2549
2550        save_control = CMOS_READ(RTC_CONTROL);
2551        save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2552        CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2553                   RTC_FREQ_SELECT);
2554        CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2555
2556        i = 100;
2557        while (i-- > 0) {
2558                mdelay(10);
2559                if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2560                        i -= 10;
2561        }
2562
2563        CMOS_WRITE(save_control, RTC_CONTROL);
2564        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2565        clear_IO_APIC_pin(apic, pin);
2566
2567        ioapic_write_entry(apic, pin, entry0);
2568}
2569
2570static int disable_timer_pin_1 __initdata;
2571/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2572static int __init disable_timer_pin_setup(char *arg)
2573{
2574        disable_timer_pin_1 = 1;
2575        return 0;
2576}
2577early_param("disable_timer_pin_1", disable_timer_pin_setup);
2578
2579int timer_through_8259 __initdata;
2580
2581/*
2582 * This code may look a bit paranoid, but it's supposed to cooperate with
2583 * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
2584 * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
2585 * fanatically on his truly buggy board.
2586 *
2587 * FIXME: really need to revamp this for all platforms.
2588 */
2589static inline void __init check_timer(void)
2590{
2591        struct irq_cfg *cfg = irq_cfg(0);
2592        int apic1, pin1, apic2, pin2;
2593        unsigned long flags;
2594        unsigned int ver;
2595        int no_pin1 = 0;
2596
2597        local_irq_save(flags);
2598
2599        ver = apic_read(APIC_LVR);
2600        ver = GET_APIC_VERSION(ver);
2601
2602        /*
2603         * get/set the timer IRQ vector:
2604         */
2605        disable_8259A_irq(0);
2606        assign_irq_vector(0, TARGET_CPUS);
2607
2608        /*
2609         * As IRQ0 is to be enabled in the 8259A, the virtual
2610         * wire has to be disabled in the local APIC.  Also
2611         * timer interrupts need to be acknowledged manually in
2612         * the 8259A for the i82489DX when using the NMI
2613         * watchdog as that APIC treats NMIs as level-triggered.
2614         * The AEOI mode will finish them in the 8259A
2615         * automatically.
2616         */
2617        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2618        init_8259A(1);
2619#ifdef CONFIG_X86_32
2620        timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2621#endif
2622
2623        pin1  = find_isa_irq_pin(0, mp_INT);
2624        apic1 = find_isa_irq_apic(0, mp_INT);
2625        pin2  = ioapic_i8259.pin;
2626        apic2 = ioapic_i8259.apic;
2627
2628        apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2629                    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2630                    cfg->vector, apic1, pin1, apic2, pin2);
2631
2632        /*
2633         * Some BIOS writers are clueless and report the ExtINTA
2634         * I/O APIC input from the cascaded 8259A as the timer
2635         * interrupt input.  So just in case, if only one pin
2636         * was found above, try it both directly and through the
2637         * 8259A.
2638         */
2639        if (pin1 == -1) {
2640#ifdef CONFIG_INTR_REMAP
2641                if (intr_remapping_enabled)
2642                        panic("BIOS bug: timer not connected to IO-APIC");
2643#endif
2644                pin1 = pin2;
2645                apic1 = apic2;
2646                no_pin1 = 1;
2647        } else if (pin2 == -1) {
2648                pin2 = pin1;
2649                apic2 = apic1;
2650        }
2651
2652        if (pin1 != -1) {
2653                /*
2654                 * Ok, does IRQ0 through the IOAPIC work?
2655                 */
2656                if (no_pin1) {
2657                        add_pin_to_irq(0, apic1, pin1);
2658                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2659                }
2660                unmask_IO_APIC_irq(0);
2661                if (timer_irq_works()) {
2662                        if (nmi_watchdog == NMI_IO_APIC) {
2663                                setup_nmi();
2664                                enable_8259A_irq(0);
2665                        }
2666                        if (disable_timer_pin_1 > 0)
2667                                clear_IO_APIC_pin(0, pin1);
2668                        goto out;
2669                }
2670#ifdef CONFIG_INTR_REMAP
2671                if (intr_remapping_enabled)
2672                        panic("timer doesn't work through Interrupt-remapped IO-APIC");
2673#endif
2674                clear_IO_APIC_pin(apic1, pin1);
2675                if (!no_pin1)
2676                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2677                                    "8254 timer not connected to IO-APIC\n");
2678
2679                apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2680                            "(IRQ0) through the 8259A ...\n");
2681                apic_printk(APIC_QUIET, KERN_INFO
2682                            "..... (found apic %d pin %d) ...\n", apic2, pin2);
2683                /*
2684                 * legacy devices should be connected to IO APIC #0
2685                 */
2686                replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2687                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2688                unmask_IO_APIC_irq(0);
2689                enable_8259A_irq(0);
2690                if (timer_irq_works()) {
2691                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2692                        timer_through_8259 = 1;
2693                        if (nmi_watchdog == NMI_IO_APIC) {
2694                                disable_8259A_irq(0);
2695                                setup_nmi();
2696                                enable_8259A_irq(0);
2697                        }
2698                        goto out;
2699                }
2700                /*
2701                 * Cleanup, just in case ...
2702                 */
2703                disable_8259A_irq(0);
2704                clear_IO_APIC_pin(apic2, pin2);
2705                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2706        }
2707
2708        if (nmi_watchdog == NMI_IO_APIC) {
2709                apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2710                            "through the IO-APIC - disabling NMI Watchdog!\n");
2711                nmi_watchdog = NMI_NONE;
2712        }
2713#ifdef CONFIG_X86_32
2714        timer_ack = 0;
2715#endif
2716
2717        apic_printk(APIC_QUIET, KERN_INFO
2718                    "...trying to set up timer as Virtual Wire IRQ...\n");
2719
2720        lapic_register_intr(0);
2721        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);        /* Fixed mode */
2722        enable_8259A_irq(0);
2723
2724        if (timer_irq_works()) {
2725                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2726                goto out;
2727        }
2728        disable_8259A_irq(0);
2729        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2730        apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2731
2732        apic_printk(APIC_QUIET, KERN_INFO
2733                    "...trying to set up timer as ExtINT IRQ...\n");
2734
2735        init_8259A(0);
2736        make_8259A_irq(0);
2737        apic_write(APIC_LVT0, APIC_DM_EXTINT);
2738
2739        unlock_ExtINT_logic();
2740
2741        if (timer_irq_works()) {
2742                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2743                goto out;
2744        }
2745        apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2746        panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
2747                "report.  Then try booting with the 'noapic' option.\n");
2748out:
2749        local_irq_restore(flags);
2750}
2751
2752/*
2753 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2754 * to devices.  However there may be an I/O APIC pin available for
2755 * this interrupt regardless.  The pin may be left unconnected, but
2756 * typically it will be reused as an ExtINT cascade interrupt for
2757 * the master 8259A.  In the MPS case such a pin will normally be
2758 * reported as an ExtINT interrupt in the MP table.  With ACPI
2759 * there is no provision for ExtINT interrupts, and in the absence
2760 * of an override it would be treated as an ordinary ISA I/O APIC
2761 * interrupt, that is edge-triggered and unmasked by default.  We
2762 * used to do this, but it caused problems on some systems because
2763 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2764 * the same ExtINT cascade interrupt to drive the local APIC of the
2765 * bootstrap processor.  Therefore we refrain from routing IRQ2 to
2766 * the I/O APIC in all cases now.  No actual device should request
2767 * it anyway.  --macro
2768 */
2769#define PIC_IRQS        (1 << PIC_CASCADE_IR)
2770
2771void __init setup_IO_APIC(void)
2772{
2773
2774#ifdef CONFIG_X86_32
2775        enable_IO_APIC();
2776#else
2777        /*
2778         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2779         */
2780#endif
2781
2782        io_apic_irqs = ~PIC_IRQS;
2783
2784        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2785        /*
2786         * Set up IO-APIC IRQ routing.
2787         */
2788#ifdef CONFIG_X86_32
2789        if (!acpi_ioapic)
2790                setup_ioapic_ids_from_mpc();
2791#endif
2792        sync_Arb_IDs();
2793        setup_IO_APIC_irqs();
2794        init_IO_APIC_traps();
2795        check_timer();
2796}
2797
2798/*
2799 *      Called after all the initialization is done. If we didnt find any
2800 *      APIC bugs then we can allow the modify fast path
2801 */
2802
2803static int __init io_apic_bug_finalize(void)
2804{
2805        if (sis_apic_bug == -1)
2806                sis_apic_bug = 0;
2807        return 0;
2808}
2809
2810late_initcall(io_apic_bug_finalize);
2811
2812struct sysfs_ioapic_data {
2813        struct sys_device dev;
2814        struct IO_APIC_route_entry entry[0];
2815};
2816static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
2817
2818static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2819{
2820        struct IO_APIC_route_entry *entry;
2821        struct sysfs_ioapic_data *data;
2822        int i;
2823
2824        data = container_of(dev, struct sysfs_ioapic_data, dev);
2825        entry = data->entry;
2826        for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
2827                *entry = ioapic_read_entry(dev->id, i);
2828
2829        return 0;
2830}
2831
2832static int ioapic_resume(struct sys_device *dev)
2833{
2834        struct IO_APIC_route_entry *entry;
2835        struct sysfs_ioapic_data *data;
2836        unsigned long flags;
2837        union IO_APIC_reg_00 reg_00;
2838        int i;
2839
2840        data = container_of(dev, struct sysfs_ioapic_data, dev);
2841        entry = data->entry;
2842
2843        spin_lock_irqsave(&ioapic_lock, flags);
2844        reg_00.raw = io_apic_read(dev->id, 0);
2845        if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2846                reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2847                io_apic_write(dev->id, 0, reg_00.raw);
2848        }
2849        spin_unlock_irqrestore(&ioapic_lock, flags);
2850        for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2851                ioapic_write_entry(dev->id, i, entry[i]);
2852
2853        return 0;
2854}
2855
2856static struct sysdev_class ioapic_sysdev_class = {
2857        .name = "ioapic",
2858        .suspend = ioapic_suspend,
2859        .resume = ioapic_resume,
2860};
2861
2862static int __init ioapic_init_sysfs(void)
2863{
2864        struct sys_device * dev;
2865        int i, size, error;
2866
2867        error = sysdev_class_register(&ioapic_sysdev_class);
2868        if (error)
2869                return error;
2870
2871        for (i = 0; i < nr_ioapics; i++ ) {
2872                size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2873                        * sizeof(struct IO_APIC_route_entry);
2874                mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2875                if (!mp_ioapic_data[i]) {
2876                        printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2877                        continue;
2878                }
2879                dev = &mp_ioapic_data[i]->dev;
2880                dev->id = i;
2881                dev->cls = &ioapic_sysdev_class;
2882                error = sysdev_register(dev);
2883                if (error) {
2884                        kfree(mp_ioapic_data[i]);
2885                        mp_ioapic_data[i] = NULL;
2886                        printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2887                        continue;
2888                }
2889        }
2890
2891        return 0;
2892}
2893
2894device_initcall(ioapic_init_sysfs);
2895
2896/*
2897 * Dynamic irq allocate and deallocation
2898 */
2899unsigned int create_irq_nr(unsigned int irq_want)
2900{
2901        /* Allocate an unused irq */
2902        unsigned int irq;
2903        unsigned int new;
2904        unsigned long flags;
2905        struct irq_cfg *cfg_new;
2906
2907        irq_want = nr_irqs - 1;
2908
2909        irq = 0;
2910        spin_lock_irqsave(&vector_lock, flags);
2911        for (new = irq_want; new > 0; new--) {
2912                if (platform_legacy_irq(new))
2913                        continue;
2914                cfg_new = irq_cfg(new);
2915                if (cfg_new && cfg_new->vector != 0)
2916                        continue;
2917                /* check if need to create one */
2918                if (!cfg_new)
2919                        cfg_new = irq_cfg_alloc(new);
2920                if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2921                        irq = new;
2922                break;
2923        }
2924        spin_unlock_irqrestore(&vector_lock, flags);
2925
2926        if (irq > 0) {
2927                dynamic_irq_init(irq);
2928        }
2929        return irq;
2930}
2931
2932int create_irq(void)
2933{
2934        int irq;
2935
2936        irq = create_irq_nr(nr_irqs - 1);
2937
2938        if (irq == 0)
2939                irq = -1;
2940
2941        return irq;
2942}
2943
2944void destroy_irq(unsigned int irq)
2945{
2946        unsigned long flags;
2947
2948        dynamic_irq_cleanup(irq);
2949
2950#ifdef CONFIG_INTR_REMAP
2951        free_irte(irq);
2952#endif
2953        spin_lock_irqsave(&vector_lock, flags);
2954        __clear_irq_vector(irq);
2955        spin_unlock_irqrestore(&vector_lock, flags);
2956}
2957
2958/*
2959 * MSI message composition
2960 */
2961#ifdef CONFIG_PCI_MSI
2962static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2963{
2964        struct irq_cfg *cfg;
2965        int err;
2966        unsigned dest;
2967        cpumask_t tmp;
2968
2969        tmp = TARGET_CPUS;
2970        err = assign_irq_vector(irq, tmp);
2971        if (err)
2972                return err;
2973
2974        cfg = irq_cfg(irq);
2975        cpus_and(tmp, cfg->domain, tmp);
2976        dest = cpu_mask_to_apicid(tmp);
2977
2978#ifdef CONFIG_INTR_REMAP
2979        if (irq_remapped(irq)) {
2980                struct irte irte;
2981                int ir_index;
2982                u16 sub_handle;
2983
2984                ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2985                BUG_ON(ir_index == -1);
2986
2987                memset (&irte, 0, sizeof(irte));
2988
2989                irte.present = 1;
2990                irte.dst_mode = INT_DEST_MODE;
2991                irte.trigger_mode = 0; /* edge */
2992                irte.dlvry_mode = INT_DELIVERY_MODE;
2993                irte.vector = cfg->vector;
2994                irte.dest_id = IRTE_DEST(dest);
2995
2996                modify_irte(irq, &irte);
2997
2998                msg->address_hi = MSI_ADDR_BASE_HI;
2999                msg->data = sub_handle;
3000                msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
3001                                  MSI_ADDR_IR_SHV |
3002                                  MSI_ADDR_IR_INDEX1(ir_index) |
3003                                  MSI_ADDR_IR_INDEX2(ir_index);
3004        } else
3005#endif
3006        {
3007                msg->address_hi = MSI_ADDR_BASE_HI;
3008                msg->address_lo =
3009                        MSI_ADDR_BASE_LO |
3010                        ((INT_DEST_MODE == 0) ?
3011                                MSI_ADDR_DEST_MODE_PHYSICAL:
3012                                MSI_ADDR_DEST_MODE_LOGICAL) |
3013                        ((INT_DELIVERY_MODE != dest_LowestPrio) ?
3014                                MSI_ADDR_REDIRECTION_CPU:
3015                                MSI_ADDR_REDIRECTION_LOWPRI) |
3016                        MSI_ADDR_DEST_ID(dest);
3017
3018                msg->data =
3019                        MSI_DATA_TRIGGER_EDGE |
3020                        MSI_DATA_LEVEL_ASSERT |
3021                        ((INT_DELIVERY_MODE != dest_LowestPrio) ?
3022                                MSI_DATA_DELIVERY_FIXED:
3023                                MSI_DATA_DELIVERY_LOWPRI) |
3024                        MSI_DATA_VECTOR(cfg->vector);
3025        }
3026        return err;
3027}
3028
3029#ifdef CONFIG_SMP
3030static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3031{
3032        struct irq_cfg *cfg;
3033        struct msi_msg msg;
3034        unsigned int dest;
3035        cpumask_t tmp;
3036        struct irq_desc *desc;
3037
3038        cpus_and(tmp, mask, cpu_online_map);
3039        if (cpus_empty(tmp))
3040                return;
3041
3042        if (assign_irq_vector(irq, mask))
3043                return;
3044
3045        cfg = irq_cfg(irq);
3046        cpus_and(tmp, cfg->domain, mask);
3047        dest = cpu_mask_to_apicid(tmp);
3048
3049        read_msi_msg(irq, &msg);
3050
3051        msg.data &= ~MSI_DATA_VECTOR_MASK;
3052        msg.data |= MSI_DATA_VECTOR(cfg->vector);
3053        msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3054        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3055
3056        write_msi_msg(irq, &msg);
3057        desc = irq_to_desc(irq);
3058        desc->affinity = mask;
3059}
3060
3061#ifdef CONFIG_INTR_REMAP
3062/*
3063 * Migrate the MSI irq to another cpumask. This migration is
3064 * done in the process context using interrupt-remapping hardware.
3065 */
3066static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3067{
3068        struct irq_cfg *cfg;
3069        unsigned int dest;
3070        cpumask_t tmp, cleanup_mask;
3071        struct irte irte;
3072        struct irq_desc *desc;
3073
3074        cpus_and(tmp, mask, cpu_online_map);
3075        if (cpus_empty(tmp))
3076                return;
3077
3078        if (get_irte(irq, &irte))
3079                return;
3080
3081        if (assign_irq_vector(irq, mask))
3082                return;
3083
3084        cfg = irq_cfg(irq);
3085        cpus_and(tmp, cfg->domain, mask);
3086        dest = cpu_mask_to_apicid(tmp);
3087
3088        irte.vector = cfg->vector;
3089        irte.dest_id = IRTE_DEST(dest);
3090
3091        /*
3092         * atomically update the IRTE with the new destination and vector.
3093         */
3094        modify_irte(irq, &irte);
3095
3096        /*
3097         * After this point, all the interrupts will start arriving
3098         * at the new destination. So, time to cleanup the previous
3099         * vector allocation.
3100         */
3101        if (cfg->move_in_progress) {
3102                cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
3103                cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3104                send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3105                cfg->move_in_progress = 0;
3106        }
3107
3108        desc = irq_to_desc(irq);
3109        desc->affinity = mask;
3110}
3111#endif
3112#endif /* CONFIG_SMP */
3113
3114/*
3115 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
3116 * which implement the MSI or MSI-X Capability Structure.
3117 */
3118static struct irq_chip msi_chip = {
3119        .name                = "PCI-MSI",
3120        .unmask                = unmask_msi_irq,
3121        .mask                = mask_msi_irq,
3122        .ack                = ack_apic_edge,
3123#ifdef CONFIG_SMP
3124        .set_affinity        = set_msi_irq_affinity,
3125#endif
3126        .retrigger        = ioapic_retrigger_irq,
3127};
3128
3129#ifdef CONFIG_INTR_REMAP
3130static struct irq_chip msi_ir_chip = {
3131        .name                = "IR-PCI-MSI",
3132        .unmask                = unmask_msi_irq,
3133        .mask                = mask_msi_irq,
3134        .ack                = ack_x2apic_edge,
3135#ifdef CONFIG_SMP
3136        .set_affinity        = ir_set_msi_irq_affinity,
3137#endif
3138        .retrigger        = ioapic_retrigger_irq,
3139};
3140
3141/*
3142 * Map the PCI dev to the corresponding remapping hardware unit
3143 * and allocate 'nvec' consecutive interrupt-remapping table entries
3144 * in it.
3145 */
3146static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3147{
3148        struct intel_iommu *iommu;
3149        int index;
3150
3151        iommu = map_dev_to_ir(dev);
3152        if (!iommu) {
3153                printk(KERN_ERR
3154                       "Unable to map PCI %s to iommu\n", pci_name(dev));
3155                return -ENOENT;
3156        }
3157
3158        index = alloc_irte(iommu, irq, nvec);
3159        if (index < 0) {
3160                printk(KERN_ERR
3161                       "Unable to allocate %d IRTE for PCI %s\n", nvec,
3162                       pci_name(dev));
3163                return -ENOSPC;
3164        }
3165        return index;
3166}
3167#endif
3168
3169static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3170{
3171        int ret;
3172        struct msi_msg msg;
3173
3174        ret = msi_compose_msg(dev, irq, &msg);
3175        if (ret < 0)
3176                return ret;
3177
3178        set_irq_msi(irq, desc);
3179        write_msi_msg(irq, &msg);
3180
3181#ifdef CONFIG_INTR_REMAP
3182        if (irq_remapped(irq)) {
3183                struct irq_desc *desc = irq_to_desc(irq);
3184                /*
3185                 * irq migration in process context
3186                 */
3187                desc->status |= IRQ_MOVE_PCNTXT;
3188                set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3189        } else
3190#endif
3191                set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3192
3193        dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3194
3195        return 0;
3196}
3197
3198static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
3199{
3200        unsigned int irq;
3201
3202        irq = dev->bus->number;
3203        irq <<= 8;
3204        irq |= dev->devfn;
3205        irq <<= 12;
3206
3207        return irq;
3208}
3209
3210int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3211{
3212        unsigned int irq;
3213        int ret;
3214        unsigned int irq_want;
3215
3216        irq_want = build_irq_for_pci_dev(dev) + 0x100;
3217
3218        irq = create_irq_nr(irq_want);
3219        if (irq == 0)
3220                return -1;
3221
3222#ifdef CONFIG_INTR_REMAP
3223        if (!intr_remapping_enabled)
3224                goto no_ir;
3225
3226        ret = msi_alloc_irte(dev, irq, 1);
3227        if (ret < 0)
3228                goto error;
3229no_ir:
3230#endif
3231        ret = setup_msi_irq(dev, desc, irq);
3232        if (ret < 0) {
3233                destroy_irq(irq);
3234                return ret;
3235        }
3236        return 0;
3237
3238#ifdef CONFIG_INTR_REMAP
3239error:
3240        destroy_irq(irq);
3241        return ret;
3242#endif
3243}
3244
3245int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3246{
3247        unsigned int irq;
3248        int ret, sub_handle;
3249        struct msi_desc *desc;
3250        unsigned int irq_want;
3251
3252#ifdef CONFIG_INTR_REMAP
3253        struct intel_iommu *iommu = 0;
3254        int index = 0;
3255#endif
3256
3257        irq_want = build_irq_for_pci_dev(dev) + 0x100;
3258        sub_handle = 0;
3259        list_for_each_entry(desc, &dev->msi_list, list) {
3260                irq = create_irq_nr(irq_want--);
3261                if (irq == 0)
3262                        return -1;
3263#ifdef CONFIG_INTR_REMAP
3264                if (!intr_remapping_enabled)
3265                        goto no_ir;
3266
3267                if (!sub_handle) {
3268                        /*
3269                         * allocate the consecutive block of IRTE's
3270                         * for 'nvec'
3271                         */
3272                        index = msi_alloc_irte(dev, irq, nvec);
3273                        if (index < 0) {
3274                                ret = index;
3275                                goto error;
3276                        }
3277                } else {
3278                        iommu = map_dev_to_ir(dev);
3279                        if (!iommu) {
3280                                ret = -ENOENT;
3281                                goto error;
3282                        }
3283                        /*
3284                         * setup the mapping between the irq and the IRTE
3285                         * base index, the sub_handle pointing to the
3286                         * appropriate interrupt remap table entry.
3287                         */
3288                        set_irte_irq(irq, iommu, index, sub_handle);
3289                }
3290no_ir:
3291#endif
3292                ret = setup_msi_irq(dev, desc, irq);
3293                if (ret < 0)
3294                        goto error;
3295                sub_handle++;
3296        }
3297        return 0;
3298
3299error:
3300        destroy_irq(irq);
3301        return ret;
3302}
3303
3304void arch_teardown_msi_irq(unsigned int irq)
3305{
3306        destroy_irq(irq);
3307}
3308
3309#ifdef CONFIG_DMAR
3310#ifdef CONFIG_SMP
3311static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3312{
3313        struct irq_cfg *cfg;
3314        struct msi_msg msg;
3315        unsigned int dest;
3316        cpumask_t tmp;
3317        struct irq_desc *desc;
3318
3319        cpus_and(tmp, mask, cpu_online_map);
3320        if (cpus_empty(tmp))
3321                return;
3322
3323        if (assign_irq_vector(irq, mask))
3324                return;
3325
3326        cfg = irq_cfg(irq);
3327        cpus_and(tmp, cfg->domain, mask);
3328        dest = cpu_mask_to_apicid(tmp);
3329
3330        dmar_msi_read(irq, &msg);
3331
3332        msg.data &= ~MSI_DATA_VECTOR_MASK;
3333        msg.data |= MSI_DATA_VECTOR(cfg->vector);
3334        msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3335        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3336
3337        dmar_msi_write(irq, &msg);
3338        desc = irq_to_desc(irq);
3339        desc->affinity = mask;
3340}
3341#endif /* CONFIG_SMP */
3342
3343struct irq_chip dmar_msi_type = {
3344        .name = "DMAR_MSI",
3345        .unmask = dmar_msi_unmask,
3346        .mask = dmar_msi_mask,
3347        .ack = ack_apic_edge,
3348#ifdef CONFIG_SMP
3349        .set_affinity = dmar_msi_set_affinity,
3350#endif
3351        .retrigger = ioapic_retrigger_irq,
3352};
3353
3354int arch_setup_dmar_msi(unsigned int irq)
3355{
3356        int ret;
3357        struct msi_msg msg;
3358
3359        ret = msi_compose_msg(NULL, irq, &msg);
3360        if (ret < 0)
3361                return ret;
3362        dmar_msi_write(irq, &msg);
3363        set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3364                "edge");
3365        return 0;
3366}
3367#endif
3368
3369#ifdef CONFIG_HPET_TIMER
3370
3371#ifdef CONFIG_SMP
3372static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3373{
3374        struct irq_cfg *cfg;
3375        struct irq_desc *desc;
3376        struct msi_msg msg;
3377        unsigned int dest;
3378        cpumask_t tmp;
3379
3380        cpus_and(tmp, mask, cpu_online_map);
3381        if (cpus_empty(tmp))
3382                return;
3383
3384        if (assign_irq_vector(irq, mask))
3385                return;
3386
3387        cfg = irq_cfg(irq);
3388        cpus_and(tmp, cfg->domain, mask);
3389        dest = cpu_mask_to_apicid(tmp);
3390
3391        hpet_msi_read(irq, &msg);
3392
3393        msg.data &= ~MSI_DATA_VECTOR_MASK;
3394        msg.data |= MSI_DATA_VECTOR(cfg->vector);
3395        msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3396        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3397
3398        hpet_msi_write(irq, &msg);
3399        desc = irq_to_desc(irq);
3400        desc->affinity = mask;
3401}
3402#endif /* CONFIG_SMP */
3403
3404struct irq_chip hpet_msi_type = {
3405        .name = "HPET_MSI",
3406        .unmask = hpet_msi_unmask,
3407        .mask = hpet_msi_mask,
3408        .ack = ack_apic_edge,
3409#ifdef CONFIG_SMP
3410        .set_affinity = hpet_msi_set_affinity,
3411#endif
3412        .retrigger = ioapic_retrigger_irq,
3413};
3414
3415int arch_setup_hpet_msi(unsigned int irq)
3416{
3417        int ret;
3418        struct msi_msg msg;
3419
3420        ret = msi_compose_msg(NULL, irq, &msg);
3421        if (ret < 0)
3422                return ret;
3423
3424        hpet_msi_write(irq, &msg);
3425        set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
3426                "edge");
3427
3428        return 0;
3429}
3430#endif
3431
3432#endif /* CONFIG_PCI_MSI */
3433/*
3434 * Hypertransport interrupt support
3435 */
3436#ifdef CONFIG_HT_IRQ
3437
3438#ifdef CONFIG_SMP
3439
3440static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3441{
3442        struct ht_irq_msg msg;
3443        fetch_ht_irq_msg(irq, &msg);
3444
3445        msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
3446        msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
3447
3448        msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
3449        msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
3450
3451        write_ht_irq_msg(irq, &msg);
3452}
3453
3454static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
3455{
3456        struct irq_cfg *cfg;
3457        unsigned int dest;
3458        cpumask_t tmp;
3459        struct irq_desc *desc;
3460
3461        cpus_and(tmp, mask, cpu_online_map);
3462        if (cpus_empty(tmp))
3463                return;
3464
3465        if (assign_irq_vector(irq, mask))
3466                return;
3467
3468        cfg = irq_cfg(irq);
3469        cpus_and(tmp, cfg->domain, mask);
3470        dest = cpu_mask_to_apicid(tmp);
3471
3472        target_ht_irq(irq, dest, cfg->vector);
3473        desc = irq_to_desc(irq);
3474        desc->affinity = mask;
3475}
3476#endif
3477
3478static struct irq_chip ht_irq_chip = {
3479        .name                = "PCI-HT",
3480        .mask                = mask_ht_irq,
3481        .unmask                = unmask_ht_irq,
3482        .ack                = ack_apic_edge,
3483#ifdef CONFIG_SMP
3484        .set_affinity        = set_ht_irq_affinity,
3485#endif
3486        .retrigger        = ioapic_retrigger_irq,
3487};
3488
3489int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3490{
3491        struct irq_cfg *cfg;
3492        int err;
3493        cpumask_t tmp;
3494
3495        tmp = TARGET_CPUS;
3496        err = assign_irq_vector(irq, tmp);
3497        if (!err) {
3498                struct ht_irq_msg msg;
3499                unsigned dest;
3500
3501                cfg = irq_cfg(irq);
3502                cpus_and(tmp, cfg->domain, tmp);
3503                dest = cpu_mask_to_apicid(tmp);
3504
3505                msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3506
3507                msg.address_lo =
3508                        HT_IRQ_LOW_BASE |
3509                        HT_IRQ_LOW_DEST_ID(dest) |
3510                        HT_IRQ_LOW_VECTOR(cfg->vector) |
3511                        ((INT_DEST_MODE == 0) ?
3512                                HT_IRQ_LOW_DM_PHYSICAL :
3513                                HT_IRQ_LOW_DM_LOGICAL) |
3514                        HT_IRQ_LOW_RQEOI_EDGE |
3515                        ((INT_DELIVERY_MODE != dest_LowestPrio) ?
3516                                HT_IRQ_LOW_MT_FIXED :
3517                                HT_IRQ_LOW_MT_ARBITRATED) |
3518                        HT_IRQ_LOW_IRQ_MASKED;
3519
3520                write_ht_irq_msg(irq, &msg);
3521
3522                set_irq_chip_and_handler_name(irq, &ht_irq_chip,
3523                                              handle_edge_irq, "edge");
3524
3525                dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
3526        }
3527        return err;
3528}
3529#endif /* CONFIG_HT_IRQ */
3530
3531#ifdef CONFIG_X86_64
3532/*
3533 * Re-target the irq to the specified CPU and enable the specified MMR located
3534 * on the specified blade to allow the sending of MSIs to the specified CPU.
3535 */
3536int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3537                       unsigned long mmr_offset)
3538{
3539        const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
3540        struct irq_cfg *cfg;
3541        int mmr_pnode;
3542        unsigned long mmr_value;
3543        struct uv_IO_APIC_route_entry *entry;
3544        unsigned long flags;
3545        int err;
3546
3547        err = assign_irq_vector(irq, *eligible_cpu);
3548        if (err != 0)
3549                return err;
3550
3551        spin_lock_irqsave(&vector_lock, flags);
3552        set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
3553                                      irq_name);
3554        spin_unlock_irqrestore(&vector_lock, flags);
3555
3556        cfg = irq_cfg(irq);
3557
3558        mmr_value = 0;
3559        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3560        BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3561
3562        entry->vector = cfg->vector;
3563        entry->delivery_mode = INT_DELIVERY_MODE;
3564        entry->dest_mode = INT_DEST_MODE;
3565        entry->polarity = 0;
3566        entry->trigger = 0;
3567        entry->mask = 0;
3568        entry->dest = cpu_mask_to_apicid(*eligible_cpu);
3569
3570        mmr_pnode = uv_blade_to_pnode(mmr_blade);
3571        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3572
3573        return irq;
3574}
3575
3576/*
3577 * Disable the specified MMR located on the specified blade so that MSIs are
3578 * longer allowed to be sent.
3579 */
3580void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
3581{
3582        unsigned long mmr_value;
3583        struct uv_IO_APIC_route_entry *entry;
3584        int mmr_pnode;
3585
3586        mmr_value = 0;
3587        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3588        BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3589
3590        entry->mask = 1;
3591
3592        mmr_pnode = uv_blade_to_pnode(mmr_blade);
3593        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3594}
3595#endif /* CONFIG_X86_64 */
3596
3597int __init io_apic_get_redir_entries (int ioapic)
3598{
3599        union IO_APIC_reg_01        reg_01;
3600        unsigned long flags;
3601
3602        spin_lock_irqsave(&ioapic_lock, flags);
3603        reg_01.raw = io_apic_read(ioapic, 1);
3604        spin_unlock_irqrestore(&ioapic_lock, flags);
3605
3606        return reg_01.bits.entries;
3607}
3608
3609int __init probe_nr_irqs(void)
3610{
3611        return NR_IRQS;
3612}
3613
3614/* --------------------------------------------------------------------------
3615                          ACPI-based IOAPIC Configuration
3616   -------------------------------------------------------------------------- */
3617
3618#ifdef CONFIG_ACPI
3619
3620#ifdef CONFIG_X86_32
3621int __init io_apic_get_unique_id(int ioapic, int apic_id)
3622{
3623        union IO_APIC_reg_00 reg_00;
3624        static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3625        physid_mask_t tmp;
3626        unsigned long flags;
3627        int i = 0;
3628
3629        /*
3630         * The P4 platform supports up to 256 APIC IDs on two separate APIC
3631         * buses (one for LAPICs, one for IOAPICs), where predecessors only
3632         * supports up to 16 on one shared APIC bus.
3633         *
3634         * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3635         *      advantage of new APIC bus architecture.
3636         */
3637
3638        if (physids_empty(apic_id_map))
3639                apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
3640
3641        spin_lock_irqsave(&ioapic_lock, flags);
3642        reg_00.raw = io_apic_read(ioapic, 0);
3643        spin_unlock_irqrestore(&ioapic_lock, flags);
3644
3645        if (apic_id >= get_physical_broadcast()) {
3646                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3647                        "%d\n", ioapic, apic_id, reg_00.bits.ID);
3648                apic_id = reg_00.bits.ID;
3649        }
3650
3651        /*
3652         * Every APIC in a system must have a unique ID or we get lots of nice
3653         * 'stuck on smp_invalidate_needed IPI wait' messages.
3654         */
3655        if (check_apicid_used(apic_id_map, apic_id)) {
3656
3657                for (i = 0; i < get_physical_broadcast(); i++) {
3658                        if (!check_apicid_used(apic_id_map, i))
3659                                break;
3660                }
3661
3662                if (i == get_physical_broadcast())
3663                        panic("Max apic_id exceeded!\n");
3664
3665                printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
3666                        "trying %d\n", ioapic, apic_id, i);
3667
3668                apic_id = i;
3669        }
3670
3671        tmp = apicid_to_cpu_present(apic_id);
3672        physids_or(apic_id_map, apic_id_map, tmp);
3673
3674        if (reg_00.bits.ID != apic_id) {
3675                reg_00.bits.ID = apic_id;
3676
3677                spin_lock_irqsave(&ioapic_lock, flags);
3678                io_apic_write(ioapic, 0, reg_00.raw);
3679                reg_00.raw = io_apic_read(ioapic, 0);
3680                spin_unlock_irqrestore(&ioapic_lock, flags);
3681
3682                /* Sanity check */
3683                if (reg_00.bits.ID != apic_id) {
3684                        printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
3685                        return -1;
3686                }
3687        }
3688
3689        apic_printk(APIC_VERBOSE, KERN_INFO
3690                        "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
3691
3692        return apic_id;
3693}
3694
3695int __init io_apic_get_version(int ioapic)
3696{
3697        union IO_APIC_reg_01        reg_01;
3698        unsigned long flags;
3699
3700        spin_lock_irqsave(&ioapic_lock, flags);
3701        reg_01.raw = io_apic_read(ioapic, 1);
3702        spin_unlock_irqrestore(&ioapic_lock, flags);
3703
3704        return reg_01.bits.version;
3705}
3706#endif
3707
3708int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3709{
3710        if (!IO_APIC_IRQ(irq)) {
3711                apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3712                        ioapic);
3713                return -EINVAL;
3714        }
3715
3716        /*
3717         * IRQs < 16 are already in the irq_2_pin[] map
3718         */
3719        if (irq >= 16)
3720                add_pin_to_irq(irq, ioapic, pin);
3721
3722        setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
3723
3724        return 0;
3725}
3726
3727
3728int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3729{
3730        int i;
3731
3732        if (skip_ioapic_setup)
3733                return -1;
3734
3735        for (i = 0; i < mp_irq_entries; i++)
3736                if (mp_irqs[i].mp_irqtype == mp_INT &&
3737                    mp_irqs[i].mp_srcbusirq == bus_irq)
3738                        break;
3739        if (i >= mp_irq_entries)
3740                return -1;
3741
3742        *trigger = irq_trigger(i);
3743        *polarity = irq_polarity(i);
3744        return 0;
3745}
3746
3747#endif /* CONFIG_ACPI */
3748
3749/*
3750 * This function currently is only a helper for the i386 smp boot process where
3751 * we need to reprogram the ioredtbls to cater for the cpus which have come online
3752 * so mask in all cases should simply be TARGET_CPUS
3753 */
3754#ifdef CONFIG_SMP
3755void __init setup_ioapic_dest(void)
3756{
3757        int pin, ioapic, irq, irq_entry;
3758        struct irq_desc *desc;
3759        struct irq_cfg *cfg;
3760        cpumask_t mask;
3761
3762        if (skip_ioapic_setup == 1)
3763                return;
3764
3765        for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
3766                for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
3767                        irq_entry = find_irq_entry(ioapic, pin, mp_INT);
3768                        if (irq_entry == -1)
3769                                continue;
3770                        irq = pin_2_irq(irq_entry, ioapic, pin);
3771
3772                        /* setup_IO_APIC_irqs could fail to get vector for some device
3773                         * when you have too many devices, because at that time only boot
3774                         * cpu is online.
3775                         */
3776                        cfg = irq_cfg(irq);
3777                        if (!cfg->vector) {
3778                                setup_IO_APIC_irq(ioapic, pin, irq,
3779                                                  irq_trigger(irq_entry),
3780                                                  irq_polarity(irq_entry));
3781                                continue;
3782
3783                        }
3784
3785                        /*
3786                         * Honour affinities which have been set in early boot
3787                         */
3788                        desc = irq_to_desc(irq);
3789                        if (desc->status &
3790                            (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3791                                mask = desc->affinity;
3792                        else
3793                                mask = TARGET_CPUS;
3794
3795#ifdef CONFIG_INTR_REMAP
3796                        if (intr_remapping_enabled)
3797                                set_ir_ioapic_affinity_irq(irq, mask);
3798                        else
3799#endif
3800                                set_ioapic_affinity_irq(irq, mask);
3801                }
3802
3803        }
3804}
3805#endif
3806
3807#define IOAPIC_RESOURCE_NAME_SIZE 11
3808
3809static struct resource *ioapic_resources;
3810
3811static struct resource * __init ioapic_setup_resources(void)
3812{
3813        unsigned long n;
3814        struct resource *res;
3815        char *mem;
3816        int i;
3817
3818        if (nr_ioapics <= 0)
3819                return NULL;
3820
3821        n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
3822        n *= nr_ioapics;
3823
3824        mem = alloc_bootmem(n);
3825        res = (void *)mem;
3826
3827        if (mem != NULL) {
3828                mem += sizeof(struct resource) * nr_ioapics;
3829
3830                for (i = 0; i < nr_ioapics; i++) {
3831                        res[i].name = mem;
3832                        res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3833                        sprintf(mem,  "IOAPIC %u", i);
3834                        mem += IOAPIC_RESOURCE_NAME_SIZE;
3835                }
3836        }
3837
3838        ioapic_resources = res;
3839
3840        return res;
3841}
3842
3843void __init ioapic_init_mappings(void)
3844{
3845        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3846        struct resource *ioapic_res;
3847        int i;
3848
3849        irq_2_pin_init();
3850        ioapic_res = ioapic_setup_resources();
3851        for (i = 0; i < nr_ioapics; i++) {
3852                if (smp_found_config) {
3853                        ioapic_phys = mp_ioapics[i].mp_apicaddr;
3854#ifdef CONFIG_X86_32
3855                        if (!ioapic_phys) {
3856                                printk(KERN_ERR
3857                                       "WARNING: bogus zero IO-APIC "
3858                                       "address found in MPTABLE, "
3859                                       "disabling IO/APIC support!\n");
3860                                smp_found_config = 0;
3861                                skip_ioapic_setup = 1;
3862                                goto fake_ioapic_page;
3863                        }
3864#endif
3865                } else {
3866#ifdef CONFIG_X86_32
3867fake_ioapic_page:
3868#endif
3869                        ioapic_phys = (unsigned long)
3870                                alloc_bootmem_pages(PAGE_SIZE);
3871                        ioapic_phys = __pa(ioapic_phys);
3872                }
3873                set_fixmap_nocache(idx, ioapic_phys);
3874                apic_printk(APIC_VERBOSE,
3875                            "mapped IOAPIC to %08lx (%08lx)\n",
3876                            __fix_to_virt(idx), ioapic_phys);
3877                idx++;
3878
3879                if (ioapic_res != NULL) {
3880                        ioapic_res->start = ioapic_phys;
3881                        ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
3882                        ioapic_res++;
3883                }
3884        }
3885}
3886
3887static int __init ioapic_insert_resources(void)
3888{
3889        int i;
3890        struct resource *r = ioapic_resources;
3891
3892        if (!r) {
3893                printk(KERN_ERR
3894                       "IO APIC resources could be not be allocated.\n");
3895                return -1;
3896        }
3897
3898        for (i = 0; i < nr_ioapics; i++) {
3899                insert_resource(&iomem_resource, r);
3900                r++;
3901        }
3902
3903        return 0;
3904}
3905
3906/* Insert the IO APIC resources after PCI initialization has occured to handle
3907 * IO APICS that are mapped in on a BAR in PCI space. */
3908late_initcall(ioapic_insert_resources);