Showing error 1234

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: arch/x86/kernel/traps.c
Line in file: 112
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 *  Copyright (C) 1991, 1992  Linus Torvalds
   3 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
   4 *
   5 *  Pentium III FXSR, SSE support
   6 *        Gareth Hughes <gareth@valinux.com>, May 2000
   7 */
   8
   9/*
  10 * Handle hardware traps and faults.
  11 */
  12#include <linux/interrupt.h>
  13#include <linux/kallsyms.h>
  14#include <linux/spinlock.h>
  15#include <linux/kprobes.h>
  16#include <linux/uaccess.h>
  17#include <linux/utsname.h>
  18#include <linux/kdebug.h>
  19#include <linux/kernel.h>
  20#include <linux/module.h>
  21#include <linux/ptrace.h>
  22#include <linux/string.h>
  23#include <linux/unwind.h>
  24#include <linux/delay.h>
  25#include <linux/errno.h>
  26#include <linux/kexec.h>
  27#include <linux/sched.h>
  28#include <linux/timer.h>
  29#include <linux/init.h>
  30#include <linux/bug.h>
  31#include <linux/nmi.h>
  32#include <linux/mm.h>
  33#include <linux/smp.h>
  34#include <linux/io.h>
  35
  36#ifdef CONFIG_EISA
  37#include <linux/ioport.h>
  38#include <linux/eisa.h>
  39#endif
  40
  41#ifdef CONFIG_MCA
  42#include <linux/mca.h>
  43#endif
  44
  45#if defined(CONFIG_EDAC)
  46#include <linux/edac.h>
  47#endif
  48
  49#include <asm/stacktrace.h>
  50#include <asm/processor.h>
  51#include <asm/debugreg.h>
  52#include <asm/atomic.h>
  53#include <asm/system.h>
  54#include <asm/unwind.h>
  55#include <asm/traps.h>
  56#include <asm/desc.h>
  57#include <asm/i387.h>
  58
  59#include <mach_traps.h>
  60
  61#ifdef CONFIG_X86_64
  62#include <asm/pgalloc.h>
  63#include <asm/proto.h>
  64#include <asm/pda.h>
  65#else
  66#include <asm/processor-flags.h>
  67#include <asm/arch_hooks.h>
  68#include <asm/nmi.h>
  69#include <asm/smp.h>
  70#include <asm/io.h>
  71#include <asm/traps.h>
  72
  73#include "cpu/mcheck/mce.h"
  74
  75DECLARE_BITMAP(used_vectors, NR_VECTORS);
  76EXPORT_SYMBOL_GPL(used_vectors);
  77
  78asmlinkage int system_call(void);
  79
  80/* Do we ignore FPU interrupts ? */
  81char ignore_fpu_irq;
  82
  83/*
  84 * The IDT has to be page-aligned to simplify the Pentium
  85 * F0 0F bug workaround.. We have a special link segment
  86 * for this.
  87 */
  88gate_desc idt_table[256]
  89        __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
  90#endif
  91
  92static int ignore_nmis;
  93
  94static inline void conditional_sti(struct pt_regs *regs)
  95{
  96        if (regs->flags & X86_EFLAGS_IF)
  97                local_irq_enable();
  98}
  99
 100static inline void preempt_conditional_sti(struct pt_regs *regs)
 101{
 102        inc_preempt_count();
 103        if (regs->flags & X86_EFLAGS_IF)
 104                local_irq_enable();
 105}
 106
 107static inline void preempt_conditional_cli(struct pt_regs *regs)
 108{
 109        if (regs->flags & X86_EFLAGS_IF)
 110                local_irq_disable();
 111        dec_preempt_count();
 112}
 113
 114#ifdef CONFIG_X86_32
 115static inline void
 116die_if_kernel(const char *str, struct pt_regs *regs, long err)
 117{
 118        if (!user_mode_vm(regs))
 119                die(str, regs, err);
 120}
 121
 122/*
 123 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
 124 * invalid offset set (the LAZY one) and the faulting thread has
 125 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
 126 * we set the offset field correctly and return 1.
 127 */
 128static int lazy_iobitmap_copy(void)
 129{
 130        struct thread_struct *thread;
 131        struct tss_struct *tss;
 132        int cpu;
 133
 134        cpu = get_cpu();
 135        tss = &per_cpu(init_tss, cpu);
 136        thread = &current->thread;
 137
 138        if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
 139            thread->io_bitmap_ptr) {
 140                memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
 141                       thread->io_bitmap_max);
 142                /*
 143                 * If the previously set map was extending to higher ports
 144                 * than the current one, pad extra space with 0xff (no access).
 145                 */
 146                if (thread->io_bitmap_max < tss->io_bitmap_max) {
 147                        memset((char *) tss->io_bitmap +
 148                                thread->io_bitmap_max, 0xff,
 149                                tss->io_bitmap_max - thread->io_bitmap_max);
 150                }
 151                tss->io_bitmap_max = thread->io_bitmap_max;
 152                tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 153                tss->io_bitmap_owner = thread;
 154                put_cpu();
 155
 156                return 1;
 157        }
 158        put_cpu();
 159
 160        return 0;
 161}
 162#endif
 163
 164static void __kprobes
 165do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 166        long error_code, siginfo_t *info)
 167{
 168        struct task_struct *tsk = current;
 169
 170#ifdef CONFIG_X86_32
 171        if (regs->flags & X86_VM_MASK) {
 172                /*
 173                 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 174                 * On nmi (interrupt 2), do_trap should not be called.
 175                 */
 176                if (trapnr < 6)
 177                        goto vm86_trap;
 178                goto trap_signal;
 179        }
 180#endif
 181
 182        if (!user_mode(regs))
 183                goto kernel_trap;
 184
 185#ifdef CONFIG_X86_32
 186trap_signal:
 187#endif
 188        /*
 189         * We want error_code and trap_no set for userspace faults and
 190         * kernelspace faults which result in die(), but not
 191         * kernelspace faults which are fixed up.  die() gives the
 192         * process no chance to handle the signal and notice the
 193         * kernel fault information, so that won't result in polluting
 194         * the information about previously queued, but not yet
 195         * delivered, faults.  See also do_general_protection below.
 196         */
 197        tsk->thread.error_code = error_code;
 198        tsk->thread.trap_no = trapnr;
 199
 200#ifdef CONFIG_X86_64
 201        if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 202            printk_ratelimit()) {
 203                printk(KERN_INFO
 204                       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
 205                       tsk->comm, tsk->pid, str,
 206                       regs->ip, regs->sp, error_code);
 207                print_vma_addr(" in ", regs->ip);
 208                printk("\n");
 209        }
 210#endif
 211
 212        if (info)
 213                force_sig_info(signr, info, tsk);
 214        else
 215                force_sig(signr, tsk);
 216        return;
 217
 218kernel_trap:
 219        if (!fixup_exception(regs)) {
 220                tsk->thread.error_code = error_code;
 221                tsk->thread.trap_no = trapnr;
 222                die(str, regs, error_code);
 223        }
 224        return;
 225
 226#ifdef CONFIG_X86_32
 227vm86_trap:
 228        if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
 229                                                error_code, trapnr))
 230                goto trap_signal;
 231        return;
 232#endif
 233}
 234
 235#define DO_ERROR(trapnr, signr, str, name)                                \
 236dotraplinkage void do_##name(struct pt_regs *regs, long error_code)        \
 237{                                                                        \
 238        if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)        \
 239                                                        == NOTIFY_STOP)        \
 240                return;                                                        \
 241        conditional_sti(regs);                                                \
 242        do_trap(trapnr, signr, str, regs, error_code, NULL);                \
 243}
 244
 245#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)                \
 246dotraplinkage void do_##name(struct pt_regs *regs, long error_code)        \
 247{                                                                        \
 248        siginfo_t info;                                                        \
 249        info.si_signo = signr;                                                \
 250        info.si_errno = 0;                                                \
 251        info.si_code = sicode;                                                \
 252        info.si_addr = (void __user *)siaddr;                                \
 253        if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)        \
 254                                                        == NOTIFY_STOP)        \
 255                return;                                                        \
 256        conditional_sti(regs);                                                \
 257        do_trap(trapnr, signr, str, regs, error_code, &info);                \
 258}
 259
 260DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
 261DO_ERROR(4, SIGSEGV, "overflow", overflow)
 262DO_ERROR(5, SIGSEGV, "bounds", bounds)
 263DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
 264DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
 265DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 266DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
 267#ifdef CONFIG_X86_32
 268DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
 269#endif
 270DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 271
 272#ifdef CONFIG_X86_64
 273/* Runs on IST stack */
 274dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 275{
 276        if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
 277                        12, SIGBUS) == NOTIFY_STOP)
 278                return;
 279        preempt_conditional_sti(regs);
 280        do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
 281        preempt_conditional_cli(regs);
 282}
 283
 284dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 285{
 286        static const char str[] = "double fault";
 287        struct task_struct *tsk = current;
 288
 289        /* Return not checked because double check cannot be ignored */
 290        notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
 291
 292        tsk->thread.error_code = error_code;
 293        tsk->thread.trap_no = 8;
 294
 295        /* This is always a kernel trap and never fixable (and thus must
 296           never return). */
 297        for (;;)
 298                die(str, regs, error_code);
 299}
 300#endif
 301
 302dotraplinkage void __kprobes
 303do_general_protection(struct pt_regs *regs, long error_code)
 304{
 305        struct task_struct *tsk;
 306
 307        conditional_sti(regs);
 308
 309#ifdef CONFIG_X86_32
 310        if (lazy_iobitmap_copy()) {
 311                /* restart the faulting instruction */
 312                return;
 313        }
 314
 315        if (regs->flags & X86_VM_MASK)
 316                goto gp_in_vm86;
 317#endif
 318
 319        tsk = current;
 320        if (!user_mode(regs))
 321                goto gp_in_kernel;
 322
 323        tsk->thread.error_code = error_code;
 324        tsk->thread.trap_no = 13;
 325
 326        if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 327                        printk_ratelimit()) {
 328                printk(KERN_INFO
 329                        "%s[%d] general protection ip:%lx sp:%lx error:%lx",
 330                        tsk->comm, task_pid_nr(tsk),
 331                        regs->ip, regs->sp, error_code);
 332                print_vma_addr(" in ", regs->ip);
 333                printk("\n");
 334        }
 335
 336        force_sig(SIGSEGV, tsk);
 337        return;
 338
 339#ifdef CONFIG_X86_32
 340gp_in_vm86:
 341        local_irq_enable();
 342        handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 343        return;
 344#endif
 345
 346gp_in_kernel:
 347        if (fixup_exception(regs))
 348                return;
 349
 350        tsk->thread.error_code = error_code;
 351        tsk->thread.trap_no = 13;
 352        if (notify_die(DIE_GPF, "general protection fault", regs,
 353                                error_code, 13, SIGSEGV) == NOTIFY_STOP)
 354                return;
 355        die("general protection fault", regs, error_code);
 356}
 357
 358static notrace __kprobes void
 359mem_parity_error(unsigned char reason, struct pt_regs *regs)
 360{
 361        printk(KERN_EMERG
 362                "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 363                        reason, smp_processor_id());
 364
 365        printk(KERN_EMERG
 366                "You have some hardware problem, likely on the PCI bus.\n");
 367
 368#if defined(CONFIG_EDAC)
 369        if (edac_handler_set()) {
 370                edac_atomic_assert_error();
 371                return;
 372        }
 373#endif
 374
 375        if (panic_on_unrecovered_nmi)
 376                panic("NMI: Not continuing");
 377
 378        printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 379
 380        /* Clear and disable the memory parity error line. */
 381        reason = (reason & 0xf) | 4;
 382        outb(reason, 0x61);
 383}
 384
 385static notrace __kprobes void
 386io_check_error(unsigned char reason, struct pt_regs *regs)
 387{
 388        unsigned long i;
 389
 390        printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
 391        show_registers(regs);
 392
 393        /* Re-enable the IOCK line, wait for a few seconds */
 394        reason = (reason & 0xf) | 8;
 395        outb(reason, 0x61);
 396
 397        i = 2000;
 398        while (--i)
 399                udelay(1000);
 400
 401        reason &= ~8;
 402        outb(reason, 0x61);
 403}
 404
 405static notrace __kprobes void
 406unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 407{
 408        if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 409                        NOTIFY_STOP)
 410                return;
 411#ifdef CONFIG_MCA
 412        /*
 413         * Might actually be able to figure out what the guilty party
 414         * is:
 415         */
 416        if (MCA_bus) {
 417                mca_handle_nmi();
 418                return;
 419        }
 420#endif
 421        printk(KERN_EMERG
 422                "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 423                        reason, smp_processor_id());
 424
 425        printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
 426        if (panic_on_unrecovered_nmi)
 427                panic("NMI: Not continuing");
 428
 429        printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 430}
 431
 432static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 433{
 434        unsigned char reason = 0;
 435        int cpu;
 436
 437        cpu = smp_processor_id();
 438
 439        /* Only the BSP gets external NMIs from the system. */
 440        if (!cpu)
 441                reason = get_nmi_reason();
 442
 443        if (!(reason & 0xc0)) {
 444                if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 445                                                                == NOTIFY_STOP)
 446                        return;
 447#ifdef CONFIG_X86_LOCAL_APIC
 448                /*
 449                 * Ok, so this is none of the documented NMI sources,
 450                 * so it must be the NMI watchdog.
 451                 */
 452                if (nmi_watchdog_tick(regs, reason))
 453                        return;
 454                if (!do_nmi_callback(regs, cpu))
 455                        unknown_nmi_error(reason, regs);
 456#else
 457                unknown_nmi_error(reason, regs);
 458#endif
 459
 460                return;
 461        }
 462        if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 463                return;
 464
 465        /* AK: following checks seem to be broken on modern chipsets. FIXME */
 466        if (reason & 0x80)
 467                mem_parity_error(reason, regs);
 468        if (reason & 0x40)
 469                io_check_error(reason, regs);
 470#ifdef CONFIG_X86_32
 471        /*
 472         * Reassert NMI in case it became active meanwhile
 473         * as it's edge-triggered:
 474         */
 475        reassert_nmi();
 476#endif
 477}
 478
 479dotraplinkage notrace __kprobes void
 480do_nmi(struct pt_regs *regs, long error_code)
 481{
 482        nmi_enter();
 483
 484#ifdef CONFIG_X86_32
 485        { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
 486#else
 487        add_pda(__nmi_count, 1);
 488#endif
 489
 490        if (!ignore_nmis)
 491                default_do_nmi(regs);
 492
 493        nmi_exit();
 494}
 495
 496void stop_nmi(void)
 497{
 498        acpi_nmi_disable();
 499        ignore_nmis++;
 500}
 501
 502void restart_nmi(void)
 503{
 504        ignore_nmis--;
 505        acpi_nmi_enable();
 506}
 507
 508/* May run on IST stack. */
 509dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 510{
 511#ifdef CONFIG_KPROBES
 512        if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 513                        == NOTIFY_STOP)
 514                return;
 515#else
 516        if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
 517                        == NOTIFY_STOP)
 518                return;
 519#endif
 520
 521        preempt_conditional_sti(regs);
 522        do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 523        preempt_conditional_cli(regs);
 524}
 525
 526#ifdef CONFIG_X86_64
 527/* Help handler running on IST stack to switch back to user stack
 528   for scheduling or signal handling. The actual stack switch is done in
 529   entry.S */
 530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 531{
 532        struct pt_regs *regs = eregs;
 533        /* Did already sync */
 534        if (eregs == (struct pt_regs *)eregs->sp)
 535                ;
 536        /* Exception from user space */
 537        else if (user_mode(eregs))
 538                regs = task_pt_regs(current);
 539        /* Exception from kernel and interrupts are enabled. Move to
 540           kernel process stack. */
 541        else if (eregs->flags & X86_EFLAGS_IF)
 542                regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 543        if (eregs != regs)
 544                *regs = *eregs;
 545        return regs;
 546}
 547#endif
 548
 549/*
 550 * Our handling of the processor debug registers is non-trivial.
 551 * We do not clear them on entry and exit from the kernel. Therefore
 552 * it is possible to get a watchpoint trap here from inside the kernel.
 553 * However, the code in ./ptrace.c has ensured that the user can
 554 * only set watchpoints on userspace addresses. Therefore the in-kernel
 555 * watchpoint trap can only occur in code which is reading/writing
 556 * from user space. Such code must not hold kernel locks (since it
 557 * can equally take a page fault), therefore it is safe to call
 558 * force_sig_info even though that claims and releases locks.
 559 *
 560 * Code in ./signal.c ensures that the debug control register
 561 * is restored before we deliver any signal, and therefore that
 562 * user code runs with the correct debug control register even though
 563 * we clear it here.
 564 *
 565 * Being careful here means that we don't have to be as careful in a
 566 * lot of more complicated places (task switching can be a bit lazy
 567 * about restoring all the debug state, and ptrace doesn't have to
 568 * find every occurrence of the TF bit that could be saved away even
 569 * by user code)
 570 *
 571 * May run on IST stack.
 572 */
 573dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 574{
 575        struct task_struct *tsk = current;
 576        unsigned long condition;
 577        int si_code;
 578
 579        get_debugreg(condition, 6);
 580
 581        /*
 582         * The processor cleared BTF, so don't mark that we need it set.
 583         */
 584        clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
 585        tsk->thread.debugctlmsr = 0;
 586
 587        if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 588                                                SIGTRAP) == NOTIFY_STOP)
 589                return;
 590
 591        /* It's safe to allow irq's after DR6 has been saved */
 592        preempt_conditional_sti(regs);
 593
 594        /* Mask out spurious debug traps due to lazy DR7 setting */
 595        if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 596                if (!tsk->thread.debugreg7)
 597                        goto clear_dr7;
 598        }
 599
 600#ifdef CONFIG_X86_32
 601        if (regs->flags & X86_VM_MASK)
 602                goto debug_vm86;
 603#endif
 604
 605        /* Save debug status register where ptrace can see it */
 606        tsk->thread.debugreg6 = condition;
 607
 608        /*
 609         * Single-stepping through TF: make sure we ignore any events in
 610         * kernel space (but re-enable TF when returning to user mode).
 611         */
 612        if (condition & DR_STEP) {
 613                if (!user_mode(regs))
 614                        goto clear_TF_reenable;
 615        }
 616
 617        si_code = get_si_code(condition);
 618        /* Ok, finally something we can handle */
 619        send_sigtrap(tsk, regs, error_code, si_code);
 620
 621        /*
 622         * Disable additional traps. They'll be re-enabled when
 623         * the signal is delivered.
 624         */
 625clear_dr7:
 626        set_debugreg(0, 7);
 627        preempt_conditional_cli(regs);
 628        return;
 629
 630#ifdef CONFIG_X86_32
 631debug_vm86:
 632        handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
 633        preempt_conditional_cli(regs);
 634        return;
 635#endif
 636
 637clear_TF_reenable:
 638        set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 639        regs->flags &= ~X86_EFLAGS_TF;
 640        preempt_conditional_cli(regs);
 641        return;
 642}
 643
 644#ifdef CONFIG_X86_64
 645static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 646{
 647        if (fixup_exception(regs))
 648                return 1;
 649
 650        notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
 651        /* Illegal floating point operation in the kernel */
 652        current->thread.trap_no = trapnr;
 653        die(str, regs, 0);
 654        return 0;
 655}
 656#endif
 657
 658/*
 659 * Note that we play around with the 'TS' bit in an attempt to get
 660 * the correct behaviour even in the presence of the asynchronous
 661 * IRQ13 behaviour
 662 */
 663void math_error(void __user *ip)
 664{
 665        struct task_struct *task;
 666        siginfo_t info;
 667        unsigned short cwd, swd;
 668
 669        /*
 670         * Save the info for the exception handler and clear the error.
 671         */
 672        task = current;
 673        save_init_fpu(task);
 674        task->thread.trap_no = 16;
 675        task->thread.error_code = 0;
 676        info.si_signo = SIGFPE;
 677        info.si_errno = 0;
 678        info.si_code = __SI_FAULT;
 679        info.si_addr = ip;
 680        /*
 681         * (~cwd & swd) will mask out exceptions that are not set to unmasked
 682         * status.  0x3f is the exception bits in these regs, 0x200 is the
 683         * C1 reg you need in case of a stack fault, 0x040 is the stack
 684         * fault bit.  We should only be taking one exception at a time,
 685         * so if this combination doesn't produce any single exception,
 686         * then we have a bad program that isn't synchronizing its FPU usage
 687         * and it will suffer the consequences since we won't be able to
 688         * fully reproduce the context of the exception
 689         */
 690        cwd = get_fpu_cwd(task);
 691        swd = get_fpu_swd(task);
 692        switch (swd & ~cwd & 0x3f) {
 693        case 0x000: /* No unmasked exception */
 694#ifdef CONFIG_X86_32
 695                return;
 696#endif
 697        default: /* Multiple exceptions */
 698                break;
 699        case 0x001: /* Invalid Op */
 700                /*
 701                 * swd & 0x240 == 0x040: Stack Underflow
 702                 * swd & 0x240 == 0x240: Stack Overflow
 703                 * User must clear the SF bit (0x40) if set
 704                 */
 705                info.si_code = FPE_FLTINV;
 706                break;
 707        case 0x002: /* Denormalize */
 708        case 0x010: /* Underflow */
 709                info.si_code = FPE_FLTUND;
 710                break;
 711        case 0x004: /* Zero Divide */
 712                info.si_code = FPE_FLTDIV;
 713                break;
 714        case 0x008: /* Overflow */
 715                info.si_code = FPE_FLTOVF;
 716                break;
 717        case 0x020: /* Precision */
 718                info.si_code = FPE_FLTRES;
 719                break;
 720        }
 721        force_sig_info(SIGFPE, &info, task);
 722}
 723
 724dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 725{
 726        conditional_sti(regs);
 727
 728#ifdef CONFIG_X86_32
 729        ignore_fpu_irq = 1;
 730#else
 731        if (!user_mode(regs) &&
 732            kernel_math_error(regs, "kernel x87 math error", 16))
 733                return;
 734#endif
 735
 736        math_error((void __user *)regs->ip);
 737}
 738
 739static void simd_math_error(void __user *ip)
 740{
 741        struct task_struct *task;
 742        siginfo_t info;
 743        unsigned short mxcsr;
 744
 745        /*
 746         * Save the info for the exception handler and clear the error.
 747         */
 748        task = current;
 749        save_init_fpu(task);
 750        task->thread.trap_no = 19;
 751        task->thread.error_code = 0;
 752        info.si_signo = SIGFPE;
 753        info.si_errno = 0;
 754        info.si_code = __SI_FAULT;
 755        info.si_addr = ip;
 756        /*
 757         * The SIMD FPU exceptions are handled a little differently, as there
 758         * is only a single status/control register.  Thus, to determine which
 759         * unmasked exception was caught we must mask the exception mask bits
 760         * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 761         */
 762        mxcsr = get_fpu_mxcsr(task);
 763        switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
 764        case 0x000:
 765        default:
 766                break;
 767        case 0x001: /* Invalid Op */
 768                info.si_code = FPE_FLTINV;
 769                break;
 770        case 0x002: /* Denormalize */
 771        case 0x010: /* Underflow */
 772                info.si_code = FPE_FLTUND;
 773                break;
 774        case 0x004: /* Zero Divide */
 775                info.si_code = FPE_FLTDIV;
 776                break;
 777        case 0x008: /* Overflow */
 778                info.si_code = FPE_FLTOVF;
 779                break;
 780        case 0x020: /* Precision */
 781                info.si_code = FPE_FLTRES;
 782                break;
 783        }
 784        force_sig_info(SIGFPE, &info, task);
 785}
 786
 787dotraplinkage void
 788do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 789{
 790        conditional_sti(regs);
 791
 792#ifdef CONFIG_X86_32
 793        if (cpu_has_xmm) {
 794                /* Handle SIMD FPU exceptions on PIII+ processors. */
 795                ignore_fpu_irq = 1;
 796                simd_math_error((void __user *)regs->ip);
 797                return;
 798        }
 799        /*
 800         * Handle strange cache flush from user space exception
 801         * in all other cases.  This is undocumented behaviour.
 802         */
 803        if (regs->flags & X86_VM_MASK) {
 804                handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
 805                return;
 806        }
 807        current->thread.trap_no = 19;
 808        current->thread.error_code = error_code;
 809        die_if_kernel("cache flush denied", regs, error_code);
 810        force_sig(SIGSEGV, current);
 811#else
 812        if (!user_mode(regs) &&
 813                        kernel_math_error(regs, "kernel simd math error", 19))
 814                return;
 815        simd_math_error((void __user *)regs->ip);
 816#endif
 817}
 818
 819dotraplinkage void
 820do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 821{
 822        conditional_sti(regs);
 823#if 0
 824        /* No need to warn about this any longer. */
 825        printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 826#endif
 827}
 828
 829#ifdef CONFIG_X86_32
 830unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
 831{
 832        struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
 833        unsigned long base = (kesp - uesp) & -THREAD_SIZE;
 834        unsigned long new_kesp = kesp - base;
 835        unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
 836        __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
 837
 838        /* Set up base for espfix segment */
 839        desc &= 0x00f0ff0000000000ULL;
 840        desc |=        ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
 841                ((((__u64)base) << 32) & 0xff00000000000000ULL) |
 842                ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
 843                (lim_pages & 0xffff);
 844        *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
 845
 846        return new_kesp;
 847}
 848#else
 849asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 850{
 851}
 852
 853asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
 854{
 855}
 856#endif
 857
 858/*
 859 * 'math_state_restore()' saves the current math information in the
 860 * old math state array, and gets the new ones from the current task
 861 *
 862 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
 863 * Don't touch unless you *really* know how it works.
 864 *
 865 * Must be called with kernel preemption disabled (in this case,
 866 * local interrupts are disabled at the call-site in entry.S).
 867 */
 868asmlinkage void math_state_restore(void)
 869{
 870        struct thread_info *thread = current_thread_info();
 871        struct task_struct *tsk = thread->task;
 872
 873        if (!tsk_used_math(tsk)) {
 874                local_irq_enable();
 875                /*
 876                 * does a slab alloc which can sleep
 877                 */
 878                if (init_fpu(tsk)) {
 879                        /*
 880                         * ran out of memory!
 881                         */
 882                        do_group_exit(SIGKILL);
 883                        return;
 884                }
 885                local_irq_disable();
 886        }
 887
 888        clts();                                /* Allow maths ops (or we recurse) */
 889#ifdef CONFIG_X86_32
 890        restore_fpu(tsk);
 891#else
 892        /*
 893         * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 894         */
 895        if (unlikely(restore_fpu_checking(tsk))) {
 896                stts();
 897                force_sig(SIGSEGV, tsk);
 898                return;
 899        }
 900#endif
 901        thread->status |= TS_USEDFPU;        /* So we fnsave on switch_to() */
 902        tsk->fpu_counter++;
 903}
 904EXPORT_SYMBOL_GPL(math_state_restore);
 905
 906#ifndef CONFIG_MATH_EMULATION
 907asmlinkage void math_emulate(long arg)
 908{
 909        printk(KERN_EMERG
 910                "math-emulation not enabled and no coprocessor found.\n");
 911        printk(KERN_EMERG "killing %s.\n", current->comm);
 912        force_sig(SIGFPE, current);
 913        schedule();
 914}
 915#endif /* CONFIG_MATH_EMULATION */
 916
 917dotraplinkage void __kprobes
 918do_device_not_available(struct pt_regs *regs, long error)
 919{
 920#ifdef CONFIG_X86_32
 921        if (read_cr0() & X86_CR0_EM) {
 922                conditional_sti(regs);
 923                math_emulate(0);
 924        } else {
 925                math_state_restore(); /* interrupts still off */
 926                conditional_sti(regs);
 927        }
 928#else
 929        math_state_restore();
 930#endif
 931}
 932
 933#ifdef CONFIG_X86_32
 934dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 935{
 936        siginfo_t info;
 937        local_irq_enable();
 938
 939        info.si_signo = SIGILL;
 940        info.si_errno = 0;
 941        info.si_code = ILL_BADSTK;
 942        info.si_addr = 0;
 943        if (notify_die(DIE_TRAP, "iret exception",
 944                        regs, error_code, 32, SIGILL) == NOTIFY_STOP)
 945                return;
 946        do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
 947}
 948#endif
 949
 950void __init trap_init(void)
 951{
 952#ifdef CONFIG_X86_32
 953        int i;
 954#endif
 955
 956#ifdef CONFIG_EISA
 957        void __iomem *p = early_ioremap(0x0FFFD9, 4);
 958
 959        if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
 960                EISA_bus = 1;
 961        early_iounmap(p, 4);
 962#endif
 963
 964        set_intr_gate(0, &divide_error);
 965        set_intr_gate_ist(1, &debug, DEBUG_STACK);
 966        set_intr_gate_ist(2, &nmi, NMI_STACK);
 967        /* int3 can be called from all */
 968        set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
 969        /* int4 can be called from all */
 970        set_system_intr_gate(4, &overflow);
 971        set_intr_gate(5, &bounds);
 972        set_intr_gate(6, &invalid_op);
 973        set_intr_gate(7, &device_not_available);
 974#ifdef CONFIG_X86_32
 975        set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
 976#else
 977        set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
 978#endif
 979        set_intr_gate(9, &coprocessor_segment_overrun);
 980        set_intr_gate(10, &invalid_TSS);
 981        set_intr_gate(11, &segment_not_present);
 982        set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
 983        set_intr_gate(13, &general_protection);
 984        set_intr_gate(14, &page_fault);
 985        set_intr_gate(15, &spurious_interrupt_bug);
 986        set_intr_gate(16, &coprocessor_error);
 987        set_intr_gate(17, &alignment_check);
 988#ifdef CONFIG_X86_MCE
 989        set_intr_gate_ist(18, &machine_check, MCE_STACK);
 990#endif
 991        set_intr_gate(19, &simd_coprocessor_error);
 992
 993#ifdef CONFIG_IA32_EMULATION
 994        set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 995#endif
 996
 997#ifdef CONFIG_X86_32
 998        if (cpu_has_fxsr) {
 999                printk(KERN_INFO "Enabling fast FPU save and restore... ");
1000                set_in_cr4(X86_CR4_OSFXSR);
1001                printk("done.\n");
1002        }
1003        if (cpu_has_xmm) {
1004                printk(KERN_INFO
1005                        "Enabling unmasked SIMD FPU exception support... ");
1006                set_in_cr4(X86_CR4_OSXMMEXCPT);
1007                printk("done.\n");
1008        }
1009
1010        set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1011
1012        /* Reserve all the builtin and the syscall vector: */
1013        for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1014                set_bit(i, used_vectors);
1015
1016        set_bit(SYSCALL_VECTOR, used_vectors);
1017#endif
1018        /*
1019         * Should be a barrier for any external CPU state:
1020         */
1021        cpu_init();
1022
1023#ifdef CONFIG_X86_32
1024        trap_init_hook();
1025#endif
1026}