Showing error 856

User: Jiri Slaby
Error type: Resource Leak
Error type description: The code omits to put the resource to the system for reuse
File location: arch/x86/kernel/kprobes.c
Line in file: 542
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2011-11-07 22:40:13 UTC


Source:

   1/*
   2 *  Kernel Probes (KProbes)
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright (C) IBM Corporation, 2002, 2004
  19 *
  20 * 2002-Oct        Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
  21 *                Probes initial implementation ( includes contributions from
  22 *                Rusty Russell).
  23 * 2004-July        Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
  24 *                interface to access function arguments.
  25 * 2004-Oct        Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
  26 *                <prasanna@in.ibm.com> adapted for x86_64 from i386.
  27 * 2005-Mar        Roland McGrath <roland@redhat.com>
  28 *                Fixed to handle %rip-relative addressing mode correctly.
  29 * 2005-May        Hien Nguyen <hien@us.ibm.com>, Jim Keniston
  30 *                <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
  31 *                <prasanna@in.ibm.com> added function-return probes.
  32 * 2005-May        Rusty Lynch <rusty.lynch@intel.com>
  33 *                 Added function return probes functionality
  34 * 2006-Feb        Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
  35 *                 kprobe-booster and kretprobe-booster for i386.
  36 * 2007-Dec        Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
  37 *                 and kretprobe-booster for x86-64
  38 * 2007-Dec        Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
  39 *                 <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
  40 *                 unified x86 kprobes code.
  41 */
  42
  43#include <linux/kprobes.h>
  44#include <linux/ptrace.h>
  45#include <linux/string.h>
  46#include <linux/slab.h>
  47#include <linux/hardirq.h>
  48#include <linux/preempt.h>
  49#include <linux/module.h>
  50#include <linux/kdebug.h>
  51
  52#include <asm/cacheflush.h>
  53#include <asm/desc.h>
  54#include <asm/pgtable.h>
  55#include <asm/uaccess.h>
  56#include <asm/alternative.h>
  57
  58void jprobe_return_end(void);
  59
  60DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
  61DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
  62
  63#ifdef CONFIG_X86_64
  64#define stack_addr(regs) ((unsigned long *)regs->sp)
  65#else
  66/*
  67 * "&regs->sp" looks wrong, but it's correct for x86_32.  x86_32 CPUs
  68 * don't save the ss and esp registers if the CPU is already in kernel
  69 * mode when it traps.  So for kprobes, regs->sp and regs->ss are not
  70 * the [nonexistent] saved stack pointer and ss register, but rather
  71 * the top 8 bytes of the pre-int3 stack.  So &regs->sp happens to
  72 * point to the top of the pre-int3 stack.
  73 */
  74#define stack_addr(regs) ((unsigned long *)&regs->sp)
  75#endif
  76
  77#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
  78        (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
  79          (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
  80          (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
  81          (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
  82         << (row % 32))
  83        /*
  84         * Undefined/reserved opcodes, conditional jump, Opcode Extension
  85         * Groups, and some special opcodes can not boost.
  86         */
  87static const u32 twobyte_is_boostable[256 / 32] = {
  88        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
  89        /*      ----------------------------------------------          */
  90        W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
  91        W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
  92        W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
  93        W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
  94        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
  95        W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
  96        W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
  97        W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
  98        W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
  99        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
 100        W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
 101        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
 102        W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
 103        W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
 104        W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
 105        W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */
 106        /*      -----------------------------------------------         */
 107        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 108};
 109static const u32 onebyte_has_modrm[256 / 32] = {
 110        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 111        /*      -----------------------------------------------         */
 112        W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
 113        W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
 114        W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
 115        W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
 116        W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
 117        W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
 118        W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
 119        W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
 120        W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
 121        W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
 122        W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
 123        W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
 124        W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
 125        W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
 126        W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
 127        W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1)   /* f0 */
 128        /*      -----------------------------------------------         */
 129        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 130};
 131static const u32 twobyte_has_modrm[256 / 32] = {
 132        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 133        /*      -----------------------------------------------         */
 134        W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
 135        W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
 136        W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
 137        W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
 138        W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
 139        W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
 140        W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
 141        W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
 142        W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
 143        W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
 144        W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
 145        W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
 146        W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
 147        W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
 148        W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
 149        W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* ff */
 150        /*      -----------------------------------------------         */
 151        /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 152};
 153#undef W
 154
 155struct kretprobe_blackpoint kretprobe_blacklist[] = {
 156        {"__switch_to", }, /* This function switches only current task, but
 157                              doesn't switch kernel stack.*/
 158        {NULL, NULL}        /* Terminator */
 159};
 160const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 161
 162/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
 163static void __kprobes set_jmp_op(void *from, void *to)
 164{
 165        struct __arch_jmp_op {
 166                char op;
 167                s32 raddr;
 168        } __attribute__((packed)) * jop;
 169        jop = (struct __arch_jmp_op *)from;
 170        jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
 171        jop->op = RELATIVEJUMP_INSTRUCTION;
 172}
 173
 174/*
 175 * Check for the REX prefix which can only exist on X86_64
 176 * X86_32 always returns 0
 177 */
 178static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
 179{
 180#ifdef CONFIG_X86_64
 181        if ((*insn & 0xf0) == 0x40)
 182                return 1;
 183#endif
 184        return 0;
 185}
 186
 187/*
 188 * Returns non-zero if opcode is boostable.
 189 * RIP relative instructions are adjusted at copying time in 64 bits mode
 190 */
 191static int __kprobes can_boost(kprobe_opcode_t *opcodes)
 192{
 193        kprobe_opcode_t opcode;
 194        kprobe_opcode_t *orig_opcodes = opcodes;
 195
 196retry:
 197        if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 198                return 0;
 199        opcode = *(opcodes++);
 200
 201        /* 2nd-byte opcode */
 202        if (opcode == 0x0f) {
 203                if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 204                        return 0;
 205                return test_bit(*opcodes,
 206                                (unsigned long *)twobyte_is_boostable);
 207        }
 208
 209        switch (opcode & 0xf0) {
 210#ifdef CONFIG_X86_64
 211        case 0x40:
 212                goto retry; /* REX prefix is boostable */
 213#endif
 214        case 0x60:
 215                if (0x63 < opcode && opcode < 0x67)
 216                        goto retry; /* prefixes */
 217                /* can't boost Address-size override and bound */
 218                return (opcode != 0x62 && opcode != 0x67);
 219        case 0x70:
 220                return 0; /* can't boost conditional jump */
 221        case 0xc0:
 222                /* can't boost software-interruptions */
 223                return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
 224        case 0xd0:
 225                /* can boost AA* and XLAT */
 226                return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
 227        case 0xe0:
 228                /* can boost in/out and absolute jmps */
 229                return ((opcode & 0x04) || opcode == 0xea);
 230        case 0xf0:
 231                if ((opcode & 0x0c) == 0 && opcode != 0xf1)
 232                        goto retry; /* lock/rep(ne) prefix */
 233                /* clear and set flags are boostable */
 234                return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
 235        default:
 236                /* segment override prefixes are boostable */
 237                if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
 238                        goto retry; /* prefixes */
 239                /* CS override prefix and call are not boostable */
 240                return (opcode != 0x2e && opcode != 0x9a);
 241        }
 242}
 243
 244/*
 245 * Returns non-zero if opcode modifies the interrupt flag.
 246 */
 247static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 248{
 249        switch (*insn) {
 250        case 0xfa:                /* cli */
 251        case 0xfb:                /* sti */
 252        case 0xcf:                /* iret/iretd */
 253        case 0x9d:                /* popf/popfd */
 254                return 1;
 255        }
 256
 257        /*
 258         * on X86_64, 0x40-0x4f are REX prefixes so we need to look
 259         * at the next byte instead.. but of course not recurse infinitely
 260         */
 261        if (is_REX_prefix(insn))
 262                return is_IF_modifier(++insn);
 263
 264        return 0;
 265}
 266
 267/*
 268 * Adjust the displacement if the instruction uses the %rip-relative
 269 * addressing mode.
 270 * If it does, Return the address of the 32-bit displacement word.
 271 * If not, return null.
 272 * Only applicable to 64-bit x86.
 273 */
 274static void __kprobes fix_riprel(struct kprobe *p)
 275{
 276#ifdef CONFIG_X86_64
 277        u8 *insn = p->ainsn.insn;
 278        s64 disp;
 279        int need_modrm;
 280
 281        /* Skip legacy instruction prefixes.  */
 282        while (1) {
 283                switch (*insn) {
 284                case 0x66:
 285                case 0x67:
 286                case 0x2e:
 287                case 0x3e:
 288                case 0x26:
 289                case 0x64:
 290                case 0x65:
 291                case 0x36:
 292                case 0xf0:
 293                case 0xf3:
 294                case 0xf2:
 295                        ++insn;
 296                        continue;
 297                }
 298                break;
 299        }
 300
 301        /* Skip REX instruction prefix.  */
 302        if (is_REX_prefix(insn))
 303                ++insn;
 304
 305        if (*insn == 0x0f) {
 306                /* Two-byte opcode.  */
 307                ++insn;
 308                need_modrm = test_bit(*insn,
 309                                      (unsigned long *)twobyte_has_modrm);
 310        } else
 311                /* One-byte opcode.  */
 312                need_modrm = test_bit(*insn,
 313                                      (unsigned long *)onebyte_has_modrm);
 314
 315        if (need_modrm) {
 316                u8 modrm = *++insn;
 317                if ((modrm & 0xc7) == 0x05) {
 318                        /* %rip+disp32 addressing mode */
 319                        /* Displacement follows ModRM byte.  */
 320                        ++insn;
 321                        /*
 322                         * The copied instruction uses the %rip-relative
 323                         * addressing mode.  Adjust the displacement for the
 324                         * difference between the original location of this
 325                         * instruction and the location of the copy that will
 326                         * actually be run.  The tricky bit here is making sure
 327                         * that the sign extension happens correctly in this
 328                         * calculation, since we need a signed 32-bit result to
 329                         * be sign-extended to 64 bits when it's added to the
 330                         * %rip value and yield the same 64-bit result that the
 331                         * sign-extension of the original signed 32-bit
 332                         * displacement would have given.
 333                         */
 334                        disp = (u8 *) p->addr + *((s32 *) insn) -
 335                               (u8 *) p->ainsn.insn;
 336                        BUG_ON((s64) (s32) disp != disp); /* Sanity check.  */
 337                        *(s32 *)insn = (s32) disp;
 338                }
 339        }
 340#endif
 341}
 342
 343static void __kprobes arch_copy_kprobe(struct kprobe *p)
 344{
 345        memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
 346
 347        fix_riprel(p);
 348
 349        if (can_boost(p->addr))
 350                p->ainsn.boostable = 0;
 351        else
 352                p->ainsn.boostable = -1;
 353
 354        p->opcode = *p->addr;
 355}
 356
 357int __kprobes arch_prepare_kprobe(struct kprobe *p)
 358{
 359        /* insn: must be on special executable page on x86. */
 360        p->ainsn.insn = get_insn_slot();
 361        if (!p->ainsn.insn)
 362                return -ENOMEM;
 363        arch_copy_kprobe(p);
 364        return 0;
 365}
 366
 367void __kprobes arch_arm_kprobe(struct kprobe *p)
 368{
 369        text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 370}
 371
 372void __kprobes arch_disarm_kprobe(struct kprobe *p)
 373{
 374        text_poke(p->addr, &p->opcode, 1);
 375}
 376
 377void __kprobes arch_remove_kprobe(struct kprobe *p)
 378{
 379        mutex_lock(&kprobe_mutex);
 380        free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
 381        mutex_unlock(&kprobe_mutex);
 382}
 383
 384static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 385{
 386        kcb->prev_kprobe.kp = kprobe_running();
 387        kcb->prev_kprobe.status = kcb->kprobe_status;
 388        kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
 389        kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
 390}
 391
 392static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 393{
 394        __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
 395        kcb->kprobe_status = kcb->prev_kprobe.status;
 396        kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
 397        kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
 398}
 399
 400static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 401                                struct kprobe_ctlblk *kcb)
 402{
 403        __get_cpu_var(current_kprobe) = p;
 404        kcb->kprobe_saved_flags = kcb->kprobe_old_flags
 405                = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
 406        if (is_IF_modifier(p->ainsn.insn))
 407                kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 408}
 409
 410static void __kprobes clear_btf(void)
 411{
 412        if (test_thread_flag(TIF_DEBUGCTLMSR))
 413                update_debugctlmsr(0);
 414}
 415
 416static void __kprobes restore_btf(void)
 417{
 418        if (test_thread_flag(TIF_DEBUGCTLMSR))
 419                update_debugctlmsr(current->thread.debugctlmsr);
 420}
 421
 422static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 423{
 424        clear_btf();
 425        regs->flags |= X86_EFLAGS_TF;
 426        regs->flags &= ~X86_EFLAGS_IF;
 427        /* single step inline if the instruction is an int3 */
 428        if (p->opcode == BREAKPOINT_INSTRUCTION)
 429                regs->ip = (unsigned long)p->addr;
 430        else
 431                regs->ip = (unsigned long)p->ainsn.insn;
 432}
 433
 434void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 435                                      struct pt_regs *regs)
 436{
 437        unsigned long *sara = stack_addr(regs);
 438
 439        ri->ret_addr = (kprobe_opcode_t *) *sara;
 440
 441        /* Replace the return addr with trampoline addr */
 442        *sara = (unsigned long) &kretprobe_trampoline;
 443}
 444
 445static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 446                                       struct kprobe_ctlblk *kcb)
 447{
 448#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
 449        if (p->ainsn.boostable == 1 && !p->post_handler) {
 450                /* Boost up -- we can execute copied instructions directly */
 451                reset_current_kprobe();
 452                regs->ip = (unsigned long)p->ainsn.insn;
 453                preempt_enable_no_resched();
 454                return;
 455        }
 456#endif
 457        prepare_singlestep(p, regs);
 458        kcb->kprobe_status = KPROBE_HIT_SS;
 459}
 460
 461/*
 462 * We have reentered the kprobe_handler(), since another probe was hit while
 463 * within the handler. We save the original kprobes variables and just single
 464 * step on the instruction of the new probe without calling any user handlers.
 465 */
 466static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 467                                    struct kprobe_ctlblk *kcb)
 468{
 469        switch (kcb->kprobe_status) {
 470        case KPROBE_HIT_SSDONE:
 471#ifdef CONFIG_X86_64
 472                /* TODO: Provide re-entrancy from post_kprobes_handler() and
 473                 * avoid exception stack corruption while single-stepping on
 474                 * the instruction of the new probe.
 475                 */
 476                arch_disarm_kprobe(p);
 477                regs->ip = (unsigned long)p->addr;
 478                reset_current_kprobe();
 479                preempt_enable_no_resched();
 480                break;
 481#endif
 482        case KPROBE_HIT_ACTIVE:
 483                save_previous_kprobe(kcb);
 484                set_current_kprobe(p, regs, kcb);
 485                kprobes_inc_nmissed_count(p);
 486                prepare_singlestep(p, regs);
 487                kcb->kprobe_status = KPROBE_REENTER;
 488                break;
 489        case KPROBE_HIT_SS:
 490                if (p == kprobe_running()) {
 491                        regs->flags &= ~X86_EFLAGS_TF;
 492                        regs->flags |= kcb->kprobe_saved_flags;
 493                        return 0;
 494                } else {
 495                        /* A probe has been hit in the codepath leading up
 496                         * to, or just after, single-stepping of a probed
 497                         * instruction. This entire codepath should strictly
 498                         * reside in .kprobes.text section. Raise a warning
 499                         * to highlight this peculiar case.
 500                         */
 501                }
 502        default:
 503                /* impossible cases */
 504                WARN_ON(1);
 505                return 0;
 506        }
 507
 508        return 1;
 509}
 510
 511/*
 512 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 513 * remain disabled thorough out this function.
 514 */
 515static int __kprobes kprobe_handler(struct pt_regs *regs)
 516{
 517        kprobe_opcode_t *addr;
 518        struct kprobe *p;
 519        struct kprobe_ctlblk *kcb;
 520
 521        addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
 522        if (*addr != BREAKPOINT_INSTRUCTION) {
 523                /*
 524                 * The breakpoint instruction was removed right
 525                 * after we hit it.  Another cpu has removed
 526                 * either a probepoint or a debugger breakpoint
 527                 * at this address.  In either case, no further
 528                 * handling of this interrupt is appropriate.
 529                 * Back up over the (now missing) int3 and run
 530                 * the original instruction.
 531                 */
 532                regs->ip = (unsigned long)addr;
 533                return 1;
 534        }
 535
 536        /*
 537         * We don't want to be preempted for the entire
 538         * duration of kprobe processing. We conditionally
 539         * re-enable preemption at the end of this function,
 540         * and also in reenter_kprobe() and setup_singlestep().
 541         */
 542        preempt_disable();
 543
 544        kcb = get_kprobe_ctlblk();
 545        p = get_kprobe(addr);
 546
 547        if (p) {
 548                if (kprobe_running()) {
 549                        if (reenter_kprobe(p, regs, kcb))
 550                                return 1;
 551                } else {
 552                        set_current_kprobe(p, regs, kcb);
 553                        kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 554
 555                        /*
 556                         * If we have no pre-handler or it returned 0, we
 557                         * continue with normal processing.  If we have a
 558                         * pre-handler and it returned non-zero, it prepped
 559                         * for calling the break_handler below on re-entry
 560                         * for jprobe processing, so get out doing nothing
 561                         * more here.
 562                         */
 563                        if (!p->pre_handler || !p->pre_handler(p, regs))
 564                                setup_singlestep(p, regs, kcb);
 565                        return 1;
 566                }
 567        } else if (kprobe_running()) {
 568                p = __get_cpu_var(current_kprobe);
 569                if (p->break_handler && p->break_handler(p, regs)) {
 570                        setup_singlestep(p, regs, kcb);
 571                        return 1;
 572                }
 573        } /* else: not a kprobe fault; let the kernel handle it */
 574
 575        preempt_enable_no_resched();
 576        return 0;
 577}
 578
 579/*
 580 * When a retprobed function returns, this code saves registers and
 581 * calls trampoline_handler() runs, which calls the kretprobe's handler.
 582 */
 583static void __used __kprobes kretprobe_trampoline_holder(void)
 584{
 585        asm volatile (
 586                        ".global kretprobe_trampoline\n"
 587                        "kretprobe_trampoline: \n"
 588#ifdef CONFIG_X86_64
 589                        /* We don't bother saving the ss register */
 590                        "        pushq %rsp\n"
 591                        "        pushfq\n"
 592                        /*
 593                         * Skip cs, ip, orig_ax.
 594                         * trampoline_handler() will plug in these values
 595                         */
 596                        "        subq $24, %rsp\n"
 597                        "        pushq %rdi\n"
 598                        "        pushq %rsi\n"
 599                        "        pushq %rdx\n"
 600                        "        pushq %rcx\n"
 601                        "        pushq %rax\n"
 602                        "        pushq %r8\n"
 603                        "        pushq %r9\n"
 604                        "        pushq %r10\n"
 605                        "        pushq %r11\n"
 606                        "        pushq %rbx\n"
 607                        "        pushq %rbp\n"
 608                        "        pushq %r12\n"
 609                        "        pushq %r13\n"
 610                        "        pushq %r14\n"
 611                        "        pushq %r15\n"
 612                        "        movq %rsp, %rdi\n"
 613                        "        call trampoline_handler\n"
 614                        /* Replace saved sp with true return address. */
 615                        "        movq %rax, 152(%rsp)\n"
 616                        "        popq %r15\n"
 617                        "        popq %r14\n"
 618                        "        popq %r13\n"
 619                        "        popq %r12\n"
 620                        "        popq %rbp\n"
 621                        "        popq %rbx\n"
 622                        "        popq %r11\n"
 623                        "        popq %r10\n"
 624                        "        popq %r9\n"
 625                        "        popq %r8\n"
 626                        "        popq %rax\n"
 627                        "        popq %rcx\n"
 628                        "        popq %rdx\n"
 629                        "        popq %rsi\n"
 630                        "        popq %rdi\n"
 631                        /* Skip orig_ax, ip, cs */
 632                        "        addq $24, %rsp\n"
 633                        "        popfq\n"
 634#else
 635                        "        pushf\n"
 636                        /*
 637                         * Skip cs, ip, orig_ax.
 638                         * trampoline_handler() will plug in these values
 639                         */
 640                        "        subl $12, %esp\n"
 641                        "        pushl %fs\n"
 642                        "        pushl %ds\n"
 643                        "        pushl %es\n"
 644                        "        pushl %eax\n"
 645                        "        pushl %ebp\n"
 646                        "        pushl %edi\n"
 647                        "        pushl %esi\n"
 648                        "        pushl %edx\n"
 649                        "        pushl %ecx\n"
 650                        "        pushl %ebx\n"
 651                        "        movl %esp, %eax\n"
 652                        "        call trampoline_handler\n"
 653                        /* Move flags to cs */
 654                        "        movl 52(%esp), %edx\n"
 655                        "        movl %edx, 48(%esp)\n"
 656                        /* Replace saved flags with true return address. */
 657                        "        movl %eax, 52(%esp)\n"
 658                        "        popl %ebx\n"
 659                        "        popl %ecx\n"
 660                        "        popl %edx\n"
 661                        "        popl %esi\n"
 662                        "        popl %edi\n"
 663                        "        popl %ebp\n"
 664                        "        popl %eax\n"
 665                        /* Skip ip, orig_ax, es, ds, fs */
 666                        "        addl $20, %esp\n"
 667                        "        popf\n"
 668#endif
 669                        "        ret\n");
 670}
 671
 672/*
 673 * Called from kretprobe_trampoline
 674 */
 675static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 676{
 677        struct kretprobe_instance *ri = NULL;
 678        struct hlist_head *head, empty_rp;
 679        struct hlist_node *node, *tmp;
 680        unsigned long flags, orig_ret_address = 0;
 681        unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 682
 683        INIT_HLIST_HEAD(&empty_rp);
 684        kretprobe_hash_lock(current, &head, &flags);
 685        /* fixup registers */
 686#ifdef CONFIG_X86_64
 687        regs->cs = __KERNEL_CS;
 688#else
 689        regs->cs = __KERNEL_CS | get_kernel_rpl();
 690#endif
 691        regs->ip = trampoline_address;
 692        regs->orig_ax = ~0UL;
 693
 694        /*
 695         * It is possible to have multiple instances associated with a given
 696         * task either because multiple functions in the call path have
 697         * return probes installed on them, and/or more then one
 698         * return probe was registered for a target function.
 699         *
 700         * We can handle this because:
 701         *     - instances are always pushed into the head of the list
 702         *     - when multiple return probes are registered for the same
 703         *         function, the (chronologically) first instance's ret_addr
 704         *         will be the real return address, and all the rest will
 705         *         point to kretprobe_trampoline.
 706         */
 707        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 708                if (ri->task != current)
 709                        /* another task is sharing our hash bucket */
 710                        continue;
 711
 712                if (ri->rp && ri->rp->handler) {
 713                        __get_cpu_var(current_kprobe) = &ri->rp->kp;
 714                        get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
 715                        ri->rp->handler(ri, regs);
 716                        __get_cpu_var(current_kprobe) = NULL;
 717                }
 718
 719                orig_ret_address = (unsigned long)ri->ret_addr;
 720                recycle_rp_inst(ri, &empty_rp);
 721
 722                if (orig_ret_address != trampoline_address)
 723                        /*
 724                         * This is the real return address. Any other
 725                         * instances associated with this task are for
 726                         * other calls deeper on the call stack
 727                         */
 728                        break;
 729        }
 730
 731        kretprobe_assert(ri, orig_ret_address, trampoline_address);
 732
 733        kretprobe_hash_unlock(current, &flags);
 734
 735        hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 736                hlist_del(&ri->hlist);
 737                kfree(ri);
 738        }
 739        return (void *)orig_ret_address;
 740}
 741
 742/*
 743 * Called after single-stepping.  p->addr is the address of the
 744 * instruction whose first byte has been replaced by the "int 3"
 745 * instruction.  To avoid the SMP problems that can occur when we
 746 * temporarily put back the original opcode to single-step, we
 747 * single-stepped a copy of the instruction.  The address of this
 748 * copy is p->ainsn.insn.
 749 *
 750 * This function prepares to return from the post-single-step
 751 * interrupt.  We have to fix up the stack as follows:
 752 *
 753 * 0) Except in the case of absolute or indirect jump or call instructions,
 754 * the new ip is relative to the copied instruction.  We need to make
 755 * it relative to the original instruction.
 756 *
 757 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 758 * flags are set in the just-pushed flags, and may need to be cleared.
 759 *
 760 * 2) If the single-stepped instruction was a call, the return address
 761 * that is atop the stack is the address following the copied instruction.
 762 * We need to make it the address following the original instruction.
 763 *
 764 * If this is the first time we've single-stepped the instruction at
 765 * this probepoint, and the instruction is boostable, boost it: add a
 766 * jump instruction after the copied instruction, that jumps to the next
 767 * instruction after the probepoint.
 768 */
 769static void __kprobes resume_execution(struct kprobe *p,
 770                struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 771{
 772        unsigned long *tos = stack_addr(regs);
 773        unsigned long copy_ip = (unsigned long)p->ainsn.insn;
 774        unsigned long orig_ip = (unsigned long)p->addr;
 775        kprobe_opcode_t *insn = p->ainsn.insn;
 776
 777        /*skip the REX prefix*/
 778        if (is_REX_prefix(insn))
 779                insn++;
 780
 781        regs->flags &= ~X86_EFLAGS_TF;
 782        switch (*insn) {
 783        case 0x9c:        /* pushfl */
 784                *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
 785                *tos |= kcb->kprobe_old_flags;
 786                break;
 787        case 0xc2:        /* iret/ret/lret */
 788        case 0xc3:
 789        case 0xca:
 790        case 0xcb:
 791        case 0xcf:
 792        case 0xea:        /* jmp absolute -- ip is correct */
 793                /* ip is already adjusted, no more changes required */
 794                p->ainsn.boostable = 1;
 795                goto no_change;
 796        case 0xe8:        /* call relative - Fix return addr */
 797                *tos = orig_ip + (*tos - copy_ip);
 798                break;
 799#ifdef CONFIG_X86_32
 800        case 0x9a:        /* call absolute -- same as call absolute, indirect */
 801                *tos = orig_ip + (*tos - copy_ip);
 802                goto no_change;
 803#endif
 804        case 0xff:
 805                if ((insn[1] & 0x30) == 0x10) {
 806                        /*
 807                         * call absolute, indirect
 808                         * Fix return addr; ip is correct.
 809                         * But this is not boostable
 810                         */
 811                        *tos = orig_ip + (*tos - copy_ip);
 812                        goto no_change;
 813                } else if (((insn[1] & 0x31) == 0x20) ||
 814                           ((insn[1] & 0x31) == 0x21)) {
 815                        /*
 816                         * jmp near and far, absolute indirect
 817                         * ip is correct. And this is boostable
 818                         */
 819                        p->ainsn.boostable = 1;
 820                        goto no_change;
 821                }
 822        default:
 823                break;
 824        }
 825
 826        if (p->ainsn.boostable == 0) {
 827                if ((regs->ip > copy_ip) &&
 828                    (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
 829                        /*
 830                         * These instructions can be executed directly if it
 831                         * jumps back to correct address.
 832                         */
 833                        set_jmp_op((void *)regs->ip,
 834                                   (void *)orig_ip + (regs->ip - copy_ip));
 835                        p->ainsn.boostable = 1;
 836                } else {
 837                        p->ainsn.boostable = -1;
 838                }
 839        }
 840
 841        regs->ip += orig_ip - copy_ip;
 842
 843no_change:
 844        restore_btf();
 845}
 846
 847/*
 848 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
 849 * remain disabled thoroughout this function.
 850 */
 851static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 852{
 853        struct kprobe *cur = kprobe_running();
 854        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 855
 856        if (!cur)
 857                return 0;
 858
 859        resume_execution(cur, regs, kcb);
 860        regs->flags |= kcb->kprobe_saved_flags;
 861
 862        if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
 863                kcb->kprobe_status = KPROBE_HIT_SSDONE;
 864                cur->post_handler(cur, regs, 0);
 865        }
 866
 867        /* Restore back the original saved kprobes variables and continue. */
 868        if (kcb->kprobe_status == KPROBE_REENTER) {
 869                restore_previous_kprobe(kcb);
 870                goto out;
 871        }
 872        reset_current_kprobe();
 873out:
 874        preempt_enable_no_resched();
 875
 876        /*
 877         * if somebody else is singlestepping across a probe point, flags
 878         * will have TF set, in which case, continue the remaining processing
 879         * of do_debug, as if this is not a probe hit.
 880         */
 881        if (regs->flags & X86_EFLAGS_TF)
 882                return 0;
 883
 884        return 1;
 885}
 886
 887int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 888{
 889        struct kprobe *cur = kprobe_running();
 890        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 891
 892        switch (kcb->kprobe_status) {
 893        case KPROBE_HIT_SS:
 894        case KPROBE_REENTER:
 895                /*
 896                 * We are here because the instruction being single
 897                 * stepped caused a page fault. We reset the current
 898                 * kprobe and the ip points back to the probe address
 899                 * and allow the page fault handler to continue as a
 900                 * normal page fault.
 901                 */
 902                regs->ip = (unsigned long)cur->addr;
 903                regs->flags |= kcb->kprobe_old_flags;
 904                if (kcb->kprobe_status == KPROBE_REENTER)
 905                        restore_previous_kprobe(kcb);
 906                else
 907                        reset_current_kprobe();
 908                preempt_enable_no_resched();
 909                break;
 910        case KPROBE_HIT_ACTIVE:
 911        case KPROBE_HIT_SSDONE:
 912                /*
 913                 * We increment the nmissed count for accounting,
 914                 * we can also use npre/npostfault count for accounting
 915                 * these specific fault cases.
 916                 */
 917                kprobes_inc_nmissed_count(cur);
 918
 919                /*
 920                 * We come here because instructions in the pre/post
 921                 * handler caused the page_fault, this could happen
 922                 * if handler tries to access user space by
 923                 * copy_from_user(), get_user() etc. Let the
 924                 * user-specified handler try to fix it first.
 925                 */
 926                if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 927                        return 1;
 928
 929                /*
 930                 * In case the user-specified fault handler returned
 931                 * zero, try to fix up.
 932                 */
 933                if (fixup_exception(regs))
 934                        return 1;
 935
 936                /*
 937                 * fixup routine could not handle it,
 938                 * Let do_page_fault() fix it.
 939                 */
 940                break;
 941        default:
 942                break;
 943        }
 944        return 0;
 945}
 946
 947/*
 948 * Wrapper routine for handling exceptions.
 949 */
 950int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 951                                       unsigned long val, void *data)
 952{
 953        struct die_args *args = data;
 954        int ret = NOTIFY_DONE;
 955
 956        if (args->regs && user_mode_vm(args->regs))
 957                return ret;
 958
 959        switch (val) {
 960        case DIE_INT3:
 961                if (kprobe_handler(args->regs))
 962                        ret = NOTIFY_STOP;
 963                break;
 964        case DIE_DEBUG:
 965                if (post_kprobe_handler(args->regs))
 966                        ret = NOTIFY_STOP;
 967                break;
 968        case DIE_GPF:
 969                /*
 970                 * To be potentially processing a kprobe fault and to
 971                 * trust the result from kprobe_running(), we have
 972                 * be non-preemptible.
 973                 */
 974                if (!preemptible() && kprobe_running() &&
 975                    kprobe_fault_handler(args->regs, args->trapnr))
 976                        ret = NOTIFY_STOP;
 977                break;
 978        default:
 979                break;
 980        }
 981        return ret;
 982}
 983
 984int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 985{
 986        struct jprobe *jp = container_of(p, struct jprobe, kp);
 987        unsigned long addr;
 988        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 989
 990        kcb->jprobe_saved_regs = *regs;
 991        kcb->jprobe_saved_sp = stack_addr(regs);
 992        addr = (unsigned long)(kcb->jprobe_saved_sp);
 993
 994        /*
 995         * As Linus pointed out, gcc assumes that the callee
 996         * owns the argument space and could overwrite it, e.g.
 997         * tailcall optimization. So, to be absolutely safe
 998         * we also save and restore enough stack bytes to cover
 999         * the argument area.
1000         */
1001        memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
1002               MIN_STACK_SIZE(addr));
1003        regs->flags &= ~X86_EFLAGS_IF;
1004        trace_hardirqs_off();
1005        regs->ip = (unsigned long)(jp->entry);
1006        return 1;
1007}
1008
1009void __kprobes jprobe_return(void)
1010{
1011        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1012
1013        asm volatile (
1014#ifdef CONFIG_X86_64
1015                        "       xchg   %%rbx,%%rsp        \n"
1016#else
1017                        "       xchgl   %%ebx,%%esp        \n"
1018#endif
1019                        "       int3                        \n"
1020                        "       .globl jprobe_return_end\n"
1021                        "       jprobe_return_end:        \n"
1022                        "       nop                        \n"::"b"
1023                        (kcb->jprobe_saved_sp):"memory");
1024}
1025
1026int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1027{
1028        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1029        u8 *addr = (u8 *) (regs->ip - 1);
1030        struct jprobe *jp = container_of(p, struct jprobe, kp);
1031
1032        if ((addr > (u8 *) jprobe_return) &&
1033            (addr < (u8 *) jprobe_return_end)) {
1034                if (stack_addr(regs) != kcb->jprobe_saved_sp) {
1035                        struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
1036                        printk(KERN_ERR
1037                               "current sp %p does not match saved sp %p\n",
1038                               stack_addr(regs), kcb->jprobe_saved_sp);
1039                        printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
1040                        show_registers(saved_regs);
1041                        printk(KERN_ERR "Current registers\n");
1042                        show_registers(regs);
1043                        BUG();
1044                }
1045                *regs = kcb->jprobe_saved_regs;
1046                memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
1047                       kcb->jprobes_stack,
1048                       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
1049                preempt_enable_no_resched();
1050                return 1;
1051        }
1052        return 0;
1053}
1054
1055int __init arch_init_kprobes(void)
1056{
1057        return 0;
1058}
1059
1060int __kprobes arch_trampoline_kprobe(struct kprobe *p)
1061{
1062        return 0;
1063}