Showing error 839

User: Jiri Slaby
Error type: Resource Leak
Error type description: The code omits to put the resource to the system for reuse
File location: lib/radix-tree.c
Line in file: 217
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2011-11-07 22:40:13 UTC


Source:

   1/*
   2 * Copyright (C) 2001 Momchil Velikov
   3 * Portions Copyright (C) 2001 Christoph Hellwig
   4 * Copyright (C) 2005 SGI, Christoph Lameter
   5 * Copyright (C) 2006 Nick Piggin
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License as
   9 * published by the Free Software Foundation; either version 2, or (at
  10 * your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful, but
  13 * WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program; if not, write to the Free Software
  19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22#include <linux/errno.h>
  23#include <linux/init.h>
  24#include <linux/kernel.h>
  25#include <linux/module.h>
  26#include <linux/radix-tree.h>
  27#include <linux/percpu.h>
  28#include <linux/slab.h>
  29#include <linux/notifier.h>
  30#include <linux/cpu.h>
  31#include <linux/gfp.h>
  32#include <linux/string.h>
  33#include <linux/bitops.h>
  34#include <linux/rcupdate.h>
  35
  36
  37#ifdef __KERNEL__
  38#define RADIX_TREE_MAP_SHIFT        (CONFIG_BASE_SMALL ? 4 : 6)
  39#else
  40#define RADIX_TREE_MAP_SHIFT        3        /* For more stressful testing */
  41#endif
  42
  43#define RADIX_TREE_MAP_SIZE        (1UL << RADIX_TREE_MAP_SHIFT)
  44#define RADIX_TREE_MAP_MASK        (RADIX_TREE_MAP_SIZE-1)
  45
  46#define RADIX_TREE_TAG_LONGS        \
  47        ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
  48
  49struct radix_tree_node {
  50        unsigned int        height;                /* Height from the bottom */
  51        unsigned int        count;
  52        struct rcu_head        rcu_head;
  53        void                *slots[RADIX_TREE_MAP_SIZE];
  54        unsigned long        tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
  55};
  56
  57struct radix_tree_path {
  58        struct radix_tree_node *node;
  59        int offset;
  60};
  61
  62#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
  63#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
  64                                          RADIX_TREE_MAP_SHIFT))
  65
  66/*
  67 * The height_to_maxindex array needs to be one deeper than the maximum
  68 * path as height 0 holds only 1 entry.
  69 */
  70static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  71
  72/*
  73 * Radix tree node cache.
  74 */
  75static struct kmem_cache *radix_tree_node_cachep;
  76
  77/*
  78 * Per-cpu pool of preloaded nodes
  79 */
  80struct radix_tree_preload {
  81        int nr;
  82        struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
  83};
  84DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  85
  86static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
  87{
  88        return root->gfp_mask & __GFP_BITS_MASK;
  89}
  90
  91static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  92                int offset)
  93{
  94        __set_bit(offset, node->tags[tag]);
  95}
  96
  97static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  98                int offset)
  99{
 100        __clear_bit(offset, node->tags[tag]);
 101}
 102
 103static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
 104                int offset)
 105{
 106        return test_bit(offset, node->tags[tag]);
 107}
 108
 109static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
 110{
 111        root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
 112}
 113
 114static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
 115{
 116        root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
 117}
 118
 119static inline void root_tag_clear_all(struct radix_tree_root *root)
 120{
 121        root->gfp_mask &= __GFP_BITS_MASK;
 122}
 123
 124static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
 125{
 126        return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
 127}
 128
 129/*
 130 * Returns 1 if any slot in the node has this tag set.
 131 * Otherwise returns 0.
 132 */
 133static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
 134{
 135        int idx;
 136        for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
 137                if (node->tags[tag][idx])
 138                        return 1;
 139        }
 140        return 0;
 141}
 142/*
 143 * This assumes that the caller has performed appropriate preallocation, and
 144 * that the caller has pinned this thread of control to the current CPU.
 145 */
 146static struct radix_tree_node *
 147radix_tree_node_alloc(struct radix_tree_root *root)
 148{
 149        struct radix_tree_node *ret = NULL;
 150        gfp_t gfp_mask = root_gfp_mask(root);
 151
 152        if (!(gfp_mask & __GFP_WAIT)) {
 153                struct radix_tree_preload *rtp;
 154
 155                /*
 156                 * Provided the caller has preloaded here, we will always
 157                 * succeed in getting a node here (and never reach
 158                 * kmem_cache_alloc)
 159                 */
 160                rtp = &__get_cpu_var(radix_tree_preloads);
 161                if (rtp->nr) {
 162                        ret = rtp->nodes[rtp->nr - 1];
 163                        rtp->nodes[rtp->nr - 1] = NULL;
 164                        rtp->nr--;
 165                }
 166        }
 167        if (ret == NULL)
 168                ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 169
 170        BUG_ON(radix_tree_is_indirect_ptr(ret));
 171        return ret;
 172}
 173
 174static void radix_tree_node_rcu_free(struct rcu_head *head)
 175{
 176        struct radix_tree_node *node =
 177                        container_of(head, struct radix_tree_node, rcu_head);
 178
 179        /*
 180         * must only free zeroed nodes into the slab. radix_tree_shrink
 181         * can leave us with a non-NULL entry in the first slot, so clear
 182         * that here to make sure.
 183         */
 184        tag_clear(node, 0, 0);
 185        tag_clear(node, 1, 0);
 186        node->slots[0] = NULL;
 187        node->count = 0;
 188
 189        kmem_cache_free(radix_tree_node_cachep, node);
 190}
 191
 192static inline void
 193radix_tree_node_free(struct radix_tree_node *node)
 194{
 195        call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
 196}
 197
 198/*
 199 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 200 * ensure that the addition of a single element in the tree cannot fail.  On
 201 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 202 * with preemption not disabled.
 203 */
 204int radix_tree_preload(gfp_t gfp_mask)
 205{
 206        struct radix_tree_preload *rtp;
 207        struct radix_tree_node *node;
 208        int ret = -ENOMEM;
 209
 210        preempt_disable();
 211        rtp = &__get_cpu_var(radix_tree_preloads);
 212        while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
 213                preempt_enable();
 214                node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 215                if (node == NULL)
 216                        goto out;
 217                preempt_disable();
 218                rtp = &__get_cpu_var(radix_tree_preloads);
 219                if (rtp->nr < ARRAY_SIZE(rtp->nodes))
 220                        rtp->nodes[rtp->nr++] = node;
 221                else
 222                        kmem_cache_free(radix_tree_node_cachep, node);
 223        }
 224        ret = 0;
 225out:
 226        return ret;
 227}
 228EXPORT_SYMBOL(radix_tree_preload);
 229
 230/*
 231 *        Return the maximum key which can be store into a
 232 *        radix tree with height HEIGHT.
 233 */
 234static inline unsigned long radix_tree_maxindex(unsigned int height)
 235{
 236        return height_to_maxindex[height];
 237}
 238
 239/*
 240 *        Extend a radix tree so it can store key @index.
 241 */
 242static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 243{
 244        struct radix_tree_node *node;
 245        unsigned int height;
 246        int tag;
 247
 248        /* Figure out what the height should be.  */
 249        height = root->height + 1;
 250        while (index > radix_tree_maxindex(height))
 251                height++;
 252
 253        if (root->rnode == NULL) {
 254                root->height = height;
 255                goto out;
 256        }
 257
 258        do {
 259                unsigned int newheight;
 260                if (!(node = radix_tree_node_alloc(root)))
 261                        return -ENOMEM;
 262
 263                /* Increase the height.  */
 264                node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
 265
 266                /* Propagate the aggregated tag info into the new root */
 267                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 268                        if (root_tag_get(root, tag))
 269                                tag_set(node, tag, 0);
 270                }
 271
 272                newheight = root->height+1;
 273                node->height = newheight;
 274                node->count = 1;
 275                node = radix_tree_ptr_to_indirect(node);
 276                rcu_assign_pointer(root->rnode, node);
 277                root->height = newheight;
 278        } while (height > root->height);
 279out:
 280        return 0;
 281}
 282
 283/**
 284 *        radix_tree_insert    -    insert into a radix tree
 285 *        @root:                radix tree root
 286 *        @index:                index key
 287 *        @item:                item to insert
 288 *
 289 *        Insert an item into the radix tree at position @index.
 290 */
 291int radix_tree_insert(struct radix_tree_root *root,
 292                        unsigned long index, void *item)
 293{
 294        struct radix_tree_node *node = NULL, *slot;
 295        unsigned int height, shift;
 296        int offset;
 297        int error;
 298
 299        BUG_ON(radix_tree_is_indirect_ptr(item));
 300
 301        /* Make sure the tree is high enough.  */
 302        if (index > radix_tree_maxindex(root->height)) {
 303                error = radix_tree_extend(root, index);
 304                if (error)
 305                        return error;
 306        }
 307
 308        slot = radix_tree_indirect_to_ptr(root->rnode);
 309
 310        height = root->height;
 311        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 312
 313        offset = 0;                        /* uninitialised var warning */
 314        while (height > 0) {
 315                if (slot == NULL) {
 316                        /* Have to add a child node.  */
 317                        if (!(slot = radix_tree_node_alloc(root)))
 318                                return -ENOMEM;
 319                        slot->height = height;
 320                        if (node) {
 321                                rcu_assign_pointer(node->slots[offset], slot);
 322                                node->count++;
 323                        } else
 324                                rcu_assign_pointer(root->rnode,
 325                                        radix_tree_ptr_to_indirect(slot));
 326                }
 327
 328                /* Go a level down */
 329                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 330                node = slot;
 331                slot = node->slots[offset];
 332                shift -= RADIX_TREE_MAP_SHIFT;
 333                height--;
 334        }
 335
 336        if (slot != NULL)
 337                return -EEXIST;
 338
 339        if (node) {
 340                node->count++;
 341                rcu_assign_pointer(node->slots[offset], item);
 342                BUG_ON(tag_get(node, 0, offset));
 343                BUG_ON(tag_get(node, 1, offset));
 344        } else {
 345                rcu_assign_pointer(root->rnode, item);
 346                BUG_ON(root_tag_get(root, 0));
 347                BUG_ON(root_tag_get(root, 1));
 348        }
 349
 350        return 0;
 351}
 352EXPORT_SYMBOL(radix_tree_insert);
 353
 354/**
 355 *        radix_tree_lookup_slot    -    lookup a slot in a radix tree
 356 *        @root:                radix tree root
 357 *        @index:                index key
 358 *
 359 *        Returns:  the slot corresponding to the position @index in the
 360 *        radix tree @root. This is useful for update-if-exists operations.
 361 *
 362 *        This function can be called under rcu_read_lock iff the slot is not
 363 *        modified by radix_tree_replace_slot, otherwise it must be called
 364 *        exclusive from other writers. Any dereference of the slot must be done
 365 *        using radix_tree_deref_slot.
 366 */
 367void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 368{
 369        unsigned int height, shift;
 370        struct radix_tree_node *node, **slot;
 371
 372        node = rcu_dereference(root->rnode);
 373        if (node == NULL)
 374                return NULL;
 375
 376        if (!radix_tree_is_indirect_ptr(node)) {
 377                if (index > 0)
 378                        return NULL;
 379                return (void **)&root->rnode;
 380        }
 381        node = radix_tree_indirect_to_ptr(node);
 382
 383        height = node->height;
 384        if (index > radix_tree_maxindex(height))
 385                return NULL;
 386
 387        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 388
 389        do {
 390                slot = (struct radix_tree_node **)
 391                        (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
 392                node = rcu_dereference(*slot);
 393                if (node == NULL)
 394                        return NULL;
 395
 396                shift -= RADIX_TREE_MAP_SHIFT;
 397                height--;
 398        } while (height > 0);
 399
 400        return (void **)slot;
 401}
 402EXPORT_SYMBOL(radix_tree_lookup_slot);
 403
 404/**
 405 *        radix_tree_lookup    -    perform lookup operation on a radix tree
 406 *        @root:                radix tree root
 407 *        @index:                index key
 408 *
 409 *        Lookup the item at the position @index in the radix tree @root.
 410 *
 411 *        This function can be called under rcu_read_lock, however the caller
 412 *        must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 413 *        them safely). No RCU barriers are required to access or modify the
 414 *        returned item, however.
 415 */
 416void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 417{
 418        unsigned int height, shift;
 419        struct radix_tree_node *node, **slot;
 420
 421        node = rcu_dereference(root->rnode);
 422        if (node == NULL)
 423                return NULL;
 424
 425        if (!radix_tree_is_indirect_ptr(node)) {
 426                if (index > 0)
 427                        return NULL;
 428                return node;
 429        }
 430        node = radix_tree_indirect_to_ptr(node);
 431
 432        height = node->height;
 433        if (index > radix_tree_maxindex(height))
 434                return NULL;
 435
 436        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 437
 438        do {
 439                slot = (struct radix_tree_node **)
 440                        (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
 441                node = rcu_dereference(*slot);
 442                if (node == NULL)
 443                        return NULL;
 444
 445                shift -= RADIX_TREE_MAP_SHIFT;
 446                height--;
 447        } while (height > 0);
 448
 449        return node;
 450}
 451EXPORT_SYMBOL(radix_tree_lookup);
 452
 453/**
 454 *        radix_tree_tag_set - set a tag on a radix tree node
 455 *        @root:                radix tree root
 456 *        @index:                index key
 457 *        @tag:                 tag index
 458 *
 459 *        Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
 460 *        corresponding to @index in the radix tree.  From
 461 *        the root all the way down to the leaf node.
 462 *
 463 *        Returns the address of the tagged item.   Setting a tag on a not-present
 464 *        item is a bug.
 465 */
 466void *radix_tree_tag_set(struct radix_tree_root *root,
 467                        unsigned long index, unsigned int tag)
 468{
 469        unsigned int height, shift;
 470        struct radix_tree_node *slot;
 471
 472        height = root->height;
 473        BUG_ON(index > radix_tree_maxindex(height));
 474
 475        slot = radix_tree_indirect_to_ptr(root->rnode);
 476        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 477
 478        while (height > 0) {
 479                int offset;
 480
 481                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 482                if (!tag_get(slot, tag, offset))
 483                        tag_set(slot, tag, offset);
 484                slot = slot->slots[offset];
 485                BUG_ON(slot == NULL);
 486                shift -= RADIX_TREE_MAP_SHIFT;
 487                height--;
 488        }
 489
 490        /* set the root's tag bit */
 491        if (slot && !root_tag_get(root, tag))
 492                root_tag_set(root, tag);
 493
 494        return slot;
 495}
 496EXPORT_SYMBOL(radix_tree_tag_set);
 497
 498/**
 499 *        radix_tree_tag_clear - clear a tag on a radix tree node
 500 *        @root:                radix tree root
 501 *        @index:                index key
 502 *        @tag:                 tag index
 503 *
 504 *        Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
 505 *        corresponding to @index in the radix tree.  If
 506 *        this causes the leaf node to have no tags set then clear the tag in the
 507 *        next-to-leaf node, etc.
 508 *
 509 *        Returns the address of the tagged item on success, else NULL.  ie:
 510 *        has the same return value and semantics as radix_tree_lookup().
 511 */
 512void *radix_tree_tag_clear(struct radix_tree_root *root,
 513                        unsigned long index, unsigned int tag)
 514{
 515        /*
 516         * The radix tree path needs to be one longer than the maximum path
 517         * since the "list" is null terminated.
 518         */
 519        struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
 520        struct radix_tree_node *slot = NULL;
 521        unsigned int height, shift;
 522
 523        height = root->height;
 524        if (index > radix_tree_maxindex(height))
 525                goto out;
 526
 527        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 528        pathp->node = NULL;
 529        slot = radix_tree_indirect_to_ptr(root->rnode);
 530
 531        while (height > 0) {
 532                int offset;
 533
 534                if (slot == NULL)
 535                        goto out;
 536
 537                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 538                pathp[1].offset = offset;
 539                pathp[1].node = slot;
 540                slot = slot->slots[offset];
 541                pathp++;
 542                shift -= RADIX_TREE_MAP_SHIFT;
 543                height--;
 544        }
 545
 546        if (slot == NULL)
 547                goto out;
 548
 549        while (pathp->node) {
 550                if (!tag_get(pathp->node, tag, pathp->offset))
 551                        goto out;
 552                tag_clear(pathp->node, tag, pathp->offset);
 553                if (any_tag_set(pathp->node, tag))
 554                        goto out;
 555                pathp--;
 556        }
 557
 558        /* clear the root's tag bit */
 559        if (root_tag_get(root, tag))
 560                root_tag_clear(root, tag);
 561
 562out:
 563        return slot;
 564}
 565EXPORT_SYMBOL(radix_tree_tag_clear);
 566
 567#ifndef __KERNEL__        /* Only the test harness uses this at present */
 568/**
 569 * radix_tree_tag_get - get a tag on a radix tree node
 570 * @root:                radix tree root
 571 * @index:                index key
 572 * @tag:                 tag index (< RADIX_TREE_MAX_TAGS)
 573 *
 574 * Return values:
 575 *
 576 *  0: tag not present or not set
 577 *  1: tag set
 578 */
 579int radix_tree_tag_get(struct radix_tree_root *root,
 580                        unsigned long index, unsigned int tag)
 581{
 582        unsigned int height, shift;
 583        struct radix_tree_node *node;
 584        int saw_unset_tag = 0;
 585
 586        /* check the root's tag bit */
 587        if (!root_tag_get(root, tag))
 588                return 0;
 589
 590        node = rcu_dereference(root->rnode);
 591        if (node == NULL)
 592                return 0;
 593
 594        if (!radix_tree_is_indirect_ptr(node))
 595                return (index == 0);
 596        node = radix_tree_indirect_to_ptr(node);
 597
 598        height = node->height;
 599        if (index > radix_tree_maxindex(height))
 600                return 0;
 601
 602        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 603
 604        for ( ; ; ) {
 605                int offset;
 606
 607                if (node == NULL)
 608                        return 0;
 609
 610                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 611
 612                /*
 613                 * This is just a debug check.  Later, we can bale as soon as
 614                 * we see an unset tag.
 615                 */
 616                if (!tag_get(node, tag, offset))
 617                        saw_unset_tag = 1;
 618                if (height == 1) {
 619                        int ret = tag_get(node, tag, offset);
 620
 621                        BUG_ON(ret && saw_unset_tag);
 622                        return !!ret;
 623                }
 624                node = rcu_dereference(node->slots[offset]);
 625                shift -= RADIX_TREE_MAP_SHIFT;
 626                height--;
 627        }
 628}
 629EXPORT_SYMBOL(radix_tree_tag_get);
 630#endif
 631
 632/**
 633 *        radix_tree_next_hole    -    find the next hole (not-present entry)
 634 *        @root:                tree root
 635 *        @index:                index key
 636 *        @max_scan:        maximum range to search
 637 *
 638 *        Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
 639 *        indexed hole.
 640 *
 641 *        Returns: the index of the hole if found, otherwise returns an index
 642 *        outside of the set specified (in which case 'return - index >= max_scan'
 643 *        will be true).
 644 *
 645 *        radix_tree_next_hole may be called under rcu_read_lock. However, like
 646 *        radix_tree_gang_lookup, this will not atomically search a snapshot of the
 647 *        tree at a single point in time. For example, if a hole is created at index
 648 *        5, then subsequently a hole is created at index 10, radix_tree_next_hole
 649 *        covering both indexes may return 10 if called under rcu_read_lock.
 650 */
 651unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 652                                unsigned long index, unsigned long max_scan)
 653{
 654        unsigned long i;
 655
 656        for (i = 0; i < max_scan; i++) {
 657                if (!radix_tree_lookup(root, index))
 658                        break;
 659                index++;
 660                if (index == 0)
 661                        break;
 662        }
 663
 664        return index;
 665}
 666EXPORT_SYMBOL(radix_tree_next_hole);
 667
 668static unsigned int
 669__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 670        unsigned int max_items, unsigned long *next_index)
 671{
 672        unsigned int nr_found = 0;
 673        unsigned int shift, height;
 674        unsigned long i;
 675
 676        height = slot->height;
 677        if (height == 0)
 678                goto out;
 679        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 680
 681        for ( ; height > 1; height--) {
 682                i = (index >> shift) & RADIX_TREE_MAP_MASK;
 683                for (;;) {
 684                        if (slot->slots[i] != NULL)
 685                                break;
 686                        index &= ~((1UL << shift) - 1);
 687                        index += 1UL << shift;
 688                        if (index == 0)
 689                                goto out;        /* 32-bit wraparound */
 690                        i++;
 691                        if (i == RADIX_TREE_MAP_SIZE)
 692                                goto out;
 693                }
 694
 695                shift -= RADIX_TREE_MAP_SHIFT;
 696                slot = rcu_dereference(slot->slots[i]);
 697                if (slot == NULL)
 698                        goto out;
 699        }
 700
 701        /* Bottom level: grab some items */
 702        for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
 703                index++;
 704                if (slot->slots[i]) {
 705                        results[nr_found++] = &(slot->slots[i]);
 706                        if (nr_found == max_items)
 707                                goto out;
 708                }
 709        }
 710out:
 711        *next_index = index;
 712        return nr_found;
 713}
 714
 715/**
 716 *        radix_tree_gang_lookup - perform multiple lookup on a radix tree
 717 *        @root:                radix tree root
 718 *        @results:        where the results of the lookup are placed
 719 *        @first_index:        start the lookup from this key
 720 *        @max_items:        place up to this many items at *results
 721 *
 722 *        Performs an index-ascending scan of the tree for present items.  Places
 723 *        them at *@results and returns the number of items which were placed at
 724 *        *@results.
 725 *
 726 *        The implementation is naive.
 727 *
 728 *        Like radix_tree_lookup, radix_tree_gang_lookup may be called under
 729 *        rcu_read_lock. In this case, rather than the returned results being
 730 *        an atomic snapshot of the tree at a single point in time, the semantics
 731 *        of an RCU protected gang lookup are as though multiple radix_tree_lookups
 732 *        have been issued in individual locks, and results stored in 'results'.
 733 */
 734unsigned int
 735radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 736                        unsigned long first_index, unsigned int max_items)
 737{
 738        unsigned long max_index;
 739        struct radix_tree_node *node;
 740        unsigned long cur_index = first_index;
 741        unsigned int ret;
 742
 743        node = rcu_dereference(root->rnode);
 744        if (!node)
 745                return 0;
 746
 747        if (!radix_tree_is_indirect_ptr(node)) {
 748                if (first_index > 0)
 749                        return 0;
 750                results[0] = node;
 751                return 1;
 752        }
 753        node = radix_tree_indirect_to_ptr(node);
 754
 755        max_index = radix_tree_maxindex(node->height);
 756
 757        ret = 0;
 758        while (ret < max_items) {
 759                unsigned int nr_found, slots_found, i;
 760                unsigned long next_index;        /* Index of next search */
 761
 762                if (cur_index > max_index)
 763                        break;
 764                slots_found = __lookup(node, (void ***)results + ret, cur_index,
 765                                        max_items - ret, &next_index);
 766                nr_found = 0;
 767                for (i = 0; i < slots_found; i++) {
 768                        struct radix_tree_node *slot;
 769                        slot = *(((void ***)results)[ret + i]);
 770                        if (!slot)
 771                                continue;
 772                        results[ret + nr_found] = rcu_dereference(slot);
 773                        nr_found++;
 774                }
 775                ret += nr_found;
 776                if (next_index == 0)
 777                        break;
 778                cur_index = next_index;
 779        }
 780
 781        return ret;
 782}
 783EXPORT_SYMBOL(radix_tree_gang_lookup);
 784
 785/**
 786 *        radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
 787 *        @root:                radix tree root
 788 *        @results:        where the results of the lookup are placed
 789 *        @first_index:        start the lookup from this key
 790 *        @max_items:        place up to this many items at *results
 791 *
 792 *        Performs an index-ascending scan of the tree for present items.  Places
 793 *        their slots at *@results and returns the number of items which were
 794 *        placed at *@results.
 795 *
 796 *        The implementation is naive.
 797 *
 798 *        Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
 799 *        be dereferenced with radix_tree_deref_slot, and if using only RCU
 800 *        protection, radix_tree_deref_slot may fail requiring a retry.
 801 */
 802unsigned int
 803radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 804                        unsigned long first_index, unsigned int max_items)
 805{
 806        unsigned long max_index;
 807        struct radix_tree_node *node;
 808        unsigned long cur_index = first_index;
 809        unsigned int ret;
 810
 811        node = rcu_dereference(root->rnode);
 812        if (!node)
 813                return 0;
 814
 815        if (!radix_tree_is_indirect_ptr(node)) {
 816                if (first_index > 0)
 817                        return 0;
 818                results[0] = (void **)&root->rnode;
 819                return 1;
 820        }
 821        node = radix_tree_indirect_to_ptr(node);
 822
 823        max_index = radix_tree_maxindex(node->height);
 824
 825        ret = 0;
 826        while (ret < max_items) {
 827                unsigned int slots_found;
 828                unsigned long next_index;        /* Index of next search */
 829
 830                if (cur_index > max_index)
 831                        break;
 832                slots_found = __lookup(node, results + ret, cur_index,
 833                                        max_items - ret, &next_index);
 834                ret += slots_found;
 835                if (next_index == 0)
 836                        break;
 837                cur_index = next_index;
 838        }
 839
 840        return ret;
 841}
 842EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
 843
 844/*
 845 * FIXME: the two tag_get()s here should use find_next_bit() instead of
 846 * open-coding the search.
 847 */
 848static unsigned int
 849__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
 850        unsigned int max_items, unsigned long *next_index, unsigned int tag)
 851{
 852        unsigned int nr_found = 0;
 853        unsigned int shift, height;
 854
 855        height = slot->height;
 856        if (height == 0)
 857                goto out;
 858        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 859
 860        while (height > 0) {
 861                unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
 862
 863                for (;;) {
 864                        if (tag_get(slot, tag, i))
 865                                break;
 866                        index &= ~((1UL << shift) - 1);
 867                        index += 1UL << shift;
 868                        if (index == 0)
 869                                goto out;        /* 32-bit wraparound */
 870                        i++;
 871                        if (i == RADIX_TREE_MAP_SIZE)
 872                                goto out;
 873                }
 874                height--;
 875                if (height == 0) {        /* Bottom level: grab some items */
 876                        unsigned long j = index & RADIX_TREE_MAP_MASK;
 877
 878                        for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
 879                                index++;
 880                                if (!tag_get(slot, tag, j))
 881                                        continue;
 882                                /*
 883                                 * Even though the tag was found set, we need to
 884                                 * recheck that we have a non-NULL node, because
 885                                 * if this lookup is lockless, it may have been
 886                                 * subsequently deleted.
 887                                 *
 888                                 * Similar care must be taken in any place that
 889                                 * lookup ->slots[x] without a lock (ie. can't
 890                                 * rely on its value remaining the same).
 891                                 */
 892                                if (slot->slots[j]) {
 893                                        results[nr_found++] = &(slot->slots[j]);
 894                                        if (nr_found == max_items)
 895                                                goto out;
 896                                }
 897                        }
 898                }
 899                shift -= RADIX_TREE_MAP_SHIFT;
 900                slot = rcu_dereference(slot->slots[i]);
 901                if (slot == NULL)
 902                        break;
 903        }
 904out:
 905        *next_index = index;
 906        return nr_found;
 907}
 908
 909/**
 910 *        radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
 911 *                                     based on a tag
 912 *        @root:                radix tree root
 913 *        @results:        where the results of the lookup are placed
 914 *        @first_index:        start the lookup from this key
 915 *        @max_items:        place up to this many items at *results
 916 *        @tag:                the tag index (< RADIX_TREE_MAX_TAGS)
 917 *
 918 *        Performs an index-ascending scan of the tree for present items which
 919 *        have the tag indexed by @tag set.  Places the items at *@results and
 920 *        returns the number of items which were placed at *@results.
 921 */
 922unsigned int
 923radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 924                unsigned long first_index, unsigned int max_items,
 925                unsigned int tag)
 926{
 927        struct radix_tree_node *node;
 928        unsigned long max_index;
 929        unsigned long cur_index = first_index;
 930        unsigned int ret;
 931
 932        /* check the root's tag bit */
 933        if (!root_tag_get(root, tag))
 934                return 0;
 935
 936        node = rcu_dereference(root->rnode);
 937        if (!node)
 938                return 0;
 939
 940        if (!radix_tree_is_indirect_ptr(node)) {
 941                if (first_index > 0)
 942                        return 0;
 943                results[0] = node;
 944                return 1;
 945        }
 946        node = radix_tree_indirect_to_ptr(node);
 947
 948        max_index = radix_tree_maxindex(node->height);
 949
 950        ret = 0;
 951        while (ret < max_items) {
 952                unsigned int nr_found, slots_found, i;
 953                unsigned long next_index;        /* Index of next search */
 954
 955                if (cur_index > max_index)
 956                        break;
 957                slots_found = __lookup_tag(node, (void ***)results + ret,
 958                                cur_index, max_items - ret, &next_index, tag);
 959                nr_found = 0;
 960                for (i = 0; i < slots_found; i++) {
 961                        struct radix_tree_node *slot;
 962                        slot = *(((void ***)results)[ret + i]);
 963                        if (!slot)
 964                                continue;
 965                        results[ret + nr_found] = rcu_dereference(slot);
 966                        nr_found++;
 967                }
 968                ret += nr_found;
 969                if (next_index == 0)
 970                        break;
 971                cur_index = next_index;
 972        }
 973
 974        return ret;
 975}
 976EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
 977
 978/**
 979 *        radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
 980 *                                          radix tree based on a tag
 981 *        @root:                radix tree root
 982 *        @results:        where the results of the lookup are placed
 983 *        @first_index:        start the lookup from this key
 984 *        @max_items:        place up to this many items at *results
 985 *        @tag:                the tag index (< RADIX_TREE_MAX_TAGS)
 986 *
 987 *        Performs an index-ascending scan of the tree for present items which
 988 *        have the tag indexed by @tag set.  Places the slots at *@results and
 989 *        returns the number of slots which were placed at *@results.
 990 */
 991unsigned int
 992radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 993                unsigned long first_index, unsigned int max_items,
 994                unsigned int tag)
 995{
 996        struct radix_tree_node *node;
 997        unsigned long max_index;
 998        unsigned long cur_index = first_index;
 999        unsigned int ret;
1000
1001        /* check the root's tag bit */
1002        if (!root_tag_get(root, tag))
1003                return 0;
1004
1005        node = rcu_dereference(root->rnode);
1006        if (!node)
1007                return 0;
1008
1009        if (!radix_tree_is_indirect_ptr(node)) {
1010                if (first_index > 0)
1011                        return 0;
1012                results[0] = (void **)&root->rnode;
1013                return 1;
1014        }
1015        node = radix_tree_indirect_to_ptr(node);
1016
1017        max_index = radix_tree_maxindex(node->height);
1018
1019        ret = 0;
1020        while (ret < max_items) {
1021                unsigned int slots_found;
1022                unsigned long next_index;        /* Index of next search */
1023
1024                if (cur_index > max_index)
1025                        break;
1026                slots_found = __lookup_tag(node, results + ret,
1027                                cur_index, max_items - ret, &next_index, tag);
1028                ret += slots_found;
1029                if (next_index == 0)
1030                        break;
1031                cur_index = next_index;
1032        }
1033
1034        return ret;
1035}
1036EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1037
1038
1039/**
1040 *        radix_tree_shrink    -    shrink height of a radix tree to minimal
1041 *        @root                radix tree root
1042 */
1043static inline void radix_tree_shrink(struct radix_tree_root *root)
1044{
1045        /* try to shrink tree height */
1046        while (root->height > 0) {
1047                struct radix_tree_node *to_free = root->rnode;
1048                void *newptr;
1049
1050                BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1051                to_free = radix_tree_indirect_to_ptr(to_free);
1052
1053                /*
1054                 * The candidate node has more than one child, or its child
1055                 * is not at the leftmost slot, we cannot shrink.
1056                 */
1057                if (to_free->count != 1)
1058                        break;
1059                if (!to_free->slots[0])
1060                        break;
1061
1062                /*
1063                 * We don't need rcu_assign_pointer(), since we are simply
1064                 * moving the node from one part of the tree to another. If
1065                 * it was safe to dereference the old pointer to it
1066                 * (to_free->slots[0]), it will be safe to dereference the new
1067                 * one (root->rnode).
1068                 */
1069                newptr = to_free->slots[0];
1070                if (root->height > 1)
1071                        newptr = radix_tree_ptr_to_indirect(newptr);
1072                root->rnode = newptr;
1073                root->height--;
1074                radix_tree_node_free(to_free);
1075        }
1076}
1077
1078/**
1079 *        radix_tree_delete    -    delete an item from a radix tree
1080 *        @root:                radix tree root
1081 *        @index:                index key
1082 *
1083 *        Remove the item at @index from the radix tree rooted at @root.
1084 *
1085 *        Returns the address of the deleted item, or NULL if it was not present.
1086 */
1087void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1088{
1089        /*
1090         * The radix tree path needs to be one longer than the maximum path
1091         * since the "list" is null terminated.
1092         */
1093        struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
1094        struct radix_tree_node *slot = NULL;
1095        struct radix_tree_node *to_free;
1096        unsigned int height, shift;
1097        int tag;
1098        int offset;
1099
1100        height = root->height;
1101        if (index > radix_tree_maxindex(height))
1102                goto out;
1103
1104        slot = root->rnode;
1105        if (height == 0) {
1106                root_tag_clear_all(root);
1107                root->rnode = NULL;
1108                goto out;
1109        }
1110        slot = radix_tree_indirect_to_ptr(slot);
1111
1112        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1113        pathp->node = NULL;
1114
1115        do {
1116                if (slot == NULL)
1117                        goto out;
1118
1119                pathp++;
1120                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1121                pathp->offset = offset;
1122                pathp->node = slot;
1123                slot = slot->slots[offset];
1124                shift -= RADIX_TREE_MAP_SHIFT;
1125                height--;
1126        } while (height > 0);
1127
1128        if (slot == NULL)
1129                goto out;
1130
1131        /*
1132         * Clear all tags associated with the just-deleted item
1133         */
1134        for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1135                if (tag_get(pathp->node, tag, pathp->offset))
1136                        radix_tree_tag_clear(root, index, tag);
1137        }
1138
1139        to_free = NULL;
1140        /* Now free the nodes we do not need anymore */
1141        while (pathp->node) {
1142                pathp->node->slots[pathp->offset] = NULL;
1143                pathp->node->count--;
1144                /*
1145                 * Queue the node for deferred freeing after the
1146                 * last reference to it disappears (set NULL, above).
1147                 */
1148                if (to_free)
1149                        radix_tree_node_free(to_free);
1150
1151                if (pathp->node->count) {
1152                        if (pathp->node ==
1153                                        radix_tree_indirect_to_ptr(root->rnode))
1154                                radix_tree_shrink(root);
1155                        goto out;
1156                }
1157
1158                /* Node with zero slots in use so free it */
1159                to_free = pathp->node;
1160                pathp--;
1161
1162        }
1163        root_tag_clear_all(root);
1164        root->height = 0;
1165        root->rnode = NULL;
1166        if (to_free)
1167                radix_tree_node_free(to_free);
1168
1169out:
1170        return slot;
1171}
1172EXPORT_SYMBOL(radix_tree_delete);
1173
1174/**
1175 *        radix_tree_tagged - test whether any items in the tree are tagged
1176 *        @root:                radix tree root
1177 *        @tag:                tag to test
1178 */
1179int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1180{
1181        return root_tag_get(root, tag);
1182}
1183EXPORT_SYMBOL(radix_tree_tagged);
1184
1185static void
1186radix_tree_node_ctor(void *node)
1187{
1188        memset(node, 0, sizeof(struct radix_tree_node));
1189}
1190
1191static __init unsigned long __maxindex(unsigned int height)
1192{
1193        unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1194        int shift = RADIX_TREE_INDEX_BITS - width;
1195
1196        if (shift < 0)
1197                return ~0UL;
1198        if (shift >= BITS_PER_LONG)
1199                return 0UL;
1200        return ~0UL >> shift;
1201}
1202
1203static __init void radix_tree_init_maxindex(void)
1204{
1205        unsigned int i;
1206
1207        for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1208                height_to_maxindex[i] = __maxindex(i);
1209}
1210
1211static int radix_tree_callback(struct notifier_block *nfb,
1212                            unsigned long action,
1213                            void *hcpu)
1214{
1215       int cpu = (long)hcpu;
1216       struct radix_tree_preload *rtp;
1217
1218       /* Free per-cpu pool of perloaded nodes */
1219       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1220               rtp = &per_cpu(radix_tree_preloads, cpu);
1221               while (rtp->nr) {
1222                       kmem_cache_free(radix_tree_node_cachep,
1223                                       rtp->nodes[rtp->nr-1]);
1224                       rtp->nodes[rtp->nr-1] = NULL;
1225                       rtp->nr--;
1226               }
1227       }
1228       return NOTIFY_OK;
1229}
1230
1231void __init radix_tree_init(void)
1232{
1233        radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1234                        sizeof(struct radix_tree_node), 0,
1235                        SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1236                        radix_tree_node_ctor);
1237        radix_tree_init_maxindex();
1238        hotcpu_notifier(radix_tree_callback, 0);
1239}