Showing error 793

User: Jiri Slaby
Error type: Memory Leak
Error type description: There the code omits to free some allocated memory
File location: fs/ntfs/dir.c
Line in file: 287
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2011-11-07 22:26:27 UTC


Source:

   1/**
   2 * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
   3 *
   4 * Copyright (c) 2001-2007 Anton Altaparmakov
   5 * Copyright (c) 2002 Richard Russon
   6 *
   7 * This program/include file is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License as published
   9 * by the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program/include file is distributed in the hope that it will be
  13 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program (in the main directory of the Linux-NTFS
  19 * distribution in the file COPYING); if not, write to the Free Software
  20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 */
  22
  23#include <linux/buffer_head.h>
  24
  25#include "dir.h"
  26#include "aops.h"
  27#include "attrib.h"
  28#include "mft.h"
  29#include "debug.h"
  30#include "ntfs.h"
  31
  32/**
  33 * The little endian Unicode string $I30 as a global constant.
  34 */
  35ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
  36                const_cpu_to_le16('3'),        const_cpu_to_le16('0'), 0 };
  37
  38/**
  39 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
  40 * @dir_ni:        ntfs inode of the directory in which to search for the name
  41 * @uname:        Unicode name for which to search in the directory
  42 * @uname_len:        length of the name @uname in Unicode characters
  43 * @res:        return the found file name if necessary (see below)
  44 *
  45 * Look for an inode with name @uname in the directory with inode @dir_ni.
  46 * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
  47 * the Unicode name. If the name is found in the directory, the corresponding
  48 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
  49 * is a 64-bit number containing the sequence number.
  50 *
  51 * On error, a negative value is returned corresponding to the error code. In
  52 * particular if the inode is not found -ENOENT is returned. Note that you
  53 * can't just check the return value for being negative, you have to check the
  54 * inode number for being negative which you can extract using MREC(return
  55 * value).
  56 *
  57 * Note, @uname_len does not include the (optional) terminating NULL character.
  58 *
  59 * Note, we look for a case sensitive match first but we also look for a case
  60 * insensitive match at the same time. If we find a case insensitive match, we
  61 * save that for the case that we don't find an exact match, where we return
  62 * the case insensitive match and setup @res (which we allocate!) with the mft
  63 * reference, the file name type, length and with a copy of the little endian
  64 * Unicode file name itself. If we match a file name which is in the DOS name
  65 * space, we only return the mft reference and file name type in @res.
  66 * ntfs_lookup() then uses this to find the long file name in the inode itself.
  67 * This is to avoid polluting the dcache with short file names. We want them to
  68 * work but we don't care for how quickly one can access them. This also fixes
  69 * the dcache aliasing issues.
  70 *
  71 * Locking:  - Caller must hold i_mutex on the directory.
  72 *             - Each page cache page in the index allocation mapping must be
  73 *               locked whilst being accessed otherwise we may find a corrupt
  74 *               page due to it being under ->writepage at the moment which
  75 *               applies the mst protection fixups before writing out and then
  76 *               removes them again after the write is complete after which it 
  77 *               unlocks the page.
  78 */
  79MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
  80                const int uname_len, ntfs_name **res)
  81{
  82        ntfs_volume *vol = dir_ni->vol;
  83        struct super_block *sb = vol->sb;
  84        MFT_RECORD *m;
  85        INDEX_ROOT *ir;
  86        INDEX_ENTRY *ie;
  87        INDEX_ALLOCATION *ia;
  88        u8 *index_end;
  89        u64 mref;
  90        ntfs_attr_search_ctx *ctx;
  91        int err, rc;
  92        VCN vcn, old_vcn;
  93        struct address_space *ia_mapping;
  94        struct page *page;
  95        u8 *kaddr;
  96        ntfs_name *name = NULL;
  97
  98        BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
  99        BUG_ON(NInoAttr(dir_ni));
 100        /* Get hold of the mft record for the directory. */
 101        m = map_mft_record(dir_ni);
 102        if (IS_ERR(m)) {
 103                ntfs_error(sb, "map_mft_record() failed with error code %ld.",
 104                                -PTR_ERR(m));
 105                return ERR_MREF(PTR_ERR(m));
 106        }
 107        ctx = ntfs_attr_get_search_ctx(dir_ni, m);
 108        if (unlikely(!ctx)) {
 109                err = -ENOMEM;
 110                goto err_out;
 111        }
 112        /* Find the index root attribute in the mft record. */
 113        err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
 114                        0, ctx);
 115        if (unlikely(err)) {
 116                if (err == -ENOENT) {
 117                        ntfs_error(sb, "Index root attribute missing in "
 118                                        "directory inode 0x%lx.",
 119                                        dir_ni->mft_no);
 120                        err = -EIO;
 121                }
 122                goto err_out;
 123        }
 124        /* Get to the index root value (it's been verified in read_inode). */
 125        ir = (INDEX_ROOT*)((u8*)ctx->attr +
 126                        le16_to_cpu(ctx->attr->data.resident.value_offset));
 127        index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
 128        /* The first index entry. */
 129        ie = (INDEX_ENTRY*)((u8*)&ir->index +
 130                        le32_to_cpu(ir->index.entries_offset));
 131        /*
 132         * Loop until we exceed valid memory (corruption case) or until we
 133         * reach the last entry.
 134         */
 135        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
 136                /* Bounds checks. */
 137                if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
 138                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
 139                                (u8*)ie + le16_to_cpu(ie->key_length) >
 140                                index_end)
 141                        goto dir_err_out;
 142                /*
 143                 * The last entry cannot contain a name. It can however contain
 144                 * a pointer to a child node in the B+tree so we just break out.
 145                 */
 146                if (ie->flags & INDEX_ENTRY_END)
 147                        break;
 148                /*
 149                 * We perform a case sensitive comparison and if that matches
 150                 * we are done and return the mft reference of the inode (i.e.
 151                 * the inode number together with the sequence number for
 152                 * consistency checking). We convert it to cpu format before
 153                 * returning.
 154                 */
 155                if (ntfs_are_names_equal(uname, uname_len,
 156                                (ntfschar*)&ie->key.file_name.file_name,
 157                                ie->key.file_name.file_name_length,
 158                                CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
 159found_it:
 160                        /*
 161                         * We have a perfect match, so we don't need to care
 162                         * about having matched imperfectly before, so we can
 163                         * free name and set *res to NULL.
 164                         * However, if the perfect match is a short file name,
 165                         * we need to signal this through *res, so that
 166                         * ntfs_lookup() can fix dcache aliasing issues.
 167                         * As an optimization we just reuse an existing
 168                         * allocation of *res.
 169                         */
 170                        if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
 171                                if (!name) {
 172                                        name = kmalloc(sizeof(ntfs_name),
 173                                                        GFP_NOFS);
 174                                        if (!name) {
 175                                                err = -ENOMEM;
 176                                                goto err_out;
 177                                        }
 178                                }
 179                                name->mref = le64_to_cpu(
 180                                                ie->data.dir.indexed_file);
 181                                name->type = FILE_NAME_DOS;
 182                                name->len = 0;
 183                                *res = name;
 184                        } else {
 185                                kfree(name);
 186                                *res = NULL;
 187                        }
 188                        mref = le64_to_cpu(ie->data.dir.indexed_file);
 189                        ntfs_attr_put_search_ctx(ctx);
 190                        unmap_mft_record(dir_ni);
 191                        return mref;
 192                }
 193                /*
 194                 * For a case insensitive mount, we also perform a case
 195                 * insensitive comparison (provided the file name is not in the
 196                 * POSIX namespace). If the comparison matches, and the name is
 197                 * in the WIN32 namespace, we cache the filename in *res so
 198                 * that the caller, ntfs_lookup(), can work on it. If the
 199                 * comparison matches, and the name is in the DOS namespace, we
 200                 * only cache the mft reference and the file name type (we set
 201                 * the name length to zero for simplicity).
 202                 */
 203                if (!NVolCaseSensitive(vol) &&
 204                                ie->key.file_name.file_name_type &&
 205                                ntfs_are_names_equal(uname, uname_len,
 206                                (ntfschar*)&ie->key.file_name.file_name,
 207                                ie->key.file_name.file_name_length,
 208                                IGNORE_CASE, vol->upcase, vol->upcase_len)) {
 209                        int name_size = sizeof(ntfs_name);
 210                        u8 type = ie->key.file_name.file_name_type;
 211                        u8 len = ie->key.file_name.file_name_length;
 212
 213                        /* Only one case insensitive matching name allowed. */
 214                        if (name) {
 215                                ntfs_error(sb, "Found already allocated name "
 216                                                "in phase 1. Please run chkdsk "
 217                                                "and if that doesn't find any "
 218                                                "errors please report you saw "
 219                                                "this message to "
 220                                                "linux-ntfs-dev@lists."
 221                                                "sourceforge.net.");
 222                                goto dir_err_out;
 223                        }
 224
 225                        if (type != FILE_NAME_DOS)
 226                                name_size += len * sizeof(ntfschar);
 227                        name = kmalloc(name_size, GFP_NOFS);
 228                        if (!name) {
 229                                err = -ENOMEM;
 230                                goto err_out;
 231                        }
 232                        name->mref = le64_to_cpu(ie->data.dir.indexed_file);
 233                        name->type = type;
 234                        if (type != FILE_NAME_DOS) {
 235                                name->len = len;
 236                                memcpy(name->name, ie->key.file_name.file_name,
 237                                                len * sizeof(ntfschar));
 238                        } else
 239                                name->len = 0;
 240                        *res = name;
 241                }
 242                /*
 243                 * Not a perfect match, need to do full blown collation so we
 244                 * know which way in the B+tree we have to go.
 245                 */
 246                rc = ntfs_collate_names(uname, uname_len,
 247                                (ntfschar*)&ie->key.file_name.file_name,
 248                                ie->key.file_name.file_name_length, 1,
 249                                IGNORE_CASE, vol->upcase, vol->upcase_len);
 250                /*
 251                 * If uname collates before the name of the current entry, there
 252                 * is definitely no such name in this index but we might need to
 253                 * descend into the B+tree so we just break out of the loop.
 254                 */
 255                if (rc == -1)
 256                        break;
 257                /* The names are not equal, continue the search. */
 258                if (rc)
 259                        continue;
 260                /*
 261                 * Names match with case insensitive comparison, now try the
 262                 * case sensitive comparison, which is required for proper
 263                 * collation.
 264                 */
 265                rc = ntfs_collate_names(uname, uname_len,
 266                                (ntfschar*)&ie->key.file_name.file_name,
 267                                ie->key.file_name.file_name_length, 1,
 268                                CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 269                if (rc == -1)
 270                        break;
 271                if (rc)
 272                        continue;
 273                /*
 274                 * Perfect match, this will never happen as the
 275                 * ntfs_are_names_equal() call will have gotten a match but we
 276                 * still treat it correctly.
 277                 */
 278                goto found_it;
 279        }
 280        /*
 281         * We have finished with this index without success. Check for the
 282         * presence of a child node and if not present return -ENOENT, unless
 283         * we have got a matching name cached in name in which case return the
 284         * mft reference associated with it.
 285         */
 286        if (!(ie->flags & INDEX_ENTRY_NODE)) {
 287                if (name) {
 288                        ntfs_attr_put_search_ctx(ctx);
 289                        unmap_mft_record(dir_ni);
 290                        return name->mref;
 291                }
 292                ntfs_debug("Entry not found.");
 293                err = -ENOENT;
 294                goto err_out;
 295        } /* Child node present, descend into it. */
 296        /* Consistency check: Verify that an index allocation exists. */
 297        if (!NInoIndexAllocPresent(dir_ni)) {
 298                ntfs_error(sb, "No index allocation attribute but index entry "
 299                                "requires one. Directory inode 0x%lx is "
 300                                "corrupt or driver bug.", dir_ni->mft_no);
 301                goto err_out;
 302        }
 303        /* Get the starting vcn of the index_block holding the child node. */
 304        vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
 305        ia_mapping = VFS_I(dir_ni)->i_mapping;
 306        /*
 307         * We are done with the index root and the mft record. Release them,
 308         * otherwise we deadlock with ntfs_map_page().
 309         */
 310        ntfs_attr_put_search_ctx(ctx);
 311        unmap_mft_record(dir_ni);
 312        m = NULL;
 313        ctx = NULL;
 314descend_into_child_node:
 315        /*
 316         * Convert vcn to index into the index allocation attribute in units
 317         * of PAGE_CACHE_SIZE and map the page cache page, reading it from
 318         * disk if necessary.
 319         */
 320        page = ntfs_map_page(ia_mapping, vcn <<
 321                        dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
 322        if (IS_ERR(page)) {
 323                ntfs_error(sb, "Failed to map directory index page, error %ld.",
 324                                -PTR_ERR(page));
 325                err = PTR_ERR(page);
 326                goto err_out;
 327        }
 328        lock_page(page);
 329        kaddr = (u8*)page_address(page);
 330fast_descend_into_child_node:
 331        /* Get to the index allocation block. */
 332        ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
 333                        dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
 334        /* Bounds checks. */
 335        if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
 336                ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 337                                "inode 0x%lx or driver bug.", dir_ni->mft_no);
 338                goto unm_err_out;
 339        }
 340        /* Catch multi sector transfer fixup errors. */
 341        if (unlikely(!ntfs_is_indx_record(ia->magic))) {
 342                ntfs_error(sb, "Directory index record with vcn 0x%llx is "
 343                                "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
 344                                (unsigned long long)vcn, dir_ni->mft_no);
 345                goto unm_err_out;
 346        }
 347        if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
 348                ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
 349                                "different from expected VCN (0x%llx). "
 350                                "Directory inode 0x%lx is corrupt or driver "
 351                                "bug.", (unsigned long long)
 352                                sle64_to_cpu(ia->index_block_vcn),
 353                                (unsigned long long)vcn, dir_ni->mft_no);
 354                goto unm_err_out;
 355        }
 356        if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
 357                        dir_ni->itype.index.block_size) {
 358                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 359                                "0x%lx has a size (%u) differing from the "
 360                                "directory specified size (%u). Directory "
 361                                "inode is corrupt or driver bug.",
 362                                (unsigned long long)vcn, dir_ni->mft_no,
 363                                le32_to_cpu(ia->index.allocated_size) + 0x18,
 364                                dir_ni->itype.index.block_size);
 365                goto unm_err_out;
 366        }
 367        index_end = (u8*)ia + dir_ni->itype.index.block_size;
 368        if (index_end > kaddr + PAGE_CACHE_SIZE) {
 369                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 370                                "0x%lx crosses page boundary. Impossible! "
 371                                "Cannot access! This is probably a bug in the "
 372                                "driver.", (unsigned long long)vcn,
 373                                dir_ni->mft_no);
 374                goto unm_err_out;
 375        }
 376        index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
 377        if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
 378                ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
 379                                "inode 0x%lx exceeds maximum size.",
 380                                (unsigned long long)vcn, dir_ni->mft_no);
 381                goto unm_err_out;
 382        }
 383        /* The first index entry. */
 384        ie = (INDEX_ENTRY*)((u8*)&ia->index +
 385                        le32_to_cpu(ia->index.entries_offset));
 386        /*
 387         * Iterate similar to above big loop but applied to index buffer, thus
 388         * loop until we exceed valid memory (corruption case) or until we
 389         * reach the last entry.
 390         */
 391        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
 392                /* Bounds check. */
 393                if ((u8*)ie < (u8*)ia || (u8*)ie +
 394                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
 395                                (u8*)ie + le16_to_cpu(ie->key_length) >
 396                                index_end) {
 397                        ntfs_error(sb, "Index entry out of bounds in "
 398                                        "directory inode 0x%lx.",
 399                                        dir_ni->mft_no);
 400                        goto unm_err_out;
 401                }
 402                /*
 403                 * The last entry cannot contain a name. It can however contain
 404                 * a pointer to a child node in the B+tree so we just break out.
 405                 */
 406                if (ie->flags & INDEX_ENTRY_END)
 407                        break;
 408                /*
 409                 * We perform a case sensitive comparison and if that matches
 410                 * we are done and return the mft reference of the inode (i.e.
 411                 * the inode number together with the sequence number for
 412                 * consistency checking). We convert it to cpu format before
 413                 * returning.
 414                 */
 415                if (ntfs_are_names_equal(uname, uname_len,
 416                                (ntfschar*)&ie->key.file_name.file_name,
 417                                ie->key.file_name.file_name_length,
 418                                CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
 419found_it2:
 420                        /*
 421                         * We have a perfect match, so we don't need to care
 422                         * about having matched imperfectly before, so we can
 423                         * free name and set *res to NULL.
 424                         * However, if the perfect match is a short file name,
 425                         * we need to signal this through *res, so that
 426                         * ntfs_lookup() can fix dcache aliasing issues.
 427                         * As an optimization we just reuse an existing
 428                         * allocation of *res.
 429                         */
 430                        if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
 431                                if (!name) {
 432                                        name = kmalloc(sizeof(ntfs_name),
 433                                                        GFP_NOFS);
 434                                        if (!name) {
 435                                                err = -ENOMEM;
 436                                                goto unm_err_out;
 437                                        }
 438                                }
 439                                name->mref = le64_to_cpu(
 440                                                ie->data.dir.indexed_file);
 441                                name->type = FILE_NAME_DOS;
 442                                name->len = 0;
 443                                *res = name;
 444                        } else {
 445                                kfree(name);
 446                                *res = NULL;
 447                        }
 448                        mref = le64_to_cpu(ie->data.dir.indexed_file);
 449                        unlock_page(page);
 450                        ntfs_unmap_page(page);
 451                        return mref;
 452                }
 453                /*
 454                 * For a case insensitive mount, we also perform a case
 455                 * insensitive comparison (provided the file name is not in the
 456                 * POSIX namespace). If the comparison matches, and the name is
 457                 * in the WIN32 namespace, we cache the filename in *res so
 458                 * that the caller, ntfs_lookup(), can work on it. If the
 459                 * comparison matches, and the name is in the DOS namespace, we
 460                 * only cache the mft reference and the file name type (we set
 461                 * the name length to zero for simplicity).
 462                 */
 463                if (!NVolCaseSensitive(vol) &&
 464                                ie->key.file_name.file_name_type &&
 465                                ntfs_are_names_equal(uname, uname_len,
 466                                (ntfschar*)&ie->key.file_name.file_name,
 467                                ie->key.file_name.file_name_length,
 468                                IGNORE_CASE, vol->upcase, vol->upcase_len)) {
 469                        int name_size = sizeof(ntfs_name);
 470                        u8 type = ie->key.file_name.file_name_type;
 471                        u8 len = ie->key.file_name.file_name_length;
 472
 473                        /* Only one case insensitive matching name allowed. */
 474                        if (name) {
 475                                ntfs_error(sb, "Found already allocated name "
 476                                                "in phase 2. Please run chkdsk "
 477                                                "and if that doesn't find any "
 478                                                "errors please report you saw "
 479                                                "this message to "
 480                                                "linux-ntfs-dev@lists."
 481                                                "sourceforge.net.");
 482                                unlock_page(page);
 483                                ntfs_unmap_page(page);
 484                                goto dir_err_out;
 485                        }
 486
 487                        if (type != FILE_NAME_DOS)
 488                                name_size += len * sizeof(ntfschar);
 489                        name = kmalloc(name_size, GFP_NOFS);
 490                        if (!name) {
 491                                err = -ENOMEM;
 492                                goto unm_err_out;
 493                        }
 494                        name->mref = le64_to_cpu(ie->data.dir.indexed_file);
 495                        name->type = type;
 496                        if (type != FILE_NAME_DOS) {
 497                                name->len = len;
 498                                memcpy(name->name, ie->key.file_name.file_name,
 499                                                len * sizeof(ntfschar));
 500                        } else
 501                                name->len = 0;
 502                        *res = name;
 503                }
 504                /*
 505                 * Not a perfect match, need to do full blown collation so we
 506                 * know which way in the B+tree we have to go.
 507                 */
 508                rc = ntfs_collate_names(uname, uname_len,
 509                                (ntfschar*)&ie->key.file_name.file_name,
 510                                ie->key.file_name.file_name_length, 1,
 511                                IGNORE_CASE, vol->upcase, vol->upcase_len);
 512                /*
 513                 * If uname collates before the name of the current entry, there
 514                 * is definitely no such name in this index but we might need to
 515                 * descend into the B+tree so we just break out of the loop.
 516                 */
 517                if (rc == -1)
 518                        break;
 519                /* The names are not equal, continue the search. */
 520                if (rc)
 521                        continue;
 522                /*
 523                 * Names match with case insensitive comparison, now try the
 524                 * case sensitive comparison, which is required for proper
 525                 * collation.
 526                 */
 527                rc = ntfs_collate_names(uname, uname_len,
 528                                (ntfschar*)&ie->key.file_name.file_name,
 529                                ie->key.file_name.file_name_length, 1,
 530                                CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 531                if (rc == -1)
 532                        break;
 533                if (rc)
 534                        continue;
 535                /*
 536                 * Perfect match, this will never happen as the
 537                 * ntfs_are_names_equal() call will have gotten a match but we
 538                 * still treat it correctly.
 539                 */
 540                goto found_it2;
 541        }
 542        /*
 543         * We have finished with this index buffer without success. Check for
 544         * the presence of a child node.
 545         */
 546        if (ie->flags & INDEX_ENTRY_NODE) {
 547                if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
 548                        ntfs_error(sb, "Index entry with child node found in "
 549                                        "a leaf node in directory inode 0x%lx.",
 550                                        dir_ni->mft_no);
 551                        goto unm_err_out;
 552                }
 553                /* Child node present, descend into it. */
 554                old_vcn = vcn;
 555                vcn = sle64_to_cpup((sle64*)((u8*)ie +
 556                                le16_to_cpu(ie->length) - 8));
 557                if (vcn >= 0) {
 558                        /* If vcn is in the same page cache page as old_vcn we
 559                         * recycle the mapped page. */
 560                        if (old_vcn << vol->cluster_size_bits >>
 561                                        PAGE_CACHE_SHIFT == vcn <<
 562                                        vol->cluster_size_bits >>
 563                                        PAGE_CACHE_SHIFT)
 564                                goto fast_descend_into_child_node;
 565                        unlock_page(page);
 566                        ntfs_unmap_page(page);
 567                        goto descend_into_child_node;
 568                }
 569                ntfs_error(sb, "Negative child node vcn in directory inode "
 570                                "0x%lx.", dir_ni->mft_no);
 571                goto unm_err_out;
 572        }
 573        /*
 574         * No child node present, return -ENOENT, unless we have got a matching
 575         * name cached in name in which case return the mft reference
 576         * associated with it.
 577         */
 578        if (name) {
 579                unlock_page(page);
 580                ntfs_unmap_page(page);
 581                return name->mref;
 582        }
 583        ntfs_debug("Entry not found.");
 584        err = -ENOENT;
 585unm_err_out:
 586        unlock_page(page);
 587        ntfs_unmap_page(page);
 588err_out:
 589        if (!err)
 590                err = -EIO;
 591        if (ctx)
 592                ntfs_attr_put_search_ctx(ctx);
 593        if (m)
 594                unmap_mft_record(dir_ni);
 595        if (name) {
 596                kfree(name);
 597                *res = NULL;
 598        }
 599        return ERR_MREF(err);
 600dir_err_out:
 601        ntfs_error(sb, "Corrupt directory.  Aborting lookup.");
 602        goto err_out;
 603}
 604
 605#if 0
 606
 607// TODO: (AIA)
 608// The algorithm embedded in this code will be required for the time when we
 609// want to support adding of entries to directories, where we require correct
 610// collation of file names in order not to cause corruption of the filesystem.
 611
 612/**
 613 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
 614 * @dir_ni:        ntfs inode of the directory in which to search for the name
 615 * @uname:        Unicode name for which to search in the directory
 616 * @uname_len:        length of the name @uname in Unicode characters
 617 *
 618 * Look for an inode with name @uname in the directory with inode @dir_ni.
 619 * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
 620 * the Unicode name. If the name is found in the directory, the corresponding
 621 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
 622 * is a 64-bit number containing the sequence number.
 623 *
 624 * On error, a negative value is returned corresponding to the error code. In
 625 * particular if the inode is not found -ENOENT is returned. Note that you
 626 * can't just check the return value for being negative, you have to check the
 627 * inode number for being negative which you can extract using MREC(return
 628 * value).
 629 *
 630 * Note, @uname_len does not include the (optional) terminating NULL character.
 631 */
 632u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 633                const int uname_len)
 634{
 635        ntfs_volume *vol = dir_ni->vol;
 636        struct super_block *sb = vol->sb;
 637        MFT_RECORD *m;
 638        INDEX_ROOT *ir;
 639        INDEX_ENTRY *ie;
 640        INDEX_ALLOCATION *ia;
 641        u8 *index_end;
 642        u64 mref;
 643        ntfs_attr_search_ctx *ctx;
 644        int err, rc;
 645        IGNORE_CASE_BOOL ic;
 646        VCN vcn, old_vcn;
 647        struct address_space *ia_mapping;
 648        struct page *page;
 649        u8 *kaddr;
 650
 651        /* Get hold of the mft record for the directory. */
 652        m = map_mft_record(dir_ni);
 653        if (IS_ERR(m)) {
 654                ntfs_error(sb, "map_mft_record() failed with error code %ld.",
 655                                -PTR_ERR(m));
 656                return ERR_MREF(PTR_ERR(m));
 657        }
 658        ctx = ntfs_attr_get_search_ctx(dir_ni, m);
 659        if (!ctx) {
 660                err = -ENOMEM;
 661                goto err_out;
 662        }
 663        /* Find the index root attribute in the mft record. */
 664        err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
 665                        0, ctx);
 666        if (unlikely(err)) {
 667                if (err == -ENOENT) {
 668                        ntfs_error(sb, "Index root attribute missing in "
 669                                        "directory inode 0x%lx.",
 670                                        dir_ni->mft_no);
 671                        err = -EIO;
 672                }
 673                goto err_out;
 674        }
 675        /* Get to the index root value (it's been verified in read_inode). */
 676        ir = (INDEX_ROOT*)((u8*)ctx->attr +
 677                        le16_to_cpu(ctx->attr->data.resident.value_offset));
 678        index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
 679        /* The first index entry. */
 680        ie = (INDEX_ENTRY*)((u8*)&ir->index +
 681                        le32_to_cpu(ir->index.entries_offset));
 682        /*
 683         * Loop until we exceed valid memory (corruption case) or until we
 684         * reach the last entry.
 685         */
 686        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
 687                /* Bounds checks. */
 688                if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
 689                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
 690                                (u8*)ie + le16_to_cpu(ie->key_length) >
 691                                index_end)
 692                        goto dir_err_out;
 693                /*
 694                 * The last entry cannot contain a name. It can however contain
 695                 * a pointer to a child node in the B+tree so we just break out.
 696                 */
 697                if (ie->flags & INDEX_ENTRY_END)
 698                        break;
 699                /*
 700                 * If the current entry has a name type of POSIX, the name is
 701                 * case sensitive and not otherwise. This has the effect of us
 702                 * not being able to access any POSIX file names which collate
 703                 * after the non-POSIX one when they only differ in case, but
 704                 * anyone doing screwy stuff like that deserves to burn in
 705                 * hell... Doing that kind of stuff on NT4 actually causes
 706                 * corruption on the partition even when using SP6a and Linux
 707                 * is not involved at all.
 708                 */
 709                ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
 710                                CASE_SENSITIVE;
 711                /*
 712                 * If the names match perfectly, we are done and return the
 713                 * mft reference of the inode (i.e. the inode number together
 714                 * with the sequence number for consistency checking. We
 715                 * convert it to cpu format before returning.
 716                 */
 717                if (ntfs_are_names_equal(uname, uname_len,
 718                                (ntfschar*)&ie->key.file_name.file_name,
 719                                ie->key.file_name.file_name_length, ic,
 720                                vol->upcase, vol->upcase_len)) {
 721found_it:
 722                        mref = le64_to_cpu(ie->data.dir.indexed_file);
 723                        ntfs_attr_put_search_ctx(ctx);
 724                        unmap_mft_record(dir_ni);
 725                        return mref;
 726                }
 727                /*
 728                 * Not a perfect match, need to do full blown collation so we
 729                 * know which way in the B+tree we have to go.
 730                 */
 731                rc = ntfs_collate_names(uname, uname_len,
 732                                (ntfschar*)&ie->key.file_name.file_name,
 733                                ie->key.file_name.file_name_length, 1,
 734                                IGNORE_CASE, vol->upcase, vol->upcase_len);
 735                /*
 736                 * If uname collates before the name of the current entry, there
 737                 * is definitely no such name in this index but we might need to
 738                 * descend into the B+tree so we just break out of the loop.
 739                 */
 740                if (rc == -1)
 741                        break;
 742                /* The names are not equal, continue the search. */
 743                if (rc)
 744                        continue;
 745                /*
 746                 * Names match with case insensitive comparison, now try the
 747                 * case sensitive comparison, which is required for proper
 748                 * collation.
 749                 */
 750                rc = ntfs_collate_names(uname, uname_len,
 751                                (ntfschar*)&ie->key.file_name.file_name,
 752                                ie->key.file_name.file_name_length, 1,
 753                                CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 754                if (rc == -1)
 755                        break;
 756                if (rc)
 757                        continue;
 758                /*
 759                 * Perfect match, this will never happen as the
 760                 * ntfs_are_names_equal() call will have gotten a match but we
 761                 * still treat it correctly.
 762                 */
 763                goto found_it;
 764        }
 765        /*
 766         * We have finished with this index without success. Check for the
 767         * presence of a child node.
 768         */
 769        if (!(ie->flags & INDEX_ENTRY_NODE)) {
 770                /* No child node, return -ENOENT. */
 771                err = -ENOENT;
 772                goto err_out;
 773        } /* Child node present, descend into it. */
 774        /* Consistency check: Verify that an index allocation exists. */
 775        if (!NInoIndexAllocPresent(dir_ni)) {
 776                ntfs_error(sb, "No index allocation attribute but index entry "
 777                                "requires one. Directory inode 0x%lx is "
 778                                "corrupt or driver bug.", dir_ni->mft_no);
 779                goto err_out;
 780        }
 781        /* Get the starting vcn of the index_block holding the child node. */
 782        vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
 783        ia_mapping = VFS_I(dir_ni)->i_mapping;
 784        /*
 785         * We are done with the index root and the mft record. Release them,
 786         * otherwise we deadlock with ntfs_map_page().
 787         */
 788        ntfs_attr_put_search_ctx(ctx);
 789        unmap_mft_record(dir_ni);
 790        m = NULL;
 791        ctx = NULL;
 792descend_into_child_node:
 793        /*
 794         * Convert vcn to index into the index allocation attribute in units
 795         * of PAGE_CACHE_SIZE and map the page cache page, reading it from
 796         * disk if necessary.
 797         */
 798        page = ntfs_map_page(ia_mapping, vcn <<
 799                        dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
 800        if (IS_ERR(page)) {
 801                ntfs_error(sb, "Failed to map directory index page, error %ld.",
 802                                -PTR_ERR(page));
 803                err = PTR_ERR(page);
 804                goto err_out;
 805        }
 806        lock_page(page);
 807        kaddr = (u8*)page_address(page);
 808fast_descend_into_child_node:
 809        /* Get to the index allocation block. */
 810        ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
 811                        dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
 812        /* Bounds checks. */
 813        if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
 814                ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 815                                "inode 0x%lx or driver bug.", dir_ni->mft_no);
 816                goto unm_err_out;
 817        }
 818        /* Catch multi sector transfer fixup errors. */
 819        if (unlikely(!ntfs_is_indx_record(ia->magic))) {
 820                ntfs_error(sb, "Directory index record with vcn 0x%llx is "
 821                                "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
 822                                (unsigned long long)vcn, dir_ni->mft_no);
 823                goto unm_err_out;
 824        }
 825        if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
 826                ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
 827                                "different from expected VCN (0x%llx). "
 828                                "Directory inode 0x%lx is corrupt or driver "
 829                                "bug.", (unsigned long long)
 830                                sle64_to_cpu(ia->index_block_vcn),
 831                                (unsigned long long)vcn, dir_ni->mft_no);
 832                goto unm_err_out;
 833        }
 834        if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
 835                        dir_ni->itype.index.block_size) {
 836                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 837                                "0x%lx has a size (%u) differing from the "
 838                                "directory specified size (%u). Directory "
 839                                "inode is corrupt or driver bug.",
 840                                (unsigned long long)vcn, dir_ni->mft_no,
 841                                le32_to_cpu(ia->index.allocated_size) + 0x18,
 842                                dir_ni->itype.index.block_size);
 843                goto unm_err_out;
 844        }
 845        index_end = (u8*)ia + dir_ni->itype.index.block_size;
 846        if (index_end > kaddr + PAGE_CACHE_SIZE) {
 847                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 848                                "0x%lx crosses page boundary. Impossible! "
 849                                "Cannot access! This is probably a bug in the "
 850                                "driver.", (unsigned long long)vcn,
 851                                dir_ni->mft_no);
 852                goto unm_err_out;
 853        }
 854        index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
 855        if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
 856                ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
 857                                "inode 0x%lx exceeds maximum size.",
 858                                (unsigned long long)vcn, dir_ni->mft_no);
 859                goto unm_err_out;
 860        }
 861        /* The first index entry. */
 862        ie = (INDEX_ENTRY*)((u8*)&ia->index +
 863                        le32_to_cpu(ia->index.entries_offset));
 864        /*
 865         * Iterate similar to above big loop but applied to index buffer, thus
 866         * loop until we exceed valid memory (corruption case) or until we
 867         * reach the last entry.
 868         */
 869        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
 870                /* Bounds check. */
 871                if ((u8*)ie < (u8*)ia || (u8*)ie +
 872                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
 873                                (u8*)ie + le16_to_cpu(ie->key_length) >
 874                                index_end) {
 875                        ntfs_error(sb, "Index entry out of bounds in "
 876                                        "directory inode 0x%lx.",
 877                                        dir_ni->mft_no);
 878                        goto unm_err_out;
 879                }
 880                /*
 881                 * The last entry cannot contain a name. It can however contain
 882                 * a pointer to a child node in the B+tree so we just break out.
 883                 */
 884                if (ie->flags & INDEX_ENTRY_END)
 885                        break;
 886                /*
 887                 * If the current entry has a name type of POSIX, the name is
 888                 * case sensitive and not otherwise. This has the effect of us
 889                 * not being able to access any POSIX file names which collate
 890                 * after the non-POSIX one when they only differ in case, but
 891                 * anyone doing screwy stuff like that deserves to burn in
 892                 * hell... Doing that kind of stuff on NT4 actually causes
 893                 * corruption on the partition even when using SP6a and Linux
 894                 * is not involved at all.
 895                 */
 896                ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
 897                                CASE_SENSITIVE;
 898                /*
 899                 * If the names match perfectly, we are done and return the
 900                 * mft reference of the inode (i.e. the inode number together
 901                 * with the sequence number for consistency checking. We
 902                 * convert it to cpu format before returning.
 903                 */
 904                if (ntfs_are_names_equal(uname, uname_len,
 905                                (ntfschar*)&ie->key.file_name.file_name,
 906                                ie->key.file_name.file_name_length, ic,
 907                                vol->upcase, vol->upcase_len)) {
 908found_it2:
 909                        mref = le64_to_cpu(ie->data.dir.indexed_file);
 910                        unlock_page(page);
 911                        ntfs_unmap_page(page);
 912                        return mref;
 913                }
 914                /*
 915                 * Not a perfect match, need to do full blown collation so we
 916                 * know which way in the B+tree we have to go.
 917                 */
 918                rc = ntfs_collate_names(uname, uname_len,
 919                                (ntfschar*)&ie->key.file_name.file_name,
 920                                ie->key.file_name.file_name_length, 1,
 921                                IGNORE_CASE, vol->upcase, vol->upcase_len);
 922                /*
 923                 * If uname collates before the name of the current entry, there
 924                 * is definitely no such name in this index but we might need to
 925                 * descend into the B+tree so we just break out of the loop.
 926                 */
 927                if (rc == -1)
 928                        break;
 929                /* The names are not equal, continue the search. */
 930                if (rc)
 931                        continue;
 932                /*
 933                 * Names match with case insensitive comparison, now try the
 934                 * case sensitive comparison, which is required for proper
 935                 * collation.
 936                 */
 937                rc = ntfs_collate_names(uname, uname_len,
 938                                (ntfschar*)&ie->key.file_name.file_name,
 939                                ie->key.file_name.file_name_length, 1,
 940                                CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 941                if (rc == -1)
 942                        break;
 943                if (rc)
 944                        continue;
 945                /*
 946                 * Perfect match, this will never happen as the
 947                 * ntfs_are_names_equal() call will have gotten a match but we
 948                 * still treat it correctly.
 949                 */
 950                goto found_it2;
 951        }
 952        /*
 953         * We have finished with this index buffer without success. Check for
 954         * the presence of a child node.
 955         */
 956        if (ie->flags & INDEX_ENTRY_NODE) {
 957                if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
 958                        ntfs_error(sb, "Index entry with child node found in "
 959                                        "a leaf node in directory inode 0x%lx.",
 960                                        dir_ni->mft_no);
 961                        goto unm_err_out;
 962                }
 963                /* Child node present, descend into it. */
 964                old_vcn = vcn;
 965                vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
 966                if (vcn >= 0) {
 967                        /* If vcn is in the same page cache page as old_vcn we
 968                         * recycle the mapped page. */
 969                        if (old_vcn << vol->cluster_size_bits >>
 970                                        PAGE_CACHE_SHIFT == vcn <<
 971                                        vol->cluster_size_bits >>
 972                                        PAGE_CACHE_SHIFT)
 973                                goto fast_descend_into_child_node;
 974                        unlock_page(page);
 975                        ntfs_unmap_page(page);
 976                        goto descend_into_child_node;
 977                }
 978                ntfs_error(sb, "Negative child node vcn in directory inode "
 979                                "0x%lx.", dir_ni->mft_no);
 980                goto unm_err_out;
 981        }
 982        /* No child node, return -ENOENT. */
 983        ntfs_debug("Entry not found.");
 984        err = -ENOENT;
 985unm_err_out:
 986        unlock_page(page);
 987        ntfs_unmap_page(page);
 988err_out:
 989        if (!err)
 990                err = -EIO;
 991        if (ctx)
 992                ntfs_attr_put_search_ctx(ctx);
 993        if (m)
 994                unmap_mft_record(dir_ni);
 995        return ERR_MREF(err);
 996dir_err_out:
 997        ntfs_error(sb, "Corrupt directory. Aborting lookup.");
 998        goto err_out;
 999}
1000
1001#endif
1002
1003/**
1004 * ntfs_filldir - ntfs specific filldir method
1005 * @vol:        current ntfs volume
1006 * @fpos:        position in the directory
1007 * @ndir:        ntfs inode of current directory
1008 * @ia_page:        page in which the index allocation buffer @ie is in resides
1009 * @ie:                current index entry
1010 * @name:        buffer to use for the converted name
1011 * @dirent:        vfs filldir callback context
1012 * @filldir:        vfs filldir callback
1013 *
1014 * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
1015 * callback.
1016 *
1017 * If @ia_page is not NULL it is the locked page containing the index
1018 * allocation block containing the index entry @ie.
1019 *
1020 * Note, we drop (and then reacquire) the page lock on @ia_page across the
1021 * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
1022 * since ntfs_lookup() will lock the same page.  As an optimization, we do not
1023 * retake the lock if we are returning a non-zero value as ntfs_readdir()
1024 * would need to drop the lock immediately anyway.
1025 */
1026static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1027                ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
1028                u8 *name, void *dirent, filldir_t filldir)
1029{
1030        unsigned long mref;
1031        int name_len, rc;
1032        unsigned dt_type;
1033        FILE_NAME_TYPE_FLAGS name_type;
1034
1035        name_type = ie->key.file_name.file_name_type;
1036        if (name_type == FILE_NAME_DOS) {
1037                ntfs_debug("Skipping DOS name space entry.");
1038                return 0;
1039        }
1040        if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
1041                ntfs_debug("Skipping root directory self reference entry.");
1042                return 0;
1043        }
1044        if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
1045                        !NVolShowSystemFiles(vol)) {
1046                ntfs_debug("Skipping system file.");
1047                return 0;
1048        }
1049        name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
1050                        ie->key.file_name.file_name_length, &name,
1051                        NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1052        if (name_len <= 0) {
1053                ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1054                                (long long)MREF_LE(ie->data.dir.indexed_file));
1055                return 0;
1056        }
1057        if (ie->key.file_name.file_attributes &
1058                        FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
1059                dt_type = DT_DIR;
1060        else
1061                dt_type = DT_REG;
1062        mref = MREF_LE(ie->data.dir.indexed_file);
1063        /*
1064         * Drop the page lock otherwise we deadlock with NFS when it calls
1065         * ->lookup since ntfs_lookup() will lock the same page.
1066         */
1067        if (ia_page)
1068                unlock_page(ia_page);
1069        ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
1070                        "0x%lx, DT_%s.", name, name_len, fpos, mref,
1071                        dt_type == DT_DIR ? "DIR" : "REG");
1072        rc = filldir(dirent, name, name_len, fpos, mref, dt_type);
1073        /* Relock the page but not if we are aborting ->readdir. */
1074        if (!rc && ia_page)
1075                lock_page(ia_page);
1076        return rc;
1077}
1078
1079/*
1080 * We use the same basic approach as the old NTFS driver, i.e. we parse the
1081 * index root entries and then the index allocation entries that are marked
1082 * as in use in the index bitmap.
1083 *
1084 * While this will return the names in random order this doesn't matter for
1085 * ->readdir but OTOH results in a faster ->readdir.
1086 *
1087 * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
1088 * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
1089 * modifications).
1090 *
1091 * Locking:  - Caller must hold i_mutex on the directory.
1092 *             - Each page cache page in the index allocation mapping must be
1093 *               locked whilst being accessed otherwise we may find a corrupt
1094 *               page due to it being under ->writepage at the moment which
1095 *               applies the mst protection fixups before writing out and then
1096 *               removes them again after the write is complete after which it 
1097 *               unlocks the page.
1098 */
1099static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1100{
1101        s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1102        loff_t fpos, i_size;
1103        struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode;
1104        struct super_block *sb = vdir->i_sb;
1105        ntfs_inode *ndir = NTFS_I(vdir);
1106        ntfs_volume *vol = NTFS_SB(sb);
1107        MFT_RECORD *m;
1108        INDEX_ROOT *ir = NULL;
1109        INDEX_ENTRY *ie;
1110        INDEX_ALLOCATION *ia;
1111        u8 *name = NULL;
1112        int rc, err, ir_pos, cur_bmp_pos;
1113        struct address_space *ia_mapping, *bmp_mapping;
1114        struct page *bmp_page = NULL, *ia_page = NULL;
1115        u8 *kaddr, *bmp, *index_end;
1116        ntfs_attr_search_ctx *ctx;
1117
1118        fpos = filp->f_pos;
1119        ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
1120                        vdir->i_ino, fpos);
1121        rc = err = 0;
1122        /* Are we at end of dir yet? */
1123        i_size = i_size_read(vdir);
1124        if (fpos >= i_size + vol->mft_record_size)
1125                goto done;
1126        /* Emulate . and .. for all directories. */
1127        if (!fpos) {
1128                ntfs_debug("Calling filldir for . with len 1, fpos 0x0, "
1129                                "inode 0x%lx, DT_DIR.", vdir->i_ino);
1130                rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR);
1131                if (rc)
1132                        goto done;
1133                fpos++;
1134        }
1135        if (fpos == 1) {
1136                ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
1137                                "inode 0x%lx, DT_DIR.",
1138                                (unsigned long)parent_ino(filp->f_path.dentry));
1139                rc = filldir(dirent, "..", 2, fpos,
1140                                parent_ino(filp->f_path.dentry), DT_DIR);
1141                if (rc)
1142                        goto done;
1143                fpos++;
1144        }
1145        m = NULL;
1146        ctx = NULL;
1147        /*
1148         * Allocate a buffer to store the current name being processed
1149         * converted to format determined by current NLS.
1150         */
1151        name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
1152        if (unlikely(!name)) {
1153                err = -ENOMEM;
1154                goto err_out;
1155        }
1156        /* Are we jumping straight into the index allocation attribute? */
1157        if (fpos >= vol->mft_record_size)
1158                goto skip_index_root;
1159        /* Get hold of the mft record for the directory. */
1160        m = map_mft_record(ndir);
1161        if (IS_ERR(m)) {
1162                err = PTR_ERR(m);
1163                m = NULL;
1164                goto err_out;
1165        }
1166        ctx = ntfs_attr_get_search_ctx(ndir, m);
1167        if (unlikely(!ctx)) {
1168                err = -ENOMEM;
1169                goto err_out;
1170        }
1171        /* Get the offset into the index root attribute. */
1172        ir_pos = (s64)fpos;
1173        /* Find the index root attribute in the mft record. */
1174        err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
1175                        0, ctx);
1176        if (unlikely(err)) {
1177                ntfs_error(sb, "Index root attribute missing in directory "
1178                                "inode 0x%lx.", vdir->i_ino);
1179                goto err_out;
1180        }
1181        /*
1182         * Copy the index root attribute value to a buffer so that we can put
1183         * the search context and unmap the mft record before calling the
1184         * filldir() callback.  We need to do this because of NFSd which calls
1185         * ->lookup() from its filldir callback() and this causes NTFS to
1186         * deadlock as ntfs_lookup() maps the mft record of the directory and
1187         * we have got it mapped here already.  The only solution is for us to
1188         * unmap the mft record here so that a call to ntfs_lookup() is able to
1189         * map the mft record without deadlocking.
1190         */
1191        rc = le32_to_cpu(ctx->attr->data.resident.value_length);
1192        ir = kmalloc(rc, GFP_NOFS);
1193        if (unlikely(!ir)) {
1194                err = -ENOMEM;
1195                goto err_out;
1196        }
1197        /* Copy the index root value (it has been verified in read_inode). */
1198        memcpy(ir, (u8*)ctx->attr +
1199                        le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
1200        ntfs_attr_put_search_ctx(ctx);
1201        unmap_mft_record(ndir);
1202        ctx = NULL;
1203        m = NULL;
1204        index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1205        /* The first index entry. */
1206        ie = (INDEX_ENTRY*)((u8*)&ir->index +
1207                        le32_to_cpu(ir->index.entries_offset));
1208        /*
1209         * Loop until we exceed valid memory (corruption case) or until we
1210         * reach the last entry or until filldir tells us it has had enough
1211         * or signals an error (both covered by the rc test).
1212         */
1213        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1214                ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
1215                /* Bounds checks. */
1216                if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
1217                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
1218                                (u8*)ie + le16_to_cpu(ie->key_length) >
1219                                index_end))
1220                        goto err_out;
1221                /* The last entry cannot contain a name. */
1222                if (ie->flags & INDEX_ENTRY_END)
1223                        break;
1224                /* Skip index root entry if continuing previous readdir. */
1225                if (ir_pos > (u8*)ie - (u8*)ir)
1226                        continue;
1227                /* Advance the position even if going to skip the entry. */
1228                fpos = (u8*)ie - (u8*)ir;
1229                /* Submit the name to the filldir callback. */
1230                rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent,
1231                                filldir);
1232                if (rc) {
1233                        kfree(ir);
1234                        goto abort;
1235                }
1236        }
1237        /* We are done with the index root and can free the buffer. */
1238        kfree(ir);
1239        ir = NULL;
1240        /* If there is no index allocation attribute we are finished. */
1241        if (!NInoIndexAllocPresent(ndir))
1242                goto EOD;
1243        /* Advance fpos to the beginning of the index allocation. */
1244        fpos = vol->mft_record_size;
1245skip_index_root:
1246        kaddr = NULL;
1247        prev_ia_pos = -1LL;
1248        /* Get the offset into the index allocation attribute. */
1249        ia_pos = (s64)fpos - vol->mft_record_size;
1250        ia_mapping = vdir->i_mapping;
1251        ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
1252        bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
1253        if (IS_ERR(bmp_vi)) {
1254                ntfs_error(sb, "Failed to get bitmap attribute.");
1255                err = PTR_ERR(bmp_vi);
1256                goto err_out;
1257        }
1258        bmp_mapping = bmp_vi->i_mapping;
1259        /* Get the starting bitmap bit position and sanity check it. */
1260        bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1261        if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1262                ntfs_error(sb, "Current index allocation position exceeds "
1263                                "index bitmap size.");
1264                goto iput_err_out;
1265        }
1266        /* Get the starting bit position in the current bitmap page. */
1267        cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
1268        bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
1269get_next_bmp_page:
1270        ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
1271                        (unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
1272                        (unsigned long long)bmp_pos &
1273                        (unsigned long long)((PAGE_CACHE_SIZE * 8) - 1));
1274        bmp_page = ntfs_map_page(bmp_mapping,
1275                        bmp_pos >> (3 + PAGE_CACHE_SHIFT));
1276        if (IS_ERR(bmp_page)) {
1277                ntfs_error(sb, "Reading index bitmap failed.");
1278                err = PTR_ERR(bmp_page);
1279                bmp_page = NULL;
1280                goto iput_err_out;
1281        }
1282        bmp = (u8*)page_address(bmp_page);
1283        /* Find next index block in use. */
1284        while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
1285find_next_index_buffer:
1286                cur_bmp_pos++;
1287                /*
1288                 * If we have reached the end of the bitmap page, get the next
1289                 * page, and put away the old one.
1290                 */
1291                if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) {
1292                        ntfs_unmap_page(bmp_page);
1293                        bmp_pos += PAGE_CACHE_SIZE * 8;
1294                        cur_bmp_pos = 0;
1295                        goto get_next_bmp_page;
1296                }
1297                /* If we have reached the end of the bitmap, we are done. */
1298                if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1299                        goto unm_EOD;
1300                ia_pos = (bmp_pos + cur_bmp_pos) <<
1301                                ndir->itype.index.block_size_bits;
1302        }
1303        ntfs_debug("Handling index buffer 0x%llx.",
1304                        (unsigned long long)bmp_pos + cur_bmp_pos);
1305        /* If the current index buffer is in the same page we reuse the page. */
1306        if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
1307                        (ia_pos & (s64)PAGE_CACHE_MASK)) {
1308                prev_ia_pos = ia_pos;
1309                if (likely(ia_page != NULL)) {
1310                        unlock_page(ia_page);
1311                        ntfs_unmap_page(ia_page);
1312                }
1313                /*
1314                 * Map the page cache page containing the current ia_pos,
1315                 * reading it from disk if necessary.
1316                 */
1317                ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
1318                if (IS_ERR(ia_page)) {
1319                        ntfs_error(sb, "Reading index allocation data failed.");
1320                        err = PTR_ERR(ia_page);
1321                        ia_page = NULL;
1322                        goto err_out;
1323                }
1324                lock_page(ia_page);
1325                kaddr = (u8*)page_address(ia_page);
1326        }
1327        /* Get the current index buffer. */
1328        ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
1329                        ~(s64)(ndir->itype.index.block_size - 1)));
1330        /* Bounds checks. */
1331        if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) {
1332                ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
1333                                "inode 0x%lx or driver bug.", vdir->i_ino);
1334                goto err_out;
1335        }
1336        /* Catch multi sector transfer fixup errors. */
1337        if (unlikely(!ntfs_is_indx_record(ia->magic))) {
1338                ntfs_error(sb, "Directory index record with vcn 0x%llx is "
1339                                "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
1340                                (unsigned long long)ia_pos >>
1341                                ndir->itype.index.vcn_size_bits, vdir->i_ino);
1342                goto err_out;
1343        }
1344        if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
1345                        ~(s64)(ndir->itype.index.block_size - 1)) >>
1346                        ndir->itype.index.vcn_size_bits)) {
1347                ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
1348                                "different from expected VCN (0x%llx). "
1349                                "Directory inode 0x%lx is corrupt or driver "
1350                                "bug. ", (unsigned long long)
1351                                sle64_to_cpu(ia->index_block_vcn),
1352                                (unsigned long long)ia_pos >>
1353                                ndir->itype.index.vcn_size_bits, vdir->i_ino);
1354                goto err_out;
1355        }
1356        if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
1357                        ndir->itype.index.block_size)) {
1358                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1359                                "0x%lx has a size (%u) differing from the "
1360                                "directory specified size (%u). Directory "
1361                                "inode is corrupt or driver bug.",
1362                                (unsigned long long)ia_pos >>
1363                                ndir->itype.index.vcn_size_bits, vdir->i_ino,
1364                                le32_to_cpu(ia->index.allocated_size) + 0x18,
1365                                ndir->itype.index.block_size);
1366                goto err_out;
1367        }
1368        index_end = (u8*)ia + ndir->itype.index.block_size;
1369        if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) {
1370                ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1371                                "0x%lx crosses page boundary. Impossible! "
1372                                "Cannot access! This is probably a bug in the "
1373                                "driver.", (unsigned long long)ia_pos >>
1374                                ndir->itype.index.vcn_size_bits, vdir->i_ino);
1375                goto err_out;
1376        }
1377        ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
1378        index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
1379        if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
1380                ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
1381                                "inode 0x%lx exceeds maximum size.",
1382                                (unsigned long long)ia_pos >>
1383                                ndir->itype.index.vcn_size_bits, vdir->i_ino);
1384                goto err_out;
1385        }
1386        /* The first index entry in this index buffer. */
1387        ie = (INDEX_ENTRY*)((u8*)&ia->index +
1388                        le32_to_cpu(ia->index.entries_offset));
1389        /*
1390         * Loop until we exceed valid memory (corruption case) or until we
1391         * reach the last entry or until filldir tells us it has had enough
1392         * or signals an error (both covered by the rc test).
1393         */
1394        for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1395                ntfs_debug("In index allocation, offset 0x%llx.",
1396                                (unsigned long long)ia_start +
1397                                (unsigned long long)((u8*)ie - (u8*)ia));
1398                /* Bounds checks. */
1399                if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
1400                                sizeof(INDEX_ENTRY_HEADER) > index_end ||
1401                                (u8*)ie + le16_to_cpu(ie->key_length) >
1402                                index_end))
1403                        goto err_out;
1404                /* The last entry cannot contain a name. */
1405                if (ie->flags & INDEX_ENTRY_END)
1406                        break;
1407                /* Skip index block entry if continuing previous readdir. */
1408                if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
1409                        continue;
1410                /* Advance the position even if going to skip the entry. */
1411                fpos = (u8*)ie - (u8*)ia +
1412                                (sle64_to_cpu(ia->index_block_vcn) <<
1413                                ndir->itype.index.vcn_size_bits) +
1414                                vol->mft_record_size;
1415                /*
1416                 * Submit the name to the @filldir callback.  Note,
1417                 * ntfs_filldir() drops the lock on @ia_page but it retakes it
1418                 * before returning, unless a non-zero value is returned in
1419                 * which case the page is left unlocked.
1420                 */
1421                rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent,
1422                                filldir);
1423                if (rc) {
1424                        /* @ia_page is already unlocked in this case. */
1425                        ntfs_unmap_page(ia_page);
1426                        ntfs_unmap_page(bmp_page);
1427                        iput(bmp_vi);
1428                        goto abort;
1429                }
1430        }
1431        goto find_next_index_buffer;
1432unm_EOD:
1433        if (ia_page) {
1434                unlock_page(ia_page);
1435                ntfs_unmap_page(ia_page);
1436        }
1437        ntfs_unmap_page(bmp_page);
1438        iput(bmp_vi);
1439EOD:
1440        /* We are finished, set fpos to EOD. */
1441        fpos = i_size + vol->mft_record_size;
1442abort:
1443        kfree(name);
1444done:
1445#ifdef DEBUG
1446        if (!rc)
1447                ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos);
1448        else
1449                ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.",
1450                                rc, fpos);
1451#endif
1452        filp->f_pos = fpos;
1453        return 0;
1454err_out:
1455        if (bmp_page) {
1456                ntfs_unmap_page(bmp_page);
1457iput_err_out:
1458                iput(bmp_vi);
1459        }
1460        if (ia_page) {
1461                unlock_page(ia_page);
1462                ntfs_unmap_page(ia_page);
1463        }
1464        kfree(ir);
1465        kfree(name);
1466        if (ctx)
1467                ntfs_attr_put_search_ctx(ctx);
1468        if (m)
1469                unmap_mft_record(ndir);
1470        if (!err)
1471                err = -EIO;
1472        ntfs_debug("Failed. Returning error code %i.", -err);
1473        filp->f_pos = fpos;
1474        return err;
1475}
1476
1477/**
1478 * ntfs_dir_open - called when an inode is about to be opened
1479 * @vi:                inode to be opened
1480 * @filp:        file structure describing the inode
1481 *
1482 * Limit directory size to the page cache limit on architectures where unsigned
1483 * long is 32-bits. This is the most we can do for now without overflowing the
1484 * page cache page index. Doing it this way means we don't run into problems
1485 * because of existing too large directories. It would be better to allow the
1486 * user to read the accessible part of the directory but I doubt very much
1487 * anyone is going to hit this check on a 32-bit architecture, so there is no
1488 * point in adding the extra complexity required to support this.
1489 *
1490 * On 64-bit architectures, the check is hopefully optimized away by the
1491 * compiler.
1492 */
1493static int ntfs_dir_open(struct inode *vi, struct file *filp)
1494{
1495        if (sizeof(unsigned long) < 8) {
1496                if (i_size_read(vi) > MAX_LFS_FILESIZE)
1497                        return -EFBIG;
1498        }
1499        return 0;
1500}
1501
1502#ifdef NTFS_RW
1503
1504/**
1505 * ntfs_dir_fsync - sync a directory to disk
1506 * @filp:        directory to be synced
1507 * @dentry:        dentry describing the directory to sync
1508 * @datasync:        if non-zero only flush user data and not metadata
1509 *
1510 * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
1511 * msync system calls.  This function is based on file.c::ntfs_file_fsync().
1512 *
1513 * Write the mft record and all associated extent mft records as well as the
1514 * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
1515 *
1516 * If @datasync is true, we do not wait on the inode(s) to be written out
1517 * but we always wait on the page cache pages to be written out.
1518 *
1519 * Note: In the past @filp could be NULL so we ignore it as we don't need it
1520 * anyway.
1521 *
1522 * Locking: Caller must hold i_mutex on the inode.
1523 *
1524 * TODO: We should probably also write all attribute/index inodes associated
1525 * with this inode but since we have no simple way of getting to them we ignore
1526 * this problem for now.  We do write the $BITMAP attribute if it is present
1527 * which is the important one for a directory so things are not too bad.
1528 */
1529static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
1530                int datasync)
1531{
1532        struct inode *bmp_vi, *vi = dentry->d_inode;
1533        int err, ret;
1534        ntfs_attr na;
1535
1536        ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1537        BUG_ON(!S_ISDIR(vi->i_mode));
1538        /* If the bitmap attribute inode is in memory sync it, too. */
1539        na.mft_no = vi->i_ino;
1540        na.type = AT_BITMAP;
1541        na.name = I30;
1542        na.name_len = 4;
1543        bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na);
1544        if (bmp_vi) {
1545                 write_inode_now(bmp_vi, !datasync);
1546                iput(bmp_vi);
1547        }
1548        ret = ntfs_write_inode(vi, 1);
1549        write_inode_now(vi, !datasync);
1550        err = sync_blockdev(vi->i_sb->s_bdev);
1551        if (unlikely(err && !ret))
1552                ret = err;
1553        if (likely(!ret))
1554                ntfs_debug("Done.");
1555        else
1556                ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
1557                                "%u.", datasync ? "data" : "", vi->i_ino, -ret);
1558        return ret;
1559}
1560
1561#endif /* NTFS_RW */
1562
1563const struct file_operations ntfs_dir_ops = {
1564        .llseek                = generic_file_llseek,        /* Seek inside directory. */
1565        .read                = generic_read_dir,        /* Return -EISDIR. */
1566        .readdir        = ntfs_readdir,                /* Read directory contents. */
1567#ifdef NTFS_RW
1568        .fsync                = ntfs_dir_fsync,        /* Sync a directory to disk. */
1569        /*.aio_fsync        = ,*/                        /* Sync all outstanding async
1570                                                   i/o operations on a kiocb. */
1571#endif /* NTFS_RW */
1572        /*.ioctl        = ,*/                        /* Perform function on the
1573                                                   mounted filesystem. */
1574        .open                = ntfs_dir_open,        /* Open directory. */
1575};