1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "mballoc.h"
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
335{
336#if BITS_PER_LONG == 64
337 *bit += ((unsigned long) addr & 7UL) << 3;
338 addr = (void *) ((unsigned long) addr & ~7UL);
339#elif BITS_PER_LONG == 32
340 *bit += ((unsigned long) addr & 3UL) << 3;
341 addr = (void *) ((unsigned long) addr & ~3UL);
342#else
343#error "how many bits you are?!"
344#endif
345 return addr;
346}
347
348static inline int mb_test_bit(int bit, void *addr)
349{
350
351
352
353
354 addr = mb_correct_addr_and_bit(&bit, addr);
355 return ext4_test_bit(bit, addr);
356}
357
358static inline void mb_set_bit(int bit, void *addr)
359{
360 addr = mb_correct_addr_and_bit(&bit, addr);
361 ext4_set_bit(bit, addr);
362}
363
364static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
365{
366 addr = mb_correct_addr_and_bit(&bit, addr);
367 ext4_set_bit_atomic(lock, bit, addr);
368}
369
370static inline void mb_clear_bit(int bit, void *addr)
371{
372 addr = mb_correct_addr_and_bit(&bit, addr);
373 ext4_clear_bit(bit, addr);
374}
375
376static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
377{
378 addr = mb_correct_addr_and_bit(&bit, addr);
379 ext4_clear_bit_atomic(lock, bit, addr);
380}
381
382static inline int mb_find_next_zero_bit(void *addr, int max, int start)
383{
384 int fix = 0, ret, tmpmax;
385 addr = mb_correct_addr_and_bit(&fix, addr);
386 tmpmax = max + fix;
387 start += fix;
388
389 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
390 if (ret > max)
391 return max;
392 return ret;
393}
394
395static inline int mb_find_next_bit(void *addr, int max, int start)
396{
397 int fix = 0, ret, tmpmax;
398 addr = mb_correct_addr_and_bit(&fix, addr);
399 tmpmax = max + fix;
400 start += fix;
401
402 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
403 if (ret > max)
404 return max;
405 return ret;
406}
407
408static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
409{
410 char *bb;
411
412 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
413 BUG_ON(max == NULL);
414
415 if (order > e4b->bd_blkbits + 1) {
416 *max = 0;
417 return NULL;
418 }
419
420
421 *max = 1 << (e4b->bd_blkbits + 3);
422 if (order == 0)
423 return EXT4_MB_BITMAP(e4b);
424
425 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
426 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
427
428 return bb;
429}
430
431#ifdef DOUBLE_CHECK
432static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
433 int first, int count)
434{
435 int i;
436 struct super_block *sb = e4b->bd_sb;
437
438 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
439 return;
440 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
441 for (i = 0; i < count; i++) {
442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
443 ext4_fsblk_t blocknr;
444 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
445 blocknr += first + i;
446 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448
449 ext4_error(sb, __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %lu)\n",
451 inode ? inode->i_ino : 0, blocknr,
452 first + i, e4b->bd_group);
453 }
454 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
455 }
456}
457
458static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
459{
460 int i;
461
462 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
463 return;
464 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
465 for (i = 0; i < count; i++) {
466 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
467 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
468 }
469}
470
471static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
472{
473 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
474 unsigned char *b1, *b2;
475 int i;
476 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
477 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) {
480 printk(KERN_ERR "corruption in group %lu "
481 "at byte %u(%u): %x in copy != %x "
482 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]);
484 BUG();
485 }
486 }
487 }
488}
489
490#else
491static inline void mb_free_blocks_double(struct inode *inode,
492 struct ext4_buddy *e4b, int first, int count)
493{
494 return;
495}
496static inline void mb_mark_used_double(struct ext4_buddy *e4b,
497 int first, int count)
498{
499 return;
500}
501static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
502{
503 return;
504}
505#endif
506
507#ifdef AGGRESSIVE_CHECK
508
509#define MB_CHECK_ASSERT(assert) \
510do { \
511 if (!(assert)) { \
512 printk(KERN_EMERG \
513 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
514 function, file, line, # assert); \
515 BUG(); \
516 } \
517} while (0)
518
519static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
520 const char *function, int line)
521{
522 struct super_block *sb = e4b->bd_sb;
523 int order = e4b->bd_blkbits + 1;
524 int max;
525 int max2;
526 int i;
527 int j;
528 int k;
529 int count;
530 struct ext4_group_info *grp;
531 int fragments = 0;
532 int fstart;
533 struct list_head *cur;
534 void *buddy;
535 void *buddy2;
536
537 {
538 static int mb_check_counter;
539 if (mb_check_counter++ % 100 != 0)
540 return 0;
541 }
542
543 while (order > 1) {
544 buddy = mb_find_buddy(e4b, order, &max);
545 MB_CHECK_ASSERT(buddy);
546 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
547 MB_CHECK_ASSERT(buddy2);
548 MB_CHECK_ASSERT(buddy != buddy2);
549 MB_CHECK_ASSERT(max * 2 == max2);
550
551 count = 0;
552 for (i = 0; i < max; i++) {
553
554 if (mb_test_bit(i, buddy)) {
555
556 if (!mb_test_bit(i << 1, buddy2)) {
557 MB_CHECK_ASSERT(
558 mb_test_bit((i<<1)+1, buddy2));
559 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
560 MB_CHECK_ASSERT(
561 mb_test_bit(i << 1, buddy2));
562 }
563 continue;
564 }
565
566
567 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
568 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
569
570 for (j = 0; j < (1 << order); j++) {
571 k = (i * (1 << order)) + j;
572 MB_CHECK_ASSERT(
573 !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
574 }
575 count++;
576 }
577 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
578 order--;
579 }
580
581 fstart = -1;
582 buddy = mb_find_buddy(e4b, 0, &max);
583 for (i = 0; i < max; i++) {
584 if (!mb_test_bit(i, buddy)) {
585 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
586 if (fstart == -1) {
587 fragments++;
588 fstart = i;
589 }
590 continue;
591 }
592 fstart = -1;
593
594 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
595 buddy2 = mb_find_buddy(e4b, j, &max2);
596 k = i >> j;
597 MB_CHECK_ASSERT(k < max2);
598 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
599 }
600 }
601 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
602 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
603
604 grp = ext4_get_group_info(sb, e4b->bd_group);
605 buddy = mb_find_buddy(e4b, 0, &max);
606 list_for_each(cur, &grp->bb_prealloc_list) {
607 ext4_group_t groupnr;
608 struct ext4_prealloc_space *pa;
609 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
610 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
611 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
612 for (i = 0; i < pa->pa_len; i++)
613 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
614 }
615 return 0;
616}
617#undef MB_CHECK_ASSERT
618#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
619 __FILE__, __func__, __LINE__)
620#else
621#define mb_check_buddy(e4b)
622#endif
623
624
625static void ext4_mb_mark_free_simple(struct super_block *sb,
626 void *buddy, unsigned first, int len,
627 struct ext4_group_info *grp)
628{
629 struct ext4_sb_info *sbi = EXT4_SB(sb);
630 unsigned short min;
631 unsigned short max;
632 unsigned short chunk;
633 unsigned short border;
634
635 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
636
637 border = 2 << sb->s_blocksize_bits;
638
639 while (len > 0) {
640
641 max = ffs(first | border) - 1;
642
643
644 min = fls(len) - 1;
645
646 if (max < min)
647 min = max;
648 chunk = 1 << min;
649
650
651 grp->bb_counters[min]++;
652 if (min > 0)
653 mb_clear_bit(first >> min,
654 buddy + sbi->s_mb_offsets[min]);
655
656 len -= chunk;
657 first += chunk;
658 }
659}
660
661static void ext4_mb_generate_buddy(struct super_block *sb,
662 void *buddy, void *bitmap, ext4_group_t group)
663{
664 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
665 unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
666 unsigned short i = 0;
667 unsigned short first;
668 unsigned short len;
669 unsigned free = 0;
670 unsigned fragments = 0;
671 unsigned long long period = get_cycles();
672
673
674
675 i = mb_find_next_zero_bit(bitmap, max, 0);
676 grp->bb_first_free = i;
677 while (i < max) {
678 fragments++;
679 first = i;
680 i = mb_find_next_bit(bitmap, max, i);
681 len = i - first;
682 free += len;
683 if (len > 1)
684 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
685 else
686 grp->bb_counters[0]++;
687 if (i < max)
688 i = mb_find_next_zero_bit(bitmap, max, i);
689 }
690 grp->bb_fragments = fragments;
691
692 if (free != grp->bb_free) {
693 ext4_error(sb, __func__,
694 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
695 group, free, grp->bb_free);
696
697
698
699
700 grp->bb_free = free;
701 }
702
703 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
704
705 period = get_cycles() - period;
706 spin_lock(&EXT4_SB(sb)->s_bal_lock);
707 EXT4_SB(sb)->s_mb_buddies_generated++;
708 EXT4_SB(sb)->s_mb_generation_time += period;
709 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
710}
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729static int ext4_mb_init_cache(struct page *page, char *incore)
730{
731 int blocksize;
732 int blocks_per_page;
733 int groups_per_page;
734 int err = 0;
735 int i;
736 ext4_group_t first_group;
737 int first_block;
738 struct super_block *sb;
739 struct buffer_head *bhs;
740 struct buffer_head **bh;
741 struct inode *inode;
742 char *data;
743 char *bitmap;
744
745 mb_debug("init page %lu\n", page->index);
746
747 inode = page->mapping->host;
748 sb = inode->i_sb;
749 blocksize = 1 << inode->i_blkbits;
750 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
751
752 groups_per_page = blocks_per_page >> 1;
753 if (groups_per_page == 0)
754 groups_per_page = 1;
755
756
757 if (groups_per_page > 1) {
758 err = -ENOMEM;
759 i = sizeof(struct buffer_head *) * groups_per_page;
760 bh = kzalloc(i, GFP_NOFS);
761 if (bh == NULL)
762 goto out;
763 } else
764 bh = &bhs;
765
766 first_group = page->index * blocks_per_page / 2;
767
768
769 for (i = 0; i < groups_per_page; i++) {
770 struct ext4_group_desc *desc;
771
772 if (first_group + i >= EXT4_SB(sb)->s_groups_count)
773 break;
774
775 err = -EIO;
776 desc = ext4_get_group_desc(sb, first_group + i, NULL);
777 if (desc == NULL)
778 goto out;
779
780 err = -ENOMEM;
781 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
782 if (bh[i] == NULL)
783 goto out;
784
785 if (buffer_uptodate(bh[i]) &&
786 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
787 continue;
788
789 lock_buffer(bh[i]);
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
792 ext4_init_block_bitmap(sb, bh[i],
793 first_group + i, desc);
794 set_buffer_uptodate(bh[i]);
795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
797 continue;
798 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
800 get_bh(bh[i]);
801 bh[i]->b_end_io = end_buffer_read_sync;
802 submit_bh(READ, bh[i]);
803 mb_debug("read bitmap for group %lu\n", first_group + i);
804 }
805
806
807 for (i = 0; i < groups_per_page && bh[i]; i++)
808 wait_on_buffer(bh[i]);
809
810 err = -EIO;
811 for (i = 0; i < groups_per_page && bh[i]; i++)
812 if (!buffer_uptodate(bh[i]))
813 goto out;
814
815 err = 0;
816 first_block = page->index * blocks_per_page;
817 for (i = 0; i < blocks_per_page; i++) {
818 int group;
819 struct ext4_group_info *grinfo;
820
821 group = (first_block + i) >> 1;
822 if (group >= EXT4_SB(sb)->s_groups_count)
823 break;
824
825
826
827
828
829
830
831 data = page_address(page) + (i * blocksize);
832 bitmap = bh[group - first_group]->b_data;
833
834
835
836
837
838 if ((first_block + i) & 1) {
839
840 BUG_ON(incore == NULL);
841 mb_debug("put buddy for group %u in page %lu/%x\n",
842 group, page->index, i * blocksize);
843 memset(data, 0xff, blocksize);
844 grinfo = ext4_get_group_info(sb, group);
845 grinfo->bb_fragments = 0;
846 memset(grinfo->bb_counters, 0,
847 sizeof(unsigned short)*(sb->s_blocksize_bits+2));
848
849
850
851 ext4_mb_generate_buddy(sb, data, incore, group);
852 incore = NULL;
853 } else {
854
855 BUG_ON(incore != NULL);
856 mb_debug("put bitmap for group %u in page %lu/%x\n",
857 group, page->index, i * blocksize);
858
859
860 ext4_lock_group(sb, group);
861 memcpy(data, bitmap, blocksize);
862
863
864 ext4_mb_generate_from_pa(sb, data, group);
865 ext4_unlock_group(sb, group);
866
867
868
869
870 incore = data;
871 }
872 }
873 SetPageUptodate(page);
874
875out:
876 if (bh) {
877 for (i = 0; i < groups_per_page && bh[i]; i++)
878 brelse(bh[i]);
879 if (bh != &bhs)
880 kfree(bh);
881 }
882 return err;
883}
884
885static noinline_for_stack int
886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
887 struct ext4_buddy *e4b)
888{
889 struct ext4_sb_info *sbi = EXT4_SB(sb);
890 struct inode *inode = sbi->s_buddy_cache;
891 int blocks_per_page;
892 int block;
893 int pnum;
894 int poff;
895 struct page *page;
896 int ret;
897
898 mb_debug("load group %lu\n", group);
899
900 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
901
902 e4b->bd_blkbits = sb->s_blocksize_bits;
903 e4b->bd_info = ext4_get_group_info(sb, group);
904 e4b->bd_sb = sb;
905 e4b->bd_group = group;
906 e4b->bd_buddy_page = NULL;
907 e4b->bd_bitmap_page = NULL;
908
909
910
911
912
913
914 block = group * 2;
915 pnum = block / blocks_per_page;
916 poff = block % blocks_per_page;
917
918
919
920 page = find_get_page(inode->i_mapping, pnum);
921 if (page == NULL || !PageUptodate(page)) {
922 if (page)
923 page_cache_release(page);
924 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
925 if (page) {
926 BUG_ON(page->mapping != inode->i_mapping);
927 if (!PageUptodate(page)) {
928 ret = ext4_mb_init_cache(page, NULL);
929 if (ret) {
930 unlock_page(page);
931 goto err;
932 }
933 mb_cmp_bitmaps(e4b, page_address(page) +
934 (poff * sb->s_blocksize));
935 }
936 unlock_page(page);
937 }
938 }
939 if (page == NULL || !PageUptodate(page)) {
940 ret = -EIO;
941 goto err;
942 }
943 e4b->bd_bitmap_page = page;
944 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
945 mark_page_accessed(page);
946
947 block++;
948 pnum = block / blocks_per_page;
949 poff = block % blocks_per_page;
950
951 page = find_get_page(inode->i_mapping, pnum);
952 if (page == NULL || !PageUptodate(page)) {
953 if (page)
954 page_cache_release(page);
955 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
956 if (page) {
957 BUG_ON(page->mapping != inode->i_mapping);
958 if (!PageUptodate(page)) {
959 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
960 if (ret) {
961 unlock_page(page);
962 goto err;
963 }
964 }
965 unlock_page(page);
966 }
967 }
968 if (page == NULL || !PageUptodate(page)) {
969 ret = -EIO;
970 goto err;
971 }
972 e4b->bd_buddy_page = page;
973 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
974 mark_page_accessed(page);
975
976 BUG_ON(e4b->bd_bitmap_page == NULL);
977 BUG_ON(e4b->bd_buddy_page == NULL);
978
979 return 0;
980
981err:
982 if (e4b->bd_bitmap_page)
983 page_cache_release(e4b->bd_bitmap_page);
984 if (e4b->bd_buddy_page)
985 page_cache_release(e4b->bd_buddy_page);
986 e4b->bd_buddy = NULL;
987 e4b->bd_bitmap = NULL;
988 return ret;
989}
990
991static void ext4_mb_release_desc(struct ext4_buddy *e4b)
992{
993 if (e4b->bd_bitmap_page)
994 page_cache_release(e4b->bd_bitmap_page);
995 if (e4b->bd_buddy_page)
996 page_cache_release(e4b->bd_buddy_page);
997}
998
999
1000static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1001{
1002 int order = 1;
1003 void *bb;
1004
1005 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
1006 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1007
1008 bb = EXT4_MB_BUDDY(e4b);
1009 while (order <= e4b->bd_blkbits + 1) {
1010 block = block >> 1;
1011 if (!mb_test_bit(block, bb)) {
1012
1013 return order;
1014 }
1015 bb += 1 << (e4b->bd_blkbits - order);
1016 order++;
1017 }
1018 return 0;
1019}
1020
1021static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
1022{
1023 __u32 *addr;
1024
1025 len = cur + len;
1026 while (cur < len) {
1027 if ((cur & 31) == 0 && (len - cur) >= 32) {
1028
1029 addr = bm + (cur >> 3);
1030 *addr = 0;
1031 cur += 32;
1032 continue;
1033 }
1034 mb_clear_bit_atomic(lock, cur, bm);
1035 cur++;
1036 }
1037}
1038
1039static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1040{
1041 __u32 *addr;
1042
1043 len = cur + len;
1044 while (cur < len) {
1045 if ((cur & 31) == 0 && (len - cur) >= 32) {
1046
1047 addr = bm + (cur >> 3);
1048 *addr = 0xffffffff;
1049 cur += 32;
1050 continue;
1051 }
1052 mb_set_bit_atomic(lock, cur, bm);
1053 cur++;
1054 }
1055}
1056
1057static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1058 int first, int count)
1059{
1060 int block = 0;
1061 int max = 0;
1062 int order;
1063 void *buddy;
1064 void *buddy2;
1065 struct super_block *sb = e4b->bd_sb;
1066
1067 BUG_ON(first + count > (sb->s_blocksize << 3));
1068 BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
1069 mb_check_buddy(e4b);
1070 mb_free_blocks_double(inode, e4b, first, count);
1071
1072 e4b->bd_info->bb_free += count;
1073 if (first < e4b->bd_info->bb_first_free)
1074 e4b->bd_info->bb_first_free = first;
1075
1076
1077 if (first != 0)
1078 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
1079 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1080 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
1081 if (block && max)
1082 e4b->bd_info->bb_fragments--;
1083 else if (!block && !max)
1084 e4b->bd_info->bb_fragments++;
1085
1086
1087 while (count-- > 0) {
1088 block = first++;
1089 order = 0;
1090
1091 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1092 ext4_fsblk_t blocknr;
1093 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
1094 blocknr += block;
1095 blocknr +=
1096 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1097 ext4_unlock_group(sb, e4b->bd_group);
1098 ext4_error(sb, __func__, "double-free of inode"
1099 " %lu's block %llu(bit %u in group %lu)\n",
1100 inode ? inode->i_ino : 0, blocknr, block,
1101 e4b->bd_group);
1102 ext4_lock_group(sb, e4b->bd_group);
1103 }
1104 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1105 e4b->bd_info->bb_counters[order]++;
1106
1107
1108 buddy = mb_find_buddy(e4b, order, &max);
1109
1110 do {
1111 block &= ~1UL;
1112 if (mb_test_bit(block, buddy) ||
1113 mb_test_bit(block + 1, buddy))
1114 break;
1115
1116
1117 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1118
1119 if (!buddy2)
1120 break;
1121
1122 if (order > 0) {
1123
1124
1125 mb_set_bit(block, buddy);
1126 mb_set_bit(block + 1, buddy);
1127 }
1128 e4b->bd_info->bb_counters[order]--;
1129 e4b->bd_info->bb_counters[order]--;
1130
1131 block = block >> 1;
1132 order++;
1133 e4b->bd_info->bb_counters[order]++;
1134
1135 mb_clear_bit(block, buddy2);
1136 buddy = buddy2;
1137 } while (1);
1138 }
1139 mb_check_buddy(e4b);
1140}
1141
1142static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1143 int needed, struct ext4_free_extent *ex)
1144{
1145 int next = block;
1146 int max;
1147 int ord;
1148 void *buddy;
1149
1150 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
1151 BUG_ON(ex == NULL);
1152
1153 buddy = mb_find_buddy(e4b, order, &max);
1154 BUG_ON(buddy == NULL);
1155 BUG_ON(block >= max);
1156 if (mb_test_bit(block, buddy)) {
1157 ex->fe_len = 0;
1158 ex->fe_start = 0;
1159 ex->fe_group = 0;
1160 return 0;
1161 }
1162
1163
1164 if (likely(order == 0)) {
1165
1166 order = mb_find_order_for_block(e4b, block);
1167 block = block >> order;
1168 }
1169
1170 ex->fe_len = 1 << order;
1171 ex->fe_start = block << order;
1172 ex->fe_group = e4b->bd_group;
1173
1174
1175 next = next - ex->fe_start;
1176 ex->fe_len -= next;
1177 ex->fe_start += next;
1178
1179 while (needed > ex->fe_len &&
1180 (buddy = mb_find_buddy(e4b, order, &max))) {
1181
1182 if (block + 1 >= max)
1183 break;
1184
1185 next = (block + 1) * (1 << order);
1186 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1187 break;
1188
1189 ord = mb_find_order_for_block(e4b, next);
1190
1191 order = ord;
1192 block = next >> order;
1193 ex->fe_len += 1 << order;
1194 }
1195
1196 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1197 return ex->fe_len;
1198}
1199
1200static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1201{
1202 int ord;
1203 int mlen = 0;
1204 int max = 0;
1205 int cur;
1206 int start = ex->fe_start;
1207 int len = ex->fe_len;
1208 unsigned ret = 0;
1209 int len0 = len;
1210 void *buddy;
1211
1212 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1213 BUG_ON(e4b->bd_group != ex->fe_group);
1214 BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
1215 mb_check_buddy(e4b);
1216 mb_mark_used_double(e4b, start, len);
1217
1218 e4b->bd_info->bb_free -= len;
1219 if (e4b->bd_info->bb_first_free == start)
1220 e4b->bd_info->bb_first_free += len;
1221
1222
1223 if (start != 0)
1224 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
1225 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1226 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
1227 if (mlen && max)
1228 e4b->bd_info->bb_fragments++;
1229 else if (!mlen && !max)
1230 e4b->bd_info->bb_fragments--;
1231
1232
1233 while (len) {
1234 ord = mb_find_order_for_block(e4b, start);
1235
1236 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1237
1238 mlen = 1 << ord;
1239 buddy = mb_find_buddy(e4b, ord, &max);
1240 BUG_ON((start >> ord) >= max);
1241 mb_set_bit(start >> ord, buddy);
1242 e4b->bd_info->bb_counters[ord]--;
1243 start += mlen;
1244 len -= mlen;
1245 BUG_ON(len < 0);
1246 continue;
1247 }
1248
1249
1250 if (ret == 0)
1251 ret = len | (ord << 16);
1252
1253
1254 BUG_ON(ord <= 0);
1255 buddy = mb_find_buddy(e4b, ord, &max);
1256 mb_set_bit(start >> ord, buddy);
1257 e4b->bd_info->bb_counters[ord]--;
1258
1259 ord--;
1260 cur = (start >> ord) & ~1U;
1261 buddy = mb_find_buddy(e4b, ord, &max);
1262 mb_clear_bit(cur, buddy);
1263 mb_clear_bit(cur + 1, buddy);
1264 e4b->bd_info->bb_counters[ord]++;
1265 e4b->bd_info->bb_counters[ord]++;
1266 }
1267
1268 mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
1269 EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1270 mb_check_buddy(e4b);
1271
1272 return ret;
1273}
1274
1275
1276
1277
1278static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1279 struct ext4_buddy *e4b)
1280{
1281 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1282 int ret;
1283
1284 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1285 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1286
1287 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1288 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1289 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1290
1291
1292
1293 ac->ac_f_ex = ac->ac_b_ex;
1294
1295 ac->ac_status = AC_STATUS_FOUND;
1296 ac->ac_tail = ret & 0xffff;
1297 ac->ac_buddy = ret >> 16;
1298
1299
1300
1301 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1302 get_page(ac->ac_bitmap_page);
1303 ac->ac_buddy_page = e4b->bd_buddy_page;
1304 get_page(ac->ac_buddy_page);
1305
1306
1307 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
1308 spin_lock(&sbi->s_md_lock);
1309 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1310 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1311 spin_unlock(&sbi->s_md_lock);
1312 }
1313}
1314
1315
1316
1317
1318
1319static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1320 struct ext4_buddy *e4b,
1321 int finish_group)
1322{
1323 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1324 struct ext4_free_extent *bex = &ac->ac_b_ex;
1325 struct ext4_free_extent *gex = &ac->ac_g_ex;
1326 struct ext4_free_extent ex;
1327 int max;
1328
1329
1330
1331
1332 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1333 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1334 ac->ac_status = AC_STATUS_BREAK;
1335 return;
1336 }
1337
1338
1339
1340
1341 if (bex->fe_len < gex->fe_len)
1342 return;
1343
1344 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1345 && bex->fe_group == e4b->bd_group) {
1346
1347
1348
1349 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1350 if (max >= gex->fe_len) {
1351 ext4_mb_use_best_found(ac, e4b);
1352 return;
1353 }
1354 }
1355}
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1368 struct ext4_free_extent *ex,
1369 struct ext4_buddy *e4b)
1370{
1371 struct ext4_free_extent *bex = &ac->ac_b_ex;
1372 struct ext4_free_extent *gex = &ac->ac_g_ex;
1373
1374 BUG_ON(ex->fe_len <= 0);
1375 BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1376 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1377 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1378
1379 ac->ac_found++;
1380
1381
1382
1383
1384 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1385 *bex = *ex;
1386 ext4_mb_use_best_found(ac, e4b);
1387 return;
1388 }
1389
1390
1391
1392
1393 if (ex->fe_len == gex->fe_len) {
1394 *bex = *ex;
1395 ext4_mb_use_best_found(ac, e4b);
1396 return;
1397 }
1398
1399
1400
1401
1402 if (bex->fe_len == 0) {
1403 *bex = *ex;
1404 return;
1405 }
1406
1407
1408
1409
1410 if (bex->fe_len < gex->fe_len) {
1411
1412
1413 if (ex->fe_len > bex->fe_len)
1414 *bex = *ex;
1415 } else if (ex->fe_len > gex->fe_len) {
1416
1417
1418
1419 if (ex->fe_len < bex->fe_len)
1420 *bex = *ex;
1421 }
1422
1423 ext4_mb_check_limits(ac, e4b, 0);
1424}
1425
1426static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1427 struct ext4_buddy *e4b)
1428{
1429 struct ext4_free_extent ex = ac->ac_b_ex;
1430 ext4_group_t group = ex.fe_group;
1431 int max;
1432 int err;
1433
1434 BUG_ON(ex.fe_len <= 0);
1435 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1436 if (err)
1437 return err;
1438
1439 ext4_lock_group(ac->ac_sb, group);
1440 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1441
1442 if (max > 0) {
1443 ac->ac_b_ex = ex;
1444 ext4_mb_use_best_found(ac, e4b);
1445 }
1446
1447 ext4_unlock_group(ac->ac_sb, group);
1448 ext4_mb_release_desc(e4b);
1449
1450 return 0;
1451}
1452
1453static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1454 struct ext4_buddy *e4b)
1455{
1456 ext4_group_t group = ac->ac_g_ex.fe_group;
1457 int max;
1458 int err;
1459 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1460 struct ext4_super_block *es = sbi->s_es;
1461 struct ext4_free_extent ex;
1462
1463 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1464 return 0;
1465
1466 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1467 if (err)
1468 return err;
1469
1470 ext4_lock_group(ac->ac_sb, group);
1471 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1472 ac->ac_g_ex.fe_len, &ex);
1473
1474 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1475 ext4_fsblk_t start;
1476
1477 start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) +
1478 ex.fe_start + le32_to_cpu(es->s_first_data_block);
1479
1480 if (do_div(start, sbi->s_stripe) == 0) {
1481 ac->ac_found++;
1482 ac->ac_b_ex = ex;
1483 ext4_mb_use_best_found(ac, e4b);
1484 }
1485 } else if (max >= ac->ac_g_ex.fe_len) {
1486 BUG_ON(ex.fe_len <= 0);
1487 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1488 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1489 ac->ac_found++;
1490 ac->ac_b_ex = ex;
1491 ext4_mb_use_best_found(ac, e4b);
1492 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1493
1494
1495 BUG_ON(ex.fe_len <= 0);
1496 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1497 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1498 ac->ac_found++;
1499 ac->ac_b_ex = ex;
1500 ext4_mb_use_best_found(ac, e4b);
1501 }
1502 ext4_unlock_group(ac->ac_sb, group);
1503 ext4_mb_release_desc(e4b);
1504
1505 return 0;
1506}
1507
1508
1509
1510
1511
1512static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1513 struct ext4_buddy *e4b)
1514{
1515 struct super_block *sb = ac->ac_sb;
1516 struct ext4_group_info *grp = e4b->bd_info;
1517 void *buddy;
1518 int i;
1519 int k;
1520 int max;
1521
1522 BUG_ON(ac->ac_2order <= 0);
1523 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1524 if (grp->bb_counters[i] == 0)
1525 continue;
1526
1527 buddy = mb_find_buddy(e4b, i, &max);
1528 BUG_ON(buddy == NULL);
1529
1530 k = mb_find_next_zero_bit(buddy, max, 0);
1531 BUG_ON(k >= max);
1532
1533 ac->ac_found++;
1534
1535 ac->ac_b_ex.fe_len = 1 << i;
1536 ac->ac_b_ex.fe_start = k << i;
1537 ac->ac_b_ex.fe_group = e4b->bd_group;
1538
1539 ext4_mb_use_best_found(ac, e4b);
1540
1541 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1542
1543 if (EXT4_SB(sb)->s_mb_stats)
1544 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1545
1546 break;
1547 }
1548}
1549
1550
1551
1552
1553
1554
1555static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1556 struct ext4_buddy *e4b)
1557{
1558 struct super_block *sb = ac->ac_sb;
1559 void *bitmap = EXT4_MB_BITMAP(e4b);
1560 struct ext4_free_extent ex;
1561 int i;
1562 int free;
1563
1564 free = e4b->bd_info->bb_free;
1565 BUG_ON(free <= 0);
1566
1567 i = e4b->bd_info->bb_first_free;
1568
1569 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1570 i = mb_find_next_zero_bit(bitmap,
1571 EXT4_BLOCKS_PER_GROUP(sb), i);
1572 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1573
1574
1575
1576
1577
1578 ext4_error(sb, __func__, "%d free blocks as per "
1579 "group info. But bitmap says 0\n",
1580 free);
1581 break;
1582 }
1583
1584 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1585 BUG_ON(ex.fe_len <= 0);
1586 if (free < ex.fe_len) {
1587 ext4_error(sb, __func__, "%d free blocks as per "
1588 "group info. But got %d blocks\n",
1589 free, ex.fe_len);
1590
1591
1592
1593
1594
1595 break;
1596 }
1597
1598 ext4_mb_measure_extent(ac, &ex, e4b);
1599
1600 i += ex.fe_len;
1601 free -= ex.fe_len;
1602 }
1603
1604 ext4_mb_check_limits(ac, e4b, 1);
1605}
1606
1607
1608
1609
1610
1611
1612static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1613 struct ext4_buddy *e4b)
1614{
1615 struct super_block *sb = ac->ac_sb;
1616 struct ext4_sb_info *sbi = EXT4_SB(sb);
1617 void *bitmap = EXT4_MB_BITMAP(e4b);
1618 struct ext4_free_extent ex;
1619 ext4_fsblk_t first_group_block;
1620 ext4_fsblk_t a;
1621 ext4_grpblk_t i;
1622 int max;
1623
1624 BUG_ON(sbi->s_stripe == 0);
1625
1626
1627 first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb)
1628 + le32_to_cpu(sbi->s_es->s_first_data_block);
1629 a = first_group_block + sbi->s_stripe - 1;
1630 do_div(a, sbi->s_stripe);
1631 i = (a * sbi->s_stripe) - first_group_block;
1632
1633 while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
1634 if (!mb_test_bit(i, bitmap)) {
1635 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1636 if (max >= sbi->s_stripe) {
1637 ac->ac_found++;
1638 ac->ac_b_ex = ex;
1639 ext4_mb_use_best_found(ac, e4b);
1640 break;
1641 }
1642 }
1643 i += sbi->s_stripe;
1644 }
1645}
1646
1647static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1648 ext4_group_t group, int cr)
1649{
1650 unsigned free, fragments;
1651 unsigned i, bits;
1652 struct ext4_group_desc *desc;
1653 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1654
1655 BUG_ON(cr < 0 || cr >= 4);
1656 BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
1657
1658 free = grp->bb_free;
1659 fragments = grp->bb_fragments;
1660 if (free == 0)
1661 return 0;
1662 if (fragments == 0)
1663 return 0;
1664
1665 switch (cr) {
1666 case 0:
1667 BUG_ON(ac->ac_2order == 0);
1668
1669 desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
1670 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1671 return 0;
1672
1673 bits = ac->ac_sb->s_blocksize_bits + 1;
1674 for (i = ac->ac_2order; i <= bits; i++)
1675 if (grp->bb_counters[i] > 0)
1676 return 1;
1677 break;
1678 case 1:
1679 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1680 return 1;
1681 break;
1682 case 2:
1683 if (free >= ac->ac_g_ex.fe_len)
1684 return 1;
1685 break;
1686 case 3:
1687 return 1;
1688 default:
1689 BUG();
1690 }
1691
1692 return 0;
1693}
1694
1695static noinline_for_stack int
1696ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1697{
1698 ext4_group_t group;
1699 ext4_group_t i;
1700 int cr;
1701 int err = 0;
1702 int bsbits;
1703 struct ext4_sb_info *sbi;
1704 struct super_block *sb;
1705 struct ext4_buddy e4b;
1706 loff_t size, isize;
1707
1708 sb = ac->ac_sb;
1709 sbi = EXT4_SB(sb);
1710 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1711
1712
1713 err = ext4_mb_find_by_goal(ac, &e4b);
1714 if (err || ac->ac_status == AC_STATUS_FOUND)
1715 goto out;
1716
1717 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1718 goto out;
1719
1720
1721
1722
1723
1724
1725 i = fls(ac->ac_g_ex.fe_len);
1726 ac->ac_2order = 0;
1727
1728
1729
1730
1731
1732 if (i >= sbi->s_mb_order2_reqs) {
1733
1734
1735
1736 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
1737 ac->ac_2order = i - 1;
1738 }
1739
1740 bsbits = ac->ac_sb->s_blocksize_bits;
1741
1742 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
1743 isize = i_size_read(ac->ac_inode) >> bsbits;
1744 if (size < isize)
1745 size = isize;
1746
1747 if (size < sbi->s_mb_stream_request &&
1748 (ac->ac_flags & EXT4_MB_HINT_DATA)) {
1749
1750 spin_lock(&sbi->s_md_lock);
1751 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
1752 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1753 spin_unlock(&sbi->s_md_lock);
1754 }
1755
1756 cr = ac->ac_2order ? 0 : 1;
1757
1758
1759
1760
1761repeat:
1762 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
1763 ac->ac_criteria = cr;
1764
1765
1766
1767
1768 group = ac->ac_g_ex.fe_group;
1769
1770 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
1771 struct ext4_group_info *grp;
1772 struct ext4_group_desc *desc;
1773
1774 if (group == EXT4_SB(sb)->s_groups_count)
1775 group = 0;
1776
1777
1778 grp = ext4_get_group_info(ac->ac_sb, group);
1779 if (grp->bb_free == 0)
1780 continue;
1781
1782
1783
1784
1785
1786 if (EXT4_MB_GRP_NEED_INIT(grp)) {
1787
1788
1789
1790
1791 err = ext4_mb_load_buddy(sb, group, &e4b);
1792 if (err)
1793 goto out;
1794 ext4_mb_release_desc(&e4b);
1795 }
1796
1797
1798
1799
1800
1801 if (!ext4_mb_good_group(ac, group, cr))
1802 continue;
1803
1804 err = ext4_mb_load_buddy(sb, group, &e4b);
1805 if (err)
1806 goto out;
1807
1808 ext4_lock_group(sb, group);
1809 if (!ext4_mb_good_group(ac, group, cr)) {
1810
1811 ext4_unlock_group(sb, group);
1812 ext4_mb_release_desc(&e4b);
1813 continue;
1814 }
1815
1816 ac->ac_groups_scanned++;
1817 desc = ext4_get_group_desc(sb, group, NULL);
1818 if (cr == 0 || (desc->bg_flags &
1819 cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
1820 ac->ac_2order != 0))
1821 ext4_mb_simple_scan_group(ac, &e4b);
1822 else if (cr == 1 &&
1823 ac->ac_g_ex.fe_len == sbi->s_stripe)
1824 ext4_mb_scan_aligned(ac, &e4b);
1825 else
1826 ext4_mb_complex_scan_group(ac, &e4b);
1827
1828 ext4_unlock_group(sb, group);
1829 ext4_mb_release_desc(&e4b);
1830
1831 if (ac->ac_status != AC_STATUS_CONTINUE)
1832 break;
1833 }
1834 }
1835
1836 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
1837 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1838
1839
1840
1841
1842
1843 ext4_mb_try_best_found(ac, &e4b);
1844 if (ac->ac_status != AC_STATUS_FOUND) {
1845
1846
1847
1848
1849
1850
1851 ac->ac_b_ex.fe_group = 0;
1852 ac->ac_b_ex.fe_start = 0;
1853 ac->ac_b_ex.fe_len = 0;
1854 ac->ac_status = AC_STATUS_CONTINUE;
1855 ac->ac_flags |= EXT4_MB_HINT_FIRST;
1856 cr = 3;
1857 atomic_inc(&sbi->s_mb_lost_chunks);
1858 goto repeat;
1859 }
1860 }
1861out:
1862 return err;
1863}
1864
1865#ifdef EXT4_MB_HISTORY
1866struct ext4_mb_proc_session {
1867 struct ext4_mb_history *history;
1868 struct super_block *sb;
1869 int start;
1870 int max;
1871};
1872
1873static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s,
1874 struct ext4_mb_history *hs,
1875 int first)
1876{
1877 if (hs == s->history + s->max)
1878 hs = s->history;
1879 if (!first && hs == s->history + s->start)
1880 return NULL;
1881 while (hs->orig.fe_len == 0) {
1882 hs++;
1883 if (hs == s->history + s->max)
1884 hs = s->history;
1885 if (hs == s->history + s->start)
1886 return NULL;
1887 }
1888 return hs;
1889}
1890
1891static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos)
1892{
1893 struct ext4_mb_proc_session *s = seq->private;
1894 struct ext4_mb_history *hs;
1895 int l = *pos;
1896
1897 if (l == 0)
1898 return SEQ_START_TOKEN;
1899 hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1);
1900 if (!hs)
1901 return NULL;
1902 while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL);
1903 return hs;
1904}
1905
1906static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v,
1907 loff_t *pos)
1908{
1909 struct ext4_mb_proc_session *s = seq->private;
1910 struct ext4_mb_history *hs = v;
1911
1912 ++*pos;
1913 if (v == SEQ_START_TOKEN)
1914 return ext4_mb_history_skip_empty(s, s->history + s->start, 1);
1915 else
1916 return ext4_mb_history_skip_empty(s, ++hs, 0);
1917}
1918
1919static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
1920{
1921 char buf[25], buf2[25], buf3[25], *fmt;
1922 struct ext4_mb_history *hs = v;
1923
1924 if (v == SEQ_START_TOKEN) {
1925 seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
1926 "%-5s %-2s %-5s %-5s %-5s %-6s\n",
1927 "pid", "inode", "original", "goal", "result", "found",
1928 "grps", "cr", "flags", "merge", "tail", "broken");
1929 return 0;
1930 }
1931
1932 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
1933 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
1934 "%-5u %-5s %-5u %-6u\n";
1935 sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
1936 hs->result.fe_start, hs->result.fe_len,
1937 hs->result.fe_logical);
1938 sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
1939 hs->orig.fe_start, hs->orig.fe_len,
1940 hs->orig.fe_logical);
1941 sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group,
1942 hs->goal.fe_start, hs->goal.fe_len,
1943 hs->goal.fe_logical);
1944 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
1945 hs->found, hs->groups, hs->cr, hs->flags,
1946 hs->merged ? "M" : "", hs->tail,
1947 hs->buddy ? 1 << hs->buddy : 0);
1948 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
1949 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
1950 sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
1951 hs->result.fe_start, hs->result.fe_len,
1952 hs->result.fe_logical);
1953 sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
1954 hs->orig.fe_start, hs->orig.fe_len,
1955 hs->orig.fe_logical);
1956 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
1957 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
1958 sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
1959 hs->result.fe_start, hs->result.fe_len);
1960 seq_printf(seq, "%-5u %-8u %-23s discard\n",
1961 hs->pid, hs->ino, buf2);
1962 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
1963 sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
1964 hs->result.fe_start, hs->result.fe_len);
1965 seq_printf(seq, "%-5u %-8u %-23s free\n",
1966 hs->pid, hs->ino, buf2);
1967 }
1968 return 0;
1969}
1970
1971static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
1972{
1973}
1974
1975static struct seq_operations ext4_mb_seq_history_ops = {
1976 .start = ext4_mb_seq_history_start,
1977 .next = ext4_mb_seq_history_next,
1978 .stop = ext4_mb_seq_history_stop,
1979 .show = ext4_mb_seq_history_show,
1980};
1981
1982static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
1983{
1984 struct super_block *sb = PDE(inode)->data;
1985 struct ext4_sb_info *sbi = EXT4_SB(sb);
1986 struct ext4_mb_proc_session *s;
1987 int rc;
1988 int size;
1989
1990 if (unlikely(sbi->s_mb_history == NULL))
1991 return -ENOMEM;
1992 s = kmalloc(sizeof(*s), GFP_KERNEL);
1993 if (s == NULL)
1994 return -ENOMEM;
1995 s->sb = sb;
1996 size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max;
1997 s->history = kmalloc(size, GFP_KERNEL);
1998 if (s->history == NULL) {
1999 kfree(s);
2000 return -ENOMEM;
2001 }
2002
2003 spin_lock(&sbi->s_mb_history_lock);
2004 memcpy(s->history, sbi->s_mb_history, size);
2005 s->max = sbi->s_mb_history_max;
2006 s->start = sbi->s_mb_history_cur % s->max;
2007 spin_unlock(&sbi->s_mb_history_lock);
2008
2009 rc = seq_open(file, &ext4_mb_seq_history_ops);
2010 if (rc == 0) {
2011 struct seq_file *m = (struct seq_file *)file->private_data;
2012 m->private = s;
2013 } else {
2014 kfree(s->history);
2015 kfree(s);
2016 }
2017 return rc;
2018
2019}
2020
2021static int ext4_mb_seq_history_release(struct inode *inode, struct file *file)
2022{
2023 struct seq_file *seq = (struct seq_file *)file->private_data;
2024 struct ext4_mb_proc_session *s = seq->private;
2025 kfree(s->history);
2026 kfree(s);
2027 return seq_release(inode, file);
2028}
2029
2030static ssize_t ext4_mb_seq_history_write(struct file *file,
2031 const char __user *buffer,
2032 size_t count, loff_t *ppos)
2033{
2034 struct seq_file *seq = (struct seq_file *)file->private_data;
2035 struct ext4_mb_proc_session *s = seq->private;
2036 struct super_block *sb = s->sb;
2037 char str[32];
2038 int value;
2039
2040 if (count >= sizeof(str)) {
2041 printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n",
2042 "mb_history", (int)sizeof(str));
2043 return -EOVERFLOW;
2044 }
2045
2046 if (copy_from_user(str, buffer, count))
2047 return -EFAULT;
2048
2049 value = simple_strtol(str, NULL, 0);
2050 if (value < 0)
2051 return -ERANGE;
2052 EXT4_SB(sb)->s_mb_history_filter = value;
2053
2054 return count;
2055}
2056
2057static struct file_operations ext4_mb_seq_history_fops = {
2058 .owner = THIS_MODULE,
2059 .open = ext4_mb_seq_history_open,
2060 .read = seq_read,
2061 .write = ext4_mb_seq_history_write,
2062 .llseek = seq_lseek,
2063 .release = ext4_mb_seq_history_release,
2064};
2065
2066static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2067{
2068 struct super_block *sb = seq->private;
2069 struct ext4_sb_info *sbi = EXT4_SB(sb);
2070 ext4_group_t group;
2071
2072 if (*pos < 0 || *pos >= sbi->s_groups_count)
2073 return NULL;
2074
2075 group = *pos + 1;
2076 return (void *) group;
2077}
2078
2079static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2080{
2081 struct super_block *sb = seq->private;
2082 struct ext4_sb_info *sbi = EXT4_SB(sb);
2083 ext4_group_t group;
2084
2085 ++*pos;
2086 if (*pos < 0 || *pos >= sbi->s_groups_count)
2087 return NULL;
2088 group = *pos + 1;
2089 return (void *) group;;
2090}
2091
2092static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2093{
2094 struct super_block *sb = seq->private;
2095 long group = (long) v;
2096 int i;
2097 int err;
2098 struct ext4_buddy e4b;
2099 struct sg {
2100 struct ext4_group_info info;
2101 unsigned short counters[16];
2102 } sg;
2103
2104 group--;
2105 if (group == 0)
2106 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2107 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2108 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2109 "group", "free", "frags", "first",
2110 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2111 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2112
2113 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2114 sizeof(struct ext4_group_info);
2115 err = ext4_mb_load_buddy(sb, group, &e4b);
2116 if (err) {
2117 seq_printf(seq, "#%-5lu: I/O error\n", group);
2118 return 0;
2119 }
2120 ext4_lock_group(sb, group);
2121 memcpy(&sg, ext4_get_group_info(sb, group), i);
2122 ext4_unlock_group(sb, group);
2123 ext4_mb_release_desc(&e4b);
2124
2125 seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
2126 sg.info.bb_fragments, sg.info.bb_first_free);
2127 for (i = 0; i <= 13; i++)
2128 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2129 sg.info.bb_counters[i] : 0);
2130 seq_printf(seq, " ]\n");
2131
2132 return 0;
2133}
2134
2135static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2136{
2137}
2138
2139static struct seq_operations ext4_mb_seq_groups_ops = {
2140 .start = ext4_mb_seq_groups_start,
2141 .next = ext4_mb_seq_groups_next,
2142 .stop = ext4_mb_seq_groups_stop,
2143 .show = ext4_mb_seq_groups_show,
2144};
2145
2146static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2147{
2148 struct super_block *sb = PDE(inode)->data;
2149 int rc;
2150
2151 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2152 if (rc == 0) {
2153 struct seq_file *m = (struct seq_file *)file->private_data;
2154 m->private = sb;
2155 }
2156 return rc;
2157
2158}
2159
2160static struct file_operations ext4_mb_seq_groups_fops = {
2161 .owner = THIS_MODULE,
2162 .open = ext4_mb_seq_groups_open,
2163 .read = seq_read,
2164 .llseek = seq_lseek,
2165 .release = seq_release,
2166};
2167
2168static void ext4_mb_history_release(struct super_block *sb)
2169{
2170 struct ext4_sb_info *sbi = EXT4_SB(sb);
2171
2172 if (sbi->s_proc != NULL) {
2173 remove_proc_entry("mb_groups", sbi->s_proc);
2174 remove_proc_entry("mb_history", sbi->s_proc);
2175 }
2176 kfree(sbi->s_mb_history);
2177}
2178
2179static void ext4_mb_history_init(struct super_block *sb)
2180{
2181 struct ext4_sb_info *sbi = EXT4_SB(sb);
2182 int i;
2183
2184 if (sbi->s_proc != NULL) {
2185 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2186 &ext4_mb_seq_history_fops, sb);
2187 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2188 &ext4_mb_seq_groups_fops, sb);
2189 }
2190
2191 sbi->s_mb_history_max = 1000;
2192 sbi->s_mb_history_cur = 0;
2193 spin_lock_init(&sbi->s_mb_history_lock);
2194 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2195 sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
2196
2197}
2198
2199static noinline_for_stack void
2200ext4_mb_store_history(struct ext4_allocation_context *ac)
2201{
2202 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2203 struct ext4_mb_history h;
2204
2205 if (unlikely(sbi->s_mb_history == NULL))
2206 return;
2207
2208 if (!(ac->ac_op & sbi->s_mb_history_filter))
2209 return;
2210
2211 h.op = ac->ac_op;
2212 h.pid = current->pid;
2213 h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0;
2214 h.orig = ac->ac_o_ex;
2215 h.result = ac->ac_b_ex;
2216 h.flags = ac->ac_flags;
2217 h.found = ac->ac_found;
2218 h.groups = ac->ac_groups_scanned;
2219 h.cr = ac->ac_criteria;
2220 h.tail = ac->ac_tail;
2221 h.buddy = ac->ac_buddy;
2222 h.merged = 0;
2223 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
2224 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
2225 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
2226 h.merged = 1;
2227 h.goal = ac->ac_g_ex;
2228 h.result = ac->ac_f_ex;
2229 }
2230
2231 spin_lock(&sbi->s_mb_history_lock);
2232 memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h));
2233 if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max)
2234 sbi->s_mb_history_cur = 0;
2235 spin_unlock(&sbi->s_mb_history_lock);
2236}
2237
2238#else
2239#define ext4_mb_history_release(sb)
2240#define ext4_mb_history_init(sb)
2241#endif
2242
2243
2244
2245int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2246 struct ext4_group_desc *desc)
2247{
2248 int i, len;
2249 int metalen = 0;
2250 struct ext4_sb_info *sbi = EXT4_SB(sb);
2251 struct ext4_group_info **meta_group_info;
2252
2253
2254
2255
2256
2257
2258 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2259 metalen = sizeof(*meta_group_info) <<
2260 EXT4_DESC_PER_BLOCK_BITS(sb);
2261 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2262 if (meta_group_info == NULL) {
2263 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2264 "buddy group\n");
2265 goto exit_meta_group_info;
2266 }
2267 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2268 meta_group_info;
2269 }
2270
2271
2272
2273
2274
2275 len = offsetof(typeof(**meta_group_info),
2276 bb_counters[sb->s_blocksize_bits + 2]);
2277
2278 meta_group_info =
2279 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2280 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2281
2282 meta_group_info[i] = kzalloc(len, GFP_KERNEL);
2283 if (meta_group_info[i] == NULL) {
2284 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2285 goto exit_group_info;
2286 }
2287 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2288 &(meta_group_info[i]->bb_state));
2289
2290
2291
2292
2293
2294 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2295 meta_group_info[i]->bb_free =
2296 ext4_free_blocks_after_init(sb, group, desc);
2297 } else {
2298 meta_group_info[i]->bb_free =
2299 le16_to_cpu(desc->bg_free_blocks_count);
2300 }
2301
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2304
2305#ifdef DOUBLE_CHECK
2306 {
2307 struct buffer_head *bh;
2308 meta_group_info[i]->bb_bitmap =
2309 kmalloc(sb->s_blocksize, GFP_KERNEL);
2310 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2311 bh = ext4_read_block_bitmap(sb, group);
2312 BUG_ON(bh == NULL);
2313 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2314 sb->s_blocksize);
2315 put_bh(bh);
2316 }
2317#endif
2318
2319 return 0;
2320
2321exit_group_info:
2322
2323 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2324 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2325exit_meta_group_info:
2326 return -ENOMEM;
2327}
2328
2329
2330
2331
2332
2333int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2334 struct ext4_group_desc *desc)
2335{
2336 struct ext4_sb_info *sbi = EXT4_SB(sb);
2337 struct inode *inode = sbi->s_buddy_cache;
2338 int blocks_per_page;
2339 int block;
2340 int pnum;
2341 struct page *page;
2342 int err;
2343
2344
2345 err = ext4_mb_add_groupinfo(sb, group, desc);
2346 if (err)
2347 return err;
2348
2349
2350
2351
2352
2353
2354
2355
2356 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2357 block = group * 2;
2358 pnum = block / blocks_per_page;
2359 page = find_get_page(inode->i_mapping, pnum);
2360 if (page != NULL) {
2361 ClearPageUptodate(page);
2362 page_cache_release(page);
2363 }
2364
2365
2366 block++;
2367 pnum = block / blocks_per_page;
2368 page = find_get_page(inode->i_mapping, pnum);
2369 if (page != NULL) {
2370 ClearPageUptodate(page);
2371 page_cache_release(page);
2372 }
2373
2374 return 0;
2375}
2376
2377
2378
2379
2380
2381void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2382{
2383 grp->bb_free += add;
2384}
2385
2386static int ext4_mb_init_backend(struct super_block *sb)
2387{
2388 ext4_group_t i;
2389 int metalen;
2390 struct ext4_sb_info *sbi = EXT4_SB(sb);
2391 struct ext4_super_block *es = sbi->s_es;
2392 int num_meta_group_infos;
2393 int num_meta_group_infos_max;
2394 int array_size;
2395 struct ext4_group_info **meta_group_info;
2396 struct ext4_group_desc *desc;
2397
2398
2399 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
2400 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413 num_meta_group_infos_max = num_meta_group_infos +
2414 le16_to_cpu(es->s_reserved_gdt_blocks);
2415
2416
2417
2418
2419
2420
2421
2422 array_size = 1;
2423 while (array_size < sizeof(*sbi->s_group_info) *
2424 num_meta_group_infos_max)
2425 array_size = array_size << 1;
2426
2427
2428
2429 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
2430 if (sbi->s_group_info == NULL) {
2431 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2432 return -ENOMEM;
2433 }
2434 sbi->s_buddy_cache = new_inode(sb);
2435 if (sbi->s_buddy_cache == NULL) {
2436 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2437 goto err_freesgi;
2438 }
2439 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2440
2441 metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
2442 for (i = 0; i < num_meta_group_infos; i++) {
2443 if ((i + 1) == num_meta_group_infos)
2444 metalen = sizeof(*meta_group_info) *
2445 (sbi->s_groups_count -
2446 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2447 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2448 if (meta_group_info == NULL) {
2449 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2450 "buddy group\n");
2451 goto err_freemeta;
2452 }
2453 sbi->s_group_info[i] = meta_group_info;
2454 }
2455
2456 for (i = 0; i < sbi->s_groups_count; i++) {
2457 desc = ext4_get_group_desc(sb, i, NULL);
2458 if (desc == NULL) {
2459 printk(KERN_ERR
2460 "EXT4-fs: can't read descriptor %lu\n", i);
2461 goto err_freebuddy;
2462 }
2463 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2464 goto err_freebuddy;
2465 }
2466
2467 return 0;
2468
2469err_freebuddy:
2470 while (i-- > 0)
2471 kfree(ext4_get_group_info(sb, i));
2472 i = num_meta_group_infos;
2473err_freemeta:
2474 while (i-- > 0)
2475 kfree(sbi->s_group_info[i]);
2476 iput(sbi->s_buddy_cache);
2477err_freesgi:
2478 kfree(sbi->s_group_info);
2479 return -ENOMEM;
2480}
2481
2482int ext4_mb_init(struct super_block *sb, int needs_recovery)
2483{
2484 struct ext4_sb_info *sbi = EXT4_SB(sb);
2485 unsigned i, j;
2486 unsigned offset;
2487 unsigned max;
2488 int ret;
2489
2490 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
2491
2492 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2493 if (sbi->s_mb_offsets == NULL) {
2494 return -ENOMEM;
2495 }
2496 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2497 if (sbi->s_mb_maxs == NULL) {
2498 kfree(sbi->s_mb_maxs);
2499 return -ENOMEM;
2500 }
2501
2502
2503 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2504 sbi->s_mb_offsets[0] = 0;
2505
2506 i = 1;
2507 offset = 0;
2508 max = sb->s_blocksize << 2;
2509 do {
2510 sbi->s_mb_offsets[i] = offset;
2511 sbi->s_mb_maxs[i] = max;
2512 offset += 1 << (sb->s_blocksize_bits - i);
2513 max = max >> 1;
2514 i++;
2515 } while (i <= sb->s_blocksize_bits + 1);
2516
2517
2518 ret = ext4_mb_init_backend(sb);
2519 if (ret != 0) {
2520 kfree(sbi->s_mb_offsets);
2521 kfree(sbi->s_mb_maxs);
2522 return ret;
2523 }
2524
2525 spin_lock_init(&sbi->s_md_lock);
2526 spin_lock_init(&sbi->s_bal_lock);
2527
2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2529 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2530 sbi->s_mb_stats = MB_DEFAULT_STATS;
2531 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2532 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2533 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2534 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2535
2536 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2537 if (sbi->s_locality_groups == NULL) {
2538 kfree(sbi->s_mb_offsets);
2539 kfree(sbi->s_mb_maxs);
2540 return -ENOMEM;
2541 }
2542 for_each_possible_cpu(i) {
2543 struct ext4_locality_group *lg;
2544 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2545 mutex_init(&lg->lg_mutex);
2546 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2547 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2548 spin_lock_init(&lg->lg_prealloc_lock);
2549 }
2550
2551 ext4_mb_init_per_dev_proc(sb);
2552 ext4_mb_history_init(sb);
2553
2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2557 return 0;
2558}
2559
2560
2561static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2562{
2563 struct ext4_prealloc_space *pa;
2564 struct list_head *cur, *tmp;
2565 int count = 0;
2566
2567 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2569 list_del(&pa->pa_group_list);
2570 count++;
2571 kmem_cache_free(ext4_pspace_cachep, pa);
2572 }
2573 if (count)
2574 mb_debug("mballoc: %u PAs left\n", count);
2575
2576}
2577
2578int ext4_mb_release(struct super_block *sb)
2579{
2580 ext4_group_t i;
2581 int num_meta_group_infos;
2582 struct ext4_group_info *grinfo;
2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2584
2585 if (sbi->s_group_info) {
2586 for (i = 0; i < sbi->s_groups_count; i++) {
2587 grinfo = ext4_get_group_info(sb, i);
2588#ifdef DOUBLE_CHECK
2589 kfree(grinfo->bb_bitmap);
2590#endif
2591 ext4_lock_group(sb, i);
2592 ext4_mb_cleanup_pa(grinfo);
2593 ext4_unlock_group(sb, i);
2594 kfree(grinfo);
2595 }
2596 num_meta_group_infos = (sbi->s_groups_count +
2597 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2598 EXT4_DESC_PER_BLOCK_BITS(sb);
2599 for (i = 0; i < num_meta_group_infos; i++)
2600 kfree(sbi->s_group_info[i]);
2601 kfree(sbi->s_group_info);
2602 }
2603 kfree(sbi->s_mb_offsets);
2604 kfree(sbi->s_mb_maxs);
2605 if (sbi->s_buddy_cache)
2606 iput(sbi->s_buddy_cache);
2607 if (sbi->s_mb_stats) {
2608 printk(KERN_INFO
2609 "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
2610 atomic_read(&sbi->s_bal_allocated),
2611 atomic_read(&sbi->s_bal_reqs),
2612 atomic_read(&sbi->s_bal_success));
2613 printk(KERN_INFO
2614 "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
2615 "%u 2^N hits, %u breaks, %u lost\n",
2616 atomic_read(&sbi->s_bal_ex_scanned),
2617 atomic_read(&sbi->s_bal_goals),
2618 atomic_read(&sbi->s_bal_2orders),
2619 atomic_read(&sbi->s_bal_breaks),
2620 atomic_read(&sbi->s_mb_lost_chunks));
2621 printk(KERN_INFO
2622 "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
2623 sbi->s_mb_buddies_generated++,
2624 sbi->s_mb_generation_time);
2625 printk(KERN_INFO
2626 "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
2627 atomic_read(&sbi->s_mb_preallocated),
2628 atomic_read(&sbi->s_mb_discarded));
2629 }
2630
2631 free_percpu(sbi->s_locality_groups);
2632 ext4_mb_history_release(sb);
2633 ext4_mb_destroy_per_dev_proc(sb);
2634
2635 return 0;
2636}
2637
2638
2639
2640
2641
2642static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2643{
2644 struct super_block *sb = journal->j_private;
2645 struct ext4_buddy e4b;
2646 struct ext4_group_info *db;
2647 int err, count = 0, count2 = 0;
2648 struct ext4_free_data *entry;
2649 ext4_fsblk_t discard_block;
2650 struct list_head *l, *ltmp;
2651
2652 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2653 entry = list_entry(l, struct ext4_free_data, list);
2654
2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2656 entry->count, entry->group, entry);
2657
2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2659
2660 BUG_ON(err != 0);
2661
2662 db = e4b.bd_info;
2663
2664 count += entry->count;
2665 count2++;
2666 ext4_lock_group(sb, entry->group);
2667
2668 rb_erase(&entry->node, &(db->bb_free_root));
2669 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2670
2671 if (!db->bb_free_root.rb_node) {
2672
2673
2674
2675 page_cache_release(e4b.bd_buddy_page);
2676 page_cache_release(e4b.bd_bitmap_page);
2677 }
2678 ext4_unlock_group(sb, entry->group);
2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2680 + entry->start_blk
2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
2683 (unsigned long long) discard_block, entry->count);
2684 sb_issue_discard(sb, discard_block, entry->count);
2685
2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2687 ext4_mb_release_desc(&e4b);
2688 }
2689
2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2691}
2692
2693#define EXT4_MB_STATS_NAME "stats"
2694#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2695#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
2696#define EXT4_MB_ORDER2_REQ "order2_req"
2697#define EXT4_MB_STREAM_REQ "stream_req"
2698#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2699
2700static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2701{
2702#ifdef CONFIG_PROC_FS
2703 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2705 struct proc_dir_entry *proc;
2706
2707 if (sbi->s_proc == NULL)
2708 return -EINVAL;
2709
2710 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2711 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2712 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2713 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2714 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2715 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2716 return 0;
2717
2718err_out:
2719 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2720 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2721 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2722 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2723 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2724 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2725 return -ENOMEM;
2726#else
2727 return 0;
2728#endif
2729}
2730
2731static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2732{
2733#ifdef CONFIG_PROC_FS
2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2735
2736 if (sbi->s_proc == NULL)
2737 return -EINVAL;
2738
2739 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2740 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2741 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2742 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2743 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2744 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2745#endif
2746 return 0;
2747}
2748
2749int __init init_ext4_mballoc(void)
2750{
2751 ext4_pspace_cachep =
2752 kmem_cache_create("ext4_prealloc_space",
2753 sizeof(struct ext4_prealloc_space),
2754 0, SLAB_RECLAIM_ACCOUNT, NULL);
2755 if (ext4_pspace_cachep == NULL)
2756 return -ENOMEM;
2757
2758 ext4_ac_cachep =
2759 kmem_cache_create("ext4_alloc_context",
2760 sizeof(struct ext4_allocation_context),
2761 0, SLAB_RECLAIM_ACCOUNT, NULL);
2762 if (ext4_ac_cachep == NULL) {
2763 kmem_cache_destroy(ext4_pspace_cachep);
2764 return -ENOMEM;
2765 }
2766
2767 ext4_free_ext_cachep =
2768 kmem_cache_create("ext4_free_block_extents",
2769 sizeof(struct ext4_free_data),
2770 0, SLAB_RECLAIM_ACCOUNT, NULL);
2771 if (ext4_free_ext_cachep == NULL) {
2772 kmem_cache_destroy(ext4_pspace_cachep);
2773 kmem_cache_destroy(ext4_ac_cachep);
2774 return -ENOMEM;
2775 }
2776 return 0;
2777}
2778
2779void exit_ext4_mballoc(void)
2780{
2781
2782 kmem_cache_destroy(ext4_pspace_cachep);
2783 kmem_cache_destroy(ext4_ac_cachep);
2784 kmem_cache_destroy(ext4_free_ext_cachep);
2785}
2786
2787
2788
2789
2790
2791
2792static noinline_for_stack int
2793ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2794 handle_t *handle, unsigned long reserv_blks)
2795{
2796 struct buffer_head *bitmap_bh = NULL;
2797 struct ext4_super_block *es;
2798 struct ext4_group_desc *gdp;
2799 struct buffer_head *gdp_bh;
2800 struct ext4_sb_info *sbi;
2801 struct super_block *sb;
2802 ext4_fsblk_t block;
2803 int err, len;
2804
2805 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2806 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2807
2808 sb = ac->ac_sb;
2809 sbi = EXT4_SB(sb);
2810 es = sbi->s_es;
2811
2812
2813 err = -EIO;
2814 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2815 if (!bitmap_bh)
2816 goto out_err;
2817
2818 err = ext4_journal_get_write_access(handle, bitmap_bh);
2819 if (err)
2820 goto out_err;
2821
2822 err = -EIO;
2823 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2824 if (!gdp)
2825 goto out_err;
2826
2827 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
2828 gdp->bg_free_blocks_count);
2829
2830 err = ext4_journal_get_write_access(handle, gdp_bh);
2831 if (err)
2832 goto out_err;
2833
2834 block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb)
2835 + ac->ac_b_ex.fe_start
2836 + le32_to_cpu(es->s_first_data_block);
2837
2838 len = ac->ac_b_ex.fe_len;
2839 if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
2840 in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
2841 in_range(block, ext4_inode_table(sb, gdp),
2842 EXT4_SB(sb)->s_itb_per_group) ||
2843 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2844 EXT4_SB(sb)->s_itb_per_group)) {
2845 ext4_error(sb, __func__,
2846 "Allocating block in system zone - block = %llu",
2847 block);
2848
2849
2850
2851
2852 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
2853 bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2854 ac->ac_b_ex.fe_len);
2855 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
2856 if (!err)
2857 err = -EAGAIN;
2858 goto out_err;
2859 }
2860#ifdef AGGRESSIVE_CHECK
2861 {
2862 int i;
2863 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2864 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2865 bitmap_bh->b_data));
2866 }
2867 }
2868#endif
2869 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
2870 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
2871
2872 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2873 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2874 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2875 gdp->bg_free_blocks_count =
2876 cpu_to_le16(ext4_free_blocks_after_init(sb,
2877 ac->ac_b_ex.fe_group,
2878 gdp));
2879 }
2880 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2881 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2882 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2883 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2884
2885
2886
2887 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2888
2889 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2890 else
2891 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2892 ac->ac_b_ex.fe_len);
2893
2894 if (sbi->s_log_groups_per_flex) {
2895 ext4_group_t flex_group = ext4_flex_group(sbi,
2896 ac->ac_b_ex.fe_group);
2897 spin_lock(sb_bgl_lock(sbi, flex_group));
2898 sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
2899 spin_unlock(sb_bgl_lock(sbi, flex_group));
2900 }
2901
2902 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
2903 if (err)
2904 goto out_err;
2905 err = ext4_journal_dirty_metadata(handle, gdp_bh);
2906
2907out_err:
2908 sb->s_dirt = 1;
2909 brelse(bitmap_bh);
2910 return err;
2911}
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2922{
2923 struct super_block *sb = ac->ac_sb;
2924 struct ext4_locality_group *lg = ac->ac_lg;
2925
2926 BUG_ON(lg == NULL);
2927 if (EXT4_SB(sb)->s_stripe)
2928 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2929 else
2930 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2931 mb_debug("#%u: goal %u blocks for locality group\n",
2932 current->pid, ac->ac_g_ex.fe_len);
2933}
2934
2935
2936
2937
2938
2939static noinline_for_stack void
2940ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2941 struct ext4_allocation_request *ar)
2942{
2943 int bsbits, max;
2944 ext4_lblk_t end;
2945 loff_t size, orig_size, start_off;
2946 ext4_lblk_t start, orig_start;
2947 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2948 struct ext4_prealloc_space *pa;
2949
2950
2951
2952 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2953 return;
2954
2955
2956 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2957 return;
2958
2959
2960
2961 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2962 return;
2963
2964 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2965 ext4_mb_normalize_group_request(ac);
2966 return ;
2967 }
2968
2969 bsbits = ac->ac_sb->s_blocksize_bits;
2970
2971
2972
2973 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
2974 size = size << bsbits;
2975 if (size < i_size_read(ac->ac_inode))
2976 size = i_size_read(ac->ac_inode);
2977
2978
2979 max = 2 << bsbits;
2980
2981#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2982 (req <= (size) || max <= (chunk_size))
2983
2984
2985
2986 start_off = 0;
2987 if (size <= 16 * 1024) {
2988 size = 16 * 1024;
2989 } else if (size <= 32 * 1024) {
2990 size = 32 * 1024;
2991 } else if (size <= 64 * 1024) {
2992 size = 64 * 1024;
2993 } else if (size <= 128 * 1024) {
2994 size = 128 * 1024;
2995 } else if (size <= 256 * 1024) {
2996 size = 256 * 1024;
2997 } else if (size <= 512 * 1024) {
2998 size = 512 * 1024;
2999 } else if (size <= 1024 * 1024) {
3000 size = 1024 * 1024;
3001 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3002 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3003 (21 - bsbits)) << 21;
3004 size = 2 * 1024 * 1024;
3005 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3006 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3007 (22 - bsbits)) << 22;
3008 size = 4 * 1024 * 1024;
3009 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3010 (8<<20)>>bsbits, max, 8 * 1024)) {
3011 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3012 (23 - bsbits)) << 23;
3013 size = 8 * 1024 * 1024;
3014 } else {
3015 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
3016 size = ac->ac_o_ex.fe_len << bsbits;
3017 }
3018 orig_size = size = size >> bsbits;
3019 orig_start = start = start_off >> bsbits;
3020
3021
3022 if (ar->pleft && start <= ar->lleft) {
3023 size -= ar->lleft + 1 - start;
3024 start = ar->lleft + 1;
3025 }
3026 if (ar->pright && start + size - 1 >= ar->lright)
3027 size -= start + size - ar->lright;
3028
3029 end = start + size;
3030
3031
3032 rcu_read_lock();
3033 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3034 unsigned long pa_end;
3035
3036 if (pa->pa_deleted)
3037 continue;
3038 spin_lock(&pa->pa_lock);
3039 if (pa->pa_deleted) {
3040 spin_unlock(&pa->pa_lock);
3041 continue;
3042 }
3043
3044 pa_end = pa->pa_lstart + pa->pa_len;
3045
3046
3047 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3048 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3049
3050
3051 if (pa->pa_lstart >= end) {
3052 spin_unlock(&pa->pa_lock);
3053 continue;
3054 }
3055 if (pa_end <= start) {
3056 spin_unlock(&pa->pa_lock);
3057 continue;
3058 }
3059 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3060
3061 if (pa_end <= ac->ac_o_ex.fe_logical) {
3062 BUG_ON(pa_end < start);
3063 start = pa_end;
3064 }
3065
3066 if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3067 BUG_ON(pa->pa_lstart > end);
3068 end = pa->pa_lstart;
3069 }
3070 spin_unlock(&pa->pa_lock);
3071 }
3072 rcu_read_unlock();
3073 size = end - start;
3074
3075
3076 rcu_read_lock();
3077 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3078 unsigned long pa_end;
3079 spin_lock(&pa->pa_lock);
3080 if (pa->pa_deleted == 0) {
3081 pa_end = pa->pa_lstart + pa->pa_len;
3082 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3083 }
3084 spin_unlock(&pa->pa_lock);
3085 }
3086 rcu_read_unlock();
3087
3088 if (start + size <= ac->ac_o_ex.fe_logical &&
3089 start > ac->ac_o_ex.fe_logical) {
3090 printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
3091 (unsigned long) start, (unsigned long) size,
3092 (unsigned long) ac->ac_o_ex.fe_logical);
3093 }
3094 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3095 start > ac->ac_o_ex.fe_logical);
3096 BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3097
3098
3099
3100
3101
3102 ac->ac_g_ex.fe_logical = start;
3103 ac->ac_g_ex.fe_len = size;
3104
3105
3106 if (ar->pright && (ar->lright == (start + size))) {
3107
3108 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3109 &ac->ac_f_ex.fe_group,
3110 &ac->ac_f_ex.fe_start);
3111 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3112 }
3113 if (ar->pleft && (ar->lleft + 1 == start)) {
3114
3115 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3116 &ac->ac_f_ex.fe_group,
3117 &ac->ac_f_ex.fe_start);
3118 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3119 }
3120
3121 mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size,
3122 (unsigned) orig_size, (unsigned) start);
3123}
3124
3125static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3126{
3127 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3128
3129 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3130 atomic_inc(&sbi->s_bal_reqs);
3131 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3132 if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len)
3133 atomic_inc(&sbi->s_bal_success);
3134 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3135 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3136 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3137 atomic_inc(&sbi->s_bal_goals);
3138 if (ac->ac_found > sbi->s_mb_max_to_scan)
3139 atomic_inc(&sbi->s_bal_breaks);
3140 }
3141
3142 ext4_mb_store_history(ac);
3143}
3144
3145
3146
3147
3148static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3149 struct ext4_prealloc_space *pa)
3150{
3151 ext4_fsblk_t start;
3152 ext4_fsblk_t end;
3153 int len;
3154
3155
3156 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3157 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
3158 len = end - start;
3159 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3160 &ac->ac_b_ex.fe_start);
3161 ac->ac_b_ex.fe_len = len;
3162 ac->ac_status = AC_STATUS_FOUND;
3163 ac->ac_pa = pa;
3164
3165 BUG_ON(start < pa->pa_pstart);
3166 BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
3167 BUG_ON(pa->pa_free < len);
3168 pa->pa_free -= len;
3169
3170 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3171}
3172
3173
3174
3175
3176static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3177 struct ext4_prealloc_space *pa)
3178{
3179 unsigned int len = ac->ac_o_ex.fe_len;
3180
3181 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3182 &ac->ac_b_ex.fe_group,
3183 &ac->ac_b_ex.fe_start);
3184 ac->ac_b_ex.fe_len = len;
3185 ac->ac_status = AC_STATUS_FOUND;
3186 ac->ac_pa = pa;
3187
3188
3189
3190
3191
3192
3193
3194 mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3195}
3196
3197
3198
3199
3200
3201
3202
3203static struct ext4_prealloc_space *
3204ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3205 struct ext4_prealloc_space *pa,
3206 struct ext4_prealloc_space *cpa)
3207{
3208 ext4_fsblk_t cur_distance, new_distance;
3209
3210 if (cpa == NULL) {
3211 atomic_inc(&pa->pa_count);
3212 return pa;
3213 }
3214 cur_distance = abs(goal_block - cpa->pa_pstart);
3215 new_distance = abs(goal_block - pa->pa_pstart);
3216
3217 if (cur_distance < new_distance)
3218 return cpa;
3219
3220
3221 atomic_dec(&cpa->pa_count);
3222 atomic_inc(&pa->pa_count);
3223 return pa;
3224}
3225
3226
3227
3228
3229static noinline_for_stack int
3230ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3231{
3232 int order, i;
3233 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3234 struct ext4_locality_group *lg;
3235 struct ext4_prealloc_space *pa, *cpa = NULL;
3236 ext4_fsblk_t goal_block;
3237
3238
3239 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3240 return 0;
3241
3242
3243 rcu_read_lock();
3244 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3245
3246
3247
3248 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3249 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3250 continue;
3251
3252
3253 spin_lock(&pa->pa_lock);
3254 if (pa->pa_deleted == 0 && pa->pa_free) {
3255 atomic_inc(&pa->pa_count);
3256 ext4_mb_use_inode_pa(ac, pa);
3257 spin_unlock(&pa->pa_lock);
3258 ac->ac_criteria = 10;
3259 rcu_read_unlock();
3260 return 1;
3261 }
3262 spin_unlock(&pa->pa_lock);
3263 }
3264 rcu_read_unlock();
3265
3266
3267 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3268 return 0;
3269
3270
3271 lg = ac->ac_lg;
3272 if (lg == NULL)
3273 return 0;
3274 order = fls(ac->ac_o_ex.fe_len) - 1;
3275 if (order > PREALLOC_TB_SIZE - 1)
3276
3277 order = PREALLOC_TB_SIZE - 1;
3278
3279 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
3280 ac->ac_g_ex.fe_start +
3281 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3282
3283
3284
3285
3286 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3287 rcu_read_lock();
3288 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3289 pa_inode_list) {
3290 spin_lock(&pa->pa_lock);
3291 if (pa->pa_deleted == 0 &&
3292 pa->pa_free >= ac->ac_o_ex.fe_len) {
3293
3294 cpa = ext4_mb_check_group_pa(goal_block,
3295 pa, cpa);
3296 }
3297 spin_unlock(&pa->pa_lock);
3298 }
3299 rcu_read_unlock();
3300 }
3301 if (cpa) {
3302 ext4_mb_use_group_pa(ac, cpa);
3303 ac->ac_criteria = 20;
3304 return 1;
3305 }
3306 return 0;
3307}
3308
3309
3310
3311
3312
3313
3314static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3315 ext4_group_t group)
3316{
3317 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3318 struct ext4_prealloc_space *pa;
3319 struct list_head *cur;
3320 ext4_group_t groupnr;
3321 ext4_grpblk_t start;
3322 int preallocated = 0;
3323 int count = 0;
3324 int len;
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334 list_for_each(cur, &grp->bb_prealloc_list) {
3335 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3336 spin_lock(&pa->pa_lock);
3337 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3338 &groupnr, &start);
3339 len = pa->pa_len;
3340 spin_unlock(&pa->pa_lock);
3341 if (unlikely(len == 0))
3342 continue;
3343 BUG_ON(groupnr != group);
3344 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
3345 bitmap, start, len);
3346 preallocated += len;
3347 count++;
3348 }
3349 mb_debug("prellocated %u for group %lu\n", preallocated, group);
3350}
3351
3352static void ext4_mb_pa_callback(struct rcu_head *head)
3353{
3354 struct ext4_prealloc_space *pa;
3355 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3356 kmem_cache_free(ext4_pspace_cachep, pa);
3357}
3358
3359
3360
3361
3362
3363static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3364 struct super_block *sb, struct ext4_prealloc_space *pa)
3365{
3366 unsigned long grp;
3367
3368 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3369 return;
3370
3371
3372 spin_lock(&pa->pa_lock);
3373 if (pa->pa_deleted == 1) {
3374 spin_unlock(&pa->pa_lock);
3375 return;
3376 }
3377
3378 pa->pa_deleted = 1;
3379 spin_unlock(&pa->pa_lock);
3380
3381
3382 ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398 ext4_lock_group(sb, grp);
3399 list_del(&pa->pa_group_list);
3400 ext4_unlock_group(sb, grp);
3401
3402 spin_lock(pa->pa_obj_lock);
3403 list_del_rcu(&pa->pa_inode_list);
3404 spin_unlock(pa->pa_obj_lock);
3405
3406 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3407}
3408
3409
3410
3411
3412static noinline_for_stack int
3413ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3414{
3415 struct super_block *sb = ac->ac_sb;
3416 struct ext4_prealloc_space *pa;
3417 struct ext4_group_info *grp;
3418 struct ext4_inode_info *ei;
3419
3420
3421 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3422 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3423 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3424
3425 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3426 if (pa == NULL)
3427 return -ENOMEM;
3428
3429 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3430 int winl;
3431 int wins;
3432 int win;
3433 int offs;
3434
3435
3436
3437
3438 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3439 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3440
3441
3442
3443
3444 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3445
3446
3447 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
3448
3449
3450 win = min(winl, wins);
3451
3452 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
3453 if (offs && offs < win)
3454 win = offs;
3455
3456 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
3457 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3458 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3459 }
3460
3461
3462
3463 ac->ac_f_ex = ac->ac_b_ex;
3464
3465 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3466 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3467 pa->pa_len = ac->ac_b_ex.fe_len;
3468 pa->pa_free = pa->pa_len;
3469 atomic_set(&pa->pa_count, 1);
3470 spin_lock_init(&pa->pa_lock);
3471 pa->pa_deleted = 0;
3472 pa->pa_linear = 0;
3473
3474 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3475 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3476
3477 ext4_mb_use_inode_pa(ac, pa);
3478 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3479
3480 ei = EXT4_I(ac->ac_inode);
3481 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3482
3483 pa->pa_obj_lock = &ei->i_prealloc_lock;
3484 pa->pa_inode = ac->ac_inode;
3485
3486 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3487 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3488 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3489
3490 spin_lock(pa->pa_obj_lock);
3491 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3492 spin_unlock(pa->pa_obj_lock);
3493
3494 return 0;
3495}
3496
3497
3498
3499
3500static noinline_for_stack int
3501ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3502{
3503 struct super_block *sb = ac->ac_sb;
3504 struct ext4_locality_group *lg;
3505 struct ext4_prealloc_space *pa;
3506 struct ext4_group_info *grp;
3507
3508
3509 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3510 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3511 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3512
3513 BUG_ON(ext4_pspace_cachep == NULL);
3514 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3515 if (pa == NULL)
3516 return -ENOMEM;
3517
3518
3519
3520 ac->ac_f_ex = ac->ac_b_ex;
3521
3522 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3523 pa->pa_lstart = pa->pa_pstart;
3524 pa->pa_len = ac->ac_b_ex.fe_len;
3525 pa->pa_free = pa->pa_len;
3526 atomic_set(&pa->pa_count, 1);
3527 spin_lock_init(&pa->pa_lock);
3528 INIT_LIST_HEAD(&pa->pa_inode_list);
3529 pa->pa_deleted = 0;
3530 pa->pa_linear = 1;
3531
3532 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3533 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3534
3535 ext4_mb_use_group_pa(ac, pa);
3536 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3537
3538 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3539 lg = ac->ac_lg;
3540 BUG_ON(lg == NULL);
3541
3542 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3543 pa->pa_inode = NULL;
3544
3545 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3546 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3547 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3548
3549
3550
3551
3552
3553 return 0;
3554}
3555
3556static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3557{
3558 int err;
3559
3560 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3561 err = ext4_mb_new_group_pa(ac);
3562 else
3563 err = ext4_mb_new_inode_pa(ac);
3564 return err;
3565}
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575static noinline_for_stack int
3576ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3577 struct ext4_prealloc_space *pa,
3578 struct ext4_allocation_context *ac)
3579{
3580 struct super_block *sb = e4b->bd_sb;
3581 struct ext4_sb_info *sbi = EXT4_SB(sb);
3582 unsigned long end;
3583 unsigned long next;
3584 ext4_group_t group;
3585 ext4_grpblk_t bit;
3586 sector_t start;
3587 int err = 0;
3588 int free = 0;
3589
3590 BUG_ON(pa->pa_deleted == 0);
3591 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3592 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3593 end = bit + pa->pa_len;
3594
3595 if (ac) {
3596 ac->ac_sb = sb;
3597 ac->ac_inode = pa->pa_inode;
3598 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3599 }
3600
3601 while (bit < end) {
3602 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3603 if (bit >= end)
3604 break;
3605 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3606 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
3607 le32_to_cpu(sbi->s_es->s_first_data_block);
3608 mb_debug(" free preallocated %u/%u in group %u\n",
3609 (unsigned) start, (unsigned) next - bit,
3610 (unsigned) group);
3611 free += next - bit;
3612
3613 if (ac) {
3614 ac->ac_b_ex.fe_group = group;
3615 ac->ac_b_ex.fe_start = bit;
3616 ac->ac_b_ex.fe_len = next - bit;
3617 ac->ac_b_ex.fe_logical = 0;
3618 ext4_mb_store_history(ac);
3619 }
3620
3621 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3622 bit = next + 1;
3623 }
3624 if (free != pa->pa_free) {
3625 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
3626 pa, (unsigned long) pa->pa_lstart,
3627 (unsigned long) pa->pa_pstart,
3628 (unsigned long) pa->pa_len);
3629 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3630 free, pa->pa_free);
3631
3632
3633
3634
3635 }
3636 atomic_add(free, &sbi->s_mb_discarded);
3637
3638 return err;
3639}
3640
3641static noinline_for_stack int
3642ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3643 struct ext4_prealloc_space *pa,
3644 struct ext4_allocation_context *ac)
3645{
3646 struct super_block *sb = e4b->bd_sb;
3647 ext4_group_t group;
3648 ext4_grpblk_t bit;
3649
3650 if (ac)
3651 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3652
3653 BUG_ON(pa->pa_deleted == 0);
3654 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3655 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3656 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3657 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3658
3659 if (ac) {
3660 ac->ac_sb = sb;
3661 ac->ac_inode = NULL;
3662 ac->ac_b_ex.fe_group = group;
3663 ac->ac_b_ex.fe_start = bit;
3664 ac->ac_b_ex.fe_len = pa->pa_len;
3665 ac->ac_b_ex.fe_logical = 0;
3666 ext4_mb_store_history(ac);
3667 }
3668
3669 return 0;
3670}
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681static noinline_for_stack int
3682ext4_mb_discard_group_preallocations(struct super_block *sb,
3683 ext4_group_t group, int needed)
3684{
3685 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3686 struct buffer_head *bitmap_bh = NULL;
3687 struct ext4_prealloc_space *pa, *tmp;
3688 struct ext4_allocation_context *ac;
3689 struct list_head list;
3690 struct ext4_buddy e4b;
3691 int err;
3692 int busy = 0;
3693 int free = 0;
3694
3695 mb_debug("discard preallocation for group %lu\n", group);
3696
3697 if (list_empty(&grp->bb_prealloc_list))
3698 return 0;
3699
3700 bitmap_bh = ext4_read_block_bitmap(sb, group);
3701 if (bitmap_bh == NULL) {
3702 ext4_error(sb, __func__, "Error in reading block "
3703 "bitmap for %lu\n", group);
3704 return 0;
3705 }
3706
3707 err = ext4_mb_load_buddy(sb, group, &e4b);
3708 if (err) {
3709 ext4_error(sb, __func__, "Error in loading buddy "
3710 "information for %lu\n", group);
3711 put_bh(bitmap_bh);
3712 return 0;
3713 }
3714
3715 if (needed == 0)
3716 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3717
3718 INIT_LIST_HEAD(&list);
3719 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3720repeat:
3721 ext4_lock_group(sb, group);
3722 list_for_each_entry_safe(pa, tmp,
3723 &grp->bb_prealloc_list, pa_group_list) {
3724 spin_lock(&pa->pa_lock);
3725 if (atomic_read(&pa->pa_count)) {
3726 spin_unlock(&pa->pa_lock);
3727 busy = 1;
3728 continue;
3729 }
3730 if (pa->pa_deleted) {
3731 spin_unlock(&pa->pa_lock);
3732 continue;
3733 }
3734
3735
3736 pa->pa_deleted = 1;
3737
3738
3739 free += pa->pa_free;
3740
3741 spin_unlock(&pa->pa_lock);
3742
3743 list_del(&pa->pa_group_list);
3744 list_add(&pa->u.pa_tmp_list, &list);
3745 }
3746
3747
3748 if (free < needed && busy) {
3749 busy = 0;
3750 ext4_unlock_group(sb, group);
3751
3752
3753
3754
3755 yield();
3756 goto repeat;
3757 }
3758
3759
3760 if (list_empty(&list)) {
3761 BUG_ON(free != 0);
3762 goto out;
3763 }
3764
3765
3766 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3767
3768
3769 spin_lock(pa->pa_obj_lock);
3770 list_del_rcu(&pa->pa_inode_list);
3771 spin_unlock(pa->pa_obj_lock);
3772
3773 if (pa->pa_linear)
3774 ext4_mb_release_group_pa(&e4b, pa, ac);
3775 else
3776 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3777
3778 list_del(&pa->u.pa_tmp_list);
3779 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3780 }
3781
3782out:
3783 ext4_unlock_group(sb, group);
3784 if (ac)
3785 kmem_cache_free(ext4_ac_cachep, ac);
3786 ext4_mb_release_desc(&e4b);
3787 put_bh(bitmap_bh);
3788 return free;
3789}
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800void ext4_discard_preallocations(struct inode *inode)
3801{
3802 struct ext4_inode_info *ei = EXT4_I(inode);
3803 struct super_block *sb = inode->i_sb;
3804 struct buffer_head *bitmap_bh = NULL;
3805 struct ext4_prealloc_space *pa, *tmp;
3806 struct ext4_allocation_context *ac;
3807 ext4_group_t group = 0;
3808 struct list_head list;
3809 struct ext4_buddy e4b;
3810 int err;
3811
3812 if (!S_ISREG(inode->i_mode)) {
3813
3814 return;
3815 }
3816
3817 mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
3818
3819 INIT_LIST_HEAD(&list);
3820
3821 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3822repeat:
3823
3824 spin_lock(&ei->i_prealloc_lock);
3825 while (!list_empty(&ei->i_prealloc_list)) {
3826 pa = list_entry(ei->i_prealloc_list.next,
3827 struct ext4_prealloc_space, pa_inode_list);
3828 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3829 spin_lock(&pa->pa_lock);
3830 if (atomic_read(&pa->pa_count)) {
3831
3832
3833 spin_unlock(&pa->pa_lock);
3834 spin_unlock(&ei->i_prealloc_lock);
3835 printk(KERN_ERR "uh-oh! used pa while discarding\n");
3836 WARN_ON(1);
3837 schedule_timeout_uninterruptible(HZ);
3838 goto repeat;
3839
3840 }
3841 if (pa->pa_deleted == 0) {
3842 pa->pa_deleted = 1;
3843 spin_unlock(&pa->pa_lock);
3844 list_del_rcu(&pa->pa_inode_list);
3845 list_add(&pa->u.pa_tmp_list, &list);
3846 continue;
3847 }
3848
3849
3850 spin_unlock(&pa->pa_lock);
3851 spin_unlock(&ei->i_prealloc_lock);
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865 schedule_timeout_uninterruptible(HZ);
3866 goto repeat;
3867 }
3868 spin_unlock(&ei->i_prealloc_lock);
3869
3870 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3871 BUG_ON(pa->pa_linear != 0);
3872 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3873
3874 err = ext4_mb_load_buddy(sb, group, &e4b);
3875 if (err) {
3876 ext4_error(sb, __func__, "Error in loading buddy "
3877 "information for %lu\n", group);
3878 continue;
3879 }
3880
3881 bitmap_bh = ext4_read_block_bitmap(sb, group);
3882 if (bitmap_bh == NULL) {
3883 ext4_error(sb, __func__, "Error in reading block "
3884 "bitmap for %lu\n", group);
3885 ext4_mb_release_desc(&e4b);
3886 continue;
3887 }
3888
3889 ext4_lock_group(sb, group);
3890 list_del(&pa->pa_group_list);
3891 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3892 ext4_unlock_group(sb, group);
3893
3894 ext4_mb_release_desc(&e4b);
3895 put_bh(bitmap_bh);
3896
3897 list_del(&pa->u.pa_tmp_list);
3898 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3899 }
3900 if (ac)
3901 kmem_cache_free(ext4_ac_cachep, ac);
3902}
3903
3904
3905
3906
3907
3908
3909
3910
3911static void ext4_mb_return_to_preallocation(struct inode *inode,
3912 struct ext4_buddy *e4b,
3913 sector_t block, int count)
3914{
3915 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
3916}
3917#ifdef MB_DEBUG
3918static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3919{
3920 struct super_block *sb = ac->ac_sb;
3921 ext4_group_t i;
3922
3923 printk(KERN_ERR "EXT4-fs: Can't allocate:"
3924 " Allocation context details:\n");
3925 printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
3926 ac->ac_status, ac->ac_flags);
3927 printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
3928 "best %lu/%lu/%lu@%lu cr %d\n",
3929 (unsigned long)ac->ac_o_ex.fe_group,
3930 (unsigned long)ac->ac_o_ex.fe_start,
3931 (unsigned long)ac->ac_o_ex.fe_len,
3932 (unsigned long)ac->ac_o_ex.fe_logical,
3933 (unsigned long)ac->ac_g_ex.fe_group,
3934 (unsigned long)ac->ac_g_ex.fe_start,
3935 (unsigned long)ac->ac_g_ex.fe_len,
3936 (unsigned long)ac->ac_g_ex.fe_logical,
3937 (unsigned long)ac->ac_b_ex.fe_group,
3938 (unsigned long)ac->ac_b_ex.fe_start,
3939 (unsigned long)ac->ac_b_ex.fe_len,
3940 (unsigned long)ac->ac_b_ex.fe_logical,
3941 (int)ac->ac_criteria);
3942 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
3943 ac->ac_found);
3944 printk(KERN_ERR "EXT4-fs: groups: \n");
3945 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
3946 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3947 struct ext4_prealloc_space *pa;
3948 ext4_grpblk_t start;
3949 struct list_head *cur;
3950 ext4_lock_group(sb, i);
3951 list_for_each(cur, &grp->bb_prealloc_list) {
3952 pa = list_entry(cur, struct ext4_prealloc_space,
3953 pa_group_list);
3954 spin_lock(&pa->pa_lock);
3955 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3956 NULL, &start);
3957 spin_unlock(&pa->pa_lock);
3958 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
3959 start, pa->pa_len);
3960 }
3961 ext4_unlock_group(sb, i);
3962
3963 if (grp->bb_free == 0)
3964 continue;
3965 printk(KERN_ERR "%lu: %d/%d \n",
3966 i, grp->bb_free, grp->bb_fragments);
3967 }
3968 printk(KERN_ERR "\n");
3969}
3970#else
3971static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3972{
3973 return;
3974}
3975#endif
3976
3977
3978
3979
3980
3981
3982
3983
3984static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3985{
3986 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3987 int bsbits = ac->ac_sb->s_blocksize_bits;
3988 loff_t size, isize;
3989
3990 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3991 return;
3992
3993 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
3994 isize = i_size_read(ac->ac_inode) >> bsbits;
3995 size = max(size, isize);
3996
3997
3998 if (size >= sbi->s_mb_stream_request)
3999 return;
4000
4001 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4002 return;
4003
4004 BUG_ON(ac->ac_lg != NULL);
4005
4006
4007
4008
4009
4010 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
4011
4012
4013 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4014
4015
4016 mutex_lock(&ac->ac_lg->lg_mutex);
4017}
4018
4019static noinline_for_stack int
4020ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4021 struct ext4_allocation_request *ar)
4022{
4023 struct super_block *sb = ar->inode->i_sb;
4024 struct ext4_sb_info *sbi = EXT4_SB(sb);
4025 struct ext4_super_block *es = sbi->s_es;
4026 ext4_group_t group;
4027 unsigned long len;
4028 unsigned long goal;
4029 ext4_grpblk_t block;
4030
4031
4032 len = ar->len;
4033
4034
4035 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
4036 len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
4037
4038
4039 goal = ar->goal;
4040 if (goal < le32_to_cpu(es->s_first_data_block) ||
4041 goal >= ext4_blocks_count(es))
4042 goal = le32_to_cpu(es->s_first_data_block);
4043 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4044
4045
4046 ac->ac_b_ex.fe_logical = ar->logical;
4047 ac->ac_b_ex.fe_group = 0;
4048 ac->ac_b_ex.fe_start = 0;
4049 ac->ac_b_ex.fe_len = 0;
4050 ac->ac_status = AC_STATUS_CONTINUE;
4051 ac->ac_groups_scanned = 0;
4052 ac->ac_ex_scanned = 0;
4053 ac->ac_found = 0;
4054 ac->ac_sb = sb;
4055 ac->ac_inode = ar->inode;
4056 ac->ac_o_ex.fe_logical = ar->logical;
4057 ac->ac_o_ex.fe_group = group;
4058 ac->ac_o_ex.fe_start = block;
4059 ac->ac_o_ex.fe_len = len;
4060 ac->ac_g_ex.fe_logical = ar->logical;
4061 ac->ac_g_ex.fe_group = group;
4062 ac->ac_g_ex.fe_start = block;
4063 ac->ac_g_ex.fe_len = len;
4064 ac->ac_f_ex.fe_len = 0;
4065 ac->ac_flags = ar->flags;
4066 ac->ac_2order = 0;
4067 ac->ac_criteria = 0;
4068 ac->ac_pa = NULL;
4069 ac->ac_bitmap_page = NULL;
4070 ac->ac_buddy_page = NULL;
4071 ac->ac_lg = NULL;
4072
4073
4074
4075 ext4_mb_group_or_file(ac);
4076
4077 mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4078 "left: %u/%u, right %u/%u to %swritable\n",
4079 (unsigned) ar->len, (unsigned) ar->logical,
4080 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4081 (unsigned) ar->lleft, (unsigned) ar->pleft,
4082 (unsigned) ar->lright, (unsigned) ar->pright,
4083 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4084 return 0;
4085
4086}
4087
4088static noinline_for_stack void
4089ext4_mb_discard_lg_preallocations(struct super_block *sb,
4090 struct ext4_locality_group *lg,
4091 int order, int total_entries)
4092{
4093 ext4_group_t group = 0;
4094 struct ext4_buddy e4b;
4095 struct list_head discard_list;
4096 struct ext4_prealloc_space *pa, *tmp;
4097 struct ext4_allocation_context *ac;
4098
4099 mb_debug("discard locality group preallocation\n");
4100
4101 INIT_LIST_HEAD(&discard_list);
4102 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4103
4104 spin_lock(&lg->lg_prealloc_lock);
4105 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4106 pa_inode_list) {
4107 spin_lock(&pa->pa_lock);
4108 if (atomic_read(&pa->pa_count)) {
4109
4110
4111
4112
4113
4114 spin_unlock(&pa->pa_lock);
4115 continue;
4116 }
4117 if (pa->pa_deleted) {
4118 spin_unlock(&pa->pa_lock);
4119 continue;
4120 }
4121
4122 BUG_ON(!pa->pa_linear);
4123
4124
4125 pa->pa_deleted = 1;
4126 spin_unlock(&pa->pa_lock);
4127
4128 list_del_rcu(&pa->pa_inode_list);
4129 list_add(&pa->u.pa_tmp_list, &discard_list);
4130
4131 total_entries--;
4132 if (total_entries <= 5) {
4133
4134
4135
4136
4137
4138
4139 break;
4140 }
4141 }
4142 spin_unlock(&lg->lg_prealloc_lock);
4143
4144 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4145
4146 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4147 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4148 ext4_error(sb, __func__, "Error in loading buddy "
4149 "information for %lu\n", group);
4150 continue;
4151 }
4152 ext4_lock_group(sb, group);
4153 list_del(&pa->pa_group_list);
4154 ext4_mb_release_group_pa(&e4b, pa, ac);
4155 ext4_unlock_group(sb, group);
4156
4157 ext4_mb_release_desc(&e4b);
4158 list_del(&pa->u.pa_tmp_list);
4159 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4160 }
4161 if (ac)
4162 kmem_cache_free(ext4_ac_cachep, ac);
4163}
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4175{
4176 int order, added = 0, lg_prealloc_count = 1;
4177 struct super_block *sb = ac->ac_sb;
4178 struct ext4_locality_group *lg = ac->ac_lg;
4179 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4180
4181 order = fls(pa->pa_free) - 1;
4182 if (order > PREALLOC_TB_SIZE - 1)
4183
4184 order = PREALLOC_TB_SIZE - 1;
4185
4186 rcu_read_lock();
4187 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4188 pa_inode_list) {
4189 spin_lock(&tmp_pa->pa_lock);
4190 if (tmp_pa->pa_deleted) {
4191 spin_unlock(&pa->pa_lock);
4192 continue;
4193 }
4194 if (!added && pa->pa_free < tmp_pa->pa_free) {
4195
4196 list_add_tail_rcu(&pa->pa_inode_list,
4197 &tmp_pa->pa_inode_list);
4198 added = 1;
4199
4200
4201
4202
4203 }
4204 spin_unlock(&tmp_pa->pa_lock);
4205 lg_prealloc_count++;
4206 }
4207 if (!added)
4208 list_add_tail_rcu(&pa->pa_inode_list,
4209 &lg->lg_prealloc_list[order]);
4210 rcu_read_unlock();
4211
4212
4213 if (lg_prealloc_count > 8) {
4214 ext4_mb_discard_lg_preallocations(sb, lg,
4215 order, lg_prealloc_count);
4216 return;
4217 }
4218 return ;
4219}
4220
4221
4222
4223
4224static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4225{
4226 struct ext4_prealloc_space *pa = ac->ac_pa;
4227 if (pa) {
4228 if (pa->pa_linear) {
4229
4230 spin_lock(&pa->pa_lock);
4231 pa->pa_pstart += ac->ac_b_ex.fe_len;
4232 pa->pa_lstart += ac->ac_b_ex.fe_len;
4233 pa->pa_free -= ac->ac_b_ex.fe_len;
4234 pa->pa_len -= ac->ac_b_ex.fe_len;
4235 spin_unlock(&pa->pa_lock);
4236
4237
4238
4239
4240
4241
4242 if (likely(pa->pa_free)) {
4243 spin_lock(pa->pa_obj_lock);
4244 list_del_rcu(&pa->pa_inode_list);
4245 spin_unlock(pa->pa_obj_lock);
4246 ext4_mb_add_n_trim(ac);
4247 }
4248 }
4249 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4250 }
4251 if (ac->ac_bitmap_page)
4252 page_cache_release(ac->ac_bitmap_page);
4253 if (ac->ac_buddy_page)
4254 page_cache_release(ac->ac_buddy_page);
4255 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4256 mutex_unlock(&ac->ac_lg->lg_mutex);
4257 ext4_mb_collect_stats(ac);
4258 return 0;
4259}
4260
4261static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4262{
4263 ext4_group_t i;
4264 int ret;
4265 int freed = 0;
4266
4267 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
4268 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4269 freed += ret;
4270 needed -= ret;
4271 }
4272
4273 return freed;
4274}
4275
4276
4277
4278
4279
4280
4281ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4282 struct ext4_allocation_request *ar, int *errp)
4283{
4284 int freed;
4285 struct ext4_allocation_context *ac = NULL;
4286 struct ext4_sb_info *sbi;
4287 struct super_block *sb;
4288 ext4_fsblk_t block = 0;
4289 unsigned long inquota;
4290 unsigned long reserv_blks = 0;
4291
4292 sb = ar->inode->i_sb;
4293 sbi = EXT4_SB(sb);
4294
4295 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4296
4297
4298
4299 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4300
4301 yield();
4302 ar->len = ar->len >> 1;
4303 }
4304 if (!ar->len) {
4305 *errp = -ENOSPC;
4306 return 0;
4307 }
4308 reserv_blks = ar->len;
4309 }
4310 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4311 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4312 ar->len--;
4313 }
4314 if (ar->len == 0) {
4315 *errp = -EDQUOT;
4316 return 0;
4317 }
4318 inquota = ar->len;
4319
4320 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4321 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4322
4323 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4324 if (!ac) {
4325 ar->len = 0;
4326 *errp = -ENOMEM;
4327 goto out1;
4328 }
4329
4330 *errp = ext4_mb_initialize_context(ac, ar);
4331 if (*errp) {
4332 ar->len = 0;
4333 goto out2;
4334 }
4335
4336 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4337 if (!ext4_mb_use_preallocated(ac)) {
4338 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4339 ext4_mb_normalize_request(ac, ar);
4340repeat:
4341
4342 ext4_mb_regular_allocator(ac);
4343
4344
4345
4346
4347 if (ac->ac_status == AC_STATUS_FOUND &&
4348 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4349 ext4_mb_new_preallocation(ac);
4350 }
4351
4352 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4353 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4354 if (*errp == -EAGAIN) {
4355 ac->ac_b_ex.fe_group = 0;
4356 ac->ac_b_ex.fe_start = 0;
4357 ac->ac_b_ex.fe_len = 0;
4358 ac->ac_status = AC_STATUS_CONTINUE;
4359 goto repeat;
4360 } else if (*errp) {
4361 ac->ac_b_ex.fe_len = 0;
4362 ar->len = 0;
4363 ext4_mb_show_ac(ac);
4364 } else {
4365 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4366 ar->len = ac->ac_b_ex.fe_len;
4367 }
4368 } else {
4369 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4370 if (freed)
4371 goto repeat;
4372 *errp = -ENOSPC;
4373 ac->ac_b_ex.fe_len = 0;
4374 ar->len = 0;
4375 ext4_mb_show_ac(ac);
4376 }
4377
4378 ext4_mb_release_context(ac);
4379
4380out2:
4381 kmem_cache_free(ext4_ac_cachep, ac);
4382out1:
4383 if (ar->len < inquota)
4384 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
4385
4386 return block;
4387}
4388
4389
4390
4391
4392
4393
4394static int can_merge(struct ext4_free_data *entry1,
4395 struct ext4_free_data *entry2)
4396{
4397 if ((entry1->t_tid == entry2->t_tid) &&
4398 (entry1->group == entry2->group) &&
4399 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4400 return 1;
4401 return 0;
4402}
4403
4404static noinline_for_stack int
4405ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4406 ext4_group_t group, ext4_grpblk_t block, int count)
4407{
4408 struct ext4_group_info *db = e4b->bd_info;
4409 struct super_block *sb = e4b->bd_sb;
4410 struct ext4_sb_info *sbi = EXT4_SB(sb);
4411 struct ext4_free_data *entry, *new_entry;
4412 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node;
4414
4415
4416 BUG_ON(e4b->bd_bitmap_page == NULL);
4417 BUG_ON(e4b->bd_buddy_page == NULL);
4418
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node;
4425
4426 ext4_lock_group(sb, group);
4427 if (!*n) {
4428
4429
4430
4431
4432
4433 page_cache_get(e4b->bd_buddy_page);
4434 page_cache_get(e4b->bd_bitmap_page);
4435 }
4436 while (*n) {
4437 parent = *n;
4438 entry = rb_entry(parent, struct ext4_free_data, node);
4439 if (block < entry->start_blk)
4440 n = &(*n)->rb_left;
4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right;
4443 else {
4444 ext4_unlock_group(sb, group);
4445 ext4_error(sb, __func__,
4446 "Double free of blocks %d (%d %d)\n",
4447 block, entry->start_blk, entry->count);
4448 return 0;
4449 }
4450 }
4451
4452 rb_link_node(new_node, parent, n);
4453 rb_insert_color(new_node, &db->bb_free_root);
4454
4455
4456 node = rb_prev(new_node);
4457 if (node) {
4458 entry = rb_entry(node, struct ext4_free_data, node);
4459 if (can_merge(entry, new_entry)) {
4460 new_entry->start_blk = entry->start_blk;
4461 new_entry->count += entry->count;
4462 rb_erase(node, &(db->bb_free_root));
4463 spin_lock(&sbi->s_md_lock);
4464 list_del(&entry->list);
4465 spin_unlock(&sbi->s_md_lock);
4466 kmem_cache_free(ext4_free_ext_cachep, entry);
4467 }
4468 }
4469
4470 node = rb_next(new_node);
4471 if (node) {
4472 entry = rb_entry(node, struct ext4_free_data, node);
4473 if (can_merge(new_entry, entry)) {
4474 new_entry->count += entry->count;
4475 rb_erase(node, &(db->bb_free_root));
4476 spin_lock(&sbi->s_md_lock);
4477 list_del(&entry->list);
4478 spin_unlock(&sbi->s_md_lock);
4479 kmem_cache_free(ext4_free_ext_cachep, entry);
4480 }
4481 }
4482
4483 spin_lock(&sbi->s_md_lock);
4484 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4485 spin_unlock(&sbi->s_md_lock);
4486 ext4_unlock_group(sb, group);
4487 return 0;
4488}
4489
4490
4491
4492
4493void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4494 unsigned long block, unsigned long count,
4495 int metadata, unsigned long *freed)
4496{
4497 struct buffer_head *bitmap_bh = NULL;
4498 struct super_block *sb = inode->i_sb;
4499 struct ext4_allocation_context *ac = NULL;
4500 struct ext4_group_desc *gdp;
4501 struct ext4_super_block *es;
4502 unsigned long overflow;
4503 ext4_grpblk_t bit;
4504 struct buffer_head *gd_bh;
4505 ext4_group_t block_group;
4506 struct ext4_sb_info *sbi;
4507 struct ext4_buddy e4b;
4508 int err = 0;
4509 int ret;
4510
4511 *freed = 0;
4512
4513 sbi = EXT4_SB(sb);
4514 es = EXT4_SB(sb)->s_es;
4515 if (block < le32_to_cpu(es->s_first_data_block) ||
4516 block + count < block ||
4517 block + count > ext4_blocks_count(es)) {
4518 ext4_error(sb, __func__,
4519 "Freeing blocks not in datazone - "
4520 "block = %lu, count = %lu", block, count);
4521 goto error_return;
4522 }
4523
4524 ext4_debug("freeing block %lu\n", block);
4525
4526 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4527 if (ac) {
4528 ac->ac_op = EXT4_MB_HISTORY_FREE;
4529 ac->ac_inode = inode;
4530 ac->ac_sb = sb;
4531 }
4532
4533do_more:
4534 overflow = 0;
4535 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4536
4537
4538
4539
4540
4541 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4542 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4543 count -= overflow;
4544 }
4545 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4546 if (!bitmap_bh) {
4547 err = -EIO;
4548 goto error_return;
4549 }
4550 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4551 if (!gdp) {
4552 err = -EIO;
4553 goto error_return;
4554 }
4555
4556 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4557 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4558 in_range(block, ext4_inode_table(sb, gdp),
4559 EXT4_SB(sb)->s_itb_per_group) ||
4560 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4561 EXT4_SB(sb)->s_itb_per_group)) {
4562
4563 ext4_error(sb, __func__,
4564 "Freeing blocks in system zone - "
4565 "Block = %lu, count = %lu", block, count);
4566
4567 goto error_return;
4568 }
4569
4570 BUFFER_TRACE(bitmap_bh, "getting write access");
4571 err = ext4_journal_get_write_access(handle, bitmap_bh);
4572 if (err)
4573 goto error_return;
4574
4575
4576
4577
4578
4579
4580 BUFFER_TRACE(gd_bh, "get_write_access");
4581 err = ext4_journal_get_write_access(handle, gd_bh);
4582 if (err)
4583 goto error_return;
4584
4585 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4586 if (err)
4587 goto error_return;
4588
4589#ifdef AGGRESSIVE_CHECK
4590 {
4591 int i;
4592 for (i = 0; i < count; i++)
4593 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4594 }
4595#endif
4596 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
4597 bit, count);
4598
4599
4600 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4601 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
4602
4603 if (ac) {
4604 ac->ac_b_ex.fe_group = block_group;
4605 ac->ac_b_ex.fe_start = bit;
4606 ac->ac_b_ex.fe_len = count;
4607 ext4_mb_store_history(ac);
4608 }
4609
4610 if (metadata) {
4611
4612
4613 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
4614 } else {
4615 ext4_lock_group(sb, block_group);
4616 mb_free_blocks(inode, &e4b, bit, count);
4617 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4618 ext4_unlock_group(sb, block_group);
4619 }
4620
4621 spin_lock(sb_bgl_lock(sbi, block_group));
4622 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4623 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4624 spin_unlock(sb_bgl_lock(sbi, block_group));
4625 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4626
4627 if (sbi->s_log_groups_per_flex) {
4628 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4629 spin_lock(sb_bgl_lock(sbi, flex_group));
4630 sbi->s_flex_groups[flex_group].free_blocks += count;
4631 spin_unlock(sb_bgl_lock(sbi, flex_group));
4632 }
4633
4634 ext4_mb_release_desc(&e4b);
4635
4636 *freed += count;
4637
4638
4639 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4640 ret = ext4_journal_dirty_metadata(handle, gd_bh);
4641 if (!err)
4642 err = ret;
4643
4644 if (overflow && !err) {
4645 block += count;
4646 count = overflow;
4647 put_bh(bitmap_bh);
4648 goto do_more;
4649 }
4650 sb->s_dirt = 1;
4651error_return:
4652 brelse(bitmap_bh);
4653 ext4_std_error(sb, err);
4654 if (ac)
4655 kmem_cache_free(ext4_ac_cachep, ac);
4656 return;
4657}