1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
30
31#include <asm/uaccess.h>
32#include <asm/cacheflush.h>
33#include <asm/tlb.h>
34#include <asm/mmu_context.h>
35
36#include "internal.h"
37
38#ifndef arch_mmap_check
39#define arch_mmap_check(addr, len, flags) (0)
40#endif
41
42#ifndef arch_rebalance_pgtables
43#define arch_rebalance_pgtables(addr, len) (addr)
44#endif
45
46static void unmap_region(struct mm_struct *mm,
47 struct vm_area_struct *vma, struct vm_area_struct *prev,
48 unsigned long start, unsigned long end);
49
50
51
52
53
54#undef DEBUG_MM_RB
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71pgprot_t protection_map[16] = {
72 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
73 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
74};
75
76pgprot_t vm_get_page_prot(unsigned long vm_flags)
77{
78 return __pgprot(pgprot_val(protection_map[vm_flags &
79 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
80 pgprot_val(arch_vm_get_page_prot(vm_flags)));
81}
82EXPORT_SYMBOL(vm_get_page_prot);
83
84int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
85int sysctl_overcommit_ratio = 50;
86int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
87atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
106{
107 unsigned long free, allowed;
108
109 vm_acct_memory(pages);
110
111
112
113
114 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
115 return 0;
116
117 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
118 unsigned long n;
119
120 free = global_page_state(NR_FILE_PAGES);
121 free += nr_swap_pages;
122
123
124
125
126
127
128
129 free += global_page_state(NR_SLAB_RECLAIMABLE);
130
131
132
133
134 if (!cap_sys_admin)
135 free -= free / 32;
136
137 if (free > pages)
138 return 0;
139
140
141
142
143
144 n = nr_free_pages();
145
146
147
148
149 if (n <= totalreserve_pages)
150 goto error;
151 else
152 n -= totalreserve_pages;
153
154
155
156
157 if (!cap_sys_admin)
158 n -= n / 32;
159 free += n;
160
161 if (free > pages)
162 return 0;
163
164 goto error;
165 }
166
167 allowed = (totalram_pages - hugetlb_total_pages())
168 * sysctl_overcommit_ratio / 100;
169
170
171
172 if (!cap_sys_admin)
173 allowed -= allowed / 32;
174 allowed += total_swap_pages;
175
176
177
178 if (mm)
179 allowed -= mm->total_vm / 32;
180
181
182
183
184
185 if (atomic_long_read(&vm_committed_space) < (long)allowed)
186 return 0;
187error:
188 vm_unacct_memory(pages);
189
190 return -ENOMEM;
191}
192
193
194
195
196static void __remove_shared_vm_struct(struct vm_area_struct *vma,
197 struct file *file, struct address_space *mapping)
198{
199 if (vma->vm_flags & VM_DENYWRITE)
200 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
201 if (vma->vm_flags & VM_SHARED)
202 mapping->i_mmap_writable--;
203
204 flush_dcache_mmap_lock(mapping);
205 if (unlikely(vma->vm_flags & VM_NONLINEAR))
206 list_del_init(&vma->shared.vm_set.list);
207 else
208 vma_prio_tree_remove(vma, &mapping->i_mmap);
209 flush_dcache_mmap_unlock(mapping);
210}
211
212
213
214
215
216void unlink_file_vma(struct vm_area_struct *vma)
217{
218 struct file *file = vma->vm_file;
219
220 if (file) {
221 struct address_space *mapping = file->f_mapping;
222 spin_lock(&mapping->i_mmap_lock);
223 __remove_shared_vm_struct(vma, file, mapping);
224 spin_unlock(&mapping->i_mmap_lock);
225 }
226}
227
228
229
230
231static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
232{
233 struct vm_area_struct *next = vma->vm_next;
234
235 might_sleep();
236 if (vma->vm_ops && vma->vm_ops->close)
237 vma->vm_ops->close(vma);
238 if (vma->vm_file) {
239 fput(vma->vm_file);
240 if (vma->vm_flags & VM_EXECUTABLE)
241 removed_exe_file_vma(vma->vm_mm);
242 }
243 mpol_put(vma_policy(vma));
244 kmem_cache_free(vm_area_cachep, vma);
245 return next;
246}
247
248asmlinkage unsigned long sys_brk(unsigned long brk)
249{
250 unsigned long rlim, retval;
251 unsigned long newbrk, oldbrk;
252 struct mm_struct *mm = current->mm;
253 unsigned long min_brk;
254
255 down_write(&mm->mmap_sem);
256
257#ifdef CONFIG_COMPAT_BRK
258 min_brk = mm->end_code;
259#else
260 min_brk = mm->start_brk;
261#endif
262 if (brk < min_brk)
263 goto out;
264
265
266
267
268
269
270
271 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
272 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
273 (mm->end_data - mm->start_data) > rlim)
274 goto out;
275
276 newbrk = PAGE_ALIGN(brk);
277 oldbrk = PAGE_ALIGN(mm->brk);
278 if (oldbrk == newbrk)
279 goto set_brk;
280
281
282 if (brk <= mm->brk) {
283 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
284 goto set_brk;
285 goto out;
286 }
287
288
289 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
290 goto out;
291
292
293 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
294 goto out;
295set_brk:
296 mm->brk = brk;
297out:
298 retval = mm->brk;
299 up_write(&mm->mmap_sem);
300 return retval;
301}
302
303#ifdef DEBUG_MM_RB
304static int browse_rb(struct rb_root *root)
305{
306 int i = 0, j;
307 struct rb_node *nd, *pn = NULL;
308 unsigned long prev = 0, pend = 0;
309
310 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
311 struct vm_area_struct *vma;
312 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
313 if (vma->vm_start < prev)
314 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
315 if (vma->vm_start < pend)
316 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
317 if (vma->vm_start > vma->vm_end)
318 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
319 i++;
320 pn = nd;
321 prev = vma->vm_start;
322 pend = vma->vm_end;
323 }
324 j = 0;
325 for (nd = pn; nd; nd = rb_prev(nd)) {
326 j++;
327 }
328 if (i != j)
329 printk("backwards %d, forwards %d\n", j, i), i = 0;
330 return i;
331}
332
333void validate_mm(struct mm_struct *mm)
334{
335 int bug = 0;
336 int i = 0;
337 struct vm_area_struct *tmp = mm->mmap;
338 while (tmp) {
339 tmp = tmp->vm_next;
340 i++;
341 }
342 if (i != mm->map_count)
343 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
344 i = browse_rb(&mm->mm_rb);
345 if (i != mm->map_count)
346 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
347 BUG_ON(bug);
348}
349#else
350#define validate_mm(mm) do { } while (0)
351#endif
352
353static struct vm_area_struct *
354find_vma_prepare(struct mm_struct *mm, unsigned long addr,
355 struct vm_area_struct **pprev, struct rb_node ***rb_link,
356 struct rb_node ** rb_parent)
357{
358 struct vm_area_struct * vma;
359 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
360
361 __rb_link = &mm->mm_rb.rb_node;
362 rb_prev = __rb_parent = NULL;
363 vma = NULL;
364
365 while (*__rb_link) {
366 struct vm_area_struct *vma_tmp;
367
368 __rb_parent = *__rb_link;
369 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
370
371 if (vma_tmp->vm_end > addr) {
372 vma = vma_tmp;
373 if (vma_tmp->vm_start <= addr)
374 break;
375 __rb_link = &__rb_parent->rb_left;
376 } else {
377 rb_prev = __rb_parent;
378 __rb_link = &__rb_parent->rb_right;
379 }
380 }
381
382 *pprev = NULL;
383 if (rb_prev)
384 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
385 *rb_link = __rb_link;
386 *rb_parent = __rb_parent;
387 return vma;
388}
389
390static inline void
391__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
392 struct vm_area_struct *prev, struct rb_node *rb_parent)
393{
394 if (prev) {
395 vma->vm_next = prev->vm_next;
396 prev->vm_next = vma;
397 } else {
398 mm->mmap = vma;
399 if (rb_parent)
400 vma->vm_next = rb_entry(rb_parent,
401 struct vm_area_struct, vm_rb);
402 else
403 vma->vm_next = NULL;
404 }
405}
406
407void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
408 struct rb_node **rb_link, struct rb_node *rb_parent)
409{
410 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
411 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
412}
413
414static void __vma_link_file(struct vm_area_struct *vma)
415{
416 struct file * file;
417
418 file = vma->vm_file;
419 if (file) {
420 struct address_space *mapping = file->f_mapping;
421
422 if (vma->vm_flags & VM_DENYWRITE)
423 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
424 if (vma->vm_flags & VM_SHARED)
425 mapping->i_mmap_writable++;
426
427 flush_dcache_mmap_lock(mapping);
428 if (unlikely(vma->vm_flags & VM_NONLINEAR))
429 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
430 else
431 vma_prio_tree_insert(vma, &mapping->i_mmap);
432 flush_dcache_mmap_unlock(mapping);
433 }
434}
435
436static void
437__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
438 struct vm_area_struct *prev, struct rb_node **rb_link,
439 struct rb_node *rb_parent)
440{
441 __vma_link_list(mm, vma, prev, rb_parent);
442 __vma_link_rb(mm, vma, rb_link, rb_parent);
443 __anon_vma_link(vma);
444}
445
446static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
447 struct vm_area_struct *prev, struct rb_node **rb_link,
448 struct rb_node *rb_parent)
449{
450 struct address_space *mapping = NULL;
451
452 if (vma->vm_file)
453 mapping = vma->vm_file->f_mapping;
454
455 if (mapping) {
456 spin_lock(&mapping->i_mmap_lock);
457 vma->vm_truncate_count = mapping->truncate_count;
458 }
459 anon_vma_lock(vma);
460
461 __vma_link(mm, vma, prev, rb_link, rb_parent);
462 __vma_link_file(vma);
463
464 anon_vma_unlock(vma);
465 if (mapping)
466 spin_unlock(&mapping->i_mmap_lock);
467
468 mm->map_count++;
469 validate_mm(mm);
470}
471
472
473
474
475
476
477static void
478__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
479{
480 struct vm_area_struct * __vma, * prev;
481 struct rb_node ** rb_link, * rb_parent;
482
483 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
484 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
485 __vma_link(mm, vma, prev, rb_link, rb_parent);
486 mm->map_count++;
487}
488
489static inline void
490__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
491 struct vm_area_struct *prev)
492{
493 prev->vm_next = vma->vm_next;
494 rb_erase(&vma->vm_rb, &mm->mm_rb);
495 if (mm->mmap_cache == vma)
496 mm->mmap_cache = prev;
497}
498
499
500
501
502
503
504
505
506void vma_adjust(struct vm_area_struct *vma, unsigned long start,
507 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
508{
509 struct mm_struct *mm = vma->vm_mm;
510 struct vm_area_struct *next = vma->vm_next;
511 struct vm_area_struct *importer = NULL;
512 struct address_space *mapping = NULL;
513 struct prio_tree_root *root = NULL;
514 struct file *file = vma->vm_file;
515 struct anon_vma *anon_vma = NULL;
516 long adjust_next = 0;
517 int remove_next = 0;
518
519 if (next && !insert) {
520 if (end >= next->vm_end) {
521
522
523
524
525again: remove_next = 1 + (end > next->vm_end);
526 end = next->vm_end;
527 anon_vma = next->anon_vma;
528 importer = vma;
529 } else if (end > next->vm_start) {
530
531
532
533
534 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
535 anon_vma = next->anon_vma;
536 importer = vma;
537 } else if (end < vma->vm_end) {
538
539
540
541
542
543 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
544 anon_vma = next->anon_vma;
545 importer = next;
546 }
547 }
548
549 if (file) {
550 mapping = file->f_mapping;
551 if (!(vma->vm_flags & VM_NONLINEAR))
552 root = &mapping->i_mmap;
553 spin_lock(&mapping->i_mmap_lock);
554 if (importer &&
555 vma->vm_truncate_count != next->vm_truncate_count) {
556
557
558
559
560 importer->vm_truncate_count = 0;
561 }
562 if (insert) {
563 insert->vm_truncate_count = vma->vm_truncate_count;
564
565
566
567
568
569
570 __vma_link_file(insert);
571 }
572 }
573
574
575
576
577
578 if (vma->anon_vma)
579 anon_vma = vma->anon_vma;
580 if (anon_vma) {
581 spin_lock(&anon_vma->lock);
582
583
584
585
586
587 if (importer && !importer->anon_vma) {
588 importer->anon_vma = anon_vma;
589 __anon_vma_link(importer);
590 }
591 }
592
593 if (root) {
594 flush_dcache_mmap_lock(mapping);
595 vma_prio_tree_remove(vma, root);
596 if (adjust_next)
597 vma_prio_tree_remove(next, root);
598 }
599
600 vma->vm_start = start;
601 vma->vm_end = end;
602 vma->vm_pgoff = pgoff;
603 if (adjust_next) {
604 next->vm_start += adjust_next << PAGE_SHIFT;
605 next->vm_pgoff += adjust_next;
606 }
607
608 if (root) {
609 if (adjust_next)
610 vma_prio_tree_insert(next, root);
611 vma_prio_tree_insert(vma, root);
612 flush_dcache_mmap_unlock(mapping);
613 }
614
615 if (remove_next) {
616
617
618
619
620 __vma_unlink(mm, next, vma);
621 if (file)
622 __remove_shared_vm_struct(next, file, mapping);
623 if (next->anon_vma)
624 __anon_vma_merge(vma, next);
625 } else if (insert) {
626
627
628
629
630
631 __insert_vm_struct(mm, insert);
632 }
633
634 if (anon_vma)
635 spin_unlock(&anon_vma->lock);
636 if (mapping)
637 spin_unlock(&mapping->i_mmap_lock);
638
639 if (remove_next) {
640 if (file) {
641 fput(file);
642 if (next->vm_flags & VM_EXECUTABLE)
643 removed_exe_file_vma(mm);
644 }
645 mm->map_count--;
646 mpol_put(vma_policy(next));
647 kmem_cache_free(vm_area_cachep, next);
648
649
650
651
652
653 if (remove_next == 2) {
654 next = vma->vm_next;
655 goto again;
656 }
657 }
658
659 validate_mm(mm);
660}
661
662
663
664
665
666static inline int is_mergeable_vma(struct vm_area_struct *vma,
667 struct file *file, unsigned long vm_flags)
668{
669 if (vma->vm_flags != vm_flags)
670 return 0;
671 if (vma->vm_file != file)
672 return 0;
673 if (vma->vm_ops && vma->vm_ops->close)
674 return 0;
675 return 1;
676}
677
678static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
679 struct anon_vma *anon_vma2)
680{
681 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
682}
683
684
685
686
687
688
689
690
691
692
693
694
695static int
696can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
697 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
698{
699 if (is_mergeable_vma(vma, file, vm_flags) &&
700 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
701 if (vma->vm_pgoff == vm_pgoff)
702 return 1;
703 }
704 return 0;
705}
706
707
708
709
710
711
712
713
714static int
715can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
716 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
717{
718 if (is_mergeable_vma(vma, file, vm_flags) &&
719 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
720 pgoff_t vm_pglen;
721 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
722 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
723 return 1;
724 }
725 return 0;
726}
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757struct vm_area_struct *vma_merge(struct mm_struct *mm,
758 struct vm_area_struct *prev, unsigned long addr,
759 unsigned long end, unsigned long vm_flags,
760 struct anon_vma *anon_vma, struct file *file,
761 pgoff_t pgoff, struct mempolicy *policy)
762{
763 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
764 struct vm_area_struct *area, *next;
765
766
767
768
769
770 if (vm_flags & VM_SPECIAL)
771 return NULL;
772
773 if (prev)
774 next = prev->vm_next;
775 else
776 next = mm->mmap;
777 area = next;
778 if (next && next->vm_end == end)
779 next = next->vm_next;
780
781
782
783
784 if (prev && prev->vm_end == addr &&
785 mpol_equal(vma_policy(prev), policy) &&
786 can_vma_merge_after(prev, vm_flags,
787 anon_vma, file, pgoff)) {
788
789
790
791 if (next && end == next->vm_start &&
792 mpol_equal(policy, vma_policy(next)) &&
793 can_vma_merge_before(next, vm_flags,
794 anon_vma, file, pgoff+pglen) &&
795 is_mergeable_anon_vma(prev->anon_vma,
796 next->anon_vma)) {
797
798 vma_adjust(prev, prev->vm_start,
799 next->vm_end, prev->vm_pgoff, NULL);
800 } else
801 vma_adjust(prev, prev->vm_start,
802 end, prev->vm_pgoff, NULL);
803 return prev;
804 }
805
806
807
808
809 if (next && end == next->vm_start &&
810 mpol_equal(policy, vma_policy(next)) &&
811 can_vma_merge_before(next, vm_flags,
812 anon_vma, file, pgoff+pglen)) {
813 if (prev && addr < prev->vm_end)
814 vma_adjust(prev, prev->vm_start,
815 addr, prev->vm_pgoff, NULL);
816 else
817 vma_adjust(area, addr, next->vm_end,
818 next->vm_pgoff - pglen, NULL);
819 return area;
820 }
821
822 return NULL;
823}
824
825
826
827
828
829
830
831
832
833struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
834{
835 struct vm_area_struct *near;
836 unsigned long vm_flags;
837
838 near = vma->vm_next;
839 if (!near)
840 goto try_prev;
841
842
843
844
845
846
847
848 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
849 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
850
851 if (near->anon_vma && vma->vm_end == near->vm_start &&
852 mpol_equal(vma_policy(vma), vma_policy(near)) &&
853 can_vma_merge_before(near, vm_flags,
854 NULL, vma->vm_file, vma->vm_pgoff +
855 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
856 return near->anon_vma;
857try_prev:
858
859
860
861
862
863
864
865 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
866 if (!near)
867 goto none;
868
869 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
870 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
871
872 if (near->anon_vma && near->vm_end == vma->vm_start &&
873 mpol_equal(vma_policy(near), vma_policy(vma)) &&
874 can_vma_merge_after(near, vm_flags,
875 NULL, vma->vm_file, vma->vm_pgoff))
876 return near->anon_vma;
877none:
878
879
880
881
882
883
884
885
886 return NULL;
887}
888
889#ifdef CONFIG_PROC_FS
890void vm_stat_account(struct mm_struct *mm, unsigned long flags,
891 struct file *file, long pages)
892{
893 const unsigned long stack_flags
894 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
895
896 if (file) {
897 mm->shared_vm += pages;
898 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
899 mm->exec_vm += pages;
900 } else if (flags & stack_flags)
901 mm->stack_vm += pages;
902 if (flags & (VM_RESERVED|VM_IO))
903 mm->reserved_vm += pages;
904}
905#endif
906
907
908
909
910
911unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
912 unsigned long len, unsigned long prot,
913 unsigned long flags, unsigned long pgoff)
914{
915 struct mm_struct * mm = current->mm;
916 struct inode *inode;
917 unsigned int vm_flags;
918 int error;
919 int accountable = 1;
920 unsigned long reqprot = prot;
921
922
923
924
925
926
927
928 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
929 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
930 prot |= PROT_EXEC;
931
932 if (!len)
933 return -EINVAL;
934
935 if (!(flags & MAP_FIXED))
936 addr = round_hint_to_min(addr);
937
938 error = arch_mmap_check(addr, len, flags);
939 if (error)
940 return error;
941
942
943 len = PAGE_ALIGN(len);
944 if (!len || len > TASK_SIZE)
945 return -ENOMEM;
946
947
948 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
949 return -EOVERFLOW;
950
951
952 if (mm->map_count > sysctl_max_map_count)
953 return -ENOMEM;
954
955
956
957
958 addr = get_unmapped_area(file, addr, len, pgoff, flags);
959 if (addr & ~PAGE_MASK)
960 return addr;
961
962
963
964
965
966 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
967 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
968
969 if (flags & MAP_LOCKED) {
970 if (!can_do_mlock())
971 return -EPERM;
972 vm_flags |= VM_LOCKED;
973 }
974
975
976 if (vm_flags & VM_LOCKED) {
977 unsigned long locked, lock_limit;
978 locked = len >> PAGE_SHIFT;
979 locked += mm->locked_vm;
980 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
981 lock_limit >>= PAGE_SHIFT;
982 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
983 return -EAGAIN;
984 }
985
986 inode = file ? file->f_path.dentry->d_inode : NULL;
987
988 if (file) {
989 switch (flags & MAP_TYPE) {
990 case MAP_SHARED:
991 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
992 return -EACCES;
993
994
995
996
997
998 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
999 return -EACCES;
1000
1001
1002
1003
1004 if (locks_verify_locked(inode))
1005 return -EAGAIN;
1006
1007 vm_flags |= VM_SHARED | VM_MAYSHARE;
1008 if (!(file->f_mode & FMODE_WRITE))
1009 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1010
1011
1012 case MAP_PRIVATE:
1013 if (!(file->f_mode & FMODE_READ))
1014 return -EACCES;
1015 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1016 if (vm_flags & VM_EXEC)
1017 return -EPERM;
1018 vm_flags &= ~VM_MAYEXEC;
1019 }
1020 if (is_file_hugepages(file))
1021 accountable = 0;
1022
1023 if (!file->f_op || !file->f_op->mmap)
1024 return -ENODEV;
1025 break;
1026
1027 default:
1028 return -EINVAL;
1029 }
1030 } else {
1031 switch (flags & MAP_TYPE) {
1032 case MAP_SHARED:
1033
1034
1035
1036 pgoff = 0;
1037 vm_flags |= VM_SHARED | VM_MAYSHARE;
1038 break;
1039 case MAP_PRIVATE:
1040
1041
1042
1043 pgoff = addr >> PAGE_SHIFT;
1044 break;
1045 default:
1046 return -EINVAL;
1047 }
1048 }
1049
1050 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1051 if (error)
1052 return error;
1053
1054 return mmap_region(file, addr, len, flags, vm_flags, pgoff,
1055 accountable);
1056}
1057EXPORT_SYMBOL(do_mmap_pgoff);
1058
1059
1060
1061
1062
1063
1064
1065int vma_wants_writenotify(struct vm_area_struct *vma)
1066{
1067 unsigned int vm_flags = vma->vm_flags;
1068
1069
1070 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1071 return 0;
1072
1073
1074 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1075 return 1;
1076
1077
1078 if (pgprot_val(vma->vm_page_prot) !=
1079 pgprot_val(vm_get_page_prot(vm_flags)))
1080 return 0;
1081
1082
1083 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1084 return 0;
1085
1086
1087 return vma->vm_file && vma->vm_file->f_mapping &&
1088 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1089}
1090
1091unsigned long mmap_region(struct file *file, unsigned long addr,
1092 unsigned long len, unsigned long flags,
1093 unsigned int vm_flags, unsigned long pgoff,
1094 int accountable)
1095{
1096 struct mm_struct *mm = current->mm;
1097 struct vm_area_struct *vma, *prev;
1098 int correct_wcount = 0;
1099 int error;
1100 struct rb_node **rb_link, *rb_parent;
1101 unsigned long charged = 0;
1102 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1103
1104
1105 error = -ENOMEM;
1106munmap_back:
1107 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1108 if (vma && vma->vm_start < addr + len) {
1109 if (do_munmap(mm, addr, len))
1110 return -ENOMEM;
1111 goto munmap_back;
1112 }
1113
1114
1115 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1116 return -ENOMEM;
1117
1118 if (flags & MAP_NORESERVE)
1119 vm_flags |= VM_NORESERVE;
1120
1121 if (accountable && (!(flags & MAP_NORESERVE) ||
1122 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1123 if (vm_flags & VM_SHARED) {
1124
1125 vm_flags |= VM_ACCOUNT;
1126 } else if (vm_flags & VM_WRITE) {
1127
1128
1129
1130 charged = len >> PAGE_SHIFT;
1131 if (security_vm_enough_memory(charged))
1132 return -ENOMEM;
1133 vm_flags |= VM_ACCOUNT;
1134 }
1135 }
1136
1137
1138
1139
1140
1141
1142 if (!file && !(vm_flags & VM_SHARED)) {
1143 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1144 NULL, NULL, pgoff, NULL);
1145 if (vma)
1146 goto out;
1147 }
1148
1149
1150
1151
1152
1153
1154 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1155 if (!vma) {
1156 error = -ENOMEM;
1157 goto unacct_error;
1158 }
1159
1160 vma->vm_mm = mm;
1161 vma->vm_start = addr;
1162 vma->vm_end = addr + len;
1163 vma->vm_flags = vm_flags;
1164 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1165 vma->vm_pgoff = pgoff;
1166
1167 if (file) {
1168 error = -EINVAL;
1169 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1170 goto free_vma;
1171 if (vm_flags & VM_DENYWRITE) {
1172 error = deny_write_access(file);
1173 if (error)
1174 goto free_vma;
1175 correct_wcount = 1;
1176 }
1177 vma->vm_file = file;
1178 get_file(file);
1179 error = file->f_op->mmap(file, vma);
1180 if (error)
1181 goto unmap_and_free_vma;
1182 if (vm_flags & VM_EXECUTABLE)
1183 added_exe_file_vma(mm);
1184 } else if (vm_flags & VM_SHARED) {
1185 error = shmem_zero_setup(vma);
1186 if (error)
1187 goto free_vma;
1188 }
1189
1190
1191
1192
1193
1194
1195 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1196 vma->vm_flags &= ~VM_ACCOUNT;
1197
1198
1199
1200
1201
1202
1203 addr = vma->vm_start;
1204 pgoff = vma->vm_pgoff;
1205 vm_flags = vma->vm_flags;
1206
1207 if (vma_wants_writenotify(vma))
1208 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1209
1210 if (file && vma_merge(mm, prev, addr, vma->vm_end,
1211 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1212 mpol_put(vma_policy(vma));
1213 kmem_cache_free(vm_area_cachep, vma);
1214 fput(file);
1215 if (vm_flags & VM_EXECUTABLE)
1216 removed_exe_file_vma(mm);
1217 } else {
1218 vma_link(mm, vma, prev, rb_link, rb_parent);
1219 file = vma->vm_file;
1220 }
1221
1222
1223 if (correct_wcount)
1224 atomic_inc(&inode->i_writecount);
1225out:
1226 mm->total_vm += len >> PAGE_SHIFT;
1227 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1228 if (vm_flags & VM_LOCKED) {
1229
1230
1231
1232 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1233 if (nr_pages < 0)
1234 return nr_pages;
1235 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1236 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1237 make_pages_present(addr, addr + len);
1238 return addr;
1239
1240unmap_and_free_vma:
1241 if (correct_wcount)
1242 atomic_inc(&inode->i_writecount);
1243 vma->vm_file = NULL;
1244 fput(file);
1245
1246
1247 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1248 charged = 0;
1249free_vma:
1250 kmem_cache_free(vm_area_cachep, vma);
1251unacct_error:
1252 if (charged)
1253 vm_unacct_memory(charged);
1254 return error;
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268#ifndef HAVE_ARCH_UNMAPPED_AREA
1269unsigned long
1270arch_get_unmapped_area(struct file *filp, unsigned long addr,
1271 unsigned long len, unsigned long pgoff, unsigned long flags)
1272{
1273 struct mm_struct *mm = current->mm;
1274 struct vm_area_struct *vma;
1275 unsigned long start_addr;
1276
1277 if (len > TASK_SIZE)
1278 return -ENOMEM;
1279
1280 if (flags & MAP_FIXED)
1281 return addr;
1282
1283 if (addr) {
1284 addr = PAGE_ALIGN(addr);
1285 vma = find_vma(mm, addr);
1286 if (TASK_SIZE - len >= addr &&
1287 (!vma || addr + len <= vma->vm_start))
1288 return addr;
1289 }
1290 if (len > mm->cached_hole_size) {
1291 start_addr = addr = mm->free_area_cache;
1292 } else {
1293 start_addr = addr = TASK_UNMAPPED_BASE;
1294 mm->cached_hole_size = 0;
1295 }
1296
1297full_search:
1298 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1299
1300 if (TASK_SIZE - len < addr) {
1301
1302
1303
1304
1305 if (start_addr != TASK_UNMAPPED_BASE) {
1306 addr = TASK_UNMAPPED_BASE;
1307 start_addr = addr;
1308 mm->cached_hole_size = 0;
1309 goto full_search;
1310 }
1311 return -ENOMEM;
1312 }
1313 if (!vma || addr + len <= vma->vm_start) {
1314
1315
1316
1317 mm->free_area_cache = addr + len;
1318 return addr;
1319 }
1320 if (addr + mm->cached_hole_size < vma->vm_start)
1321 mm->cached_hole_size = vma->vm_start - addr;
1322 addr = vma->vm_end;
1323 }
1324}
1325#endif
1326
1327void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1328{
1329
1330
1331
1332 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1333 mm->free_area_cache = addr;
1334 mm->cached_hole_size = ~0UL;
1335 }
1336}
1337
1338
1339
1340
1341
1342#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1343unsigned long
1344arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1345 const unsigned long len, const unsigned long pgoff,
1346 const unsigned long flags)
1347{
1348 struct vm_area_struct *vma;
1349 struct mm_struct *mm = current->mm;
1350 unsigned long addr = addr0;
1351
1352
1353 if (len > TASK_SIZE)
1354 return -ENOMEM;
1355
1356 if (flags & MAP_FIXED)
1357 return addr;
1358
1359
1360 if (addr) {
1361 addr = PAGE_ALIGN(addr);
1362 vma = find_vma(mm, addr);
1363 if (TASK_SIZE - len >= addr &&
1364 (!vma || addr + len <= vma->vm_start))
1365 return addr;
1366 }
1367
1368
1369 if (len <= mm->cached_hole_size) {
1370 mm->cached_hole_size = 0;
1371 mm->free_area_cache = mm->mmap_base;
1372 }
1373
1374
1375 addr = mm->free_area_cache;
1376
1377
1378 if (addr > len) {
1379 vma = find_vma(mm, addr-len);
1380 if (!vma || addr <= vma->vm_start)
1381
1382 return (mm->free_area_cache = addr-len);
1383 }
1384
1385 if (mm->mmap_base < len)
1386 goto bottomup;
1387
1388 addr = mm->mmap_base-len;
1389
1390 do {
1391
1392
1393
1394
1395
1396 vma = find_vma(mm, addr);
1397 if (!vma || addr+len <= vma->vm_start)
1398
1399 return (mm->free_area_cache = addr);
1400
1401
1402 if (addr + mm->cached_hole_size < vma->vm_start)
1403 mm->cached_hole_size = vma->vm_start - addr;
1404
1405
1406 addr = vma->vm_start-len;
1407 } while (len < vma->vm_start);
1408
1409bottomup:
1410
1411
1412
1413
1414
1415
1416 mm->cached_hole_size = ~0UL;
1417 mm->free_area_cache = TASK_UNMAPPED_BASE;
1418 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1419
1420
1421
1422 mm->free_area_cache = mm->mmap_base;
1423 mm->cached_hole_size = ~0UL;
1424
1425 return addr;
1426}
1427#endif
1428
1429void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1430{
1431
1432
1433
1434 if (addr > mm->free_area_cache)
1435 mm->free_area_cache = addr;
1436
1437
1438 if (mm->free_area_cache > mm->mmap_base)
1439 mm->free_area_cache = mm->mmap_base;
1440}
1441
1442unsigned long
1443get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1444 unsigned long pgoff, unsigned long flags)
1445{
1446 unsigned long (*get_area)(struct file *, unsigned long,
1447 unsigned long, unsigned long, unsigned long);
1448
1449 get_area = current->mm->get_unmapped_area;
1450 if (file && file->f_op && file->f_op->get_unmapped_area)
1451 get_area = file->f_op->get_unmapped_area;
1452 addr = get_area(file, addr, len, pgoff, flags);
1453 if (IS_ERR_VALUE(addr))
1454 return addr;
1455
1456 if (addr > TASK_SIZE - len)
1457 return -ENOMEM;
1458 if (addr & ~PAGE_MASK)
1459 return -EINVAL;
1460
1461 return arch_rebalance_pgtables(addr, len);
1462}
1463
1464EXPORT_SYMBOL(get_unmapped_area);
1465
1466
1467struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1468{
1469 struct vm_area_struct *vma = NULL;
1470
1471 if (mm) {
1472
1473
1474 vma = mm->mmap_cache;
1475 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1476 struct rb_node * rb_node;
1477
1478 rb_node = mm->mm_rb.rb_node;
1479 vma = NULL;
1480
1481 while (rb_node) {
1482 struct vm_area_struct * vma_tmp;
1483
1484 vma_tmp = rb_entry(rb_node,
1485 struct vm_area_struct, vm_rb);
1486
1487 if (vma_tmp->vm_end > addr) {
1488 vma = vma_tmp;
1489 if (vma_tmp->vm_start <= addr)
1490 break;
1491 rb_node = rb_node->rb_left;
1492 } else
1493 rb_node = rb_node->rb_right;
1494 }
1495 if (vma)
1496 mm->mmap_cache = vma;
1497 }
1498 }
1499 return vma;
1500}
1501
1502EXPORT_SYMBOL(find_vma);
1503
1504
1505struct vm_area_struct *
1506find_vma_prev(struct mm_struct *mm, unsigned long addr,
1507 struct vm_area_struct **pprev)
1508{
1509 struct vm_area_struct *vma = NULL, *prev = NULL;
1510 struct rb_node * rb_node;
1511 if (!mm)
1512 goto out;
1513
1514
1515 vma = mm->mmap;
1516
1517
1518 rb_node = mm->mm_rb.rb_node;
1519
1520 while (rb_node) {
1521 struct vm_area_struct *vma_tmp;
1522 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1523
1524 if (addr < vma_tmp->vm_end) {
1525 rb_node = rb_node->rb_left;
1526 } else {
1527 prev = vma_tmp;
1528 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1529 break;
1530 rb_node = rb_node->rb_right;
1531 }
1532 }
1533
1534out:
1535 *pprev = prev;
1536 return prev ? prev->vm_next : vma;
1537}
1538
1539
1540
1541
1542
1543
1544static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
1545{
1546 struct mm_struct *mm = vma->vm_mm;
1547 struct rlimit *rlim = current->signal->rlim;
1548 unsigned long new_start;
1549
1550
1551 if (!may_expand_vm(mm, grow))
1552 return -ENOMEM;
1553
1554
1555 if (size > rlim[RLIMIT_STACK].rlim_cur)
1556 return -ENOMEM;
1557
1558
1559 if (vma->vm_flags & VM_LOCKED) {
1560 unsigned long locked;
1561 unsigned long limit;
1562 locked = mm->locked_vm + grow;
1563 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1564 if (locked > limit && !capable(CAP_IPC_LOCK))
1565 return -ENOMEM;
1566 }
1567
1568
1569 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1570 vma->vm_end - size;
1571 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1572 return -EFAULT;
1573
1574
1575
1576
1577
1578 if (security_vm_enough_memory(grow))
1579 return -ENOMEM;
1580
1581
1582 mm->total_vm += grow;
1583 if (vma->vm_flags & VM_LOCKED)
1584 mm->locked_vm += grow;
1585 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1586 return 0;
1587}
1588
1589#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1590
1591
1592
1593
1594#ifndef CONFIG_IA64
1595static
1596#endif
1597int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1598{
1599 int error;
1600
1601 if (!(vma->vm_flags & VM_GROWSUP))
1602 return -EFAULT;
1603
1604
1605
1606
1607
1608 if (unlikely(anon_vma_prepare(vma)))
1609 return -ENOMEM;
1610 anon_vma_lock(vma);
1611
1612
1613
1614
1615
1616
1617
1618 if (address < PAGE_ALIGN(address+4))
1619 address = PAGE_ALIGN(address+4);
1620 else {
1621 anon_vma_unlock(vma);
1622 return -ENOMEM;
1623 }
1624 error = 0;
1625
1626
1627 if (address > vma->vm_end) {
1628 unsigned long size, grow;
1629
1630 size = address - vma->vm_start;
1631 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1632
1633 error = acct_stack_growth(vma, size, grow);
1634 if (!error)
1635 vma->vm_end = address;
1636 }
1637 anon_vma_unlock(vma);
1638 return error;
1639}
1640#endif
1641
1642
1643
1644
1645static int expand_downwards(struct vm_area_struct *vma,
1646 unsigned long address)
1647{
1648 int error;
1649
1650
1651
1652
1653
1654 if (unlikely(anon_vma_prepare(vma)))
1655 return -ENOMEM;
1656
1657 address &= PAGE_MASK;
1658 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1659 if (error)
1660 return error;
1661
1662 anon_vma_lock(vma);
1663
1664
1665
1666
1667
1668
1669
1670
1671 if (address < vma->vm_start) {
1672 unsigned long size, grow;
1673
1674 size = vma->vm_end - address;
1675 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1676
1677 error = acct_stack_growth(vma, size, grow);
1678 if (!error) {
1679 vma->vm_start = address;
1680 vma->vm_pgoff -= grow;
1681 }
1682 }
1683 anon_vma_unlock(vma);
1684 return error;
1685}
1686
1687int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1688{
1689 return expand_downwards(vma, address);
1690}
1691
1692#ifdef CONFIG_STACK_GROWSUP
1693int expand_stack(struct vm_area_struct *vma, unsigned long address)
1694{
1695 return expand_upwards(vma, address);
1696}
1697
1698struct vm_area_struct *
1699find_extend_vma(struct mm_struct *mm, unsigned long addr)
1700{
1701 struct vm_area_struct *vma, *prev;
1702
1703 addr &= PAGE_MASK;
1704 vma = find_vma_prev(mm, addr, &prev);
1705 if (vma && (vma->vm_start <= addr))
1706 return vma;
1707 if (!prev || expand_stack(prev, addr))
1708 return NULL;
1709 if (prev->vm_flags & VM_LOCKED) {
1710 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
1711 return NULL;
1712 }
1713 return prev;
1714}
1715#else
1716int expand_stack(struct vm_area_struct *vma, unsigned long address)
1717{
1718 return expand_downwards(vma, address);
1719}
1720
1721struct vm_area_struct *
1722find_extend_vma(struct mm_struct * mm, unsigned long addr)
1723{
1724 struct vm_area_struct * vma;
1725 unsigned long start;
1726
1727 addr &= PAGE_MASK;
1728 vma = find_vma(mm,addr);
1729 if (!vma)
1730 return NULL;
1731 if (vma->vm_start <= addr)
1732 return vma;
1733 if (!(vma->vm_flags & VM_GROWSDOWN))
1734 return NULL;
1735 start = vma->vm_start;
1736 if (expand_stack(vma, addr))
1737 return NULL;
1738 if (vma->vm_flags & VM_LOCKED) {
1739 if (mlock_vma_pages_range(vma, addr, start) < 0)
1740 return NULL;
1741 }
1742 return vma;
1743}
1744#endif
1745
1746
1747
1748
1749
1750
1751
1752static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1753{
1754
1755 update_hiwater_vm(mm);
1756 do {
1757 long nrpages = vma_pages(vma);
1758
1759 mm->total_vm -= nrpages;
1760 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1761 vma = remove_vma(vma);
1762 } while (vma);
1763 validate_mm(mm);
1764}
1765
1766
1767
1768
1769
1770
1771static void unmap_region(struct mm_struct *mm,
1772 struct vm_area_struct *vma, struct vm_area_struct *prev,
1773 unsigned long start, unsigned long end)
1774{
1775 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1776 struct mmu_gather *tlb;
1777 unsigned long nr_accounted = 0;
1778
1779 lru_add_drain();
1780 tlb = tlb_gather_mmu(mm, 0);
1781 update_hiwater_rss(mm);
1782 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1783 vm_unacct_memory(nr_accounted);
1784 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1785 next? next->vm_start: 0);
1786 tlb_finish_mmu(tlb, start, end);
1787}
1788
1789
1790
1791
1792
1793static void
1794detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1795 struct vm_area_struct *prev, unsigned long end)
1796{
1797 struct vm_area_struct **insertion_point;
1798 struct vm_area_struct *tail_vma = NULL;
1799 unsigned long addr;
1800
1801 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1802 do {
1803 rb_erase(&vma->vm_rb, &mm->mm_rb);
1804 mm->map_count--;
1805 tail_vma = vma;
1806 vma = vma->vm_next;
1807 } while (vma && vma->vm_start < end);
1808 *insertion_point = vma;
1809 tail_vma->vm_next = NULL;
1810 if (mm->unmap_area == arch_unmap_area)
1811 addr = prev ? prev->vm_end : mm->mmap_base;
1812 else
1813 addr = vma ? vma->vm_start : mm->mmap_base;
1814 mm->unmap_area(mm, addr);
1815 mm->mmap_cache = NULL;
1816}
1817
1818
1819
1820
1821
1822int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1823 unsigned long addr, int new_below)
1824{
1825 struct mempolicy *pol;
1826 struct vm_area_struct *new;
1827
1828 if (is_vm_hugetlb_page(vma) && (addr &
1829 ~(huge_page_mask(hstate_vma(vma)))))
1830 return -EINVAL;
1831
1832 if (mm->map_count >= sysctl_max_map_count)
1833 return -ENOMEM;
1834
1835 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1836 if (!new)
1837 return -ENOMEM;
1838
1839
1840 *new = *vma;
1841
1842 if (new_below)
1843 new->vm_end = addr;
1844 else {
1845 new->vm_start = addr;
1846 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1847 }
1848
1849 pol = mpol_dup(vma_policy(vma));
1850 if (IS_ERR(pol)) {
1851 kmem_cache_free(vm_area_cachep, new);
1852 return PTR_ERR(pol);
1853 }
1854 vma_set_policy(new, pol);
1855
1856 if (new->vm_file) {
1857 get_file(new->vm_file);
1858 if (vma->vm_flags & VM_EXECUTABLE)
1859 added_exe_file_vma(mm);
1860 }
1861
1862 if (new->vm_ops && new->vm_ops->open)
1863 new->vm_ops->open(new);
1864
1865 if (new_below)
1866 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1867 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1868 else
1869 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1870
1871 return 0;
1872}
1873
1874
1875
1876
1877
1878
1879int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1880{
1881 unsigned long end;
1882 struct vm_area_struct *vma, *prev, *last;
1883
1884 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1885 return -EINVAL;
1886
1887 if ((len = PAGE_ALIGN(len)) == 0)
1888 return -EINVAL;
1889
1890
1891 vma = find_vma_prev(mm, start, &prev);
1892 if (!vma)
1893 return 0;
1894
1895
1896
1897 end = start + len;
1898 if (vma->vm_start >= end)
1899 return 0;
1900
1901
1902
1903
1904
1905
1906
1907
1908 if (start > vma->vm_start) {
1909 int error = split_vma(mm, vma, start, 0);
1910 if (error)
1911 return error;
1912 prev = vma;
1913 }
1914
1915
1916 last = find_vma(mm, end);
1917 if (last && end > last->vm_start) {
1918 int error = split_vma(mm, last, end, 1);
1919 if (error)
1920 return error;
1921 }
1922 vma = prev? prev->vm_next: mm->mmap;
1923
1924
1925
1926
1927 if (mm->locked_vm) {
1928 struct vm_area_struct *tmp = vma;
1929 while (tmp && tmp->vm_start < end) {
1930 if (tmp->vm_flags & VM_LOCKED) {
1931 mm->locked_vm -= vma_pages(tmp);
1932 munlock_vma_pages_all(tmp);
1933 }
1934 tmp = tmp->vm_next;
1935 }
1936 }
1937
1938
1939
1940
1941 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1942 unmap_region(mm, vma, prev, start, end);
1943
1944
1945 remove_vma_list(mm, vma);
1946
1947 return 0;
1948}
1949
1950EXPORT_SYMBOL(do_munmap);
1951
1952asmlinkage long sys_munmap(unsigned long addr, size_t len)
1953{
1954 int ret;
1955 struct mm_struct *mm = current->mm;
1956
1957 profile_munmap(addr);
1958
1959 down_write(&mm->mmap_sem);
1960 ret = do_munmap(mm, addr, len);
1961 up_write(&mm->mmap_sem);
1962 return ret;
1963}
1964
1965static inline void verify_mm_writelocked(struct mm_struct *mm)
1966{
1967#ifdef CONFIG_DEBUG_VM
1968 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1969 WARN_ON(1);
1970 up_read(&mm->mmap_sem);
1971 }
1972#endif
1973}
1974
1975
1976
1977
1978
1979
1980unsigned long do_brk(unsigned long addr, unsigned long len)
1981{
1982 struct mm_struct * mm = current->mm;
1983 struct vm_area_struct * vma, * prev;
1984 unsigned long flags;
1985 struct rb_node ** rb_link, * rb_parent;
1986 pgoff_t pgoff = addr >> PAGE_SHIFT;
1987 int error;
1988
1989 len = PAGE_ALIGN(len);
1990 if (!len)
1991 return addr;
1992
1993 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1994 return -EINVAL;
1995
1996 if (is_hugepage_only_range(mm, addr, len))
1997 return -EINVAL;
1998
1999 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2000 if (error)
2001 return error;
2002
2003 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2004
2005 error = arch_mmap_check(addr, len, flags);
2006 if (error)
2007 return error;
2008
2009
2010
2011
2012 if (mm->def_flags & VM_LOCKED) {
2013 unsigned long locked, lock_limit;
2014 locked = len >> PAGE_SHIFT;
2015 locked += mm->locked_vm;
2016 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
2017 lock_limit >>= PAGE_SHIFT;
2018 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2019 return -EAGAIN;
2020 }
2021
2022
2023
2024
2025
2026 verify_mm_writelocked(mm);
2027
2028
2029
2030
2031 munmap_back:
2032 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2033 if (vma && vma->vm_start < addr + len) {
2034 if (do_munmap(mm, addr, len))
2035 return -ENOMEM;
2036 goto munmap_back;
2037 }
2038
2039
2040 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2041 return -ENOMEM;
2042
2043 if (mm->map_count > sysctl_max_map_count)
2044 return -ENOMEM;
2045
2046 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2047 return -ENOMEM;
2048
2049
2050 vma = vma_merge(mm, prev, addr, addr + len, flags,
2051 NULL, NULL, pgoff, NULL);
2052 if (vma)
2053 goto out;
2054
2055
2056
2057
2058 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2059 if (!vma) {
2060 vm_unacct_memory(len >> PAGE_SHIFT);
2061 return -ENOMEM;
2062 }
2063
2064 vma->vm_mm = mm;
2065 vma->vm_start = addr;
2066 vma->vm_end = addr + len;
2067 vma->vm_pgoff = pgoff;
2068 vma->vm_flags = flags;
2069 vma->vm_page_prot = vm_get_page_prot(flags);
2070 vma_link(mm, vma, prev, rb_link, rb_parent);
2071out:
2072 mm->total_vm += len >> PAGE_SHIFT;
2073 if (flags & VM_LOCKED) {
2074 if (!mlock_vma_pages_range(vma, addr, addr + len))
2075 mm->locked_vm += (len >> PAGE_SHIFT);
2076 }
2077 return addr;
2078}
2079
2080EXPORT_SYMBOL(do_brk);
2081
2082
2083void exit_mmap(struct mm_struct *mm)
2084{
2085 struct mmu_gather *tlb;
2086 struct vm_area_struct *vma;
2087 unsigned long nr_accounted = 0;
2088 unsigned long end;
2089
2090
2091 arch_exit_mmap(mm);
2092 mmu_notifier_release(mm);
2093
2094 if (mm->locked_vm) {
2095 vma = mm->mmap;
2096 while (vma) {
2097 if (vma->vm_flags & VM_LOCKED)
2098 munlock_vma_pages_all(vma);
2099 vma = vma->vm_next;
2100 }
2101 }
2102 vma = mm->mmap;
2103 lru_add_drain();
2104 flush_cache_mm(mm);
2105 tlb = tlb_gather_mmu(mm, 1);
2106
2107
2108 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2109 vm_unacct_memory(nr_accounted);
2110 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2111 tlb_finish_mmu(tlb, 0, end);
2112
2113
2114
2115
2116
2117 while (vma)
2118 vma = remove_vma(vma);
2119
2120 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2121}
2122
2123
2124
2125
2126
2127int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2128{
2129 struct vm_area_struct * __vma, * prev;
2130 struct rb_node ** rb_link, * rb_parent;
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144 if (!vma->vm_file) {
2145 BUG_ON(vma->anon_vma);
2146 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2147 }
2148 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2149 if (__vma && __vma->vm_start < vma->vm_end)
2150 return -ENOMEM;
2151 if ((vma->vm_flags & VM_ACCOUNT) &&
2152 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2153 return -ENOMEM;
2154 vma_link(mm, vma, prev, rb_link, rb_parent);
2155 return 0;
2156}
2157
2158
2159
2160
2161
2162struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2163 unsigned long addr, unsigned long len, pgoff_t pgoff)
2164{
2165 struct vm_area_struct *vma = *vmap;
2166 unsigned long vma_start = vma->vm_start;
2167 struct mm_struct *mm = vma->vm_mm;
2168 struct vm_area_struct *new_vma, *prev;
2169 struct rb_node **rb_link, *rb_parent;
2170 struct mempolicy *pol;
2171
2172
2173
2174
2175
2176 if (!vma->vm_file && !vma->anon_vma)
2177 pgoff = addr >> PAGE_SHIFT;
2178
2179 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2180 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2181 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2182 if (new_vma) {
2183
2184
2185
2186 if (vma_start >= new_vma->vm_start &&
2187 vma_start < new_vma->vm_end)
2188 *vmap = new_vma;
2189 } else {
2190 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2191 if (new_vma) {
2192 *new_vma = *vma;
2193 pol = mpol_dup(vma_policy(vma));
2194 if (IS_ERR(pol)) {
2195 kmem_cache_free(vm_area_cachep, new_vma);
2196 return NULL;
2197 }
2198 vma_set_policy(new_vma, pol);
2199 new_vma->vm_start = addr;
2200 new_vma->vm_end = addr + len;
2201 new_vma->vm_pgoff = pgoff;
2202 if (new_vma->vm_file) {
2203 get_file(new_vma->vm_file);
2204 if (vma->vm_flags & VM_EXECUTABLE)
2205 added_exe_file_vma(mm);
2206 }
2207 if (new_vma->vm_ops && new_vma->vm_ops->open)
2208 new_vma->vm_ops->open(new_vma);
2209 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2210 }
2211 }
2212 return new_vma;
2213}
2214
2215
2216
2217
2218
2219int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2220{
2221 unsigned long cur = mm->total_vm;
2222 unsigned long lim;
2223
2224 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2225
2226 if (cur + npages > lim)
2227 return 0;
2228 return 1;
2229}
2230
2231
2232static int special_mapping_fault(struct vm_area_struct *vma,
2233 struct vm_fault *vmf)
2234{
2235 pgoff_t pgoff;
2236 struct page **pages;
2237
2238
2239
2240
2241
2242
2243
2244 pgoff = vmf->pgoff - vma->vm_pgoff;
2245
2246 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2247 pgoff--;
2248
2249 if (*pages) {
2250 struct page *page = *pages;
2251 get_page(page);
2252 vmf->page = page;
2253 return 0;
2254 }
2255
2256 return VM_FAULT_SIGBUS;
2257}
2258
2259
2260
2261
2262static void special_mapping_close(struct vm_area_struct *vma)
2263{
2264}
2265
2266static struct vm_operations_struct special_mapping_vmops = {
2267 .close = special_mapping_close,
2268 .fault = special_mapping_fault,
2269};
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280int install_special_mapping(struct mm_struct *mm,
2281 unsigned long addr, unsigned long len,
2282 unsigned long vm_flags, struct page **pages)
2283{
2284 struct vm_area_struct *vma;
2285
2286 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2287 if (unlikely(vma == NULL))
2288 return -ENOMEM;
2289
2290 vma->vm_mm = mm;
2291 vma->vm_start = addr;
2292 vma->vm_end = addr + len;
2293
2294 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2295 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2296
2297 vma->vm_ops = &special_mapping_vmops;
2298 vma->vm_private_data = pages;
2299
2300 if (unlikely(insert_vm_struct(mm, vma))) {
2301 kmem_cache_free(vm_area_cachep, vma);
2302 return -ENOMEM;
2303 }
2304
2305 mm->total_vm += len >> PAGE_SHIFT;
2306
2307 return 0;
2308}
2309
2310static DEFINE_MUTEX(mm_all_locks_mutex);
2311
2312static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2313{
2314 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2315
2316
2317
2318
2319 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329 if (__test_and_set_bit(0, (unsigned long *)
2330 &anon_vma->head.next))
2331 BUG();
2332 }
2333}
2334
2335static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2336{
2337 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2348 BUG();
2349 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2350 }
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385int mm_take_all_locks(struct mm_struct *mm)
2386{
2387 struct vm_area_struct *vma;
2388 int ret = -EINTR;
2389
2390 BUG_ON(down_read_trylock(&mm->mmap_sem));
2391
2392 mutex_lock(&mm_all_locks_mutex);
2393
2394 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2395 if (signal_pending(current))
2396 goto out_unlock;
2397 if (vma->vm_file && vma->vm_file->f_mapping)
2398 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2399 }
2400
2401 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2402 if (signal_pending(current))
2403 goto out_unlock;
2404 if (vma->anon_vma)
2405 vm_lock_anon_vma(mm, vma->anon_vma);
2406 }
2407
2408 ret = 0;
2409
2410out_unlock:
2411 if (ret)
2412 mm_drop_all_locks(mm);
2413
2414 return ret;
2415}
2416
2417static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2418{
2419 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432 if (!__test_and_clear_bit(0, (unsigned long *)
2433 &anon_vma->head.next))
2434 BUG();
2435 spin_unlock(&anon_vma->lock);
2436 }
2437}
2438
2439static void vm_unlock_mapping(struct address_space *mapping)
2440{
2441 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2442
2443
2444
2445
2446 spin_unlock(&mapping->i_mmap_lock);
2447 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2448 &mapping->flags))
2449 BUG();
2450 }
2451}
2452
2453
2454
2455
2456
2457void mm_drop_all_locks(struct mm_struct *mm)
2458{
2459 struct vm_area_struct *vma;
2460
2461 BUG_ON(down_read_trylock(&mm->mmap_sem));
2462 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2463
2464 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2465 if (vma->anon_vma)
2466 vm_unlock_anon_vma(vma->anon_vma);
2467 if (vma->vm_file && vma->vm_file->f_mapping)
2468 vm_unlock_mapping(vma->vm_file->f_mapping);
2469 }
2470
2471 mutex_unlock(&mm_all_locks_mutex);
2472}