1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/srcu.h>
38#include <asm/processor.h>
39#include "gru.h"
40#include "grutables.h"
41#include <asm/uv/uv_hub.h>
42
43#define gru_random() get_cycles()
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60static inline int get_off_blade_tgh(struct gru_state *gru)
61{
62 int n;
63
64 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
65 n = gru_random() % n;
66 n += gru->gs_tgh_first_remote;
67 return n;
68}
69
70static inline int get_on_blade_tgh(struct gru_state *gru)
71{
72 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
73}
74
75static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
76 *gru)
77{
78 struct gru_tlb_global_handle *tgh;
79 int n;
80
81 preempt_disable();
82 if (uv_numa_blade_id() == gru->gs_blade_id)
83 n = get_on_blade_tgh(gru);
84 else
85 n = get_off_blade_tgh(gru);
86 tgh = get_tgh_by_index(gru, n);
87 lock_tgh_handle(tgh);
88
89 return tgh;
90}
91
92static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
93{
94 unlock_tgh_handle(tgh);
95 preempt_enable();
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
159 unsigned long len)
160{
161 struct gru_state *gru;
162 struct gru_mm_tracker *asids;
163 struct gru_tlb_global_handle *tgh;
164 unsigned long num;
165 int grupagesize, pagesize, pageshift, gid, asid;
166
167
168 pageshift = PAGE_SHIFT;
169 pagesize = (1UL << pageshift);
170 grupagesize = GRU_PAGESIZE(pageshift);
171 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
172
173 STAT(flush_tlb);
174 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
175 start, len, gms->ms_asidmap[0]);
176
177 spin_lock(&gms->ms_asid_lock);
178 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
179 STAT(flush_tlb_gru);
180 gru = GID_TO_GRU(gid);
181 asids = gms->ms_asids + gid;
182 asid = asids->mt_asid;
183 if (asids->mt_ctxbitmap && asid) {
184 STAT(flush_tlb_gru_tgh);
185 asid = GRUASID(asid, start);
186 gru_dbg(grudev,
187 " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
188 gid, asid, num, asids->mt_ctxbitmap);
189 tgh = get_lock_tgh_handle(gru);
190 tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
191 num - 1, asids->mt_ctxbitmap);
192 get_unlock_tgh_handle(tgh);
193 } else {
194 STAT(flush_tlb_gru_zero_asid);
195 asids->mt_asid = 0;
196 __clear_bit(gru->gs_gid, gms->ms_asidmap);
197 gru_dbg(grudev,
198 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
199 gid, asid, asids->mt_ctxbitmap,
200 gms->ms_asidmap[0]);
201 }
202 }
203 spin_unlock(&gms->ms_asid_lock);
204}
205
206
207
208
209void gru_flush_all_tlb(struct gru_state *gru)
210{
211 struct gru_tlb_global_handle *tgh;
212
213 gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
214 tgh = get_lock_tgh_handle(gru);
215 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
216 get_unlock_tgh_handle(tgh);
217 preempt_enable();
218}
219
220
221
222
223static void gru_invalidate_range_start(struct mmu_notifier *mn,
224 struct mm_struct *mm,
225 unsigned long start, unsigned long end)
226{
227 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
228 ms_notifier);
229
230 STAT(mmu_invalidate_range);
231 atomic_inc(&gms->ms_range_active);
232 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
233 start, end, atomic_read(&gms->ms_range_active));
234 gru_flush_tlb_range(gms, start, end - start);
235}
236
237static void gru_invalidate_range_end(struct mmu_notifier *mn,
238 struct mm_struct *mm, unsigned long start,
239 unsigned long end)
240{
241 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
242 ms_notifier);
243
244
245 (void)atomic_dec_and_test(&gms->ms_range_active);
246
247 wake_up_all(&gms->ms_wait_queue);
248 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
249}
250
251static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
252 unsigned long address)
253{
254 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
255 ms_notifier);
256
257 STAT(mmu_invalidate_page);
258 gru_flush_tlb_range(gms, address, PAGE_SIZE);
259 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
260}
261
262static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
263{
264 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
265 ms_notifier);
266
267 gms->ms_released = 1;
268 gru_dbg(grudev, "gms %p\n", gms);
269}
270
271
272static const struct mmu_notifier_ops gru_mmuops = {
273 .invalidate_page = gru_invalidate_page,
274 .invalidate_range_start = gru_invalidate_range_start,
275 .invalidate_range_end = gru_invalidate_range_end,
276 .release = gru_release,
277};
278
279
280static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
281 const struct mmu_notifier_ops *ops)
282{
283 struct mmu_notifier *mn, *gru_mn = NULL;
284 struct hlist_node *n;
285
286 if (mm->mmu_notifier_mm) {
287 rcu_read_lock();
288 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
289 hlist)
290 if (mn->ops == ops) {
291 gru_mn = mn;
292 break;
293 }
294 rcu_read_unlock();
295 }
296 return gru_mn;
297}
298
299struct gru_mm_struct *gru_register_mmu_notifier(void)
300{
301 struct gru_mm_struct *gms;
302 struct mmu_notifier *mn;
303
304 mn = mmu_find_ops(current->mm, &gru_mmuops);
305 if (mn) {
306 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
307 atomic_inc(&gms->ms_refcnt);
308 } else {
309 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
310 if (gms) {
311 spin_lock_init(&gms->ms_asid_lock);
312 gms->ms_notifier.ops = &gru_mmuops;
313 atomic_set(&gms->ms_refcnt, 1);
314 init_waitqueue_head(&gms->ms_wait_queue);
315 __mmu_notifier_register(&gms->ms_notifier, current->mm);
316 }
317 }
318 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
319 atomic_read(&gms->ms_refcnt));
320 return gms;
321}
322
323void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
324{
325 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
326 atomic_read(&gms->ms_refcnt), gms->ms_released);
327 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
328 if (!gms->ms_released)
329 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
330 kfree(gms);
331 }
332}
333
334
335
336
337
338
339
340
341
342
343
344
345
346#define MAX_LOCAL_TGH 16
347
348void gru_tgh_flush_init(struct gru_state *gru)
349{
350 int cpus, shift = 0, n;
351
352 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
353
354
355 if (cpus) {
356 n = 1 << fls(cpus - 1);
357
358
359
360
361
362
363
364 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
365 }
366 gru->gs_tgh_local_shift = shift;
367
368
369 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
370
371}