New rdma_subr.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 2006, The Ohio State University. All rights reserved.
28 *
29 * Portions of this source code is developed by the team members of
30 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31 * headed by Professor Dhabaleswar K. (DK) Panda.
32 *
33 * Acknowledgements to contributions from developors:
34 * Ranjit Noronha: noronha@cse.ohio-state.edu
35 * Lei Chai : chail@cse.ohio-state.edu
36 * Weikuan Yu : yuw@cse.ohio-state.edu
37 *
38 */
39 #pragma ident "@(#)rdma_subr.c 1.11 05/06/08 SMI"
40
41 #include <sys/systm.h>
42 #include <sys/kstat.h>
43 #include <sys/modctl.h>
44 #include <rpc/rpc_rdma.h>
45
46 #include <sys/ib/ibtl/ibti.h>
47
48 uint_t rdma_minchunk = RDMA_MINCHUNK;
49
50 /*
51 * Globals
52 */
53 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */
54 int rdma_dev_available = 0; /* if any RDMA device is loaded */
55 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
56 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */
57 krwlock_t rdma_lock; /* protects rdma_mod_head list */
58 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */
59
60 /*
61 * Statics
62 */
63 static ldi_handle_t rpcib_handle = NULL;
64
65 /*
66 * Externs
67 */
68 extern kstat_named_t *rdmarcstat_ptr;
69 extern uint_t rdmarcstat_ndata;
70 extern kstat_named_t *rdmarsstat_ptr;
71 extern uint_t rdmarsstat_ndata;
72
73 void rdma_kstat_init();
74
75 /*
76 * RDMATF module registration routine.
77 * This routine is expected to be called by the init routine in
78 * the plugin modules.
79 */
80 rdma_stat
81 rdma_register_mod(rdma_mod_t *mod)
82 {
83 rdma_registry_t **mp, *m;
84
85 if (mod->rdma_version != RDMATF_VERS) {
86 return (RDMA_BADVERS);
87 }
88
89 rw_enter(&rdma_lock, RW_WRITER);
90 /*
91 * Ensure not already registered
92 */
93 mp = &rdma_mod_head;
94 while (*mp != NULL) {
95 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
96 KNC_STRSIZE) == 0) {
97 rw_exit(&rdma_lock);
98 return (RDMA_REG_EXIST);
99 }
100 mp = &((*mp)->r_next);
101 }
102
103 /*
104 * New one, create and add to registry
105 */
106 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
107 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
108 *m->r_mod = *mod;
109 m->r_next = NULL;
110 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
111 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
112 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
113 *mp = m;
114 rw_exit(&rdma_lock);
115
116 return (RDMA_SUCCESS);
117 }
118
119 /*
120 * RDMATF module unregistration routine.
121 * This routine is expected to be called by the fini routine in
122 * the plugin modules.
123 */
124 rdma_stat
125 rdma_unregister_mod(rdma_mod_t *mod)
126 {
127 rdma_registry_t **m, *mmod = NULL;
128
129 rw_enter(&rdma_lock, RW_WRITER);
130
131 m = &rdma_mod_head;
132 while (*m != NULL) {
133 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
134 KNC_STRSIZE) != 0) {
135 m = &((*m)->r_next);
136 continue;
137 }
138 /*
139 * Check if any device attached, if so return error
140 */
141 if ((*m)->r_mod->rdma_count != 0) {
142 rw_exit(&rdma_lock);
143 return (RDMA_FAILED);
144 }
145 /*
146 * Found entry. Now remove it.
147 */
148 mmod = *m;
149 *m = (*m)->r_next;
150 kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE);
151 kmem_free(mmod->r_mod, sizeof (rdma_mod_t));
152 kmem_free(mmod, sizeof (rdma_registry_t));
153 rw_exit(&rdma_lock);
154 return (RDMA_SUCCESS);
155 }
156
157 /*
158 * Not found.
159 */
160 rw_exit(&rdma_lock);
161 return (RDMA_FAILED);
162 }
163
164 /*
165 * Creates a new chunk list entry, and
166 * adds it to the end of a chunk list.
167 */
168 void
169 clist_add(struct clist **clp, uint32_t xdroff, int len,
170 struct mrc *shandle, caddr_t saddr,
171 struct mrc *dhandle, caddr_t daddr)
172 {
173 struct clist *cl;
174
175 /* Find the end of the list */
176
177 while (*clp != NULL)
178 clp = &((*clp)->c_next);
179
180 cl = kmem_zalloc(sizeof (*cl), KM_SLEEP);
181 cl->c_xdroff = xdroff;
182 cl->c_len = len;
183 cl->c_saddr = (uint64_t)(uintptr_t)saddr;
184 if (shandle)
185 cl->c_smemhandle = *shandle;
186 cl->c_daddr = (uint64_t)(uintptr_t)daddr;
187 if (dhandle)
188 cl->c_dmemhandle = *dhandle;
189 cl->c_next = NULL;
190
191 *clp = cl;
192 }
193
194 int
195 clist_register(CONN *conn, struct clist *cl, bool_t src)
196 {
197 struct clist *c;
198 int status;
199
200 for (c = cl; c; c = c->c_next) {
201 if(c->c_len > 0)
202 if (src) {
203 status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)cl->c_adspc,
204 (caddr_t)(uintptr_t)c->c_saddr, c->c_len,
205 #ifdef SERVER_REG_CACHE
206 &c->c_smemhandle, (void **)&c->c_ssynchandle, (void *)c->long_reply_buf);
207 #else
208 &c->c_smemhandle, (void **)&c->c_ssynchandle);
209 #endif
210 } else {
211 status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)cl->c_adspc,
212 (caddr_t)(uintptr_t)c->c_daddr, c->c_len,
213 #ifdef SERVER_REG_CACHE
214 &c->c_dmemhandle, (void **)&c->c_dsynchandle,(void *)c->long_reply_buf);
215 #else
216 &c->c_dmemhandle, (void **)&c->c_dsynchandle);
217 #endif
218 }
219 if (status != RDMA_SUCCESS) {
220 (void) clist_deregister(conn, cl, src);
221 return (status);
222 }
223 }
224
225 return (RDMA_SUCCESS);
226 }
227
228 int
229 clist_deregister(CONN *conn, struct clist *cl, bool_t src)
230 {
231 struct clist *c;
232
233 for (c = cl; c; c = c->c_next) {
234 if (src) {
235 if (c->c_smemhandle.mrc_rmr != 0) {
236 (void) RDMA_DEREGMEMSYNC(conn,
237 (caddr_t)(uintptr_t)c->c_saddr,
238 c->c_smemhandle,
239 #ifdef SERVER_REG_CACHE
240 (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf);
241 #else
242 (void *)(uintptr_t)c->c_ssynchandle);
243 #endif
244 c->c_smemhandle.mrc_rmr = 0;
245 c->c_ssynchandle = NULL;
246 }
247 } else {
248 if (c->c_dmemhandle.mrc_rmr != 0) {
249 (void) RDMA_DEREGMEMSYNC(conn,
250 (caddr_t)(uintptr_t)c->c_daddr,
251 c->c_dmemhandle,
252 #ifdef SERVER_REG_CACHE
253 (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf);
254 #else
255 (void *)(uintptr_t)c->c_dsynchandle);
256 #endif
257 c->c_dmemhandle.mrc_rmr = 0;
258 c->c_dsynchandle = NULL;
259 }
260 }
261 }
262
263 return (RDMA_SUCCESS);
264 }
265
266 /*
267 * Frees up entries in chunk list
268 */
269 void
270 clist_free(struct clist *cl)
271 {
272 struct clist *c = cl;
273
274 while (c != NULL) {
275 cl = cl->c_next;
276 kmem_free(c, sizeof (struct clist));
277 c = cl;
278 }
279 }
280
281 rdma_stat
282 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
283 {
284 struct clist *cl = NULL;
285 rdma_stat retval;
286 rdma_buf_t rbuf = {0};
287
288 rbuf.type = RECV_BUFFER;
289 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
290 retval = RDMA_NORESOURCE;
291 } else {
292 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
293 NULL, NULL);
294 retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
295 clist_free(cl);
296 }
297 return (retval);
298 }
299
300 rdma_stat
301 rdma_svc_postrecv(CONN *conn)
302 {
303 struct clist *cl = NULL;
304 rdma_stat retval;
305 rdma_buf_t rbuf = {0};
306
307 rbuf.type = RECV_BUFFER;
308 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
309 retval = RDMA_NORESOURCE;
310 } else {
311 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
312 NULL, NULL);
313 retval = RDMA_SVC_RECVBUF(conn, cl);
314 clist_free(cl);
315 }
316 return (retval);
317 }
318
319 rdma_stat
320 clist_syncmem(CONN *conn, struct clist *cl, bool_t src)
321 {
322 struct clist *c;
323 rdma_stat status;
324
325 c = cl;
326 if (src) {
327 while (c != NULL) {
328 if(c->c_ssynchandle){
329 status = RDMA_SYNCMEM(conn,
330 (void *)(uintptr_t)c->c_ssynchandle,
331 (caddr_t)(uintptr_t)c->c_saddr, c->c_len, 0);
332 if (status != RDMA_SUCCESS)
333 return (status);
334 }
335 c = c->c_next;
336 }
337 } else {
338 while (c != NULL) {
339 if(c->c_ssynchandle){
340 status = RDMA_SYNCMEM(conn,
341 (void *)(uintptr_t)c->c_dsynchandle,
342 (caddr_t)(uintptr_t)c->c_daddr, c->c_len, 1);
343 if (status != RDMA_SUCCESS)
344 return (status);
345 }
346 c = c->c_next;
347 }
348 }
349 return (RDMA_SUCCESS);
350 }
351
352 void
353 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
354 {
355 if (!rbuf || rbuf->addr == NULL) {
356 return;
357 }
358 if (rbuf->type != CHUNK_BUFFER) {
359 /* pool buffer */
360 RDMA_BUF_FREE(conn, rbuf);
361 } else {
362 #ifdef SERVER_REG_CACHE
363 if(rbuf->long_reply_buf)
364 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)rbuf->long_reply_buf);
365 else
366 #endif
367 kmem_free(rbuf->addr, rbuf->len);
368 }
369 rbuf->addr = NULL;
370 rbuf->len = 0;
371 }
372
373 /*
374 * Caller is holding rdma_modload_lock mutex
375 */
376 int
377 rdma_modload()
378 {
379 int status;
380 ASSERT(MUTEX_HELD(&rdma_modload_lock));
381 /*
382 * Load all available RDMA plugins which right now is only IB plugin.
383 * If no IB hardware is present, then quit right away.
384 * ENODEV -- For no device on the system
385 * EPROTONOSUPPORT -- For module not avilable either due to failure to
386 * load or some other reason.
387 */
388 rdma_modloaded = 1;
389 if (ibt_hw_is_present() == 0) {
390 rdma_dev_available = 0;
391 return (ENODEV);
392 }
393
394 rdma_dev_available = 1;
395 if (rpcmod_li == NULL)
396 return (EPROTONOSUPPORT);
397
398 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
399 FREAD | FWRITE, kcred,
400 &rpcib_handle, rpcmod_li);
401 if (status != 0)
402 return (EPROTONOSUPPORT);
403
404 /* success */
405 rdma_kstat_init();
406 return (0);
407 }
408
409 void
410 rdma_kstat_init(void)
411 {
412 kstat_t *ksp;
413
414 /*
415 * The RDMA framework doesn't know how to deal with Zones, and is
416 * only available in the global zone.
417 */
418 ASSERT(INGLOBALZONE(curproc));
419 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
420 KSTAT_TYPE_NAMED, rdmarcstat_ndata,
421 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
422 if (ksp) {
423 ksp->ks_data = (void *) rdmarcstat_ptr;
424 kstat_install(ksp);
425 }
426
427 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
428 KSTAT_TYPE_NAMED, rdmarsstat_ndata,
429 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
430 if (ksp) {
431 ksp->ks_data = (void *) rdmarsstat_ptr;
432 kstat_install(ksp);
433 }
434 }