New rdma_subr.c
  1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 
 27  /* Copyright (c) 2006, The Ohio State University. All rights reserved.
 28   *
 29   * Portions of this source code is developed by the team members of
 30   * The Ohio State University's Network-Based Computing Laboratory (NBCL),
 31   * headed by Professor Dhabaleswar K. (DK) Panda.
 32   *
 33   * Acknowledgements to contributions from developors:
 34   *   Ranjit Noronha: noronha@cse.ohio-state.edu
 35   *   Lei Chai      : chail@cse.ohio-state.edu
 36   *   Weikuan Yu    : yuw@cse.ohio-state.edu
 37   *
 38   */
 39 #pragma ident   "@(#)rdma_subr.c        1.11    05/06/08 SMI"
 40 
 41 #include <sys/systm.h>
 42 #include <sys/kstat.h>
 43 #include <sys/modctl.h>
 44 #include <rpc/rpc_rdma.h>
 45 
 46 #include <sys/ib/ibtl/ibti.h>
 47 
 48 uint_t rdma_minchunk = RDMA_MINCHUNK;
 49 
 50 /*
 51  * Globals
 52  */
 53 int rdma_modloaded = 0;         /* flag to load RDMA plugin modules */
 54 int rdma_dev_available = 0;     /* if any RDMA device is loaded */
 55 kmutex_t rdma_modload_lock;     /* protects rdma_modloaded flag */
 56 rdma_registry_t *rdma_mod_head = NULL;  /* head for RDMA modules */
 57 krwlock_t       rdma_lock;              /* protects rdma_mod_head list */
 58 ldi_ident_t rpcmod_li = NULL;   /* identifies us with ldi_ framework */
 59 
 60 /*
 61  * Statics
 62  */
 63 static ldi_handle_t rpcib_handle = NULL;
 64 
 65 /*
 66  * Externs
 67  */
 68 extern  kstat_named_t   *rdmarcstat_ptr;
 69 extern  uint_t          rdmarcstat_ndata;
 70 extern  kstat_named_t   *rdmarsstat_ptr;
 71 extern  uint_t          rdmarsstat_ndata;
 72 
 73 void rdma_kstat_init();
 74 
 75 /*
 76  * RDMATF module registration routine.
 77  * This routine is expected to be called by the init routine in
 78  * the plugin modules.
 79  */
 80 rdma_stat
 81 rdma_register_mod(rdma_mod_t *mod)
 82 {
 83         rdma_registry_t **mp, *m;
 84 
 85         if (mod->rdma_version != RDMATF_VERS) {
 86                 return (RDMA_BADVERS);
 87         }
 88 
 89         rw_enter(&rdma_lock, RW_WRITER);
 90         /*
 91          * Ensure not already registered
 92          */
 93         mp = &rdma_mod_head;
 94         while (*mp != NULL) {
 95                 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
 96                     KNC_STRSIZE) == 0) {
 97                         rw_exit(&rdma_lock);
 98                         return (RDMA_REG_EXIST);
 99                 }
100                 mp = &((*mp)->r_next);
101         }
102 
103         /*
104          * New one, create and add to registry
105          */
106         m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
107         m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
108         *m->r_mod = *mod;
109         m->r_next = NULL;
110         m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
111         (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
112         m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
113         *mp = m;
114         rw_exit(&rdma_lock);
115 
116         return (RDMA_SUCCESS);
117 }
118 
119 /*
120  * RDMATF module unregistration routine.
121  * This routine is expected to be called by the fini routine in
122  * the plugin modules.
123  */
124 rdma_stat
125 rdma_unregister_mod(rdma_mod_t *mod)
126 {
127         rdma_registry_t **m, *mmod = NULL;
128 
129         rw_enter(&rdma_lock, RW_WRITER);
130 
131         m = &rdma_mod_head;
132         while (*m != NULL) {
133                 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
134                     KNC_STRSIZE) != 0) {
135                         m = &((*m)->r_next);
136                         continue;
137                 }
138                 /*
139                  * Check if any device attached, if so return error
140                  */
141                 if ((*m)->r_mod->rdma_count != 0) {
142                         rw_exit(&rdma_lock);
143                         return (RDMA_FAILED);
144                 }
145                 /*
146                  * Found entry. Now remove it.
147                  */
148                 mmod = *m;
149                 *m = (*m)->r_next;
150                 kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE);
151                 kmem_free(mmod->r_mod, sizeof (rdma_mod_t));
152                 kmem_free(mmod, sizeof (rdma_registry_t));
153                 rw_exit(&rdma_lock);
154                 return (RDMA_SUCCESS);
155         }
156 
157         /*
158          * Not found.
159          */
160         rw_exit(&rdma_lock);
161         return (RDMA_FAILED);
162 }
163 
164 /*
165  * Creates a new chunk list entry, and
166  * adds it to the end of a chunk list.
167  */
168 void
169 clist_add(struct clist **clp, uint32_t xdroff, int len,
170         struct mrc *shandle, caddr_t saddr,
171         struct mrc *dhandle, caddr_t daddr)
172 {
173         struct clist *cl;
174 
175         /* Find the end of the list */
176 
177         while (*clp != NULL)
178                 clp = &((*clp)->c_next);
179 
180         cl = kmem_zalloc(sizeof (*cl), KM_SLEEP);
181         cl->c_xdroff = xdroff;
182         cl->c_len = len;
183         cl->c_saddr = (uint64_t)(uintptr_t)saddr;
184         if (shandle)
185                 cl->c_smemhandle = *shandle;
186         cl->c_daddr = (uint64_t)(uintptr_t)daddr;
187         if (dhandle)
188                 cl->c_dmemhandle = *dhandle;
189         cl->c_next = NULL;
190 
191         *clp = cl;
192 }
193 
194 int
195 clist_register(CONN *conn, struct clist *cl, bool_t src)
196 {
197         struct clist *c;
198         int status;
199 
200         for (c = cl; c; c = c->c_next) {
201         if(c->c_len > 0)
202                 if (src) {
203                         status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)cl->c_adspc,
204                             (caddr_t)(uintptr_t)c->c_saddr,  c->c_len,
205 #ifdef SERVER_REG_CACHE
206                             &c->c_smemhandle, (void **)&c->c_ssynchandle, (void *)c->long_reply_buf);
207 #else
208                             &c->c_smemhandle, (void **)&c->c_ssynchandle);
209 #endif
210                 } else {
211                         status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)cl->c_adspc,
212                             (caddr_t)(uintptr_t)c->c_daddr, c->c_len,
213 #ifdef SERVER_REG_CACHE
214                             &c->c_dmemhandle, (void **)&c->c_dsynchandle,(void *)c->long_reply_buf);
215 #else
216                             &c->c_dmemhandle, (void **)&c->c_dsynchandle);
217 #endif
218                 }
219                 if (status != RDMA_SUCCESS) {
220                         (void) clist_deregister(conn, cl, src);
221                         return (status);
222                 }
223         }
224 
225         return (RDMA_SUCCESS);
226 }
227 
228 int
229 clist_deregister(CONN *conn, struct clist *cl, bool_t src)
230 {
231         struct clist *c;
232 
233         for (c = cl; c; c = c->c_next) {
234                 if (src) {
235                         if (c->c_smemhandle.mrc_rmr != 0) {
236                                 (void) RDMA_DEREGMEMSYNC(conn,
237                                     (caddr_t)(uintptr_t)c->c_saddr,
238                                     c->c_smemhandle,
239 #ifdef SERVER_REG_CACHE
240                                     (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf);
241 #else
242                                     (void *)(uintptr_t)c->c_ssynchandle);
243 #endif
244                                 c->c_smemhandle.mrc_rmr = 0;
245                                 c->c_ssynchandle = NULL;
246                         }
247                 } else {
248                         if (c->c_dmemhandle.mrc_rmr != 0) {
249                                 (void) RDMA_DEREGMEMSYNC(conn,
250                                     (caddr_t)(uintptr_t)c->c_daddr,
251                                     c->c_dmemhandle,
252 #ifdef SERVER_REG_CACHE
253                                     (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf);
254 #else
255                                     (void *)(uintptr_t)c->c_dsynchandle);
256 #endif
257                                 c->c_dmemhandle.mrc_rmr = 0;
258                                 c->c_dsynchandle = NULL;
259                         }
260                 }
261         }
262 
263         return (RDMA_SUCCESS);
264 }
265 
266 /*
267  * Frees up entries in chunk list
268  */
269 void
270 clist_free(struct clist *cl)
271 {
272         struct clist *c = cl;
273 
274         while (c != NULL) {
275                 cl = cl->c_next;
276                 kmem_free(c, sizeof (struct clist));
277                 c = cl;
278         }
279 }
280 
281 rdma_stat
282 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
283 {
284         struct clist *cl = NULL;
285         rdma_stat retval;
286         rdma_buf_t rbuf = {0};
287 
288         rbuf.type = RECV_BUFFER;
289         if (RDMA_BUF_ALLOC(conn, &rbuf)) {
290                 retval = RDMA_NORESOURCE;
291         } else {
292                 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
293                         NULL, NULL);
294                 retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
295                 clist_free(cl);
296         }
297         return (retval);
298 }
299 
300 rdma_stat
301 rdma_svc_postrecv(CONN *conn)
302 {
303         struct clist *cl = NULL;
304         rdma_stat retval;
305         rdma_buf_t rbuf = {0};
306 
307         rbuf.type = RECV_BUFFER;
308         if (RDMA_BUF_ALLOC(conn, &rbuf)) {
309                 retval = RDMA_NORESOURCE;
310         } else {
311                 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
312                         NULL, NULL);
313                 retval = RDMA_SVC_RECVBUF(conn, cl);
314                 clist_free(cl);
315         }
316         return (retval);
317 }
318 
319 rdma_stat
320 clist_syncmem(CONN *conn, struct clist *cl, bool_t src)
321 {
322         struct clist *c;
323         rdma_stat status;
324 
325         c = cl;
326         if (src) {
327                 while (c != NULL) {
328                         if(c->c_ssynchandle){
329                         status = RDMA_SYNCMEM(conn,
330                             (void *)(uintptr_t)c->c_ssynchandle,
331                             (caddr_t)(uintptr_t)c->c_saddr, c->c_len, 0);
332                         if (status != RDMA_SUCCESS)
333                                 return (status);
334                         }
335                         c = c->c_next;
336                 }
337         } else {
338                 while (c != NULL) {
339                         if(c->c_ssynchandle){
340                         status = RDMA_SYNCMEM(conn,
341                             (void *)(uintptr_t)c->c_dsynchandle,
342                             (caddr_t)(uintptr_t)c->c_daddr, c->c_len, 1);
343                         if (status != RDMA_SUCCESS)
344                                 return (status);
345                         }
346                         c = c->c_next;
347                 }
348         }
349         return (RDMA_SUCCESS);
350 }
351 
352 void
353 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
354 {
355         if (!rbuf || rbuf->addr == NULL) {
356                 return;
357         }
358         if (rbuf->type != CHUNK_BUFFER) {
359                 /* pool buffer */
360                 RDMA_BUF_FREE(conn, rbuf);
361         } else {
362 #ifdef  SERVER_REG_CACHE
363                 if(rbuf->long_reply_buf)
364                 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)rbuf->long_reply_buf);
365                 else
366 #endif
367                 kmem_free(rbuf->addr, rbuf->len);
368         }
369         rbuf->addr = NULL;
370         rbuf->len = 0;
371 }
372 
373 /*
374  * Caller is holding rdma_modload_lock mutex
375  */
376 int
377 rdma_modload()
378 {
379         int status;
380         ASSERT(MUTEX_HELD(&rdma_modload_lock));
381         /*
382          * Load all available RDMA plugins which right now is only IB plugin.
383          * If no IB hardware is present, then quit right away.
384          * ENODEV -- For no device on the system
385          * EPROTONOSUPPORT -- For module not avilable either due to failure to
386          * load or some other reason.
387          */
388         rdma_modloaded = 1;
389         if (ibt_hw_is_present() == 0) {
390                 rdma_dev_available = 0;
391                 return (ENODEV);
392         }
393 
394         rdma_dev_available = 1;
395         if (rpcmod_li == NULL)
396                 return (EPROTONOSUPPORT);
397 
398         status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
399             FREAD | FWRITE, kcred,
400             &rpcib_handle, rpcmod_li);
401         if (status != 0)
402                 return (EPROTONOSUPPORT);
403 
404         /* success */
405         rdma_kstat_init();
406         return (0);
407 }
408 
409 void
410 rdma_kstat_init(void)
411 {
412         kstat_t *ksp;
413 
414         /*
415          * The RDMA framework doesn't know how to deal with Zones, and is
416          * only available in the global zone.
417          */
418         ASSERT(INGLOBALZONE(curproc));
419         ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
420             KSTAT_TYPE_NAMED, rdmarcstat_ndata,
421             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
422         if (ksp) {
423                 ksp->ks_data = (void *) rdmarcstat_ptr;
424                 kstat_install(ksp);
425         }
426 
427         ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
428             KSTAT_TYPE_NAMED, rdmarsstat_ndata,
429             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
430         if (ksp) {
431                 ksp->ks_data = (void *) rdmarsstat_ptr;
432                 kstat_install(ksp);
433         }
434 }