1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 













 27 #ifndef _RPC_RPC_RDMA_H
 28 #define _RPC_RPC_RDMA_H
 29 
 30 #pragma ident   "@(#)rpc_rdma.h 1.9     05/06/08 SMI"
 31 
 32 #include <rpc/rpc.h>
 33 #include <rpc/rpc_sztypes.h>
 34 #include <sys/sunddi.h>
 35 #include <sys/sunldi.h>
 36 
 37 #ifdef __cplusplus
 38 extern "C" {
 39 #endif
 40 
 41 #define RPCRDMA_VERS    0       /* Version of the RPC over RDMA protocol */ 
 42 #define RDMATF_VERS     1       /* Version of the API used by RPC for RDMA */
 43 #define RDMATF_VERS_1   1       /* Current version of RDMATF */
 44 












 45 /*




 46  * The size of an RPC call or reply message
 47  */
 48 #define RPC_MSG_SZ  1024
 49 
 50 /*
 51  * Storage for a chunk list
 52  */
 53 #define RPC_CL_SZ  1024
 54 
 55 /*
 56  * Chunk size
 57  */
 58 #define MINCHUNK  1024
 59 
 60 /*
 61  * Size of receive buffer
 62  */
 63 #define RPC_BUF_SIZE    2048

 64 
 65 #define NOWAIT  0       /* don't wait for operation of complete */
 66 #define WAIT    1       /* wait and ensure that operation is complete */
 67 
 68 /*
 69  * RDMA xdr buffer control and other control flags. Add new flags here,
 70  * set them in private structure for xdr over RDMA in xdr_rdma.c
 71  */
 72 #define RDMA_NOCHUNK            0x1
 73 
















 74 /*







































 75  * Return codes from RDMA operations
 76  */
 77 typedef enum {
 78 
 79         RDMA_SUCCESS = 0,       /* successful operation */
 80 
 81         RDMA_INVAL = 1,         /* invalid parameter */
 82         RDMA_TIMEDOUT = 2,      /* operation timed out */
 83         RDMA_INTR = 3,          /* operation interrupted */
 84         RDMA_NORESOURCE = 4,    /* insufficient resource */
 85         /*
 86          * connection errors
 87          */
 88         RDMA_REJECT = 5,        /* connection req rejected */
 89         RDMA_NOLISTENER = 6,    /* no listener on server */
 90         RDMA_UNREACHABLE = 7,   /* host unreachable */
 91         RDMA_CONNLOST = 8,      /* connection lost */
 92 
 93         RDMA_XPRTFAILED = 9,    /* RDMA transport failed */
 94         RDMA_PROTECTERR = 10,   /* memory protection error */
 95         RDMA_OVERRUN = 11,      /* transport overrun */
 96         RDMA_RECVQEMPTY = 12,   /* incoming pkt dropped, recv q empty */
 97         RDMA_PROTFAILED = 13,   /* RDMA protocol failed */
 98         RDMA_NOTSUPP = 14,      /* requested feature not supported */
 99         RDMA_REMOTERR = 15,     /* error at remote end */
100         /*
101          * RDMATF errors
102          */
103         RDMA_BADVERS = 16,      /* mismatch RDMATF versions */
104         RDMA_REG_EXIST = 17,    /* RDMATF registration already exists */
105 
106         /*
107          * fallback error
108          */
109         RDMA_FAILED = 18        /* generic error */
110 } rdma_stat;
111 
112 /*
113  * Memory region context. This is an RDMA provider generated
114  * handle for a registered arbitrary size contiguous virtual
115  * memory. The RDMA Interface Adapter needs this for local or
116  * remote memory access.
117  *
118  * The mrc_rmr field holds the remote memory region context
119  * which is sent over-the-wire to provide the remote host
120  * with RDMA access to the memory region.
121  */
122 struct mrc {
123         uint32_t        mrc_rmr;        /* Remote MR context, sent OTW */
124         union {
125                 struct mr {
126                         uint32_t        lmr;    /* Local MR context */
127                         uint64_t        linfo;  /* Local memory info */

128                 } mr;
129         } lhdl;
130 };
131 
132 #define mrc_lmr         lhdl.mr.lmr
133 #define mrc_linfo       lhdl.mr.linfo
134  
135 /*
136  * The XDR offset value is used by the XDR
137  * routine to identify the position in the
138  * RPC message where the opaque object would
139  * normally occur. Neither the data content
140  * of the chunk, nor its size field are included
141  * in the RPC message.  The XDR offset is calculated
142  * as if the chunks were present.
143  *
144  * The remaining fields identify the chunk of data
145  * on the sender.  The c_memhandle identifies a
146  * registered RDMA memory region and the c_addr
147  * and c_len fields identify the chunk within it.
148  */
















149 struct clist {
150         uint32          c_xdroff;       /* XDR offset */
151         uint32          c_len;          /* Length */
152         struct mrc      c_smemhandle;   /* src memory handle */
153         uint64          c_ssynchandle;  /* src sync handle */
154         uint64          c_saddr;        /* src address */
155         struct mrc      c_dmemhandle;   /* dst memory handle */
156         uint64          c_dsynchandle;  /* dst sync handle */
157         uint64          c_daddr;        /* dst address */



158         struct clist    *c_next;        /* Next chunk */
159 };
160 
161 typedef struct clist clist;
162 









163 enum rdma_proc {
164         RDMA_MSG        = 0,    /* chunk list and RPC msg follow */
165         RDMA_NOMSG      = 1,    /* only chunk list follows */
166         RDMA_MSGP       = 2,    /* chunk list and RPC msg with padding follow */
167         RDMA_DONE       = 3     /* signal completion of chunk transfer */
168 };
169 
170 /*
171  * Listener information for a service
172  */
173 struct rdma_svc_data {
174         queue_t         q;      /* queue_t to place incoming pkts */
175         int             active; /* If active, after registeration startup */
176         rdma_stat       err_code;       /* Error code from plugin layer */
177         int32_t         svcid;          /* RDMA based service identifier */
178 };
179 
180 /*
181  * Per RDMA plugin module information.
182  * Will be populated by each plugin
183  * module during its initialization.
184  */
185 typedef struct rdma_mod {
186         char            *rdma_api;              /* "kvipl", "ibtf", etc */
187         uint_t          rdma_version;           /* RDMATF API version */
188         int             rdma_count;             /* # of devices */
189         struct rdmaops  *rdma_ops;              /* rdma op vector for api */
190 } rdma_mod_t;
191 
192 /*
193  * Registry of RDMA plugins
194  */
195 typedef struct rdma_registry {
196         rdma_mod_t      *r_mod;         /* plugin mod info */
197         struct rdma_registry *r_next;   /* next registered RDMA plugin */
198 } rdma_registry_t;
199 
200 /*
201  * RDMA transport information
202  */
203 typedef struct rdma_info {
204         uint_t  addrlen;        /* address length */
205         uint_t  mts;            /* max transfer size */
206         uint_t  mtu;            /* native mtu size of unlerlying network */
207 } rdma_info_t;
208 
209 /*
210  * RDMA Connection information
211  */
212 typedef struct conn {
213         rdma_mod_t      *c_rdmamod;     /* RDMA transport info for conn */
214         struct netbuf   c_raddr;        /* remote address */
215         struct netbuf   c_laddr;        /* local address */
216         int             c_ref;          /* no. of clients of connection */
217         struct conn     *c_next;        /* next in list of connections */
218         struct conn     *c_prev;        /* prev in list of connections */
219         caddr_t         c_private;      /* transport specific stuff */
220 
221 #define C_IDLE          0x80000000
222 #define C_CONN_PEND     0x40000000
223 #define C_CONNECTED     0x20000000
224 #define C_ERROR         0x10000000
225 #define C_DISCONN_PEND  0x08000000
226 #define C_REMOTE_DOWN   0x04000000
227 
228         uint_t          c_state;        /* state of connection */





229         kmutex_t        c_lock;         /* protect c_state and c_ref fields */
230         kcondvar_t      c_cv;           /* to signal when pending is done */



231 } CONN;
232 
233 
234 /*
235  * Memory management for the RDMA buffers
236  */
237 /*
238  * RDMA buffer types
239  */
240 typedef enum {
241         SEND_BUFFER,    /* buf for send msg */
242         SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */
243         RECV_BUFFER,    /* buf for recv msg */
244         RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */
245         CHUNK_BUFFER    /* chunk buf used in RDMATF only and not in plugins */
246 } rdma_btype;
247 
248 /*
249  * RDMA buffer information
250  */
251 typedef struct rdma_buf {
252         rdma_btype      type;   /* buffer type */
253         int             len;    /* length of buffer */
254         caddr_t         addr;   /* buffer address */
255         struct mrc      handle; /* buffer registration handle */



256 } rdma_buf_t;
257 
258 /*
259  * Data transferred from plugin interrupt to svc_queuereq()
260  */
261 struct recv_data {
262         CONN            *conn;
263         int             status;
264         rdma_buf_t      rpcmsg;
265 };
266 
267 /*
268  * Operations vector for RDMA transports.
269  */
270 typedef struct rdmaops {
271         /* Network */
272         rdma_stat       (*rdma_reachable)(int addr_type, struct netbuf *,
273                                                 void **handle);
274         /* Connection */
275         rdma_stat       (*rdma_get_conn)(struct netbuf *, int addr_type,
276                                                 void *, CONN **);
277         rdma_stat       (*rdma_rel_conn)(CONN *);
278         /* Server side listner start and stop routines */
279         void            (*rdma_svc_listen)(struct rdma_svc_data *);
280         void            (*rdma_svc_stop)(struct rdma_svc_data *);
281         /* Memory */
282         rdma_stat       (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *); 
283         rdma_stat       (*rdma_deregmem)(CONN *, caddr_t, struct mrc);
284         rdma_stat       (*rdma_regmemsync)(CONN *, caddr_t, uint_t, 






285                                 struct mrc *, void **);
286         rdma_stat       (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
287                                 void *);


288         rdma_stat       (*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
289         /* Buffer */
290         rdma_stat       (*rdma_buf_alloc)(CONN *, rdma_buf_t *);
291         void            (*rdma_buf_free)(CONN *, rdma_buf_t *);
292         /* Transfer */
293         rdma_stat       (*rdma_send)(CONN *, clist *, uint32_t);






294         rdma_stat       (*rdma_send_resp)(CONN *, clist *, uint32_t);
295         rdma_stat       (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
296         rdma_stat       (*rdma_svc_recvbuf)(CONN *, clist *);
297         rdma_stat       (*rdma_recv)(CONN *, clist **, uint32_t);
298         /* RDMA */
299         rdma_stat       (*rdma_read)(CONN *, clist *, int);
300         rdma_stat       (*rdma_write)(CONN *, clist *, int);
301         /* INFO */
302         rdma_stat       (*rdma_getinfo)(rdma_info_t *info);










303 
304 } rdmaops_t;
305 
306 /*
307  * RDMA operations.
308  */
309 #define RDMA_REACHABLE(rdma_ops, addr_type, addr, handle)       \
310         (*(rdma_ops)->rdma_reachable)(addr_type, addr, handle)
311 
312 #define RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn)  \
313         (*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
314 
315 #define RDMA_REL_CONN(conn)     \
316         (*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
317 
318 #define RDMA_REGMEM(conn, buff, len, handle)    \ 
319         (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle) 
320 
321 #define RDMA_DEREGMEM(conn, buff, handle)       \
322         (*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
323 
324 #define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle)    \ 
325         (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \ 









326             len, handle, synchandle)
327 
328 #define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle)       \
329         (*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
330             handle, synchandle)
331 

332 #define RDMA_SYNCMEM(conn, handle, buff, len, direction)        \
333         (*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
334             buff, len, direction)
335 
336 #define RDMA_BUF_ALLOC(conn, rbuf)      \
337         (*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf)
338 
339 #define RDMA_BUF_FREE(conn, rbuf)       \
340         (*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)
341 
342 #define RDMA_SEND(conn, sendlist, xid)  \
343         (*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)



344 










345 #define RDMA_SEND_RESP(conn, sendlist, xid)     \
346         (*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)
347 
348 #define RDMA_CLNT_RECVBUF(conn, cl, xid)        \
349         (*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)
350 
351 #define RDMA_SVC_RECVBUF(conn, cl)      \
352         (*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl)
353 
354 #define RDMA_RECV(conn, recvlist, xid)  \
355         (*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid)
356 
357 #define RDMA_READ(conn, cl, wait)       \
358         (*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait)
359 
360 #define RDMA_WRITE(conn, cl, wait)      \
361         (*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
362 
363 #define RDMA_GETINFO(rdma_mod, info)    \
364         (*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
365 














366 #ifdef _KERNEL
367 extern rdma_registry_t  *rdma_mod_head;
368 extern krwlock_t rdma_lock;             /* protects rdma_mod_head list */
369 extern int rdma_modloaded;              /* flag for loading RDMA plugins */
370 extern int rdma_dev_available;          /* rdma device is loaded or not */
371 extern kmutex_t rdma_modload_lock;      /* protects rdma_modloaded flag */
372 extern uint_t rdma_minchunk;
373 extern ldi_ident_t rpcmod_li;           /* needed by layed driver framework */
374  
375 /*
376  * General RDMA routines
377  */
378 extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
379         struct mrc *shandle, caddr_t saddr,
380         struct mrc *dhandle, caddr_t daddr);
381 extern void clist_free(struct clist *cl);
382 extern int clist_register(CONN *conn, struct clist *cl, bool_t src);
383 extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src);
384 rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid);
385 rdma_stat rdma_svc_postrecv(CONN *conn);
386 extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src);
387 extern rdma_stat rdma_register_mod(rdma_mod_t *mod);
388 extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod);
389 extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf);
390 extern int rdma_modload();
391 
392 /*
393  * RDMA XDR
394  */
395 extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *,
396         enum xdr_op, CONN *);
397 extern void xdrrdma_destroy(XDR *);
398 extern struct clist *xdrrdma_clist(XDR *);
399 extern uint_t xdrrdma_getpos(XDR *);
400 extern bool_t xdrrdma_setpos(XDR *, uint_t);
401 extern bool_t xdr_clist(XDR *, clist *);
402 extern bool_t xdr_do_clist(XDR *, clist **);
403 extern uint_t xdr_getbufsize(XDR *);
404 unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
405 unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);









406 #endif /* _KERNEL */
407 
408 #ifdef __cplusplus
409 }
410 #endif
411 
412 #endif  /* _RPC_RPC_RDMA_H */
--- EOF ---