Old rpc_rdma.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #ifndef _RPC_RPC_RDMA_H
28 #define _RPC_RPC_RDMA_H
29
30 #pragma ident "@(#)rpc_rdma.h 1.9 05/06/08 SMI"
31
32 #include <rpc/rpc.h>
33 #include <rpc/rpc_sztypes.h>
34 #include <sys/sunddi.h>
35 #include <sys/sunldi.h>
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 #define RPCRDMA_VERS 0 /* Version of the RPC over RDMA protocol */
42 #define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */
43 #define RDMATF_VERS_1 1 /* Current version of RDMATF */
44
45 /*
46 * The size of an RPC call or reply message
47 */
48 #define RPC_MSG_SZ 1024
49
50 /*
51 * Storage for a chunk list
52 */
53 #define RPC_CL_SZ 1024
54
55 /*
56 * Chunk size
57 */
58 #define MINCHUNK 1024
59
60 /*
61 * Size of receive buffer
62 */
63 #define RPC_BUF_SIZE 2048
64
65 #define NOWAIT 0 /* don't wait for operation of complete */
66 #define WAIT 1 /* wait and ensure that operation is complete */
67
68 /*
69 * RDMA xdr buffer control and other control flags. Add new flags here,
70 * set them in private structure for xdr over RDMA in xdr_rdma.c
71 */
72 #define RDMA_NOCHUNK 0x1
73
74 /*
75 * Return codes from RDMA operations
76 */
77 typedef enum {
78
79 RDMA_SUCCESS = 0, /* successful operation */
80
81 RDMA_INVAL = 1, /* invalid parameter */
82 RDMA_TIMEDOUT = 2, /* operation timed out */
83 RDMA_INTR = 3, /* operation interrupted */
84 RDMA_NORESOURCE = 4, /* insufficient resource */
85 /*
86 * connection errors
87 */
88 RDMA_REJECT = 5, /* connection req rejected */
89 RDMA_NOLISTENER = 6, /* no listener on server */
90 RDMA_UNREACHABLE = 7, /* host unreachable */
91 RDMA_CONNLOST = 8, /* connection lost */
92
93 RDMA_XPRTFAILED = 9, /* RDMA transport failed */
94 RDMA_PROTECTERR = 10, /* memory protection error */
95 RDMA_OVERRUN = 11, /* transport overrun */
96 RDMA_RECVQEMPTY = 12, /* incoming pkt dropped, recv q empty */
97 RDMA_PROTFAILED = 13, /* RDMA protocol failed */
98 RDMA_NOTSUPP = 14, /* requested feature not supported */
99 RDMA_REMOTERR = 15, /* error at remote end */
100 /*
101 * RDMATF errors
102 */
103 RDMA_BADVERS = 16, /* mismatch RDMATF versions */
104 RDMA_REG_EXIST = 17, /* RDMATF registration already exists */
105
106 /*
107 * fallback error
108 */
109 RDMA_FAILED = 18 /* generic error */
110 } rdma_stat;
111
112 /*
113 * Memory region context. This is an RDMA provider generated
114 * handle for a registered arbitrary size contiguous virtual
115 * memory. The RDMA Interface Adapter needs this for local or
116 * remote memory access.
117 *
118 * The mrc_rmr field holds the remote memory region context
119 * which is sent over-the-wire to provide the remote host
120 * with RDMA access to the memory region.
121 */
122 struct mrc {
123 uint32_t mrc_rmr; /* Remote MR context, sent OTW */
124 union {
125 struct mr {
126 uint32_t lmr; /* Local MR context */
127 uint64_t linfo; /* Local memory info */
128 } mr;
129 } lhdl;
130 };
131
132 #define mrc_lmr lhdl.mr.lmr
133 #define mrc_linfo lhdl.mr.linfo
134
135 /*
136 * The XDR offset value is used by the XDR
137 * routine to identify the position in the
138 * RPC message where the opaque object would
139 * normally occur. Neither the data content
140 * of the chunk, nor its size field are included
141 * in the RPC message. The XDR offset is calculated
142 * as if the chunks were present.
143 *
144 * The remaining fields identify the chunk of data
145 * on the sender. The c_memhandle identifies a
146 * registered RDMA memory region and the c_addr
147 * and c_len fields identify the chunk within it.
148 */
149 struct clist {
150 uint32 c_xdroff; /* XDR offset */
151 uint32 c_len; /* Length */
152 struct mrc c_smemhandle; /* src memory handle */
153 uint64 c_ssynchandle; /* src sync handle */
154 uint64 c_saddr; /* src address */
155 struct mrc c_dmemhandle; /* dst memory handle */
156 uint64 c_dsynchandle; /* dst sync handle */
157 uint64 c_daddr; /* dst address */
158 struct clist *c_next; /* Next chunk */
159 };
160
161 typedef struct clist clist;
162
163 enum rdma_proc {
164 RDMA_MSG = 0, /* chunk list and RPC msg follow */
165 RDMA_NOMSG = 1, /* only chunk list follows */
166 RDMA_MSGP = 2, /* chunk list and RPC msg with padding follow */
167 RDMA_DONE = 3 /* signal completion of chunk transfer */
168 };
169
170 /*
171 * Listener information for a service
172 */
173 struct rdma_svc_data {
174 queue_t q; /* queue_t to place incoming pkts */
175 int active; /* If active, after registeration startup */
176 rdma_stat err_code; /* Error code from plugin layer */
177 int32_t svcid; /* RDMA based service identifier */
178 };
179
180 /*
181 * Per RDMA plugin module information.
182 * Will be populated by each plugin
183 * module during its initialization.
184 */
185 typedef struct rdma_mod {
186 char *rdma_api; /* "kvipl", "ibtf", etc */
187 uint_t rdma_version; /* RDMATF API version */
188 int rdma_count; /* # of devices */
189 struct rdmaops *rdma_ops; /* rdma op vector for api */
190 } rdma_mod_t;
191
192 /*
193 * Registry of RDMA plugins
194 */
195 typedef struct rdma_registry {
196 rdma_mod_t *r_mod; /* plugin mod info */
197 struct rdma_registry *r_next; /* next registered RDMA plugin */
198 } rdma_registry_t;
199
200 /*
201 * RDMA transport information
202 */
203 typedef struct rdma_info {
204 uint_t addrlen; /* address length */
205 uint_t mts; /* max transfer size */
206 uint_t mtu; /* native mtu size of unlerlying network */
207 } rdma_info_t;
208
209 /*
210 * RDMA Connection information
211 */
212 typedef struct conn {
213 rdma_mod_t *c_rdmamod; /* RDMA transport info for conn */
214 struct netbuf c_raddr; /* remote address */
215 struct netbuf c_laddr; /* local address */
216 int c_ref; /* no. of clients of connection */
217 struct conn *c_next; /* next in list of connections */
218 struct conn *c_prev; /* prev in list of connections */
219 caddr_t c_private; /* transport specific stuff */
220
221 #define C_IDLE 0x80000000
222 #define C_CONN_PEND 0x40000000
223 #define C_CONNECTED 0x20000000
224 #define C_ERROR 0x10000000
225 #define C_DISCONN_PEND 0x08000000
226 #define C_REMOTE_DOWN 0x04000000
227
228 uint_t c_state; /* state of connection */
229 kmutex_t c_lock; /* protect c_state and c_ref fields */
230 kcondvar_t c_cv; /* to signal when pending is done */
231 } CONN;
232
233
234 /*
235 * Memory management for the RDMA buffers
236 */
237 /*
238 * RDMA buffer types
239 */
240 typedef enum {
241 SEND_BUFFER, /* buf for send msg */
242 SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */
243 RECV_BUFFER, /* buf for recv msg */
244 RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */
245 CHUNK_BUFFER /* chunk buf used in RDMATF only and not in plugins */
246 } rdma_btype;
247
248 /*
249 * RDMA buffer information
250 */
251 typedef struct rdma_buf {
252 rdma_btype type; /* buffer type */
253 int len; /* length of buffer */
254 caddr_t addr; /* buffer address */
255 struct mrc handle; /* buffer registration handle */
256 } rdma_buf_t;
257
258 /*
259 * Data transferred from plugin interrupt to svc_queuereq()
260 */
261 struct recv_data {
262 CONN *conn;
263 int status;
264 rdma_buf_t rpcmsg;
265 };
266
267 /*
268 * Operations vector for RDMA transports.
269 */
270 typedef struct rdmaops {
271 /* Network */
272 rdma_stat (*rdma_reachable)(int addr_type, struct netbuf *,
273 void **handle);
274 /* Connection */
275 rdma_stat (*rdma_get_conn)(struct netbuf *, int addr_type,
276 void *, CONN **);
277 rdma_stat (*rdma_rel_conn)(CONN *);
278 /* Server side listner start and stop routines */
279 void (*rdma_svc_listen)(struct rdma_svc_data *);
280 void (*rdma_svc_stop)(struct rdma_svc_data *);
281 /* Memory */
282 rdma_stat (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *);
283 rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc);
284 rdma_stat (*rdma_regmemsync)(CONN *, caddr_t, uint_t,
285 struct mrc *, void **);
286 rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
287 void *);
288 rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
289 /* Buffer */
290 rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *);
291 void (*rdma_buf_free)(CONN *, rdma_buf_t *);
292 /* Transfer */
293 rdma_stat (*rdma_send)(CONN *, clist *, uint32_t);
294 rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t);
295 rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
296 rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *);
297 rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t);
298 /* RDMA */
299 rdma_stat (*rdma_read)(CONN *, clist *, int);
300 rdma_stat (*rdma_write)(CONN *, clist *, int);
301 /* INFO */
302 rdma_stat (*rdma_getinfo)(rdma_info_t *info);
303
304 } rdmaops_t;
305
306 /*
307 * RDMA operations.
308 */
309 #define RDMA_REACHABLE(rdma_ops, addr_type, addr, handle) \
310 (*(rdma_ops)->rdma_reachable)(addr_type, addr, handle)
311
312 #define RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn) \
313 (*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
314
315 #define RDMA_REL_CONN(conn) \
316 (*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
317
318 #define RDMA_REGMEM(conn, buff, len, handle) \
319 (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle)
320
321 #define RDMA_DEREGMEM(conn, buff, handle) \
322 (*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
323
324 #define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle) \
325 (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \
326 len, handle, synchandle)
327
328 #define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \
329 (*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
330 handle, synchandle)
331
332 #define RDMA_SYNCMEM(conn, handle, buff, len, direction) \
333 (*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
334 buff, len, direction)
335
336 #define RDMA_BUF_ALLOC(conn, rbuf) \
337 (*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf)
338
339 #define RDMA_BUF_FREE(conn, rbuf) \
340 (*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)
341
342 #define RDMA_SEND(conn, sendlist, xid) \
343 (*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)
344
345 #define RDMA_SEND_RESP(conn, sendlist, xid) \
346 (*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)
347
348 #define RDMA_CLNT_RECVBUF(conn, cl, xid) \
349 (*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)
350
351 #define RDMA_SVC_RECVBUF(conn, cl) \
352 (*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl)
353
354 #define RDMA_RECV(conn, recvlist, xid) \
355 (*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid)
356
357 #define RDMA_READ(conn, cl, wait) \
358 (*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait)
359
360 #define RDMA_WRITE(conn, cl, wait) \
361 (*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
362
363 #define RDMA_GETINFO(rdma_mod, info) \
364 (*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
365
366 #ifdef _KERNEL
367 extern rdma_registry_t *rdma_mod_head;
368 extern krwlock_t rdma_lock; /* protects rdma_mod_head list */
369 extern int rdma_modloaded; /* flag for loading RDMA plugins */
370 extern int rdma_dev_available; /* rdma device is loaded or not */
371 extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
372 extern uint_t rdma_minchunk;
373 extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */
374
375 /*
376 * General RDMA routines
377 */
378 extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
379 struct mrc *shandle, caddr_t saddr,
380 struct mrc *dhandle, caddr_t daddr);
381 extern void clist_free(struct clist *cl);
382 extern int clist_register(CONN *conn, struct clist *cl, bool_t src);
383 extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src);
384 rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid);
385 rdma_stat rdma_svc_postrecv(CONN *conn);
386 extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src);
387 extern rdma_stat rdma_register_mod(rdma_mod_t *mod);
388 extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod);
389 extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf);
390 extern int rdma_modload();
391
392 /*
393 * RDMA XDR
394 */
395 extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *,
396 enum xdr_op, CONN *);
397 extern void xdrrdma_destroy(XDR *);
398 extern struct clist *xdrrdma_clist(XDR *);
399 extern uint_t xdrrdma_getpos(XDR *);
400 extern bool_t xdrrdma_setpos(XDR *, uint_t);
401 extern bool_t xdr_clist(XDR *, clist *);
402 extern bool_t xdr_do_clist(XDR *, clist **);
403 extern uint_t xdr_getbufsize(XDR *);
404 unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
405 unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);
406 #endif /* _KERNEL */
407
408 #ifdef __cplusplus
409 }
410 #endif
411
412 #endif /* _RPC_RPC_RDMA_H */