New ib.h
  1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 
 27  /* Copyright (c) 2006, The Ohio State University. All rights reserved.
 28   *
 29   * Portions of this source code is developed by the team members of
 30   * The Ohio State University's Network-Based Computing Laboratory (NBCL),
 31   * headed by Professor Dhabaleswar K. (DK) Panda.
 32   *
 33   * Acknowledgements to contributions from developors:
 34   *   Ranjit Noronha: noronha@cse.ohio-state.edu
 35   *   Lei Chai      : chail@cse.ohio-state.edu
 36   *   Weikuan Yu    : yuw@cse.ohio-state.edu
 37   *
 38   */
 39 #ifndef _IB_H
 40 #define _IB_H
 41 
 42 #pragma ident   "@(#)ib.h       1.8     05/06/08 SMI"
 43 
 44 /*
 45  * ib.h, rpcib plugin interface.
 46  */
 47 
 48 #include <sys/types.h>
 49 #include <sys/ddi.h>
 50 #include <sys/sunddi.h>
 51 #include <sys/conf.h>
 52 #include <sys/stat.h>
 53 #include <rpc/rpc.h>
 54 #include <rpc/rpc_rdma.h>
 55 #include <sys/ib/ibtl/ibti.h>
 56 #ifdef SERVER_REG_CACHE
 57 #include <sys/avl.h>
 58 #endif
 59 
 60 #ifdef __cplusplus
 61 extern "C" {
 62 #endif
 63 
 64 #define MAX_BUFS        256     /* max no. of buffers per pool */
 65 #define DEF_CQ_SIZE     4096 - 1        /* default CQ size */
 66                                 /*
 67                                  * Tavor returns the next higher power of 2
 68                                  * CQ entries than the requested size.
 69                                  * For instance, if you request (2^12 - 1)
 70                                  * CQ entries, Tavor returns 2^12 entries.
 71                                  * 4K CQ entries suffice.  Hence, 4096 - 1.
 72                                  */
 73 #define DEF_SQ_SIZE     128     /* default SendQ size */
 74 #define DEF_RQ_SIZE     256     /* default RecvQ size */
 75 #define DSEG_MAX        2
 76 #define RQ_DSEG_MAX     1       /* default RQ data seg */
 77 #define IBSRM_HB        0x8000  /* high order bit of pkey */
 78 #define NFS_SEC_KEY0    0x6878  /* randomly selected NFS security key */
 79 #define NFS_SEC_KEY1    0x8679
 80 
 81 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
 82 #define REFRESH_ATTEMPTS        3
 83 
 84 typedef struct rib_hca_s rib_hca_t;
 85 typedef struct rib_qp_s rib_qp_t;
 86 typedef struct rib_cq_s rib_cq_t;
 87 
 88 /*
 89  * Notification for RDMA_DONE is based on xid
 90  */
 91 struct rdma_done_list {
 92         uint32_t        xid;            /* XID waiting for RDMA_DONE */
 93         kcondvar_t      rdma_done_cv;   /* cv for RDMA_DONE */
 94         struct rdma_done_list   *next;
 95         struct rdma_done_list   *prev;
 96 };
 97 
 98 /*
 99  * State of the plugin.
100  * ACCEPT = accepting new connections and requests
101  * NO_ACCEPT = not accepting new connection and requests
102  */
103 #define ACCEPT          1
104 #define NO_ACCEPT       2
105 
106 /*
107  * Send Wait states
108  */
109 #define SEND_WAIT       -1
110 
111 /*
112  * Reply states
113  */
114 #define REPLY_WAIT      -1
115 
116 typedef void * rib_pvoid;
117 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
118 
119 /*
120  * IB buffer pool management structure
121  */
122 
123 /*
124  * Buffer pool info
125  */
126 typedef struct {
127         kmutex_t        buflock;        /* lock for this structure */
128         caddr_t         buf;            /* pool address */
129         uint32_t        bufhandle;      /* rkey for this pool */
130         ulong_t         bufsize;        /* size of pool */
131         int             rsize;          /* size of each element */
132         int             numelems;       /* no. of elements allocated */
133         int             buffree;        /* no. of free elements */
134         void            *buflist[1];    /* free elements in pool */
135 } bufpool_t;
136 
137 typedef struct {
138         bufpool_t       *bpool;
139         ibt_mr_hdl_t    *mr_hdl;
140 #ifdef IB_FMR_SUP
141         ibt_ma_hdl_t    *ma_hdl;
142         ibt_pmr_desc_t  *pmr_desc;
143 #endif
144         ibt_mr_desc_t   *mr_desc;       /* vaddr, lkey, rkey */
145         
146 } rib_bufpool_t;
147 
148 /*
149  * ATS relsted defines and structures.
150  */
151 #define ATS_AR_DATA_LEN 16
152 #define IBD_NAME        "ibd"
153 #define N_IBD_INSTANCES 4
154 
155 typedef struct rpcib_ats_s {
156         int                     ras_inst;
157         ib_pkey_t               ras_pkey;
158         ib_gid_t                ras_port_gid;
159         sa_family_t             ras_inet_type;
160         union {
161                 struct sockaddr_in      ras_sockaddr;
162                 struct sockaddr_in6     ras_sockaddr6;
163         } ra_sin;
164 #define ras_sin                 ra_sin.ras_sockaddr
165 #define ras_sin6                ra_sin.ras_sockaddr6
166 } rpcib_ats_t;
167 
168 typedef struct rpcib_ibd_insts_s {
169         int                     rib_ibd_alloc;
170         int                     rib_ibd_cnt;
171         rpcib_ats_t             *rib_ats;
172 } rpcib_ibd_insts_t;
173 
174 /*
175  * Service types supported by RPCIB
176  * For now only NFS is supported.
177  */
178 #define NFS             1
179 #define NLM             2
180 
181 /*
182  * Tracks consumer state (client or server).
183  */
184 typedef enum {
185         RIB_SERVER,
186         RIB_CLIENT
187 } rib_mode_t;
188 
189 /*
190  * CQ structure
191  */
192 struct rib_cq_s {
193         rib_hca_t               *rib_hca;
194         ibt_cq_hdl_t            rib_cq_hdl;
195 };
196 
197 /*
198  * RPCIB plugin state
199  */
200 typedef struct rpcib_state {
201         ibt_clnt_hdl_t          ibt_clnt_hdl;
202         uint32_t                hca_count;
203         uint32_t                nhca_inited;
204         ib_guid_t               *hca_guids;
205         rib_hca_t               *hcas;
206         int                     refcount;
207         kmutex_t                open_hca_lock;
208         rib_hca_t               *hca;           /* the hca being used */
209         queue_t                 *q;             /* up queue for a serv_type */
210         uint32_t                service_type;   /* NFS, NLM, etc */
211         void                    *private;
212 } rpcib_state_t;
213 
214 /*
215  * Each registered service's data structure.
216  * Each HCA has a list of these structures, which are the registered
217  * services on this HCA.
218  */
219 typedef struct rib_service rib_service_t;
220 struct rib_service {
221         uint32_t                srv_type;       /* i.e, NFS, NLM, v4CBD */
222 
223         /*
224          * service name, i.e, <IP>::NFS or <IP>::NLM. Since
225          * each type of service can be registered with many
226          * IP addrs(srv_name) and is running on all ports
227          * for all HCAs.
228          */
229         char                    *srv_name;
230 
231         uint32_t                srv_port;       /* port on which registered */
232         ib_svc_id_t             srv_id;         /* from ibt_register call */
233         ibt_srv_hdl_t           srv_hdl;        /* from ibt_register call */
234         ibt_sbind_hdl_t         *srv_sbind_hdl; /* from ibt_bind call */
235         ibt_ar_t                srv_ar;
236 
237         /*
238          * pointer to the next service registered on this
239          * particular HCA
240          */
241         rib_service_t           *srv_next;
242 };
243 
244 /*
245  * Connection lists
246  */
247 typedef struct {
248         krwlock_t       conn_lock;      /* list lock */
249         CONN            *conn_hd;       /* list head */
250 } rib_conn_list_t;
251 
252 enum hca_state {
253         HCA_INITED,             /* hca in up and running state */
254         HCA_DETACHED            /* hca in detached state */
255 };
256 
257 /*
258  * RPCIB per HCA structure
259  */
260 struct rib_hca_s {
261         ibt_clnt_hdl_t          ibt_clnt_hdl;
262 
263         /*
264          * per HCA.
265          */
266         ibt_hca_hdl_t           hca_hdl;        /* HCA handle */
267         ibt_hca_attr_t          hca_attrs;      /* HCA attributes */
268         ibt_pd_hdl_t            pd_hdl;
269         ib_guid_t               hca_guid;
270         uint32_t                hca_nports;
271         ibt_hca_portinfo_t      *hca_ports;
272         size_t                  hca_pinfosz;
273         enum hca_state          state;          /* state of HCA */
274         krwlock_t               state_lock;     /* protects state field */
275         bool_t                  inuse;          /* indicates HCA usage */
276         kmutex_t                inuse_lock;     /* protects inuse field */
277         /*
278          * List of services registered on all ports available
279          * on this HCA. Only one consumer of KRPC can register
280          * its services at one time or tear them down at one
281          * time.
282          */
283         rib_service_t   *service_list;
284         krwlock_t               service_list_lock;
285 
286         rib_service_t   *ats_list;              /* Service list for ATS */
287 
288         rib_conn_list_t         cl_conn_list;   /* client conn list */
289         rib_conn_list_t         srv_conn_list;  /* server conn list */
290 
291         rib_cq_t                *clnt_scq;
292         rib_cq_t                *clnt_rcq;
293         rib_cq_t                *svc_scq;
294         rib_cq_t                *svc_rcq;
295         kmutex_t                cb_lock;
296         kcondvar_t              cb_cv;
297 
298         rib_bufpool_t           *recv_pool;     /* recv buf pool */
299         rib_bufpool_t           *send_pool;     /* send buf pool */
300 
301         void                    *iblock;        /* interrupt cookie */
302 
303 #ifdef IB_FMR_SUP
304         ibt_fmr_pool_hdl_t      fmr_pool;
305 #endif          /* IB_FMR_SUP */
306 
307 #ifdef SERVER_REG_CACHE
308         kmem_cache_t            *server_side_cache;      /* long reply pool */
309         avl_tree_t              avl_tree;
310         kmutex_t                 avl_lock;
311         krwlock_t                avl_rw_lock;
312         volatile bool_t          avl_init;
313 #endif
314 
315 };
316 
317 
318 /*
319  * Structure on wait state of a post send
320  */
321 struct send_wid {
322 #if defined (CLNT_INTERRUPT_COAL)
323         struct send_wid *forw;
324         struct send_wid *back;
325 #endif
326         uint32_t        xid;
327         int             cv_sig;
328         kmutex_t        sendwait_lock;
329         kcondvar_t      wait_cv;
330         uint_t          status;
331         rib_qp_t        *qp;
332         int             nsbufs;                 /* # of send buffers posted */
333         uint64_t        sbufaddr[DSEG_MAX];     /* posted send buffers */
334         caddr_t         c;
335         caddr_t         c1;
336         int             l1;     
337         caddr_t         c2;     
338         int             l2;
339         int             wl,rl;
340 };
341 
342 /*
343  * Structure on reply descriptor for recv queue.
344  * Different from the above posting of a descriptor.
345  */
346 struct reply {
347         uint32_t        xid;
348         uint_t          status;
349         uint64_t        vaddr_cq;       /* buf addr from CQ */
350         uint_t          bytes_xfer;
351         kcondvar_t      wait_cv;
352         struct reply    *next;
353         struct reply    *prev;
354 };
355 
356 struct svc_recv {
357         rib_qp_t        *qp;
358         uint64_t        vaddr;
359         uint_t          bytes_xfer;
360 };
361 
362 struct recv_wid {
363         uint32_t        xid;
364         rib_qp_t        *qp;
365         uint64_t        addr;   /* posted buf addr */
366 };
367 
368 /*
369  * Per QP data structure
370  */
371 struct rib_qp_s {
372         rib_hca_t               *hca;
373         rib_mode_t              mode;   /* RIB_SERVER or RIB_CLIENT */
374         CONN                    rdmaconn;
375         ibt_channel_hdl_t       qp_hdl;
376         uint_t                  port_num;
377         ib_qpn_t                qpn;
378         int                     chan_flags;
379         clock_t                 timeout;
380         ibt_rc_chan_query_attr_t        qp_q_attrs;
381         rib_cq_t                *send_cq;       /* send CQ */
382         rib_cq_t                *recv_cq;       /* recv CQ */
383 
384         /*
385          * Number of pre-posted rbufs
386          */
387         uint_t                  n_posted_rbufs;
388         kcondvar_t              posted_rbufs_cv;
389         kmutex_t                posted_rbufs_lock;
390 
391         /*
392          * RPC reply
393          */
394         uint_t                  rep_list_size;
395         struct reply            *replylist;
396         kmutex_t                replylist_lock;
397 
398         /*
399          * server only, RDMA_DONE
400          */
401         struct rdma_done_list   *rdlist;
402         kmutex_t                rdlist_lock;
403 
404         kmutex_t                cb_lock;
405         kcondvar_t              cb_conn_cv;
406 
407         caddr_t                 q;      /* upstream queue */
408         struct send_wid        wd;
409 };
410 
411 #define ctoqp(conn)     ((rib_qp_t *)((conn)->c_private))
412 #define qptoc(rqp)      ((CONN *)&((rqp)->rdmaconn))
413 
414 /*
415  * Timeout for various calls
416  */
417 #define CONN_WAIT_TIME  40
418 #define SEND_WAIT_TIME  40      /* time for send completion */
419 
420 #define REPLY_WAIT_TIME 40      /* time to get reply from remote QP */
421 
422 #ifdef __cplusplus
423 }
424 #endif
425 
426 #endif  /* !_IB_H */