Cdiff rpc_rdma.h
*** /webrev/webrev/usr/src/uts/common/rpc/rpc_rdma.h- Mon Aug 14 13:12:11 2006
--- rpc_rdma.h Thu Aug 10 14:05:27 2006
*** 22,31 ****
--- 22,44 ----
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+ /* Copyright (c) 2006, The Ohio State University. All rights reserved.
+ *
+ * Portions of this source code is developed by the team members of
+ * The Ohio State University's Network-Based Computing Laboratory (NBCL),
+ * headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * Acknowledgements to contributions from developors:
+ * Ranjit Noronha: noronha@cse.ohio-state.edu
+ * Lei Chai : chail@cse.ohio-state.edu
+ * Weikuan Yu : yuw@cse.ohio-state.edu
+ *
+ */
+
#ifndef _RPC_RPC_RDMA_H
#define _RPC_RPC_RDMA_H
#pragma ident "@(#)rpc_rdma.h 1.9 05/06/08 SMI"
*** 36,50 ****
#ifdef __cplusplus
extern "C" {
#endif
! #define RPCRDMA_VERS 0 /* Version of the RPC over RDMA protocol */
#define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */
#define RDMATF_VERS_1 1 /* Current version of RDMATF */
/*
* The size of an RPC call or reply message
*/
#define RPC_MSG_SZ 1024
/*
--- 49,79 ----
#ifdef __cplusplus
extern "C" {
#endif
! #define RPCRDMA_VERS 1 /* Version of the RPC over RDMA protocol */
#define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */
#define RDMATF_VERS_1 1 /* Current version of RDMATF */
+ /* #define DYNAMIC_CREDIT_CONTROL 1 */
+ #define SERVER_REG_CACHE
+ /*#define ASYNC_SERVER_DEREG */
+ #define ASYNC_CLIENT_DEREG
+ /*#define RPC_RDMA_INLINE */
+ #ifdef RPC_RDMA_INLINE /* Increase to some super-large values */
+ #define RDMA_MINCHUNK 262144
+ #define RPC_MSG_SZ 262144
+ #define RPC_CL_SZ 262144
+ #define MINCHUNK 262144
+ #define RPC_BUF_SIZE 262144*2
+ #else
/*
+ * RDMA chunk size
+ */
+ #define RDMA_MINCHUNK 1024
+ /*
* The size of an RPC call or reply message
*/
#define RPC_MSG_SZ 1024
/*
*** 59,68 ****
--- 88,98 ----
/*
* Size of receive buffer
*/
#define RPC_BUF_SIZE 2048
+ #endif
#define NOWAIT 0 /* don't wait for operation of complete */
#define WAIT 1 /* wait and ensure that operation is complete */
/*
*** 69,79 ****
--- 99,164 ----
* RDMA xdr buffer control and other control flags. Add new flags here,
* set them in private structure for xdr over RDMA in xdr_rdma.c
*/
#define RDMA_NOCHUNK 0x1
+ #define LONG_REPLY_LEN 65536
+
+ extern int credit_control_debug;
+ extern int rib_long_reply_debug;
+ extern int rdma_long_reply_debug;
+ extern int rdma_xdr_long_reply_debug;
+ extern int rdma_wlist_xdr_debug;
+ extern int rdma_wlist_clnt_debug;
+ extern int rdma_wlist_svc_debug;
+ extern int rdma_wlist_memreg_debug;
+ extern int rdma_wlist_verbose_debug;
+
+
+ #define RDMA_BUFS_RQST 128 /* Num bufs requested by client */
+ #define RDMA_BUFS_GRANT 126 /* Num bufs granted by server */
+
/*
+ * Credit Control Structures.
+ */
+ typedef enum rdma_cc_type {
+ RDMA_CC_CLNT, /* CONN is for a client */
+ RDMA_CC_SRV /* CONN is for a server */
+ } rdma_cc_type_t;
+
+ /*
+ * Client side credit control data structure.
+ */
+ typedef struct rdma_clnt_cred_ctrl {
+ uint32_t clnt_cc_granted_ops;
+ uint32_t clnt_cc_in_flight_ops;
+ kcondvar_t clnt_cc_cv;
+ } rdma_clnt_cred_ctrl_t;
+
+ /*
+ * Server side credit control data structure.
+ */
+ typedef struct rdma_srv_cred_ctrl {
+ uint32_t srv_cc_buffers_granted;
+ uint32_t srv_cc_cur_buffers_used;
+ uint32_t srv_cc_posted;
+ uint32_t srv_cc_max_buf_size; /* to be determined by CCP */
+ uint32_t srv_cc_cur_buf_size; /* to be determined by CCP */
+ } rdma_srv_cred_ctrl_t;
+
+ typedef enum {
+ RPCCALL_RCHUNK,
+ RPCCALL_NORCHUNK
+ }rpccall_read_t;
+
+ typedef enum {
+ RPCCALL_WLIST,
+ RPCCALL_WCHUNK,
+ RPCCALL_NOWRITE
+ }rpccall_write_t;
+
+ /*
* Return codes from RDMA operations
*/
typedef enum {
RDMA_SUCCESS = 0, /* successful operation */
*** 123,139 ****
uint32_t mrc_rmr; /* Remote MR context, sent OTW */
union {
struct mr {
uint32_t lmr; /* Local MR context */
uint64_t linfo; /* Local memory info */
} mr;
} lhdl;
};
#define mrc_lmr lhdl.mr.lmr
#define mrc_linfo lhdl.mr.linfo
!
/*
* The XDR offset value is used by the XDR
* routine to identify the position in the
* RPC message where the opaque object would
* normally occur. Neither the data content
--- 208,225 ----
uint32_t mrc_rmr; /* Remote MR context, sent OTW */
union {
struct mr {
uint32_t lmr; /* Local MR context */
uint64_t linfo; /* Local memory info */
+ uint64_t lma; /* Local Mem Area Hdl */
} mr;
} lhdl;
};
#define mrc_lmr lhdl.mr.lmr
#define mrc_linfo lhdl.mr.linfo
! #define mrc_lma lhdl.mr.lma /* FMR : Mem Area Hdl */
/*
* The XDR offset value is used by the XDR
* routine to identify the position in the
* RPC message where the opaque object would
* normally occur. Neither the data content
*** 144,167 ****
--- 230,281 ----
* The remaining fields identify the chunk of data
* on the sender. The c_memhandle identifies a
* registered RDMA memory region and the c_addr
* and c_len fields identify the chunk within it.
*/
+
+ #ifdef SERVER_REG_CACHE
+ typedef struct rib_lrc_entry {
+ struct rib_lrc_entry *forw;
+ struct rib_lrc_entry *back;
+ char *lrc_buf;
+
+ uint32_t lrc_len;
+ void *avl_node;
+ bool_t registered;
+
+ struct mrc lrc_mhandle;
+ bool_t lrc_on_freed_list;
+ } rib_lrc_entry_t;
+ #endif
+
struct clist {
uint32 c_xdroff; /* XDR offset */
uint32 c_len; /* Length */
struct mrc c_smemhandle; /* src memory handle */
uint64 c_ssynchandle; /* src sync handle */
uint64 c_saddr; /* src address */
struct mrc c_dmemhandle; /* dst memory handle */
uint64 c_dsynchandle; /* dst sync handle */
uint64 c_daddr; /* dst address */
+ struct as *c_adspc; /* address space for saddr/daddr */
+ page_t **c_dpplist; /* page list for dest vaddr */
+ uint64 long_reply_buf;
struct clist *c_next; /* Next chunk */
};
typedef struct clist clist;
+ extern struct clist empty_cl;
+
+ /*
+ * FTDO: max 4 meg wlist xfer size
+ * This is defined because the rfs3_tsize service requires
+ * svc_req struct (which we don't have that in krecv).
+ */
+ #define MAX_SVC_XFER_SIZE (4*1024*1024)
+
enum rdma_proc {
RDMA_MSG = 0, /* chunk list and RPC msg follow */
RDMA_NOMSG = 1, /* only chunk list follows */
RDMA_MSGP = 2, /* chunk list and RPC msg with padding follow */
RDMA_DONE = 3 /* signal completion of chunk transfer */
*** 224,235 ****
--- 338,357 ----
#define C_ERROR 0x10000000
#define C_DISCONN_PEND 0x08000000
#define C_REMOTE_DOWN 0x04000000
uint_t c_state; /* state of connection */
+ rdma_cc_type_t c_cc_type; /* client or server, for credit cntrl */
+ union {
+ rdma_clnt_cred_ctrl_t c_clnt_cc;
+ rdma_srv_cred_ctrl_t c_srv_cc;
+ } rdma_conn_cred_ctrl_u;
kmutex_t c_lock; /* protect c_state and c_ref fields */
kcondvar_t c_cv; /* to signal when pending is done */
+ #if defined (CLNT_INTERRUPT_COAL)
+ uint_t c_count;
+ #endif
} CONN;
/*
* Memory management for the RDMA buffers
*** 251,260 ****
--- 373,385 ----
typedef struct rdma_buf {
rdma_btype type; /* buffer type */
int len; /* length of buffer */
caddr_t addr; /* buffer address */
struct mrc handle; /* buffer registration handle */
+ #ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *long_reply_buf;
+ #endif
} rdma_buf_t;
/*
* Data transferred from plugin interrupt to svc_queuereq()
*/
*** 277,307 ****
rdma_stat (*rdma_rel_conn)(CONN *);
/* Server side listner start and stop routines */
void (*rdma_svc_listen)(struct rdma_svc_data *);
void (*rdma_svc_stop)(struct rdma_svc_data *);
/* Memory */
! rdma_stat (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *);
rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc);
! rdma_stat (*rdma_regmemsync)(CONN *, caddr_t, uint_t,
struct mrc *, void **);
rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
void *);
rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
/* Buffer */
rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *);
void (*rdma_buf_free)(CONN *, rdma_buf_t *);
/* Transfer */
rdma_stat (*rdma_send)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *);
rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t);
/* RDMA */
rdma_stat (*rdma_read)(CONN *, clist *, int);
rdma_stat (*rdma_write)(CONN *, clist *, int);
/* INFO */
rdma_stat (*rdma_getinfo)(rdma_info_t *info);
} rdmaops_t;
/*
* RDMA operations.
--- 402,456 ----
rdma_stat (*rdma_rel_conn)(CONN *);
/* Server side listner start and stop routines */
void (*rdma_svc_listen)(struct rdma_svc_data *);
void (*rdma_svc_stop)(struct rdma_svc_data *);
/* Memory */
! rdma_stat (*rdma_regmem)(CONN *, caddr_t , caddr_t, uint_t, struct mrc *);
rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc);
! #ifdef SERVER_REG_CACHE
! rdma_stat (*rdma_regmemsync)(CONN *, caddr_t ,caddr_t, uint_t,
! struct mrc *, void **, void *);
! rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
! void *, void *);
! #else
! rdma_stat (*rdma_regmemsync)(CONN *, caddr_t ,caddr_t, uint_t,
struct mrc *, void **);
rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
void *);
+
+ #endif
rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
/* Buffer */
rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *);
void (*rdma_buf_free)(CONN *, rdma_buf_t *);
/* Transfer */
rdma_stat (*rdma_send)(CONN *, clist *, uint32_t);
+ #if defined (CLNT_INTERRUPT_COAL)
+ rdma_stat (*rdma_send_bl)(CONN *, clist *, uint32_t);
+ #endif
+ #if defined(ASYNC_SERVER_DEREG)
+ rdma_stat (*rdma_send_nw)(CONN *, clist *, uint32_t, caddr_t, caddr_t , int, caddr_t ,int, int, int);
+ #endif
rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *);
rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t);
/* RDMA */
rdma_stat (*rdma_read)(CONN *, clist *, int);
rdma_stat (*rdma_write)(CONN *, clist *, int);
/* INFO */
rdma_stat (*rdma_getinfo)(rdma_info_t *info);
+ #ifdef SERVER_REG_CACHE
+ rib_lrc_entry_t *(*rdma_get_server_cache_buf)(CONN *,uint32_t);
+ void (*rdma_free_server_cache_buf)(CONN *, rib_lrc_entry_t *);
+ #endif
+ #ifdef DYNAMIC_CREDIT_CONTROL
+ void (*rdma_get_resource_info)(CONN *, int *, int *);
+ #endif
+ #if defined(ASYNC_CLIENT_DEREG)
+ void (*insert_queue)(CONN *, clist *);
+ #endif
} rdmaops_t;
/*
* RDMA operations.
*** 313,336 ****
(*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
#define RDMA_REL_CONN(conn) \
(*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
! #define RDMA_REGMEM(conn, buff, len, handle) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle)
#define RDMA_DEREGMEM(conn, buff, handle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
! #define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \
len, handle, synchandle)
#define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
handle, synchandle)
#define RDMA_SYNCMEM(conn, handle, buff, len, direction) \
(*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
buff, len, direction)
#define RDMA_BUF_ALLOC(conn, rbuf) \
--- 462,495 ----
(*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
#define RDMA_REL_CONN(conn) \
(*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
! #define RDMA_REGMEM(conn, adsp, buff, len, handle) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, adsp, buff, len, handle)
#define RDMA_DEREGMEM(conn, buff, handle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
! #ifdef SERVER_REG_CACHE
! #define RDMA_REGMEMSYNC(conn, adsp, buff, len, handle, synchandle, lrc) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, adsp, buff, \
! len, handle, synchandle, lrc)
!
! #define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle, lrc) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
! handle, synchandle, lrc)
! #else
! #define RDMA_REGMEMSYNC(conn, adsp, buff, len, handle, synchandle) \
! (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, adsp, buff, \
len, handle, synchandle)
#define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
handle, synchandle)
+ #endif
#define RDMA_SYNCMEM(conn, handle, buff, len, direction) \
(*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
buff, len, direction)
#define RDMA_BUF_ALLOC(conn, rbuf) \
*** 339,349 ****
--- 498,521 ----
#define RDMA_BUF_FREE(conn, rbuf) \
(*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)
#define RDMA_SEND(conn, sendlist, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)
+ #if defined (CLNT_INTERRUPT_COAL)
+ #define RDMA_SEND_BL(conn, sendlist, xid) \
+ (*(conn)->c_rdmamod->rdma_ops->rdma_send_bl)(conn, sendlist, xid)
+ #endif
+ #if defined(ASYNC_SERVER_DEREG)
+ #define RDMA_SEND_NW(conn, sendlist, xid, c, c1, c2, c3, c4, c5, c6) \
+ (*(conn)->c_rdmamod->rdma_ops->rdma_send_nw)(conn, sendlist, xid, c, c1, c2, c3, c4, c5, c6)
+ #endif
+ #if defined(ASYNC_CLIENT_DEREG)
+ #define INSERT_QUEUE(conn,rwc) \
+ (*(conn)->c_rdmamod->rdma_ops->insert_queue)(conn,rwc)
+ #endif
+
#define RDMA_SEND_RESP(conn, sendlist, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)
#define RDMA_CLNT_RECVBUF(conn, cl, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)
*** 361,379 ****
(*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
#define RDMA_GETINFO(rdma_mod, info) \
(*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
#ifdef _KERNEL
extern rdma_registry_t *rdma_mod_head;
extern krwlock_t rdma_lock; /* protects rdma_mod_head list */
extern int rdma_modloaded; /* flag for loading RDMA plugins */
extern int rdma_dev_available; /* rdma device is loaded or not */
extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
extern uint_t rdma_minchunk;
extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */
-
/*
* General RDMA routines
*/
extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
struct mrc *shandle, caddr_t saddr,
--- 533,564 ----
(*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
#define RDMA_GETINFO(rdma_mod, info) \
(*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
+
+ #ifdef SERVER_REG_CACHE
+ #define RDMA_GET_SERVER_CACHE_BUF(conn, len) \
+ (*(conn)->c_rdmamod->rdma_ops->rdma_get_server_cache_buf)(conn, len)
+
+ #define RDMA_FREE_SERVER_CACHE_BUF(conn, buf) \
+ (*(conn)->c_rdmamod->rdma_ops->rdma_free_server_cache_buf)(conn, buf)
+ #endif
+
+ #ifdef DYNAMIC_CREDIT_CONTROL
+ #define RDMA_GET_RESOURCE_INFO(conn, num, avail) \
+ (*(conn)->c_rdmamod->rdma_ops->rdma_get_resource_info)(conn, num, avail)
+ #endif
+
#ifdef _KERNEL
extern rdma_registry_t *rdma_mod_head;
extern krwlock_t rdma_lock; /* protects rdma_mod_head list */
extern int rdma_modloaded; /* flag for loading RDMA plugins */
extern int rdma_dev_available; /* rdma device is loaded or not */
extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
extern uint_t rdma_minchunk;
extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */
/*
* General RDMA routines
*/
extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
struct mrc *shandle, caddr_t saddr,
*** 401,410 ****
--- 586,604 ----
extern bool_t xdr_clist(XDR *, clist *);
extern bool_t xdr_do_clist(XDR *, clist **);
extern uint_t xdr_getbufsize(XDR *);
unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);
+
+ extern bool_t xdr_decode_reply_wchunk(XDR *, struct clist **,CONN *conn);
+ extern bool_t xdr_decode_wlist(XDR *xdrs, struct clist **, bool_t *);
+ extern bool_t xdr_decode_wlist_new(XDR *xdrs, struct clist **, bool_t *,
+ uint32_t *,CONN *);
+
+ extern bool_t xdr_encode_wlist(XDR *, clist *, uint_t);
+ extern bool_t xdr_encode_reply_wchunk(XDR *, struct clist *, uint32_t seg_array_len);
+
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif