Cdiff xdr_rdma.c
*** /webrev/webrev/usr/src/uts/common/rpc/xdr_rdma.c- Mon Aug 14 13:12:12 2006
--- xdr_rdma.c Thu Aug 10 14:22:04 2006
*** 22,31 ****
--- 22,44 ----
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+ /* Copyright (c) 2006, The Ohio State University. All rights reserved.
+ *
+ * Portions of this source code is developed by the team members of
+ * The Ohio State University's Network-Based Computing Laboratory (NBCL),
+ * headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * Acknowledgements to contributions from developors:
+ * Ranjit Noronha: noronha@cse.ohio-state.edu
+ * Lei Chai : chail@cse.ohio-state.edu
+ * Weikuan Yu : yuw@cse.ohio-state.edu
+ *
+ */
+
#pragma ident "@(#)xdr_rdma.c 1.4 05/06/08 SMI"
/*
* xdr_rdma.c, XDR implementation using RDMA to move large chunks
*/
*** 41,50 ****
--- 54,65 ----
#include <rpc/rpc_sztypes.h>
#include <rpc/rpc_rdma.h>
static struct xdr_ops *xdrrdma_ops(void);
+ /*int rdma_xdr_long_reply_debug = 0x0;*/
+
/*
* A chunk list entry identifies a chunk
* of opaque data to be moved separately
* from the rest of the RPC message.
* xp_min_chunk = 0, is a special case for ENCODING, which means
*** 145,169 ****
static bool_t
xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len)
{
struct private *xdrp = (struct private *)(xdrs->x_private);
struct clist *cle = *(xdrp->xp_cl_next);
struct clist cl;
bool_t retval = TRUE;
/*
* If there was a chunk at the current offset
* first record the destination address and length
* in the chunk list that came with the message, then
* RDMA READ the chunk data.
*/
if (cle != NULL &&
cle->c_xdroff == (xdrp->xp_offp - xdrs->x_base)) {
! cle->c_daddr = (uint64)(uintptr_t)addr;
! cle->c_len = len;
xdrp->xp_cl_next = &cle->c_next;
/*
* RDMA READ the chunk data from the remote end.
* First prep the destination buffer by registering
* it, then RDMA READ the chunk data. Since we are
* doing streaming memory, sync the destination buffer
--- 160,220 ----
static bool_t
xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len)
{
struct private *xdrp = (struct private *)(xdrs->x_private);
struct clist *cle = *(xdrp->xp_cl_next);
+ struct clist *cls = *(xdrp->xp_cl_next);
struct clist cl;
bool_t retval = TRUE;
+ uint32_t total_len=len;
+ uint32_t sum_len=0;
+ uint32_t total_segments=0;
+ uint32_t actual_segments=0;
+ uint32_t status;
+ uint32_t i;
+ uint32_t alen;
+ while(cle) {
+ total_segments++;
+ cle=cle->c_next;
+ }
+ cle = *(xdrp->xp_cl_next);
/*
* If there was a chunk at the current offset
* first record the destination address and length
* in the chunk list that came with the message, then
* RDMA READ the chunk data.
*/
if (cle != NULL &&
cle->c_xdroff == (xdrp->xp_offp - xdrs->x_base)) {
! for(actual_segments=0; actual_segments < total_segments; actual_segments++) {
! if(total_len <= 0)
! goto mem_sync;
! cle->c_daddr = (uint64)(uintptr_t)addr + sum_len;
! /*cle->c_len = len;*/
! alen = 0;
! if(cle->c_len > total_len) {
! alen = cle->c_len;
! cle->c_len = total_len;
! }
! if(!alen)
xdrp->xp_cl_next = &cle->c_next;
+
+ sum_len += cle->c_len;
+ total_len -= cle->c_len;
+
+ if((total_segments - actual_segments - 1) == 0 && total_len > 0 ){
+ cmn_err(CE_WARN,"Provided read chunks are too short\n");
+ retval = FALSE;
+ }
+
+ if((total_segments - actual_segments - 1) > 0 && total_len == 0 ){
+ #ifdef DEBUG
+ cmn_err(CE_NOTE,"Provided read chunks are too long [total=%d, actual=%d]\n",total_segments,actual_segments);
+ #endif
+ }
/*
* RDMA READ the chunk data from the remote end.
* First prep the destination buffer by registering
* it, then RDMA READ the chunk data. Since we are
* doing streaming memory, sync the destination buffer
*** 176,210 ****
cl = *cle;
cl.c_next = NULL;
if (clist_register(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) {
return (FALSE);
}
/*
* Now read the chunk in
*/
! if (RDMA_READ(xdrp->xp_conn, &cl, WAIT) != RDMA_SUCCESS) {
#ifdef DEBUG
cmn_err(CE_WARN,
"xdrrdma_getbytes: RDMA_READ failed\n");
#endif
retval = FALSE;
goto out;
}
/*
* sync the memory for cpu
*/
if (clist_syncmem(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) {
retval = FALSE;
goto out;
}
-
out:
/*
* Deregister the chunks
*/
(void) clist_deregister(xdrp->xp_conn, &cl, 0);
return (retval);
}
if ((xdrs->x_handy -= len) < 0)
return (FALSE);
--- 227,282 ----
cl = *cle;
cl.c_next = NULL;
if (clist_register(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) {
return (FALSE);
}
+ cle->c_dmemhandle = cl.c_dmemhandle;
+ cle->c_dsynchandle = cl.c_dsynchandle;
/*
* Now read the chunk in
*/
! if((total_segments - actual_segments - 1) == 0 || total_len == 0){
! status = RDMA_READ(xdrp->xp_conn, &cl, WAIT);
! } else {
! status = RDMA_READ(xdrp->xp_conn, &cl, NOWAIT);
! }
! if (status != RDMA_SUCCESS) {
#ifdef DEBUG
cmn_err(CE_WARN,
"xdrrdma_getbytes: RDMA_READ failed\n");
#endif
retval = FALSE;
goto out;
}
+ cle = cle->c_next;
+ }
+ mem_sync:
/*
* sync the memory for cpu
*/
+ cle = cls;
+ cl = *cle;
+ cl.c_next = NULL;
+ cl.c_len = sum_len;
if (clist_syncmem(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) {
retval = FALSE;
goto out;
}
out:
/*
* Deregister the chunks
*/
+ cle = cls;
+ cl = *cle;
+ cl.c_next = NULL;
+ cl.c_len = sum_len;
(void) clist_deregister(xdrp->xp_conn, &cl, 0);
+ if(alen){
+ cle->c_saddr = (uint64)(uintptr_t)cle->c_saddr + cle->c_len;
+ cle->c_len = alen - cle->c_len;
+ }
return (retval);
}
if ((xdrs->x_handy -= len) < 0)
return (FALSE);
*** 422,435 ****
xdr_clist(XDR *xdrs, clist *objp)
{
if (!xdr_uint32(xdrs, &objp->c_xdroff))
return (FALSE);
- if (!xdr_uint32(xdrs, &objp->c_len))
- return (FALSE);
if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr))
return (FALSE);
if (!xdr_uint64(xdrs, &objp->c_saddr))
return (FALSE);
if (!xdr_pointer(xdrs, (char **)&objp->c_next, sizeof (clist),
(xdrproc_t)xdr_clist))
return (FALSE);
--- 494,507 ----
xdr_clist(XDR *xdrs, clist *objp)
{
if (!xdr_uint32(xdrs, &objp->c_xdroff))
return (FALSE);
if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr))
return (FALSE);
+ if (!xdr_uint32(xdrs, &objp->c_len))
+ return (FALSE);
if (!xdr_uint64(xdrs, &objp->c_saddr))
return (FALSE);
if (!xdr_pointer(xdrs, (char **)&objp->c_next, sizeof (clist),
(xdrproc_t)xdr_clist))
return (FALSE);
*** 447,452 ****
--- 519,818 ----
xdr_getbufsize(XDR *xdrs)
{
struct private *xdrp = (struct private *)(xdrs->x_private);
return ((uint_t)xdrp->xp_buf_size);
+ }
+
+ bool_t
+ xdr_encode_wlist(XDR *xdrs, clist *w, uint_t num_segment)
+ {
+ bool_t vfalse = FALSE, vtrue = TRUE;
+ int i;
+
+ /* does a wlist exist? */
+ if (w == NULL) {
+ return (xdr_bool(xdrs, &vfalse));
+ }
+
+ /* Encode N consecutive segments, 1, N, HLOO, ..., HLOO, 0 */
+ if (! xdr_bool(xdrs, &vtrue))
+ return (FALSE);
+
+ if (! xdr_uint32(xdrs, &num_segment))
+ return (FALSE);
+ for(i=0; i<num_segment; i++){
+ if (! xdr_uint32(xdrs, &w->c_dmemhandle.mrc_rmr))
+ return (FALSE);
+
+ if (! xdr_uint32(xdrs, &w->c_len))
+ return (FALSE);
+
+ if (! xdr_uint64(xdrs, &w->c_daddr))
+ return (FALSE);
+
+ w = w->c_next;
+ }
+ if (!xdr_bool(xdrs, &vfalse))
+ return (FALSE);
+
+ return (TRUE);
+ }
+
+ bool_t
+ xdr_decode_wlist(XDR *xdrs, struct clist **w, bool_t *wlist_exists)
+ {
+ struct clist *tmp;
+ bool_t more = FALSE;
+ uint32_t seg_array_len;
+ uint32_t i;
+
+ if (! xdr_bool(xdrs, &more))
+ return (FALSE);
+
+ /* is there a wlist? */
+ if (more == FALSE) {
+ *wlist_exists = FALSE;
+ return (TRUE);
+ }
+
+ *wlist_exists = TRUE;
+
+ if (! xdr_uint32(xdrs, &seg_array_len))
+ return (FALSE);
+
+ tmp = *w = (struct clist *)kmem_zalloc(sizeof (struct clist),
+ KM_SLEEP);
+ /* *w = empty_cl; */
+ for (i = 0; i < seg_array_len; i++) {
+ if (! xdr_uint32(xdrs, &tmp->c_dmemhandle.mrc_rmr))
+ return (FALSE);
+ if (! xdr_uint32(xdrs, &tmp->c_len))
+ return (FALSE);
+ if (! xdr_uint64(xdrs, &tmp->c_daddr))
+ return (FALSE);
+ if (i < seg_array_len - 1) {
+ tmp->c_next = (struct clist *)
+ mem_alloc(sizeof(struct clist));
+ tmp = tmp->c_next;
+ } else {
+ tmp->c_next = NULL;
+ }
+ }
+
+ more = FALSE;
+ if (!xdr_bool(xdrs, &more))
+ return (FALSE);
+
+ return (TRUE);
+ }
+
+ bool_t
+ xdr_decode_wlist_new(XDR *xdrs, struct clist **wclp, bool_t *wwl,
+ uint32_t *total_length,CONN *conn)
+ {
+ struct clist *first, *prev, *ncl;
+ char *memp;
+ #ifdef SERVER_REG_CACHE
+ /*struct private *xdrp ; = (struct private *)(xdrs->x_private)*/
+ rib_lrc_entry_t *long_reply_buf = NULL;
+ #endif
+ uint32_t num_wclist;
+ uint32_t wcl_length = 0;
+ uint32_t i;
+ bool_t more = FALSE;
+
+ *wclp = NULL;
+ *wwl = FALSE;
+ *total_length=0;
+
+ if (! xdr_bool(xdrs, &more)) {
+ return (FALSE);
+ }
+
+ if (more == FALSE) {
+ return (TRUE);
+ }
+
+ *wwl = TRUE;
+ if (! xdr_uint32(xdrs, &num_wclist)) {
+ cmn_err(CE_NOTE, "Error interpretting list length");
+ return (FALSE);
+ }
+
+ first = prev = ncl = (struct clist *)
+ kmem_zalloc(num_wclist*sizeof(struct clist), KM_SLEEP);
+
+ if (!first) {
+ cmn_err(CE_NOTE, "Not able to allocate memory");
+ return (FALSE);
+ }
+
+ more = TRUE;
+ for (i = 0; i < num_wclist; i++) {
+ if (! xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr))
+ return (FALSE);
+ if (! xdr_uint32(xdrs, &ncl->c_len))
+ return (FALSE);
+ if (! xdr_uint64(xdrs, &ncl->c_daddr))
+ return (FALSE);
+
+ if (ncl->c_len > MAX_SVC_XFER_SIZE) {
+ cmn_err(CE_NOTE, "write chunk length too big");
+ ncl->c_len = MAX_SVC_XFER_SIZE;
+ }
+ if (i > 0) {
+ prev->c_next = ncl;
+ }
+ wcl_length += ncl->c_len;
+ prev = ncl;
+ ncl ++ ;
+ }
+
+ more = FALSE;
+ if (!xdr_bool(xdrs, &more))
+ return (FALSE);
+
+ #ifdef SERVER_REG_CACHE
+ long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn,wcl_length*sizeof(char));
+ first->long_reply_buf = (uint64)long_reply_buf;
+ memp = long_reply_buf->lrc_buf;
+ #else
+ memp = (char *) kmem_alloc(wcl_length*sizeof(char), KM_SLEEP);
+ #endif
+ if (!memp) {
+ cmn_err(CE_NOTE, "Not able to allocate memory for chunks");
+ kmem_free((void*) first, num_wclist*sizeof(struct clist));
+ return (FALSE);
+ }
+ ncl = first;
+ for (i = 0; i < num_wclist; i++) {
+ #ifdef SERVER_REG_CACHE
+ ncl->long_reply_buf = (uint64)long_reply_buf;
+ #endif
+ ncl->c_saddr = (uint64_t) memp;
+ memp += ncl->c_len;
+ ncl++;
+ }
+
+ *wclp = first;
+ *total_length = wcl_length;
+ return (TRUE);
+ }
+
+ /*
+ * XDR decode the long reply write chunk.
+ */
+ bool_t
+ xdr_decode_reply_wchunk(XDR *xdrs, struct clist **clist,CONN *conn)
+ {
+ uint32_t mem_handle = 0;
+ uint32_t length = 0;
+ uint64 offset = 0;
+ bool_t have_rchunk = FALSE;
+ uint32_t seg_array_len = 0;
+ struct clist *first = NULL, *prev = NULL, *ncl = NULL;
+ char *memp;
+ uint32_t num_wclist;
+ uint32_t wcl_length = 0;
+ uint32_t i;
+ rdma_buf_t long_rpc = {0};
+
+ if (!xdr_bool(xdrs, &have_rchunk))
+ return (FALSE);
+
+ if (have_rchunk == FALSE)
+ return (TRUE);
+
+ if (! xdr_uint32(xdrs, &num_wclist)) {
+ cmn_err(CE_NOTE, "Error interpretting list length");
+ return (FALSE);
+ }
+ if (num_wclist == 0) {
+ return (FALSE);
+ }
+
+ first = prev = ncl = (struct clist *)
+ kmem_zalloc(num_wclist*sizeof(struct clist), KM_SLEEP);
+ if (!first) {
+ cmn_err(CE_NOTE, "Not able to allocate memory");
+ return (FALSE);
+ }
+
+ for (i = 0; i < num_wclist; i++) {
+ if (! xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr))
+ return (FALSE);
+ if (! xdr_uint32(xdrs, &ncl->c_len))
+ return (FALSE);
+ if (! xdr_uint64(xdrs, &ncl->c_daddr))
+ return (FALSE);
+
+ if (ncl->c_len > MAX_SVC_XFER_SIZE) {
+ cmn_err(CE_NOTE, "reply chunk length too big");
+ ncl->c_len = MAX_SVC_XFER_SIZE;
+ }
+ if(!(ncl->c_dmemhandle.mrc_rmr && (ncl->c_len > 0) && ncl->c_daddr))
+ cmn_err(CE_WARN,"Client sent invalid segment address\n");
+ if (i > 0) {
+ prev->c_next = ncl;
+ }
+ wcl_length += ncl->c_len;
+ prev = ncl;
+ ncl ++ ;
+ }
+ if(num_wclist){
+ long_rpc.type = CHUNK_BUFFER;
+ #ifdef SERVER_REG_CACHE
+ long_rpc.long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn,wcl_length);
+ memp = long_rpc.addr = long_rpc.long_reply_buf->lrc_buf;
+ #else
+ memp = long_rpc.addr = kmem_zalloc(wcl_length, KM_SLEEP);
+ #endif
+ ncl = first;
+
+ for (i = 0; i < num_wclist; i++) {
+ #ifdef SERVER_REG_CACHE
+ ncl->long_reply_buf = (uint64)long_rpc.long_reply_buf;
+ #endif
+ ncl->c_saddr = (uint64_t) memp;
+ memp += ncl->c_len;
+ ncl++;
+ }
+ }
+ *clist=first;
+ return (TRUE);
+ }
+
+ bool_t
+ xdr_encode_reply_wchunk(XDR *xdrs, struct clist *lrc_entry,uint32_t seg_array_len)
+ {
+ int i;
+ bool_t long_reply_exists = TRUE;
+ uint32_t length ;
+ uint64 offset ;
+ if(seg_array_len>0){
+ if (!xdr_bool(xdrs, &long_reply_exists))
+ return (FALSE);
+ if (!xdr_uint32(xdrs, &seg_array_len))
+ return (FALSE);
+
+ for(i=0;i<seg_array_len;i++){
+ if(!lrc_entry)
+ return FALSE;
+ length = lrc_entry->c_len;
+ offset = (uint64)lrc_entry->c_daddr;
+
+ if (!xdr_uint32(xdrs, &lrc_entry->c_dmemhandle.mrc_rmr))
+ return (FALSE);
+ if (!xdr_uint32(xdrs, &length))
+ return (FALSE);
+ if (!xdr_uint64(xdrs, &offset))
+ return (FALSE);
+ lrc_entry = lrc_entry->c_next;
+ }
+ } else {
+ long_reply_exists = FALSE;
+ if(!xdr_bool(xdrs, &long_reply_exists))
+ return (FALSE);
+ }
+ return (TRUE);
}