Cdiff xdr_rdma.c
*** /webrev/webrev/usr/src/uts/common/rpc/xdr_rdma.c-   Mon Aug 14 13:12:12 2006
--- xdr_rdma.c  Thu Aug 10 14:22:04 2006

*** 22,31 **** --- 22,44 ---- /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* Copyright (c) 2006, The Ohio State University. All rights reserved. + * + * Portions of this source code is developed by the team members of + * The Ohio State University's Network-Based Computing Laboratory (NBCL), + * headed by Professor Dhabaleswar K. (DK) Panda. + * + * Acknowledgements to contributions from developors: + * Ranjit Noronha: noronha@cse.ohio-state.edu + * Lei Chai : chail@cse.ohio-state.edu + * Weikuan Yu : yuw@cse.ohio-state.edu + * + */ + #pragma ident "@(#)xdr_rdma.c 1.4 05/06/08 SMI" /* * xdr_rdma.c, XDR implementation using RDMA to move large chunks */
*** 41,50 **** --- 54,65 ---- #include <rpc/rpc_sztypes.h> #include <rpc/rpc_rdma.h> static struct xdr_ops *xdrrdma_ops(void); + /*int rdma_xdr_long_reply_debug = 0x0;*/ + /* * A chunk list entry identifies a chunk * of opaque data to be moved separately * from the rest of the RPC message. * xp_min_chunk = 0, is a special case for ENCODING, which means
*** 145,169 **** static bool_t xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len) { struct private *xdrp = (struct private *)(xdrs->x_private); struct clist *cle = *(xdrp->xp_cl_next); struct clist cl; bool_t retval = TRUE; /* * If there was a chunk at the current offset * first record the destination address and length * in the chunk list that came with the message, then * RDMA READ the chunk data. */ if (cle != NULL && cle->c_xdroff == (xdrp->xp_offp - xdrs->x_base)) { ! cle->c_daddr = (uint64)(uintptr_t)addr; ! cle->c_len = len; xdrp->xp_cl_next = &cle->c_next; /* * RDMA READ the chunk data from the remote end. * First prep the destination buffer by registering * it, then RDMA READ the chunk data. Since we are * doing streaming memory, sync the destination buffer --- 160,220 ---- static bool_t xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len) { struct private *xdrp = (struct private *)(xdrs->x_private); struct clist *cle = *(xdrp->xp_cl_next); + struct clist *cls = *(xdrp->xp_cl_next); struct clist cl; bool_t retval = TRUE; + uint32_t total_len=len; + uint32_t sum_len=0; + uint32_t total_segments=0; + uint32_t actual_segments=0; + uint32_t status; + uint32_t i; + uint32_t alen; + while(cle) { + total_segments++; + cle=cle->c_next; + } + cle = *(xdrp->xp_cl_next); /* * If there was a chunk at the current offset * first record the destination address and length * in the chunk list that came with the message, then * RDMA READ the chunk data. */ if (cle != NULL && cle->c_xdroff == (xdrp->xp_offp - xdrs->x_base)) { ! for(actual_segments=0; actual_segments < total_segments; actual_segments++) { ! if(total_len <= 0) ! goto mem_sync; ! cle->c_daddr = (uint64)(uintptr_t)addr + sum_len; ! /*cle->c_len = len;*/ ! alen = 0; ! if(cle->c_len > total_len) { ! alen = cle->c_len; ! cle->c_len = total_len; ! } ! if(!alen) xdrp->xp_cl_next = &cle->c_next; + + sum_len += cle->c_len; + total_len -= cle->c_len; + + if((total_segments - actual_segments - 1) == 0 && total_len > 0 ){ + cmn_err(CE_WARN,"Provided read chunks are too short\n"); + retval = FALSE; + } + + if((total_segments - actual_segments - 1) > 0 && total_len == 0 ){ + #ifdef DEBUG + cmn_err(CE_NOTE,"Provided read chunks are too long [total=%d, actual=%d]\n",total_segments,actual_segments); + #endif + } /* * RDMA READ the chunk data from the remote end. * First prep the destination buffer by registering * it, then RDMA READ the chunk data. Since we are * doing streaming memory, sync the destination buffer
*** 176,210 **** cl = *cle; cl.c_next = NULL; if (clist_register(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) { return (FALSE); } /* * Now read the chunk in */ ! if (RDMA_READ(xdrp->xp_conn, &cl, WAIT) != RDMA_SUCCESS) { #ifdef DEBUG cmn_err(CE_WARN, "xdrrdma_getbytes: RDMA_READ failed\n"); #endif retval = FALSE; goto out; } /* * sync the memory for cpu */ if (clist_syncmem(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) { retval = FALSE; goto out; } - out: /* * Deregister the chunks */ (void) clist_deregister(xdrp->xp_conn, &cl, 0); return (retval); } if ((xdrs->x_handy -= len) < 0) return (FALSE); --- 227,282 ---- cl = *cle; cl.c_next = NULL; if (clist_register(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) { return (FALSE); } + cle->c_dmemhandle = cl.c_dmemhandle; + cle->c_dsynchandle = cl.c_dsynchandle; /* * Now read the chunk in */ ! if((total_segments - actual_segments - 1) == 0 || total_len == 0){ ! status = RDMA_READ(xdrp->xp_conn, &cl, WAIT); ! } else { ! status = RDMA_READ(xdrp->xp_conn, &cl, NOWAIT); ! } ! if (status != RDMA_SUCCESS) { #ifdef DEBUG cmn_err(CE_WARN, "xdrrdma_getbytes: RDMA_READ failed\n"); #endif retval = FALSE; goto out; } + cle = cle->c_next; + } + mem_sync: /* * sync the memory for cpu */ + cle = cls; + cl = *cle; + cl.c_next = NULL; + cl.c_len = sum_len; if (clist_syncmem(xdrp->xp_conn, &cl, 0) != RDMA_SUCCESS) { retval = FALSE; goto out; } out: /* * Deregister the chunks */ + cle = cls; + cl = *cle; + cl.c_next = NULL; + cl.c_len = sum_len; (void) clist_deregister(xdrp->xp_conn, &cl, 0); + if(alen){ + cle->c_saddr = (uint64)(uintptr_t)cle->c_saddr + cle->c_len; + cle->c_len = alen - cle->c_len; + } return (retval); } if ((xdrs->x_handy -= len) < 0) return (FALSE);
*** 422,435 **** xdr_clist(XDR *xdrs, clist *objp) { if (!xdr_uint32(xdrs, &objp->c_xdroff)) return (FALSE); - if (!xdr_uint32(xdrs, &objp->c_len)) - return (FALSE); if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr)) return (FALSE); if (!xdr_uint64(xdrs, &objp->c_saddr)) return (FALSE); if (!xdr_pointer(xdrs, (char **)&objp->c_next, sizeof (clist), (xdrproc_t)xdr_clist)) return (FALSE); --- 494,507 ---- xdr_clist(XDR *xdrs, clist *objp) { if (!xdr_uint32(xdrs, &objp->c_xdroff)) return (FALSE); if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr)) return (FALSE); + if (!xdr_uint32(xdrs, &objp->c_len)) + return (FALSE); if (!xdr_uint64(xdrs, &objp->c_saddr)) return (FALSE); if (!xdr_pointer(xdrs, (char **)&objp->c_next, sizeof (clist), (xdrproc_t)xdr_clist)) return (FALSE);
*** 447,452 **** --- 519,818 ---- xdr_getbufsize(XDR *xdrs) { struct private *xdrp = (struct private *)(xdrs->x_private); return ((uint_t)xdrp->xp_buf_size); + } + + bool_t + xdr_encode_wlist(XDR *xdrs, clist *w, uint_t num_segment) + { + bool_t vfalse = FALSE, vtrue = TRUE; + int i; + + /* does a wlist exist? */ + if (w == NULL) { + return (xdr_bool(xdrs, &vfalse)); + } + + /* Encode N consecutive segments, 1, N, HLOO, ..., HLOO, 0 */ + if (! xdr_bool(xdrs, &vtrue)) + return (FALSE); + + if (! xdr_uint32(xdrs, &num_segment)) + return (FALSE); + for(i=0; i<num_segment; i++){ + if (! xdr_uint32(xdrs, &w->c_dmemhandle.mrc_rmr)) + return (FALSE); + + if (! xdr_uint32(xdrs, &w->c_len)) + return (FALSE); + + if (! xdr_uint64(xdrs, &w->c_daddr)) + return (FALSE); + + w = w->c_next; + } + if (!xdr_bool(xdrs, &vfalse)) + return (FALSE); + + return (TRUE); + } + + bool_t + xdr_decode_wlist(XDR *xdrs, struct clist **w, bool_t *wlist_exists) + { + struct clist *tmp; + bool_t more = FALSE; + uint32_t seg_array_len; + uint32_t i; + + if (! xdr_bool(xdrs, &more)) + return (FALSE); + + /* is there a wlist? */ + if (more == FALSE) { + *wlist_exists = FALSE; + return (TRUE); + } + + *wlist_exists = TRUE; + + if (! xdr_uint32(xdrs, &seg_array_len)) + return (FALSE); + + tmp = *w = (struct clist *)kmem_zalloc(sizeof (struct clist), + KM_SLEEP); + /* *w = empty_cl; */ + for (i = 0; i < seg_array_len; i++) { + if (! xdr_uint32(xdrs, &tmp->c_dmemhandle.mrc_rmr)) + return (FALSE); + if (! xdr_uint32(xdrs, &tmp->c_len)) + return (FALSE); + if (! xdr_uint64(xdrs, &tmp->c_daddr)) + return (FALSE); + if (i < seg_array_len - 1) { + tmp->c_next = (struct clist *) + mem_alloc(sizeof(struct clist)); + tmp = tmp->c_next; + } else { + tmp->c_next = NULL; + } + } + + more = FALSE; + if (!xdr_bool(xdrs, &more)) + return (FALSE); + + return (TRUE); + } + + bool_t + xdr_decode_wlist_new(XDR *xdrs, struct clist **wclp, bool_t *wwl, + uint32_t *total_length,CONN *conn) + { + struct clist *first, *prev, *ncl; + char *memp; + #ifdef SERVER_REG_CACHE + /*struct private *xdrp ; = (struct private *)(xdrs->x_private)*/ + rib_lrc_entry_t *long_reply_buf = NULL; + #endif + uint32_t num_wclist; + uint32_t wcl_length = 0; + uint32_t i; + bool_t more = FALSE; + + *wclp = NULL; + *wwl = FALSE; + *total_length=0; + + if (! xdr_bool(xdrs, &more)) { + return (FALSE); + } + + if (more == FALSE) { + return (TRUE); + } + + *wwl = TRUE; + if (! xdr_uint32(xdrs, &num_wclist)) { + cmn_err(CE_NOTE, "Error interpretting list length"); + return (FALSE); + } + + first = prev = ncl = (struct clist *) + kmem_zalloc(num_wclist*sizeof(struct clist), KM_SLEEP); + + if (!first) { + cmn_err(CE_NOTE, "Not able to allocate memory"); + return (FALSE); + } + + more = TRUE; + for (i = 0; i < num_wclist; i++) { + if (! xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr)) + return (FALSE); + if (! xdr_uint32(xdrs, &ncl->c_len)) + return (FALSE); + if (! xdr_uint64(xdrs, &ncl->c_daddr)) + return (FALSE); + + if (ncl->c_len > MAX_SVC_XFER_SIZE) { + cmn_err(CE_NOTE, "write chunk length too big"); + ncl->c_len = MAX_SVC_XFER_SIZE; + } + if (i > 0) { + prev->c_next = ncl; + } + wcl_length += ncl->c_len; + prev = ncl; + ncl ++ ; + } + + more = FALSE; + if (!xdr_bool(xdrs, &more)) + return (FALSE); + + #ifdef SERVER_REG_CACHE + long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn,wcl_length*sizeof(char)); + first->long_reply_buf = (uint64)long_reply_buf; + memp = long_reply_buf->lrc_buf; + #else + memp = (char *) kmem_alloc(wcl_length*sizeof(char), KM_SLEEP); + #endif + if (!memp) { + cmn_err(CE_NOTE, "Not able to allocate memory for chunks"); + kmem_free((void*) first, num_wclist*sizeof(struct clist)); + return (FALSE); + } + ncl = first; + for (i = 0; i < num_wclist; i++) { + #ifdef SERVER_REG_CACHE + ncl->long_reply_buf = (uint64)long_reply_buf; + #endif + ncl->c_saddr = (uint64_t) memp; + memp += ncl->c_len; + ncl++; + } + + *wclp = first; + *total_length = wcl_length; + return (TRUE); + } + + /* + * XDR decode the long reply write chunk. + */ + bool_t + xdr_decode_reply_wchunk(XDR *xdrs, struct clist **clist,CONN *conn) + { + uint32_t mem_handle = 0; + uint32_t length = 0; + uint64 offset = 0; + bool_t have_rchunk = FALSE; + uint32_t seg_array_len = 0; + struct clist *first = NULL, *prev = NULL, *ncl = NULL; + char *memp; + uint32_t num_wclist; + uint32_t wcl_length = 0; + uint32_t i; + rdma_buf_t long_rpc = {0}; + + if (!xdr_bool(xdrs, &have_rchunk)) + return (FALSE); + + if (have_rchunk == FALSE) + return (TRUE); + + if (! xdr_uint32(xdrs, &num_wclist)) { + cmn_err(CE_NOTE, "Error interpretting list length"); + return (FALSE); + } + if (num_wclist == 0) { + return (FALSE); + } + + first = prev = ncl = (struct clist *) + kmem_zalloc(num_wclist*sizeof(struct clist), KM_SLEEP); + if (!first) { + cmn_err(CE_NOTE, "Not able to allocate memory"); + return (FALSE); + } + + for (i = 0; i < num_wclist; i++) { + if (! xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr)) + return (FALSE); + if (! xdr_uint32(xdrs, &ncl->c_len)) + return (FALSE); + if (! xdr_uint64(xdrs, &ncl->c_daddr)) + return (FALSE); + + if (ncl->c_len > MAX_SVC_XFER_SIZE) { + cmn_err(CE_NOTE, "reply chunk length too big"); + ncl->c_len = MAX_SVC_XFER_SIZE; + } + if(!(ncl->c_dmemhandle.mrc_rmr && (ncl->c_len > 0) && ncl->c_daddr)) + cmn_err(CE_WARN,"Client sent invalid segment address\n"); + if (i > 0) { + prev->c_next = ncl; + } + wcl_length += ncl->c_len; + prev = ncl; + ncl ++ ; + } + if(num_wclist){ + long_rpc.type = CHUNK_BUFFER; + #ifdef SERVER_REG_CACHE + long_rpc.long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn,wcl_length); + memp = long_rpc.addr = long_rpc.long_reply_buf->lrc_buf; + #else + memp = long_rpc.addr = kmem_zalloc(wcl_length, KM_SLEEP); + #endif + ncl = first; + + for (i = 0; i < num_wclist; i++) { + #ifdef SERVER_REG_CACHE + ncl->long_reply_buf = (uint64)long_rpc.long_reply_buf; + #endif + ncl->c_saddr = (uint64_t) memp; + memp += ncl->c_len; + ncl++; + } + } + *clist=first; + return (TRUE); + } + + bool_t + xdr_encode_reply_wchunk(XDR *xdrs, struct clist *lrc_entry,uint32_t seg_array_len) + { + int i; + bool_t long_reply_exists = TRUE; + uint32_t length ; + uint64 offset ; + if(seg_array_len>0){ + if (!xdr_bool(xdrs, &long_reply_exists)) + return (FALSE); + if (!xdr_uint32(xdrs, &seg_array_len)) + return (FALSE); + + for(i=0;i<seg_array_len;i++){ + if(!lrc_entry) + return FALSE; + length = lrc_entry->c_len; + offset = (uint64)lrc_entry->c_daddr; + + if (!xdr_uint32(xdrs, &lrc_entry->c_dmemhandle.mrc_rmr)) + return (FALSE); + if (!xdr_uint32(xdrs, &length)) + return (FALSE); + if (!xdr_uint64(xdrs, &offset)) + return (FALSE); + lrc_entry = lrc_entry->c_next; + } + } else { + long_reply_exists = FALSE; + if(!xdr_bool(xdrs, &long_reply_exists)) + return (FALSE); + } + return (TRUE); }