New nfs3_srv.c
  1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 
 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
 28 /* All Rights Reserved */
 29 
 30  /* Copyright (c) 2006, The Ohio State University. All rights reserved.
 31   *
 32   * Portions of this source code is developed by the team members of
 33   * The Ohio State University's Network-Based Computing Laboratory (NBCL),
 34   * headed by Professor Dhabaleswar K. (DK) Panda.
 35   *
 36   * Acknowledgements to contributions from developors:
 37   *   Ranjit Noronha: noronha@cse.ohio-state.edu
 38   *   Lei Chai      : chail@cse.ohio-state.edu
 39   *   Weikuan Yu    : yuw@cse.ohio-state.edu
 40   *
 41   */
 42 #pragma ident   "@(#)nfs3_srv.c 1.113   05/07/25 SMI"
 43 
 44 #include <sys/param.h>
 45 #include <sys/types.h>
 46 #include <sys/systm.h>
 47 #include <sys/cred.h>
 48 #include <sys/buf.h>
 49 #include <sys/vfs.h>
 50 #include <sys/vnode.h>
 51 #include <sys/uio.h>
 52 #include <sys/errno.h>
 53 #include <sys/sysmacros.h>
 54 #include <sys/statvfs.h>
 55 #include <sys/kmem.h>
 56 #include <sys/dirent.h>
 57 #include <sys/cmn_err.h>
 58 #include <sys/debug.h>
 59 #include <sys/systeminfo.h>
 60 #include <sys/flock.h>
 61 #include <sys/nbmlock.h>
 62 #include <sys/policy.h>
 63 
 64 #include <rpc/types.h>
 65 #include <rpc/auth.h>
 66 #include <rpc/svc.h>
 67 
 68 #include <nfs/nfs.h>
 69 #include <nfs/export.h>
 70 
 71 #include <sys/strsubr.h>
 72 
 73 /* #define RPC_RDMA_INLINE 1 */
 74 
 75 /*
 76  * These are the interface routines for the server side of the
 77  * Network File System.  See the NFS version 3 protocol specification
 78  * for a description of this interface.
 79  */
 80 
 81 #ifdef DEBUG
 82 int rfs3_do_pre_op_attr = 1;
 83 int rfs3_do_post_op_attr = 1;
 84 int rfs3_do_post_op_fh3 = 1;
 85 #endif
 86 
 87 static writeverf3 write3verf;
 88 
 89 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
 90 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
 91 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
 92 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
 93 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
 94 
 95 /* ARGSUSED */
 96 void
 97 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 98         struct svc_req *req, cred_t *cr)
 99 {
100         int error;
101         vnode_t *vp;
102         struct vattr va;
103 
104         vp = nfs3_fhtovp(&args->object, exi);
105         if (vp == NULL) {
106                 error = ESTALE;
107                 goto out;
108         }
109 
110         va.va_mask = AT_ALL;
111         error = rfs4_delegated_getattr(vp, &va, 0, cr);
112 
113         VN_RELE(vp);
114 
115         if (!error) {
116                 /* overflow error if time or size is out of range */
117                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118                 if (error)
119                         goto out;
120                 resp->status = NFS3_OK;
121                 return;
122         }
123 
124 out:
125         if (curthread->t_flag & T_WOULDBLOCK) {
126                 curthread->t_flag &= ~T_WOULDBLOCK;
127                 resp->status = NFS3ERR_JUKEBOX;
128         } else
129                 resp->status = puterrno3(error);
130 }
131 
132 fhandle_t *
133 rfs3_getattr_getfh(GETATTR3args *args)
134 {
135 
136         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
137 }
138 
139 void
140 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
141         struct svc_req *req, cred_t *cr)
142 {
143         int error;
144         vnode_t *vp;
145         struct vattr *bvap;
146         struct vattr bva;
147         struct vattr *avap;
148         struct vattr ava;
149         int flag;
150         int in_crit = 0;
151         struct flock64 bf;
152 
153         bvap = NULL;
154         avap = NULL;
155 
156         vp = nfs3_fhtovp(&args->object, exi);
157         if (vp == NULL) {
158                 error = ESTALE;
159                 goto out;
160         }
161 
162         error = sattr3_to_vattr(&args->new_attributes, &ava);
163         if (error)
164                 goto out;
165 
166         /*
167          * We need to specially handle size changes because of
168          * possible conflicting NBMAND locks. Get into critical
169          * region before VOP_GETATTR, so the size attribute is
170          * valid when checking conflicts.
171          *
172          * Also, check to see if the v4 side of the server has
173          * delegated this file.  If so, then we return JUKEBOX to
174          * allow the client to retrasmit its request.
175          */
176         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
177                 if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
178                         resp->status = NFS3ERR_JUKEBOX;
179                         goto out1;
180                 }
181                 if (nbl_need_check(vp)) {
182                         nbl_start_crit(vp, RW_READER);
183                         in_crit = 1;
184                 }
185         }
186 
187         bva.va_mask = AT_ALL;
188         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
189 
190         /*
191          * If we can't get the attributes, then we can't do the
192          * right access checking.  So, we'll fail the request.
193          */
194         if (error)
195                 goto out;
196 
197 #ifdef DEBUG
198         if (rfs3_do_pre_op_attr)
199                 bvap = &bva;
200 #else
201         bvap = &bva;
202 #endif
203 
204         if (rdonly(exi, req) || vn_is_readonly(vp)) {
205                 resp->status = NFS3ERR_ROFS;
206                 goto out1;
207         }
208 
209         if (args->guard.check &&
210             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
211             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
212                 resp->status = NFS3ERR_NOT_SYNC;
213                 goto out1;
214         }
215 
216         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
217                 flag = ATTR_UTIME;
218         else
219                 flag = 0;
220 
221         /*
222          * If the filesystem is exported with nosuid, then mask off
223          * the setuid and setgid bits.
224          */
225         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
226             (exi->exi_export.ex_flags & EX_NOSUID))
227                 ava.va_mode &= ~(VSUID | VSGID);
228 
229         /*
230          * We need to specially handle size changes because it is
231          * possible for the client to create a file with modes
232          * which indicate read-only, but with the file opened for
233          * writing.  If the client then tries to set the size of
234          * the file, then the normal access checking done in
235          * VOP_SETATTR would prevent the client from doing so,
236          * although it should be legal for it to do so.  To get
237          * around this, we do the access checking for ourselves
238          * and then use VOP_SPACE which doesn't do the access
239          * checking which VOP_SETATTR does. VOP_SPACE can only
240          * operate on VREG files, let VOP_SETATTR handle the other
241          * extremely rare cases.
242          * Also the client should not be allowed to change the
243          * size of the file if there is a conflicting non-blocking
244          * mandatory lock in the region the change.
245          */
246         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
247                 if (in_crit) {
248                         u_offset_t offset;
249                         ssize_t length;
250 
251                         if (ava.va_size < bva.va_size) {
252                                 offset = ava.va_size;
253                                 length = bva.va_size - ava.va_size;
254                         } else {
255                                 offset = bva.va_size;
256                                 length = ava.va_size - bva.va_size;
257                         }
258                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
259                                 error = EACCES;
260                                 goto out;
261                         }
262                 }
263 
264                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
265                         ava.va_mask &= ~AT_SIZE;
266                         bf.l_type = F_WRLCK;
267                         bf.l_whence = 0;
268                         bf.l_start = (off64_t)ava.va_size;
269                         bf.l_len = 0;
270                         bf.l_sysid = 0;
271                         bf.l_pid = 0;
272                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
273                             (offset_t)ava.va_size, cr, NULL);
274                 }
275         }
276 
277         if (!error && ava.va_mask)
278                 error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
279 
280 #ifdef DEBUG
281         if (rfs3_do_post_op_attr) {
282                 ava.va_mask = AT_ALL;
283                 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
284         } else
285                 avap = NULL;
286 #else
287         ava.va_mask = AT_ALL;
288         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
289 #endif
290 
291         /*
292          * Force modified metadata out to stable storage.
293          */
294         (void) VOP_FSYNC(vp, FNODSYNC, cr);
295 
296         if (error)
297                 goto out;
298 
299         if (in_crit)
300                 nbl_end_crit(vp);
301         VN_RELE(vp);
302 
303         resp->status = NFS3_OK;
304         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
305         return;
306 
307 out:
308         if (curthread->t_flag & T_WOULDBLOCK) {
309                 curthread->t_flag &= ~T_WOULDBLOCK;
310                 resp->status = NFS3ERR_JUKEBOX;
311         } else
312                 resp->status = puterrno3(error);
313 out1:
314         if (vp != NULL) {
315                 if (in_crit)
316                         nbl_end_crit(vp);
317                 VN_RELE(vp);
318         }
319         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
320 }
321 
322 fhandle_t *
323 rfs3_setattr_getfh(SETATTR3args *args)
324 {
325 
326         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
327 }
328 
329 /* ARGSUSED */
330 void
331 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
332         struct svc_req *req, cred_t *cr)
333 {
334         int error;
335         vnode_t *vp;
336         vnode_t *dvp;
337         struct vattr *vap;
338         struct vattr va;
339         struct vattr *dvap;
340         struct vattr dva;
341         nfs_fh3 *fhp;
342         struct sec_ol sec = {0, 0};
343         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
344 
345         dvap = NULL;
346 
347         /*
348          * Allow lookups from the root - the default
349          * location of the public filehandle.
350          */
351         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
352                 dvp = rootdir;
353                 VN_HOLD(dvp);
354         } else {
355                 dvp = nfs3_fhtovp(args->what.dirp, exi);
356                 if (dvp == NULL) {
357                         error = ESTALE;
358                         goto out;
359                 }
360         }
361 
362 #ifdef DEBUG
363         if (rfs3_do_pre_op_attr) {
364                 dva.va_mask = AT_ALL;
365                 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
366         }
367 #else
368         dva.va_mask = AT_ALL;
369         dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
370 #endif
371 
372         if (args->what.name == nfs3nametoolong) {
373                 resp->status = NFS3ERR_NAMETOOLONG;
374                 goto out1;
375         }
376 
377         if (args->what.name == NULL || *(args->what.name) == '\0') {
378                 resp->status = NFS3ERR_ACCES;
379                 goto out1;
380         }
381 
382         fhp = args->what.dirp;
383         if (strcmp(args->what.name, "..") == 0 &&
384             EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
385                 resp->status = NFS3ERR_NOENT;
386                 goto out1;
387         }
388 
389         /*
390          * If the public filehandle is used then allow
391          * a multi-component lookup
392          */
393         if (PUBLIC_FH3(args->what.dirp)) {
394                 publicfh_flag = TRUE;
395                 error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
396                                         &exi, &sec);
397                 if (error && exi != NULL)
398                         exi_rele(exi);  /* See the comment below */
399         } else {
400                 error = VOP_LOOKUP(dvp, args->what.name, &vp,
401                                 NULL, 0, NULL, cr);
402         }
403 
404 #ifdef DEBUG
405         if (rfs3_do_post_op_attr) {
406                 dva.va_mask = AT_ALL;
407                 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
408         } else
409                 dvap = NULL;
410 #else
411         dva.va_mask = AT_ALL;
412         dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
413 #endif
414 
415         if (error)
416                 goto out;
417 
418         if (sec.sec_flags & SEC_QUERY) {
419                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
420         } else {
421                 error = makefh3(&resp->resok.object, vp, exi);
422                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
423                         auth_weak = TRUE;
424         }
425 
426         if (error) {
427                 VN_RELE(vp);
428                 goto out;
429         }
430 
431         /*
432          * If publicfh_flag is true then we have called rfs_publicfh_mclookup
433          * and have obtained a new exportinfo in exi which needs to be
434          * released. Note the the original exportinfo pointed to by exi
435          * will be released by the caller, common_dispatch.
436          */
437         if (publicfh_flag)
438                 exi_rele(exi);
439 
440         VN_RELE(dvp);
441 
442 #ifdef DEBUG
443         if (rfs3_do_post_op_attr) {
444                 va.va_mask = AT_ALL;
445                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
446         } else
447                 vap = NULL;
448 #else
449         va.va_mask = AT_ALL;
450         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
451 #endif
452 
453         VN_RELE(vp);
454 
455         resp->status = NFS3_OK;
456         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
457         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
458 
459         /*
460          * If it's public fh, no 0x81, and client's flavor is
461          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
462          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
463          */
464         if (auth_weak)
465                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
466 
467         return;
468 
469 out:
470         if (curthread->t_flag & T_WOULDBLOCK) {
471                 curthread->t_flag &= ~T_WOULDBLOCK;
472                 resp->status = NFS3ERR_JUKEBOX;
473         } else
474                 resp->status = puterrno3(error);
475 out1:
476         if (dvp != NULL)
477                 VN_RELE(dvp);
478         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
479 
480 }
481 
482 fhandle_t *
483 rfs3_lookup_getfh(LOOKUP3args *args)
484 {
485 
486         return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
487 }
488 
489 /* ARGSUSED */
490 void
491 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
492         struct svc_req *req, cred_t *cr)
493 {
494         int error;
495         vnode_t *vp;
496         struct vattr *vap;
497         struct vattr va;
498         int checkwriteperm;
499 
500         vap = NULL;
501 
502         vp = nfs3_fhtovp(&args->object, exi);
503         if (vp == NULL) {
504                 error = ESTALE;
505                 goto out;
506         }
507 
508         /*
509          * If the file system is exported read only, it is not appropriate
510          * to check write permissions for regular files and directories.
511          * Special files are interpreted by the client, so the underlying
512          * permissions are sent back to the client for interpretation.
513          */
514         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
515                 checkwriteperm = 0;
516         else
517                 checkwriteperm = 1;
518 
519         /*
520          * We need the mode so that we can correctly determine access
521          * permissions relative to a mandatory lock file.  Access to
522          * mandatory lock files is denied on the server, so it might
523          * as well be reflected to the server during the open.
524          */
525         va.va_mask = AT_MODE;
526         error = VOP_GETATTR(vp, &va, 0, cr);
527         if (error)
528                 goto out;
529 
530 #ifdef DEBUG
531         if (rfs3_do_post_op_attr)
532                 vap = &va;
533 #else
534         vap = &va;
535 #endif
536 
537         resp->resok.access = 0;
538 
539         if (args->access & ACCESS3_READ) {
540                 error = VOP_ACCESS(vp, VREAD, 0, cr);
541                 if (error) {
542                         if (curthread->t_flag & T_WOULDBLOCK)
543                                 goto out;
544                 } else if (!MANDLOCK(vp, va.va_mode))
545                         resp->resok.access |= ACCESS3_READ;
546         }
547         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
548                 error = VOP_ACCESS(vp, VEXEC, 0, cr);
549                 if (error) {
550                         if (curthread->t_flag & T_WOULDBLOCK)
551                                 goto out;
552                 } else
553                         resp->resok.access |= ACCESS3_LOOKUP;
554         }
555         if (checkwriteperm &&
556             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
557                 error = VOP_ACCESS(vp, VWRITE, 0, cr);
558                 if (error) {
559                         if (curthread->t_flag & T_WOULDBLOCK)
560                                 goto out;
561                 } else if (!MANDLOCK(vp, va.va_mode)) {
562                         resp->resok.access |=
563                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
564                 }
565         }
566         if (checkwriteperm &&
567             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
568                 error = VOP_ACCESS(vp, VWRITE, 0, cr);
569                 if (error) {
570                         if (curthread->t_flag & T_WOULDBLOCK)
571                                 goto out;
572                 } else
573                         resp->resok.access |= ACCESS3_DELETE;
574         }
575         if (args->access & ACCESS3_EXECUTE) {
576                 error = VOP_ACCESS(vp, VEXEC, 0, cr);
577                 if (error) {
578                         if (curthread->t_flag & T_WOULDBLOCK)
579                                 goto out;
580                 } else if (!MANDLOCK(vp, va.va_mode))
581                         resp->resok.access |= ACCESS3_EXECUTE;
582         }
583 
584 #ifdef DEBUG
585         if (rfs3_do_post_op_attr) {
586                 va.va_mask = AT_ALL;
587                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
588         } else
589                 vap = NULL;
590 #else
591         va.va_mask = AT_ALL;
592         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
593 #endif
594 
595         VN_RELE(vp);
596 
597         resp->status = NFS3_OK;
598         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
599         return;
600 
601 out:
602         if (curthread->t_flag & T_WOULDBLOCK) {
603                 curthread->t_flag &= ~T_WOULDBLOCK;
604                 resp->status = NFS3ERR_JUKEBOX;
605         } else
606                 resp->status = puterrno3(error);
607         if (vp != NULL)
608                 VN_RELE(vp);
609         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
610 }
611 
612 fhandle_t *
613 rfs3_access_getfh(ACCESS3args *args)
614 {
615 
616         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
617 }
618 
619 /* ARGSUSED */
620 void
621 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
622         struct svc_req *req, cred_t *cr)
623 {
624         int error;
625         vnode_t *vp;
626         struct vattr *vap;
627         struct vattr va;
628         struct iovec iov;
629         struct uio uio;
630         char *data;
631 
632         vap = NULL;
633 
634         vp = nfs3_fhtovp(&args->symlink, exi);
635         if (vp == NULL) {
636                 error = ESTALE;
637                 goto out;
638         }
639 
640         va.va_mask = AT_ALL;
641         error = VOP_GETATTR(vp, &va, 0, cr);
642         if (error)
643                 goto out;
644 
645 #ifdef DEBUG
646         if (rfs3_do_post_op_attr)
647                 vap = &va;
648 #else
649         vap = &va;
650 #endif
651 
652         if (vp->v_type != VLNK) {
653                 resp->status = NFS3ERR_INVAL;
654                 goto out1;
655         }
656 
657         if (MANDLOCK(vp, va.va_mode)) {
658                 resp->status = NFS3ERR_ACCES;
659                 goto out1;
660         }
661 
662         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
663 
664         iov.iov_base = data;
665         iov.iov_len = MAXPATHLEN;
666         uio.uio_iov = &iov;
667         uio.uio_iovcnt = 1;
668         uio.uio_segflg = UIO_SYSSPACE;
669         uio.uio_extflg = UIO_COPY_CACHED;
670         uio.uio_loffset = 0;
671         uio.uio_resid = MAXPATHLEN;
672 
673         error = VOP_READLINK(vp, &uio, cr);
674 
675 #ifdef DEBUG
676         if (rfs3_do_post_op_attr) {
677                 va.va_mask = AT_ALL;
678                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
679         } else
680                 vap = NULL;
681 #else
682         va.va_mask = AT_ALL;
683         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
684 #endif
685 
686 #if 0 /* notyet */
687         /*
688          * Don't do this.  It causes local disk writes when just
689          * reading the file and the overhead is deemed larger
690          * than the benefit.
691          */
692         /*
693          * Force modified metadata out to stable storage.
694          */
695         (void) VOP_FSYNC(vp, FNODSYNC, cr);
696 #endif
697 
698         if (error) {
699                 kmem_free(data, MAXPATHLEN + 1);
700                 goto out;
701         }
702 
703         VN_RELE(vp);
704 
705         resp->status = NFS3_OK;
706         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
707         resp->resok.data = data;
708         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
709         return;
710 
711 out:
712         if (curthread->t_flag & T_WOULDBLOCK) {
713                 curthread->t_flag &= ~T_WOULDBLOCK;
714                 resp->status = NFS3ERR_JUKEBOX;
715         } else
716                 resp->status = puterrno3(error);
717 out1:
718         if (vp != NULL)
719                 VN_RELE(vp);
720         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
721 }
722 
723 fhandle_t *
724 rfs3_readlink_getfh(READLINK3args *args)
725 {
726 
727         return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
728 }
729 
730 void
731 rfs3_readlink_free(READLINK3res *resp)
732 {
733 
734         if (resp->status == NFS3_OK)
735                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
736 }
737 
738 /* ARGSUSED */
739 void
740 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
741         struct svc_req *req, cred_t *cr)
742 {
743         int error;
744         vnode_t *vp;
745         struct vattr *vap;
746         struct vattr va;
747         struct iovec iov;
748         struct uio uio;
749         u_offset_t offset;
750         mblk_t *mp;
751         int alloc_err = 0;
752         int in_crit = 0;
753         int need_rwunlock = 0;
754 
755         vap = NULL;
756 
757         vp = nfs3_fhtovp(&args->file, exi);
758         if (vp == NULL) {
759                 error = ESTALE;
760                 goto out;
761         }
762 
763         /*
764          * Check to see if the v4 side of the server has delegated
765          * this file.  If so, then we return JUKEBOX to allow the
766          * client to retrasmit its request.
767          */
768         if (rfs4_check_delegated(FREAD, vp, FALSE)) {
769                 resp->status = NFS3ERR_JUKEBOX;
770                 goto out1;
771         }
772 
773         /*
774          * Enter the critical region before calling VOP_RWLOCK
775          * to avoid a deadlock with write requests.
776          */
777         if (nbl_need_check(vp)) {
778                 nbl_start_crit(vp, RW_READER);
779                 in_crit = 1;
780                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
781                         error = EACCES;
782                         goto out;
783                 }
784         }
785 
786         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
787         need_rwunlock = 1;
788 
789         va.va_mask = AT_ALL;
790         error = VOP_GETATTR(vp, &va, 0, cr);
791 
792         /*
793          * If we can't get the attributes, then we can't do the
794          * right access checking.  So, we'll fail the request.
795          */
796         if (error)
797                 goto out;
798 
799 #ifdef DEBUG
800         if (rfs3_do_post_op_attr)
801                 vap = &va;
802 #else
803         vap = &va;
804 #endif
805 
806         if (vp->v_type != VREG) {
807                 resp->status = NFS3ERR_INVAL;
808                 goto out1;
809         }
810 
811         if (crgetuid(cr) != va.va_uid) {
812                 error = VOP_ACCESS(vp, VREAD, 0, cr);
813                 if (error) {
814                         if (curthread->t_flag & T_WOULDBLOCK)
815                                 goto out;
816                         error = VOP_ACCESS(vp, VEXEC, 0, cr);
817                         if (error)
818                                 goto out;
819                 }
820         }
821 
822         if (MANDLOCK(vp, va.va_mode)) {
823                 resp->status = NFS3ERR_ACCES;
824                 goto out1;
825         }
826 
827         offset = args->offset;
828         if (offset >= va.va_size) {
829                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
830                 if (in_crit)
831                         nbl_end_crit(vp);
832                 VN_RELE(vp);
833                 resp->status = NFS3_OK;
834                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
835                 resp->resok.count = 0;
836                 resp->resok.eof = TRUE;
837                 resp->resok.data.data_len = 0;
838                 resp->resok.data.data_val = NULL;
839                 resp->resok.data.mp = NULL;
840                 return;
841         }
842 
843         if (args->count == 0) {
844                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
845                 if (in_crit)
846                         nbl_end_crit(vp);
847                 VN_RELE(vp);
848                 resp->status = NFS3_OK;
849                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
850                 resp->resok.count = 0;
851                 resp->resok.eof = FALSE;
852                 resp->resok.data.data_len = 0;
853                 resp->resok.data.data_val = NULL;
854                 resp->resok.data.mp = NULL;
855                 return;
856         }
857 
858         /*
859          * do not allocate memory more the max. allowed
860          * transfer size
861          */
862         if (args->count > rfs3_tsize(req))
863                 args->count = rfs3_tsize(req);
864 
865         /*
866         * If we aren't returning READ data w/RDMA_WRITE, then grab
867         * a mblk.
868         */
869        if (SVC_GET_WCHUNK(req->rq_xprt, req, &iov) == FALSE) {
870 
871         /*
872          * mp will contain the data to be sent out in the read reply.
873          * This will be freed after the reply has been sent out (by the
874          * driver).
875          * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
876          * that the call to xdrmblk_putmblk() never fails.
877          */
878         mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
879                                &alloc_err);
880         ASSERT(mp != NULL);
881         ASSERT(alloc_err == 0);
882 
883         iov.iov_base = (caddr_t)mp->b_datap->db_base;
884         iov.iov_len = args->count;
885        } else
886                mp = NULL;
887         uio.uio_iov = &iov;
888         uio.uio_iovcnt = 1;
889         uio.uio_segflg = UIO_SYSSPACE;
890         uio.uio_extflg = UIO_COPY_CACHED;
891         uio.uio_loffset = args->offset;
892         uio.uio_resid = args->count;
893 
894         error = VOP_READ(vp, &uio, 0, cr, NULL);
895 
896         if (error) {
897                 freeb(mp);
898                 goto out;
899         }
900 
901         va.va_mask = AT_ALL;
902         error = VOP_GETATTR(vp, &va, 0, cr);
903 
904 #ifdef DEBUG
905         if (rfs3_do_post_op_attr) {
906                 if (error)
907                         vap = NULL;
908                 else
909                         vap = &va;
910         } else
911                 vap = NULL;
912 #else
913         if (error)
914                 vap = NULL;
915         else
916                 vap = &va;
917 #endif
918 
919         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
920 
921 #if 0 /* notyet */
922         /*
923          * Don't do this.  It causes local disk writes when just
924          * reading the file and the overhead is deemed larger
925          * than the benefit.
926          */
927         /*
928          * Force modified metadata out to stable storage.
929          */
930         (void) VOP_FSYNC(vp, FNODSYNC, cr);
931 #endif
932 
933         if (in_crit)
934                 nbl_end_crit(vp);
935         VN_RELE(vp);
936 
937         resp->status = NFS3_OK;
938         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
939         resp->resok.count = args->count - uio.uio_resid;
940         if (!error && offset + resp->resok.count == va.va_size)
941                 resp->resok.eof = TRUE;
942         else
943                 resp->resok.eof = FALSE;
944         resp->resok.data.data_len = resp->resok.count;
945 #ifndef RPC_RDMA_INLINE
946         resp->resok.data.data_val = (caddr_t)iov.iov_base;
947 #else
948         resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
949 #endif
950         resp->resok.data.mp = mp;
951 
952         resp->resok.size = (uint_t)args->count;
953         return;
954 
955 out:
956         if (curthread->t_flag & T_WOULDBLOCK) {
957                 curthread->t_flag &= ~T_WOULDBLOCK;
958                 resp->status = NFS3ERR_JUKEBOX;
959         } else
960                 resp->status = puterrno3(error);
961 out1:
962         if (vp != NULL) {
963                 if (need_rwunlock)
964                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
965                 if (in_crit)
966                         nbl_end_crit(vp);
967                 VN_RELE(vp);
968         }
969         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
970 }
971 
972 void
973 rfs3_read_free(READ3res *resp)
974 {
975         mblk_t *mp;
976 
977         if (resp->status == NFS3_OK) {
978                 mp = resp->resok.data.mp;
979                 if (mp != NULL)
980                         freeb(mp);
981         }
982 }
983 
984 fhandle_t *
985 rfs3_read_getfh(READ3args *args)
986 {
987 
988         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
989 }
990 
991 #define MAX_IOVECS      12
992 
993 #ifdef DEBUG
994 static int rfs3_write_hits = 0;
995 static int rfs3_write_misses = 0;
996 #endif
997 
998 void
999 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1000         struct svc_req *req, cred_t *cr)
1001 {
1002         int error;
1003         vnode_t *vp;
1004         struct vattr *bvap = NULL;
1005         struct vattr bva;
1006         struct vattr *avap = NULL;
1007         struct vattr ava;
1008         u_offset_t rlimit;
1009         struct uio uio;
1010         struct iovec iov[MAX_IOVECS];
1011         mblk_t *m;
1012         struct iovec *iovp;
1013         int iovcnt;
1014         int ioflag;
1015         cred_t *savecred;
1016         int in_crit = 0;
1017         int rwlock_ret = -1;
1018 
1019         vp = nfs3_fhtovp(&args->file, exi);
1020         if (vp == NULL) {
1021                 error = ESTALE;
1022                 goto out;
1023         }
1024 
1025         /*
1026          * Check to see if the v4 side of the server has delegated
1027          * this file.  If so, then we return JUKEBOX to allow the
1028          * client to retrasmit its request.
1029          */
1030         if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1031                 resp->status = NFS3ERR_JUKEBOX;
1032                 goto out1;
1033         }
1034 
1035         /*
1036          * We have to enter the critical region before calling VOP_RWLOCK
1037          * to avoid a deadlock with ufs.
1038          */
1039         if (nbl_need_check(vp)) {
1040                 nbl_start_crit(vp, RW_READER);
1041                 in_crit = 1;
1042                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1043                         error = EACCES;
1044                         goto out;
1045                 }
1046         }
1047 
1048         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1049 
1050         bva.va_mask = AT_ALL;
1051         error = VOP_GETATTR(vp, &bva, 0, cr);
1052 
1053         /*
1054          * If we can't get the attributes, then we can't do the
1055          * right access checking.  So, we'll fail the request.
1056          */
1057         if (error)
1058                 goto out;
1059 
1060         bvap = &bva;
1061 #ifdef DEBUG
1062         if (!rfs3_do_pre_op_attr)
1063                 bvap = NULL;
1064 #endif
1065         avap = bvap;
1066 
1067         if (args->count != args->data.data_len) {
1068                 resp->status = NFS3ERR_INVAL;
1069                 goto out1;
1070         }
1071 
1072         if (rdonly(exi, req)) {
1073                 resp->status = NFS3ERR_ROFS;
1074                 goto out1;
1075         }
1076 
1077         if (vp->v_type != VREG) {
1078                 resp->status = NFS3ERR_INVAL;
1079                 goto out1;
1080         }
1081 
1082         if (crgetuid(cr) != bva.va_uid &&
1083             (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1084                 goto out;
1085 
1086         if (MANDLOCK(vp, bva.va_mode)) {
1087                 resp->status = NFS3ERR_ACCES;
1088                 goto out1;
1089         }
1090 
1091         if (args->count == 0) {
1092                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1093                 VN_RELE(vp);
1094                 resp->status = NFS3_OK;
1095                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1096                 resp->resok.count = 0;
1097                 resp->resok.committed = args->stable;
1098                 resp->resok.verf = write3verf;
1099                 return;
1100         }
1101 
1102         if (args->mblk != NULL) {
1103                 iovcnt = 0;
1104                 for (m = args->mblk; m != NULL; m = m->b_cont)
1105                         iovcnt++;
1106                 if (iovcnt <= MAX_IOVECS) {
1107 #ifdef DEBUG
1108                         rfs3_write_hits++;
1109 #endif
1110                         iovp = iov;
1111                 } else {
1112 #ifdef DEBUG
1113                         rfs3_write_misses++;
1114 #endif
1115                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1116                 }
1117                 mblk_to_iov(args->mblk, iovcnt, iovp);
1118         } else {
1119                 iovcnt = 1;
1120                 iovp = iov;
1121                 iovp->iov_base = args->data.data_val;
1122                 iovp->iov_len = args->count;
1123         }
1124 
1125         uio.uio_iov = iovp;
1126         uio.uio_iovcnt = iovcnt;
1127 
1128         uio.uio_segflg = UIO_SYSSPACE;
1129         uio.uio_extflg = UIO_COPY_DEFAULT;
1130         uio.uio_loffset = args->offset;
1131         uio.uio_resid = args->count;
1132         uio.uio_llimit = curproc->p_fsz_ctl;
1133         rlimit = uio.uio_llimit - args->offset;
1134         if (rlimit < (u_offset_t)uio.uio_resid)
1135                 uio.uio_resid = (int)rlimit;
1136 
1137         if (args->stable == UNSTABLE)
1138                 ioflag = 0;
1139         else if (args->stable == FILE_SYNC)
1140                 ioflag = FSYNC;
1141         else if (args->stable == DATA_SYNC)
1142                 ioflag = FDSYNC;
1143         else {
1144                 if (iovp != iov)
1145                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1146                 resp->status = NFS3ERR_INVAL;
1147                 goto out1;
1148         }
1149 
1150         /*
1151          * We're changing creds because VM may fault and we need
1152          * the cred of the current thread to be used if quota
1153          * checking is enabled.
1154          */
1155         savecred = curthread->t_cred;
1156         curthread->t_cred = cr;
1157         error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1158         curthread->t_cred = savecred;
1159 
1160         if (iovp != iov)
1161                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1162 
1163         ava.va_mask = AT_ALL;
1164         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1165 
1166 #ifdef DEBUG
1167         if (!rfs3_do_post_op_attr)
1168                 avap = NULL;
1169 #endif
1170 
1171         if (error)
1172                 goto out;
1173 
1174         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1175         if (in_crit)
1176                 nbl_end_crit(vp);
1177         VN_RELE(vp);
1178 
1179         /*
1180          * If we were unable to get the V_WRITELOCK_TRUE, then we
1181          * may not have accurate after attrs, so check if
1182          * we have both attributes, they have a non-zero va_seq, and
1183          * va_seq has changed by exactly one,
1184          * if not, turn off the before attr.
1185          */
1186         if (rwlock_ret != V_WRITELOCK_TRUE) {
1187                 if (bvap == NULL || avap == NULL ||
1188                                 bvap->va_seq == 0 || avap->va_seq == 0 ||
1189                                 avap->va_seq != (bvap->va_seq + 1)) {
1190                         bvap = NULL;
1191                 }
1192         }
1193 
1194         resp->status = NFS3_OK;
1195         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1196         resp->resok.count = args->count - uio.uio_resid;
1197         resp->resok.committed = args->stable;
1198         resp->resok.verf = write3verf;
1199         return;
1200 
1201 out:
1202         if (curthread->t_flag & T_WOULDBLOCK) {
1203                 curthread->t_flag &= ~T_WOULDBLOCK;
1204                 resp->status = NFS3ERR_JUKEBOX;
1205         } else
1206                 resp->status = puterrno3(error);
1207 out1:
1208         if (vp != NULL) {
1209                 if (rwlock_ret != -1)
1210                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1211                 if (in_crit)
1212                         nbl_end_crit(vp);
1213                 VN_RELE(vp);
1214         }
1215         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1216 }
1217 
1218 fhandle_t *
1219 rfs3_write_getfh(WRITE3args *args)
1220 {
1221 
1222         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1223 }
1224 
1225 void
1226 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1227         struct svc_req *req, cred_t *cr)
1228 {
1229         int error;
1230         int in_crit = 0;
1231         vnode_t *vp;
1232         vnode_t *tvp = NULL;
1233         vnode_t *dvp;
1234         struct vattr *vap;
1235         struct vattr va;
1236         struct vattr *dbvap;
1237         struct vattr dbva;
1238         struct vattr *davap;
1239         struct vattr dava;
1240         enum vcexcl excl;
1241         nfstime3 *mtime;
1242         len_t reqsize;
1243         bool_t trunc;
1244 
1245         dbvap = NULL;
1246         davap = NULL;
1247 
1248         dvp = nfs3_fhtovp(args->where.dirp, exi);
1249         if (dvp == NULL) {
1250                 error = ESTALE;
1251                 goto out;
1252         }
1253 
1254 #ifdef DEBUG
1255         if (rfs3_do_pre_op_attr) {
1256                 dbva.va_mask = AT_ALL;
1257                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1258         } else
1259                 dbvap = NULL;
1260 #else
1261         dbva.va_mask = AT_ALL;
1262         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1263 #endif
1264         davap = dbvap;
1265 
1266         if (args->where.name == nfs3nametoolong) {
1267                 resp->status = NFS3ERR_NAMETOOLONG;
1268                 goto out1;
1269         }
1270 
1271         if (args->where.name == NULL || *(args->where.name) == '\0') {
1272                 resp->status = NFS3ERR_ACCES;
1273                 goto out1;
1274         }
1275 
1276         if (rdonly(exi, req)) {
1277                 resp->status = NFS3ERR_ROFS;
1278                 goto out1;
1279         }
1280 
1281         if (args->how.mode == EXCLUSIVE) {
1282                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1283                 va.va_type = VREG;
1284                 va.va_mode = (mode_t)0;
1285                 /*
1286                  * Ensure no time overflows and that types match
1287                  */
1288                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1289                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1290                 va.va_mtime.tv_nsec = mtime->nseconds;
1291                 excl = EXCL;
1292         } else {
1293                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1294                     &va);
1295                 if (error)
1296                         goto out;
1297                 va.va_mask |= AT_TYPE;
1298                 va.va_type = VREG;
1299                 if (args->how.mode == GUARDED)
1300                         excl = EXCL;
1301                 else {
1302                         excl = NONEXCL;
1303 
1304                         /*
1305                          * During creation of file in non-exclusive mode
1306                          * if size of file is being set then make sure
1307                          * that if the file already exists that no conflicting
1308                          * non-blocking mandatory locks exists in the region
1309                          * being modified. If there are conflicting locks fail
1310                          * the operation with EACCES.
1311                          */
1312                         if (va.va_mask & AT_SIZE) {
1313                                 struct vattr tva;
1314 
1315                                 /*
1316                                  * Does file already exist?
1317                                  */
1318                                 error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1319                                                 NULL, 0, NULL, cr);
1320 
1321                                 /*
1322                                  * Check to see if the file has been delegated
1323                                  * to a v4 client.  If so, then begin recall of
1324                                  * the delegation and return JUKEBOX to allow
1325                                  * the client to retrasmit its request.
1326                                  */
1327 
1328                                 trunc = va.va_size == 0;
1329                                 if (!error &&
1330                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1331                                         resp->status = NFS3ERR_JUKEBOX;
1332                                         goto out1;
1333                                 }
1334 
1335                                 /*
1336                                  * Check for NBMAND lock conflicts
1337                                  */
1338                                 if (!error && nbl_need_check(tvp)) {
1339                                         u_offset_t offset;
1340                                         ssize_t len;
1341 
1342                                         nbl_start_crit(tvp, RW_READER);
1343                                         in_crit = 1;
1344 
1345                                         tva.va_mask = AT_SIZE;
1346                                         error = VOP_GETATTR(tvp, &tva, 0, cr);
1347                                         /*
1348                                          * Can't check for conflicts, so return
1349                                          * error.
1350                                          */
1351                                         if (error)
1352                                                 goto out;
1353 
1354                                         offset = tva.va_size < va.va_size ?
1355                                                 tva.va_size : va.va_size;
1356                                         len = tva.va_size < va.va_size ?
1357                                                 va.va_size - tva.va_size :
1358                                                 tva.va_size - va.va_size;
1359                                         if (nbl_conflict(tvp, NBL_WRITE,
1360                                                         offset, len, 0)) {
1361                                                 error = EACCES;
1362                                                 goto out;
1363                                         }
1364                                 } else if (tvp) {
1365                                         VN_RELE(tvp);
1366                                         tvp = NULL;
1367                                 }
1368                         }
1369                 }
1370                 if (va.va_mask & AT_SIZE)
1371                         reqsize = va.va_size;
1372         }
1373 
1374         /*
1375          * Must specify the mode.
1376          */
1377         if (!(va.va_mask & AT_MODE)) {
1378                 resp->status = NFS3ERR_INVAL;
1379                 goto out1;
1380         }
1381 
1382         /*
1383          * If the filesystem is exported with nosuid, then mask off
1384          * the setuid and setgid bits.
1385          */
1386         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1387                 va.va_mode &= ~(VSUID | VSGID);
1388 
1389 tryagain:
1390         /*
1391          * The file open mode used is VWRITE.  If the client needs
1392          * some other semantic, then it should do the access checking
1393          * itself.  It would have been nice to have the file open mode
1394          * passed as part of the arguments.
1395          */
1396         error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1397             &vp, cr, 0);
1398 
1399 #ifdef DEBUG
1400         if (rfs3_do_post_op_attr) {
1401                 dava.va_mask = AT_ALL;
1402                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1403         } else
1404                 davap = NULL;
1405 #else
1406         dava.va_mask = AT_ALL;
1407         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1408 #endif
1409 
1410         if (error) {
1411                 /*
1412                  * If we got something other than file already exists
1413                  * then just return this error.  Otherwise, we got
1414                  * EEXIST.  If we were doing a GUARDED create, then
1415                  * just return this error.  Otherwise, we need to
1416                  * make sure that this wasn't a duplicate of an
1417                  * exclusive create request.
1418                  *
1419                  * The assumption is made that a non-exclusive create
1420                  * request will never return EEXIST.
1421                  */
1422                 if (error != EEXIST || args->how.mode == GUARDED)
1423                         goto out;
1424                 /*
1425                  * Lookup the file so that we can get a vnode for it.
1426                  */
1427                 error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1428                     NULL, cr);
1429                 if (error) {
1430                         /*
1431                          * We couldn't find the file that we thought that
1432                          * we just created.  So, we'll just try creating
1433                          * it again.
1434                          */
1435                         if (error == ENOENT)
1436                                 goto tryagain;
1437                         goto out;
1438                 }
1439 
1440                 /*
1441                  * If the file is delegated to a v4 client, go ahead
1442                  * and initiate recall, this create is a hint that a
1443                  * conflicting v3 open has occurred.
1444                  */
1445 
1446                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1447                         VN_RELE(vp);
1448                         resp->status = NFS3ERR_JUKEBOX;
1449                         goto out1;
1450                 }
1451 
1452                 va.va_mask = AT_ALL;
1453                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1454 
1455                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1456                 /* % with INT32_MAX to prevent overflows */
1457                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1458                     vap->va_mtime.tv_sec !=
1459                     (mtime->seconds % INT32_MAX) ||
1460                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1461                         VN_RELE(vp);
1462                         error = EEXIST;
1463                         goto out;
1464                 }
1465         } else {
1466 
1467                 if ((args->how.mode == UNCHECKED ||
1468                     args->how.mode == GUARDED) &&
1469                     args->how.createhow3_u.obj_attributes.size.set_it &&
1470                     va.va_size == 0)
1471                         trunc = TRUE;
1472                 else
1473                         trunc = FALSE;
1474 
1475                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1476                         VN_RELE(vp);
1477                         resp->status = NFS3ERR_JUKEBOX;
1478                         goto out1;
1479                 }
1480 
1481                 va.va_mask = AT_ALL;
1482                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1483 
1484                 /*
1485                  * We need to check to make sure that the file got
1486                  * created to the indicated size.  If not, we do a
1487                  * setattr to try to change the size, but we don't
1488                  * try too hard.  This shouldn't a problem as most
1489                  * clients will only specifiy a size of zero which
1490                  * local file systems handle.  However, even if
1491                  * the client does specify a non-zero size, it can
1492                  * still recover by checking the size of the file
1493                  * after it has created it and then issue a setattr
1494                  * request of its own to set the size of the file.
1495                  */
1496                 if (vap != NULL &&
1497                     (args->how.mode == UNCHECKED ||
1498                     args->how.mode == GUARDED) &&
1499                     args->how.createhow3_u.obj_attributes.size.set_it &&
1500                     vap->va_size != reqsize) {
1501                         va.va_mask = AT_SIZE;
1502                         va.va_size = reqsize;
1503                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1504                         va.va_mask = AT_ALL;
1505                         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1506                 }
1507         }
1508 
1509 #ifdef DEBUG
1510         if (!rfs3_do_post_op_attr)
1511                 vap = NULL;
1512 #endif
1513 
1514 #ifdef DEBUG
1515         if (!rfs3_do_post_op_fh3)
1516                 resp->resok.obj.handle_follows = FALSE;
1517         else {
1518 #endif
1519         error = makefh3(&resp->resok.obj.handle, vp, exi);
1520         if (error)
1521                 resp->resok.obj.handle_follows = FALSE;
1522         else
1523                 resp->resok.obj.handle_follows = TRUE;
1524 #ifdef DEBUG
1525         }
1526 #endif
1527 
1528         /*
1529          * Force modified data and metadata out to stable storage.
1530          */
1531         (void) VOP_FSYNC(vp, FNODSYNC, cr);
1532         (void) VOP_FSYNC(dvp, 0, cr);
1533 
1534         VN_RELE(vp);
1535         VN_RELE(dvp);
1536         if (tvp != NULL) {
1537                 if (in_crit)
1538                         nbl_end_crit(tvp);
1539                 VN_RELE(tvp);
1540         }
1541 
1542         resp->status = NFS3_OK;
1543         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1544         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1545         return;
1546 
1547 out:
1548         if (curthread->t_flag & T_WOULDBLOCK) {
1549                 curthread->t_flag &= ~T_WOULDBLOCK;
1550                 resp->status = NFS3ERR_JUKEBOX;
1551         } else
1552                 resp->status = puterrno3(error);
1553 out1:
1554         if (tvp != NULL) {
1555                 if (in_crit)
1556                         nbl_end_crit(tvp);
1557                 VN_RELE(tvp);
1558         }
1559         if (dvp != NULL)
1560                 VN_RELE(dvp);
1561         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1562 }
1563 
1564 fhandle_t *
1565 rfs3_create_getfh(CREATE3args *args)
1566 {
1567 
1568         return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1569 }
1570 
1571 void
1572 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1573         struct svc_req *req, cred_t *cr)
1574 {
1575         int error;
1576         vnode_t *vp = NULL;
1577         vnode_t *dvp;
1578         struct vattr *vap;
1579         struct vattr va;
1580         struct vattr *dbvap;
1581         struct vattr dbva;
1582         struct vattr *davap;
1583         struct vattr dava;
1584 
1585         dbvap = NULL;
1586         davap = NULL;
1587 
1588         dvp = nfs3_fhtovp(args->where.dirp, exi);
1589         if (dvp == NULL) {
1590                 error = ESTALE;
1591                 goto out;
1592         }
1593 
1594 #ifdef DEBUG
1595         if (rfs3_do_pre_op_attr) {
1596                 dbva.va_mask = AT_ALL;
1597                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1598         } else
1599                 dbvap = NULL;
1600 #else
1601         dbva.va_mask = AT_ALL;
1602         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1603 #endif
1604         davap = dbvap;
1605 
1606         if (args->where.name == nfs3nametoolong) {
1607                 resp->status = NFS3ERR_NAMETOOLONG;
1608                 goto out1;
1609         }
1610 
1611         if (args->where.name == NULL || *(args->where.name) == '\0') {
1612                 resp->status = NFS3ERR_ACCES;
1613                 goto out1;
1614         }
1615 
1616         if (rdonly(exi, req)) {
1617                 resp->status = NFS3ERR_ROFS;
1618                 goto out1;
1619         }
1620 
1621         error = sattr3_to_vattr(&args->attributes, &va);
1622         if (error)
1623                 goto out;
1624 
1625         if (!(va.va_mask & AT_MODE)) {
1626                 resp->status = NFS3ERR_INVAL;
1627                 goto out1;
1628         }
1629 
1630         va.va_mask |= AT_TYPE;
1631         va.va_type = VDIR;
1632 
1633         error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1634 
1635 #ifdef DEBUG
1636         if (rfs3_do_post_op_attr) {
1637                 dava.va_mask = AT_ALL;
1638                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1639         } else
1640                 davap = NULL;
1641 #else
1642         dava.va_mask = AT_ALL;
1643         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1644 #endif
1645 
1646         /*
1647          * Force modified data and metadata out to stable storage.
1648          */
1649         (void) VOP_FSYNC(dvp, 0, cr);
1650 
1651         if (error)
1652                 goto out;
1653 
1654         VN_RELE(dvp);
1655 
1656 #ifdef DEBUG
1657         if (!rfs3_do_post_op_fh3)
1658                 resp->resok.obj.handle_follows = FALSE;
1659         else {
1660 #endif
1661         error = makefh3(&resp->resok.obj.handle, vp, exi);
1662         if (error)
1663                 resp->resok.obj.handle_follows = FALSE;
1664         else
1665                 resp->resok.obj.handle_follows = TRUE;
1666 #ifdef DEBUG
1667         }
1668 #endif
1669 
1670 #ifdef DEBUG
1671         if (rfs3_do_post_op_attr) {
1672                 va.va_mask = AT_ALL;
1673                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1674         } else
1675                 vap = NULL;
1676 #else
1677         va.va_mask = AT_ALL;
1678         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1679 #endif
1680 
1681         /*
1682          * Force modified data and metadata out to stable storage.
1683          */
1684         (void) VOP_FSYNC(vp, 0, cr);
1685 
1686         VN_RELE(vp);
1687 
1688         resp->status = NFS3_OK;
1689         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1690         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1691         return;
1692 
1693 out:
1694         if (curthread->t_flag & T_WOULDBLOCK) {
1695                 curthread->t_flag &= ~T_WOULDBLOCK;
1696                 resp->status = NFS3ERR_JUKEBOX;
1697         } else
1698                 resp->status = puterrno3(error);
1699 out1:
1700         if (dvp != NULL)
1701                 VN_RELE(dvp);
1702         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1703 }
1704 
1705 fhandle_t *
1706 rfs3_mkdir_getfh(MKDIR3args *args)
1707 {
1708 
1709         return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1710 }
1711 
1712 void
1713 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1714         struct svc_req *req, cred_t *cr)
1715 {
1716         int error;
1717         vnode_t *vp;
1718         vnode_t *dvp;
1719         struct vattr *vap;
1720         struct vattr va;
1721         struct vattr *dbvap;
1722         struct vattr dbva;
1723         struct vattr *davap;
1724         struct vattr dava;
1725 
1726         dbvap = NULL;
1727         davap = NULL;
1728 
1729         dvp = nfs3_fhtovp(args->where.dirp, exi);
1730         if (dvp == NULL) {
1731                 error = ESTALE;
1732                 goto out;
1733         }
1734 
1735 #ifdef DEBUG
1736         if (rfs3_do_pre_op_attr) {
1737                 dbva.va_mask = AT_ALL;
1738                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1739         } else
1740                 dbvap = NULL;
1741 #else
1742         dbva.va_mask = AT_ALL;
1743         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1744 #endif
1745         davap = dbvap;
1746 
1747         if (args->where.name == nfs3nametoolong) {
1748                 resp->status = NFS3ERR_NAMETOOLONG;
1749                 goto out1;
1750         }
1751 
1752         if (args->where.name == NULL || *(args->where.name) == '\0') {
1753                 resp->status = NFS3ERR_ACCES;
1754                 goto out1;
1755         }
1756 
1757         if (rdonly(exi, req)) {
1758                 resp->status = NFS3ERR_ROFS;
1759                 goto out1;
1760         }
1761 
1762         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1763         if (error)
1764                 goto out;
1765 
1766         if (!(va.va_mask & AT_MODE)) {
1767                 resp->status = NFS3ERR_INVAL;
1768                 goto out1;
1769         }
1770 
1771         if (args->symlink.symlink_data == nfs3nametoolong) {
1772                 resp->status = NFS3ERR_NAMETOOLONG;
1773                 goto out1;
1774         }
1775 
1776         va.va_mask |= AT_TYPE;
1777         va.va_type = VLNK;
1778 
1779         error = VOP_SYMLINK(dvp, args->where.name, &va,
1780             args->symlink.symlink_data, cr);
1781 
1782 #ifdef DEBUG
1783         if (rfs3_do_post_op_attr) {
1784                 dava.va_mask = AT_ALL;
1785                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1786         } else
1787                 davap = NULL;
1788 #else
1789         dava.va_mask = AT_ALL;
1790         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1791 #endif
1792 
1793         if (error)
1794                 goto out;
1795 
1796         error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1797 
1798         /*
1799          * Force modified data and metadata out to stable storage.
1800          */
1801         (void) VOP_FSYNC(dvp, 0, cr);
1802 
1803         VN_RELE(dvp);
1804 
1805         resp->status = NFS3_OK;
1806         if (error) {
1807                 resp->resok.obj.handle_follows = FALSE;
1808                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1809                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1810                 return;
1811         }
1812 
1813 #ifdef DEBUG
1814         if (!rfs3_do_post_op_fh3)
1815                 resp->resok.obj.handle_follows = FALSE;
1816         else {
1817 #endif
1818         error = makefh3(&resp->resok.obj.handle, vp, exi);
1819         if (error)
1820                 resp->resok.obj.handle_follows = FALSE;
1821         else
1822                 resp->resok.obj.handle_follows = TRUE;
1823 #ifdef DEBUG
1824         }
1825 #endif
1826 
1827 #ifdef DEBUG
1828         if (rfs3_do_post_op_attr) {
1829                 va.va_mask = AT_ALL;
1830                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1831         } else
1832                 vap = NULL;
1833 #else
1834         va.va_mask = AT_ALL;
1835         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1836 #endif
1837 
1838         /*
1839          * Force modified data and metadata out to stable storage.
1840          */
1841         (void) VOP_FSYNC(vp, 0, cr);
1842 
1843         VN_RELE(vp);
1844 
1845         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1846         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1847         return;
1848 
1849 out:
1850         if (curthread->t_flag & T_WOULDBLOCK) {
1851                 curthread->t_flag &= ~T_WOULDBLOCK;
1852                 resp->status = NFS3ERR_JUKEBOX;
1853         } else
1854                 resp->status = puterrno3(error);
1855 out1:
1856         if (dvp != NULL)
1857                 VN_RELE(dvp);
1858         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1859 }
1860 
1861 fhandle_t *
1862 rfs3_symlink_getfh(SYMLINK3args *args)
1863 {
1864 
1865         return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1866 }
1867 
1868 void
1869 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1870         struct svc_req *req, cred_t *cr)
1871 {
1872         int error;
1873         vnode_t *vp;
1874         vnode_t *dvp;
1875         struct vattr *vap;
1876         struct vattr va;
1877         struct vattr *dbvap;
1878         struct vattr dbva;
1879         struct vattr *davap;
1880         struct vattr dava;
1881         int mode;
1882         enum vcexcl excl;
1883 
1884         dbvap = NULL;
1885         davap = NULL;
1886 
1887         dvp = nfs3_fhtovp(args->where.dirp, exi);
1888         if (dvp == NULL) {
1889                 error = ESTALE;
1890                 goto out;
1891         }
1892 
1893 #ifdef DEBUG
1894         if (rfs3_do_pre_op_attr) {
1895                 dbva.va_mask = AT_ALL;
1896                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1897         } else
1898                 dbvap = NULL;
1899 #else
1900         dbva.va_mask = AT_ALL;
1901         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1902 #endif
1903         davap = dbvap;
1904 
1905         if (args->where.name == nfs3nametoolong) {
1906                 resp->status = NFS3ERR_NAMETOOLONG;
1907                 goto out1;
1908         }
1909 
1910         if (args->where.name == NULL || *(args->where.name) == '\0') {
1911                 resp->status = NFS3ERR_ACCES;
1912                 goto out1;
1913         }
1914 
1915         if (rdonly(exi, req)) {
1916                 resp->status = NFS3ERR_ROFS;
1917                 goto out1;
1918         }
1919 
1920         switch (args->what.type) {
1921         case NF3CHR:
1922         case NF3BLK:
1923                 error = sattr3_to_vattr(
1924                     &args->what.mknoddata3_u.device.dev_attributes, &va);
1925                 if (error)
1926                         goto out;
1927                 if (secpolicy_sys_devices(cr) != 0) {
1928                         resp->status = NFS3ERR_PERM;
1929                         goto out1;
1930                 }
1931                 if (args->what.type == NF3CHR)
1932                         va.va_type = VCHR;
1933                 else
1934                         va.va_type = VBLK;
1935                 va.va_rdev = makedevice(
1936                     args->what.mknoddata3_u.device.spec.specdata1,
1937                     args->what.mknoddata3_u.device.spec.specdata2);
1938                 va.va_mask |= AT_TYPE | AT_RDEV;
1939                 break;
1940         case NF3SOCK:
1941                 error = sattr3_to_vattr(
1942                     &args->what.mknoddata3_u.pipe_attributes, &va);
1943                 if (error)
1944                         goto out;
1945                 va.va_type = VSOCK;
1946                 va.va_mask |= AT_TYPE;
1947                 break;
1948         case NF3FIFO:
1949                 error = sattr3_to_vattr(
1950                     &args->what.mknoddata3_u.pipe_attributes, &va);
1951                 if (error)
1952                         goto out;
1953                 va.va_type = VFIFO;
1954                 va.va_mask |= AT_TYPE;
1955                 break;
1956         default:
1957                 resp->status = NFS3ERR_BADTYPE;
1958                 goto out1;
1959         }
1960 
1961         /*
1962          * Must specify the mode.
1963          */
1964         if (!(va.va_mask & AT_MODE)) {
1965                 resp->status = NFS3ERR_INVAL;
1966                 goto out1;
1967         }
1968 
1969         excl = EXCL;
1970 
1971         mode = 0;
1972 
1973         error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1974             &vp, cr, 0);
1975 
1976 #ifdef DEBUG
1977         if (rfs3_do_post_op_attr) {
1978                 dava.va_mask = AT_ALL;
1979                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1980         } else
1981                 davap = NULL;
1982 #else
1983         dava.va_mask = AT_ALL;
1984         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1985 #endif
1986 
1987         /*
1988          * Force modified data and metadata out to stable storage.
1989          */
1990         (void) VOP_FSYNC(dvp, 0, cr);
1991 
1992         if (error)
1993                 goto out;
1994 
1995         VN_RELE(dvp);
1996 
1997         resp->status = NFS3_OK;
1998 
1999 #ifdef DEBUG
2000         if (!rfs3_do_post_op_fh3)
2001                 resp->resok.obj.handle_follows = FALSE;
2002         else {
2003 #endif
2004         error = makefh3(&resp->resok.obj.handle, vp, exi);
2005         if (error)
2006                 resp->resok.obj.handle_follows = FALSE;
2007         else
2008                 resp->resok.obj.handle_follows = TRUE;
2009 #ifdef DEBUG
2010         }
2011 #endif
2012 
2013 #ifdef DEBUG
2014         if (rfs3_do_post_op_attr) {
2015                 va.va_mask = AT_ALL;
2016                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2017         } else
2018                 vap = NULL;
2019 #else
2020         va.va_mask = AT_ALL;
2021         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2022 #endif
2023 
2024         /*
2025          * Force modified metadata out to stable storage.
2026          */
2027         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2028 
2029         VN_RELE(vp);
2030 
2031         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2032         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2033         return;
2034 
2035 out:
2036         if (curthread->t_flag & T_WOULDBLOCK) {
2037                 curthread->t_flag &= ~T_WOULDBLOCK;
2038                 resp->status = NFS3ERR_JUKEBOX;
2039         } else
2040                 resp->status = puterrno3(error);
2041 out1:
2042         if (dvp != NULL)
2043                 VN_RELE(dvp);
2044         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2045 }
2046 
2047 fhandle_t *
2048 rfs3_mknod_getfh(MKNOD3args *args)
2049 {
2050 
2051         return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2052 }
2053 
2054 void
2055 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2056         struct svc_req *req, cred_t *cr)
2057 {
2058         int error = 0;
2059         vnode_t *vp;
2060         struct vattr *bvap;
2061         struct vattr bva;
2062         struct vattr *avap;
2063         struct vattr ava;
2064         vnode_t *targvp = NULL;
2065 
2066         bvap = NULL;
2067         avap = NULL;
2068 
2069         vp = nfs3_fhtovp(args->object.dirp, exi);
2070         if (vp == NULL) {
2071                 error = ESTALE;
2072                 goto out;
2073         }
2074 
2075 #ifdef DEBUG
2076         if (rfs3_do_pre_op_attr) {
2077                 bva.va_mask = AT_ALL;
2078                 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2079         } else
2080                 bvap = NULL;
2081 #else
2082         bva.va_mask = AT_ALL;
2083         bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2084 #endif
2085         avap = bvap;
2086 
2087         if (vp->v_type != VDIR) {
2088                 resp->status = NFS3ERR_NOTDIR;
2089                 goto out1;
2090         }
2091 
2092         if (args->object.name == nfs3nametoolong) {
2093                 resp->status = NFS3ERR_NAMETOOLONG;
2094                 goto out1;
2095         }
2096 
2097         if (args->object.name == NULL || *(args->object.name) == '\0') {
2098                 resp->status = NFS3ERR_ACCES;
2099                 goto out1;
2100         }
2101 
2102         if (rdonly(exi, req)) {
2103                 resp->status = NFS3ERR_ROFS;
2104                 goto out1;
2105         }
2106 
2107         /*
2108          * Check for a conflict with a non-blocking mandatory share
2109          * reservation and V4 delegations
2110          */
2111         error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2112                         NULL, cr);
2113         if (error != 0)
2114                 goto out;
2115 
2116         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2117                 resp->status = NFS3ERR_JUKEBOX;
2118                 goto out1;
2119         }
2120 
2121         if (!nbl_need_check(targvp)) {
2122                 error = VOP_REMOVE(vp, args->object.name, cr);
2123         } else {
2124                 nbl_start_crit(targvp, RW_READER);
2125                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2126                         error = EACCES;
2127                 } else {
2128                         error = VOP_REMOVE(vp, args->object.name, cr);
2129                 }
2130                 nbl_end_crit(targvp);
2131         }
2132         VN_RELE(targvp);
2133         targvp = NULL;
2134 
2135 #ifdef DEBUG
2136         if (rfs3_do_post_op_attr) {
2137                 ava.va_mask = AT_ALL;
2138                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2139         } else
2140                 avap = NULL;
2141 #else
2142         ava.va_mask = AT_ALL;
2143         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2144 #endif
2145 
2146         /*
2147          * Force modified data and metadata out to stable storage.
2148          */
2149         (void) VOP_FSYNC(vp, 0, cr);
2150 
2151         if (error)
2152                 goto out;
2153 
2154         VN_RELE(vp);
2155 
2156         resp->status = NFS3_OK;
2157         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2158         return;
2159 
2160 out:
2161         if (curthread->t_flag & T_WOULDBLOCK) {
2162                 curthread->t_flag &= ~T_WOULDBLOCK;
2163                 resp->status = NFS3ERR_JUKEBOX;
2164         } else
2165                 resp->status = puterrno3(error);
2166 out1:
2167         if (vp != NULL)
2168                 VN_RELE(vp);
2169         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2170 }
2171 
2172 fhandle_t *
2173 rfs3_remove_getfh(REMOVE3args *args)
2174 {
2175 
2176         return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2177 }
2178 
2179 void
2180 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2181         struct svc_req *req, cred_t *cr)
2182 {
2183         int error;
2184         vnode_t *vp;
2185         struct vattr *bvap;
2186         struct vattr bva;
2187         struct vattr *avap;
2188         struct vattr ava;
2189 
2190         bvap = NULL;
2191         avap = NULL;
2192 
2193         vp = nfs3_fhtovp(args->object.dirp, exi);
2194         if (vp == NULL) {
2195                 error = ESTALE;
2196                 goto out;
2197         }
2198 
2199 #ifdef DEBUG
2200         if (rfs3_do_pre_op_attr) {
2201                 bva.va_mask = AT_ALL;
2202                 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2203         } else
2204                 bvap = NULL;
2205 #else
2206         bva.va_mask = AT_ALL;
2207         bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2208 #endif
2209         avap = bvap;
2210 
2211         if (vp->v_type != VDIR) {
2212                 resp->status = NFS3ERR_NOTDIR;
2213                 goto out1;
2214         }
2215 
2216         if (args->object.name == nfs3nametoolong) {
2217                 resp->status = NFS3ERR_NAMETOOLONG;
2218                 goto out1;
2219         }
2220 
2221         if (args->object.name == NULL || *(args->object.name) == '\0') {
2222                 resp->status = NFS3ERR_ACCES;
2223                 goto out1;
2224         }
2225 
2226         if (rdonly(exi, req)) {
2227                 resp->status = NFS3ERR_ROFS;
2228                 goto out1;
2229         }
2230 
2231         error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2232 
2233 #ifdef DEBUG
2234         if (rfs3_do_post_op_attr) {
2235                 ava.va_mask = AT_ALL;
2236                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2237         } else
2238                 avap = NULL;
2239 #else
2240         ava.va_mask = AT_ALL;
2241         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2242 #endif
2243 
2244         /*
2245          * Force modified data and metadata out to stable storage.
2246          */
2247         (void) VOP_FSYNC(vp, 0, cr);
2248 
2249         if (error) {
2250                 /*
2251                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2252                  * if the directory is not empty.  A System V NFS server
2253                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2254                  * over the wire.
2255                  */
2256                 if (error == EEXIST)
2257                         error = ENOTEMPTY;
2258                 goto out;
2259         }
2260 
2261         VN_RELE(vp);
2262 
2263         resp->status = NFS3_OK;
2264         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2265         return;
2266 
2267 out:
2268         if (curthread->t_flag & T_WOULDBLOCK) {
2269                 curthread->t_flag &= ~T_WOULDBLOCK;
2270                 resp->status = NFS3ERR_JUKEBOX;
2271         } else
2272                 resp->status = puterrno3(error);
2273 out1:
2274         if (vp != NULL)
2275                 VN_RELE(vp);
2276         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2277 }
2278 
2279 fhandle_t *
2280 rfs3_rmdir_getfh(RMDIR3args *args)
2281 {
2282 
2283         return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2284 }
2285 
2286 void
2287 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2288         struct svc_req *req, cred_t *cr)
2289 {
2290         int error = 0;
2291         vnode_t *fvp;
2292         vnode_t *tvp;
2293         vnode_t *targvp;
2294         struct vattr *fbvap;
2295         struct vattr fbva;
2296         struct vattr *favap;
2297         struct vattr fava;
2298         struct vattr *tbvap;
2299         struct vattr tbva;
2300         struct vattr *tavap;
2301         struct vattr tava;
2302         nfs_fh3 *fh3;
2303         struct exportinfo *to_exi;
2304         vnode_t *srcvp = NULL;
2305 
2306         fbvap = NULL;
2307         favap = NULL;
2308         tbvap = NULL;
2309         tavap = NULL;
2310         tvp = NULL;
2311 
2312         fvp = nfs3_fhtovp(args->from.dirp, exi);
2313         if (fvp == NULL) {
2314                 error = ESTALE;
2315                 goto out;
2316         }
2317 
2318 #ifdef DEBUG
2319         if (rfs3_do_pre_op_attr) {
2320                 fbva.va_mask = AT_ALL;
2321                 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2322         } else
2323                 fbvap = NULL;
2324 #else
2325         fbva.va_mask = AT_ALL;
2326         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2327 #endif
2328         favap = fbvap;
2329 
2330         fh3 = args->to.dirp;
2331         to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2332         if (to_exi == NULL) {
2333                 resp->status = NFS3ERR_ACCES;
2334                 goto out1;
2335         }
2336         exi_rele(to_exi);
2337 
2338         if (to_exi != exi) {
2339                 resp->status = NFS3ERR_XDEV;
2340                 goto out1;
2341         }
2342 
2343         tvp = nfs3_fhtovp(args->to.dirp, exi);
2344         if (tvp == NULL) {
2345                 error = ESTALE;
2346                 goto out;
2347         }
2348 
2349 #ifdef DEBUG
2350         if (rfs3_do_pre_op_attr) {
2351                 tbva.va_mask = AT_ALL;
2352                 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2353         } else
2354                 tbvap = NULL;
2355 #else
2356         tbva.va_mask = AT_ALL;
2357         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2358 #endif
2359         tavap = tbvap;
2360 
2361         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2362                 resp->status = NFS3ERR_NOTDIR;
2363                 goto out1;
2364         }
2365 
2366         if (args->from.name == nfs3nametoolong ||
2367             args->to.name == nfs3nametoolong) {
2368                 resp->status = NFS3ERR_NAMETOOLONG;
2369                 goto out1;
2370         }
2371         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2372             args->to.name == NULL || *(args->to.name) == '\0') {
2373                 resp->status = NFS3ERR_ACCES;
2374                 goto out1;
2375         }
2376 
2377         if (rdonly(exi, req)) {
2378                 resp->status = NFS3ERR_ROFS;
2379                 goto out1;
2380         }
2381 
2382         /*
2383          * Check for a conflict with a non-blocking mandatory share
2384          * reservation or V4 delegations.
2385          */
2386         error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2387                         NULL, cr);
2388         if (error != 0)
2389                 goto out;
2390 
2391         /*
2392          * If we rename a delegated file we should recall the
2393          * delegation, since future opens should fail or would
2394          * refer to a new file.
2395          */
2396         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2397                 resp->status = NFS3ERR_JUKEBOX;
2398                 goto out1;
2399         }
2400 
2401         /*
2402          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2403          * first to avoid VOP_LOOKUP if possible.
2404          */
2405         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2406             VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2407 
2408                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2409                         VN_RELE(targvp);
2410                         resp->status = NFS3ERR_JUKEBOX;
2411                         goto out1;
2412                 }
2413                 VN_RELE(targvp);
2414         }
2415 
2416         if (!nbl_need_check(srcvp)) {
2417                 error = VOP_RENAME(fvp, args->from.name, tvp,
2418                                     args->to.name, cr);
2419         } else {
2420                 nbl_start_crit(srcvp, RW_READER);
2421                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2422                         error = EACCES;
2423                 } else {
2424                         error = VOP_RENAME(fvp, args->from.name, tvp,
2425                                     args->to.name, cr);
2426                 }
2427                 nbl_end_crit(srcvp);
2428         }
2429         if (error == 0) {
2430                 char *tmp;
2431 
2432                 /* fix the path name for the renamed file */
2433                 mutex_enter(&srcvp->v_lock);
2434                 tmp = srcvp->v_path;
2435                 srcvp->v_path = NULL;
2436                 mutex_exit(&srcvp->v_lock);
2437                 vn_setpath(rootdir, tvp, srcvp, args->to.name,
2438                                 strlen(args->to.name));
2439                 if (tmp != NULL)
2440                         kmem_free(tmp, strlen(tmp) + 1);
2441         }
2442 
2443         VN_RELE(srcvp);
2444         srcvp = NULL;
2445 
2446 #ifdef DEBUG
2447         if (rfs3_do_post_op_attr) {
2448                 fava.va_mask = AT_ALL;
2449                 favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2450                 tava.va_mask = AT_ALL;
2451                 tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2452         } else {
2453                 favap = NULL;
2454                 tavap = NULL;
2455         }
2456 #else
2457         fava.va_mask = AT_ALL;
2458         favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2459         tava.va_mask = AT_ALL;
2460         tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2461 #endif
2462 
2463         /*
2464          * Force modified data and metadata out to stable storage.
2465          */
2466         (void) VOP_FSYNC(fvp, 0, cr);
2467         (void) VOP_FSYNC(tvp, 0, cr);
2468 
2469         if (error)
2470                 goto out;
2471 
2472         VN_RELE(tvp);
2473         VN_RELE(fvp);
2474 
2475         resp->status = NFS3_OK;
2476         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2477         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2478         return;
2479 
2480 out:
2481         if (curthread->t_flag & T_WOULDBLOCK) {
2482                 curthread->t_flag &= ~T_WOULDBLOCK;
2483                 resp->status = NFS3ERR_JUKEBOX;
2484         } else
2485                 resp->status = puterrno3(error);
2486 out1:
2487         if (fvp != NULL)
2488                 VN_RELE(fvp);
2489         if (tvp != NULL)
2490                 VN_RELE(tvp);
2491         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2492         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2493 }
2494 
2495 fhandle_t *
2496 rfs3_rename_getfh(RENAME3args *args)
2497 {
2498 
2499         return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2500 }
2501 
2502 void
2503 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2504         struct svc_req *req, cred_t *cr)
2505 {
2506         int error;
2507         vnode_t *vp;
2508         vnode_t *dvp;
2509         struct vattr *vap;
2510         struct vattr va;
2511         struct vattr *bvap;
2512         struct vattr bva;
2513         struct vattr *avap;
2514         struct vattr ava;
2515         nfs_fh3 *fh3;
2516         struct exportinfo *to_exi;
2517 
2518         vap = NULL;
2519         bvap = NULL;
2520         avap = NULL;
2521         dvp = NULL;
2522 
2523         vp = nfs3_fhtovp(&args->file, exi);
2524         if (vp == NULL) {
2525                 error = ESTALE;
2526                 goto out;
2527         }
2528 
2529 #ifdef DEBUG
2530         if (rfs3_do_pre_op_attr) {
2531                 va.va_mask = AT_ALL;
2532                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2533         } else
2534                 vap = NULL;
2535 #else
2536         va.va_mask = AT_ALL;
2537         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2538 #endif
2539 
2540         fh3 = args->link.dirp;
2541         to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2542         if (to_exi == NULL) {
2543                 resp->status = NFS3ERR_ACCES;
2544                 goto out1;
2545         }
2546         exi_rele(to_exi);
2547 
2548         if (to_exi != exi) {
2549                 resp->status = NFS3ERR_XDEV;
2550                 goto out1;
2551         }
2552 
2553         dvp = nfs3_fhtovp(args->link.dirp, exi);
2554         if (dvp == NULL) {
2555                 error = ESTALE;
2556                 goto out;
2557         }
2558 
2559 #ifdef DEBUG
2560         if (rfs3_do_pre_op_attr) {
2561                 bva.va_mask = AT_ALL;
2562                 bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2563         } else
2564                 bvap = NULL;
2565 #else
2566         bva.va_mask = AT_ALL;
2567         bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2568 #endif
2569 
2570         if (dvp->v_type != VDIR) {
2571                 resp->status = NFS3ERR_NOTDIR;
2572                 goto out1;
2573         }
2574 
2575         if (args->link.name == nfs3nametoolong) {
2576                 resp->status = NFS3ERR_NAMETOOLONG;
2577                 goto out1;
2578         }
2579 
2580         if (args->link.name == NULL || *(args->link.name) == '\0') {
2581                 resp->status = NFS3ERR_ACCES;
2582                 goto out1;
2583         }
2584 
2585         if (rdonly(exi, req)) {
2586                 resp->status = NFS3ERR_ROFS;
2587                 goto out1;
2588         }
2589 
2590         error = VOP_LINK(dvp, vp, args->link.name, cr);
2591 
2592 #ifdef DEBUG
2593         if (rfs3_do_post_op_attr) {
2594                 va.va_mask = AT_ALL;
2595                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2596                 ava.va_mask = AT_ALL;
2597                 avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2598         } else {
2599                 vap = NULL;
2600                 avap = NULL;
2601         }
2602 #else
2603         va.va_mask = AT_ALL;
2604         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2605         ava.va_mask = AT_ALL;
2606         avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2607 #endif
2608 
2609         /*
2610          * Force modified data and metadata out to stable storage.
2611          */
2612         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2613         (void) VOP_FSYNC(dvp, 0, cr);
2614 
2615         if (error)
2616                 goto out;
2617 
2618         VN_RELE(dvp);
2619         VN_RELE(vp);
2620 
2621         resp->status = NFS3_OK;
2622         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2623         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2624         return;
2625 
2626 out:
2627         if (curthread->t_flag & T_WOULDBLOCK) {
2628                 curthread->t_flag &= ~T_WOULDBLOCK;
2629                 resp->status = NFS3ERR_JUKEBOX;
2630         } else
2631                 resp->status = puterrno3(error);
2632 out1:
2633         if (vp != NULL)
2634                 VN_RELE(vp);
2635         if (dvp != NULL)
2636                 VN_RELE(dvp);
2637         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2638         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2639 }
2640 
2641 fhandle_t *
2642 rfs3_link_getfh(LINK3args *args)
2643 {
2644 
2645         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2646 }
2647 
2648 /*
2649  * This macro defines the size of a response which contains attribute
2650  * information and one directory entry (whose length is specified by
2651  * the macro parameter).  If the incoming request is larger than this,
2652  * then we are guaranteed to be able to return at one directory entry
2653  * if one exists.  Therefore, we do not need to check for
2654  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2655  * is not, then we need to check to make sure that this error does not
2656  * need to be returned.
2657  *
2658  * NFS3_READDIR_MIN_COUNT is comprised of following :
2659  *
2660  * status - 1 * BYTES_PER_XDR_UNIT
2661  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2662  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2663  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2664  * boolean - 1 * BYTES_PER_XDR_UNIT
2665  * file id - 2 * BYTES_PER_XDR_UNIT
2666  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2667  * cookie - 2 * BYTES_PER_XDR_UNIT
2668  * end of list - 1 * BYTES_PER_XDR_UNIT
2669  * end of file - 1 * BYTES_PER_XDR_UNIT
2670  * Name length of directory to the nearest byte
2671  */
2672 
2673 #define NFS3_READDIR_MIN_COUNT(length)  \
2674         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2675                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2676 
2677 /* ARGSUSED */
2678 void
2679 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2680         struct svc_req *req, cred_t *cr)
2681 {
2682         int error;
2683         vnode_t *vp;
2684         struct vattr *vap;
2685         struct vattr va;
2686         struct iovec iov;
2687         struct uio uio;
2688         char *data;
2689         int iseof;
2690         int bufsize;
2691         int namlen;
2692         uint_t count;
2693 
2694         vap = NULL;
2695 
2696         vp = nfs3_fhtovp(&args->dir, exi);
2697         if (vp == NULL) {
2698                 error = ESTALE;
2699                 goto out;
2700         }
2701 
2702         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2703 
2704 #ifdef DEBUG
2705         if (rfs3_do_pre_op_attr) {
2706                 va.va_mask = AT_ALL;
2707                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2708         } else
2709                 vap = NULL;
2710 #else
2711         va.va_mask = AT_ALL;
2712         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2713 #endif
2714 
2715         if (vp->v_type != VDIR) {
2716                 resp->status = NFS3ERR_NOTDIR;
2717                 goto out1;
2718         }
2719 
2720         error = VOP_ACCESS(vp, VREAD, 0, cr);
2721         if (error)
2722                 goto out;
2723 
2724         /*
2725          * Now don't allow arbitrary count to alloc;
2726          * allow the maximum not to exceed rfs3_tsize()
2727          */
2728         if (args->count > rfs3_tsize(req))
2729                 args->count = rfs3_tsize(req);
2730 
2731         /*
2732          * Make sure that there is room to read at least one entry
2733          * if any are available.
2734          */
2735         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2736                 count = DIRENT64_RECLEN(MAXNAMELEN);
2737         else
2738                 count = args->count;
2739 
2740         data = kmem_alloc(count, KM_SLEEP);
2741 
2742         iov.iov_base = data;
2743         iov.iov_len = count;
2744         uio.uio_iov = &iov;
2745         uio.uio_iovcnt = 1;
2746         uio.uio_segflg = UIO_SYSSPACE;
2747         uio.uio_extflg = UIO_COPY_CACHED;
2748         uio.uio_loffset = (offset_t)args->cookie;
2749         uio.uio_resid = count;
2750 
2751         error = VOP_READDIR(vp, &uio, cr, &iseof);
2752 
2753 #ifdef DEBUG
2754         if (rfs3_do_post_op_attr) {
2755                 va.va_mask = AT_ALL;
2756                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2757         } else
2758                 vap = NULL;
2759 #else
2760         va.va_mask = AT_ALL;
2761         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2762 #endif
2763 
2764         if (error) {
2765                 kmem_free(data, count);
2766                 goto out;
2767         }
2768 
2769         /*
2770          * If the count was not large enough to be able to guarantee
2771          * to be able to return at least one entry, then need to
2772          * check to see if NFS3ERR_TOOSMALL should be returned.
2773          */
2774         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2775                 /*
2776                  * bufsize is used to keep track of the size of the response.
2777                  * It is primed with:
2778                  *      1 for the status +
2779                  *      1 for the dir_attributes.attributes boolean +
2780                  *      2 for the cookie verifier
2781                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
2782                  * to bytes.  If there are directory attributes to be
2783                  * returned, then:
2784                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2785                  * time BYTES_PER_XDR_UNIT is added to account for them.
2786                  */
2787                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2788                 if (vap != NULL)
2789                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2790                 /*
2791                  * An entry is composed of:
2792                  *      1 for the true/false list indicator +
2793                  *      2 for the fileid +
2794                  *      1 for the length of the name +
2795                  *      2 for the cookie +
2796                  * all times BYTES_PER_XDR_UNIT to convert from
2797                  * XDR units to bytes, plus the length of the name
2798                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
2799                  */
2800                 if (count != uio.uio_resid) {
2801                         namlen = strlen(((struct dirent64 *)data)->d_name);
2802                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2803                                     roundup(namlen, BYTES_PER_XDR_UNIT);
2804                 }
2805                 /*
2806                  * We need to check to see if the number of bytes left
2807                  * to go into the buffer will actually fit into the
2808                  * buffer.  This is calculated as the size of this
2809                  * entry plus:
2810                  *      1 for the true/false list indicator +
2811                  *      1 for the eof indicator
2812                  * times BYTES_PER_XDR_UNIT to convert from from
2813                  * XDR units to bytes.
2814                  */
2815                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2816                 if (bufsize > args->count) {
2817                         kmem_free(data, count);
2818                         resp->status = NFS3ERR_TOOSMALL;
2819                         goto out1;
2820                 }
2821         }
2822 
2823         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2824 
2825 #if 0 /* notyet */
2826         /*
2827          * Don't do this.  It causes local disk writes when just
2828          * reading the file and the overhead is deemed larger
2829          * than the benefit.
2830          */
2831         /*
2832          * Force modified metadata out to stable storage.
2833          */
2834         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2835 #endif
2836 
2837         VN_RELE(vp);
2838 
2839         resp->status = NFS3_OK;
2840         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2841         resp->resok.cookieverf = 0;
2842         resp->resok.reply.entries = (entry3 *)data;
2843         resp->resok.reply.eof = iseof;
2844         resp->resok.size = count - uio.uio_resid;
2845         resp->resok.count = args->count;
2846         resp->resok.freecount = count;
2847         return;
2848 
2849 out:
2850         if (curthread->t_flag & T_WOULDBLOCK) {
2851                 curthread->t_flag &= ~T_WOULDBLOCK;
2852                 resp->status = NFS3ERR_JUKEBOX;
2853         } else
2854                 resp->status = puterrno3(error);
2855 out1:
2856         if (vp != NULL) {
2857                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2858                 VN_RELE(vp);
2859         }
2860         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2861 }
2862 
2863 fhandle_t *
2864 rfs3_readdir_getfh(READDIR3args *args)
2865 {
2866 
2867         return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2868 }
2869 
2870 void
2871 rfs3_readdir_free(READDIR3res *resp)
2872 {
2873 
2874         if (resp->status == NFS3_OK)
2875                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2876 }
2877 
2878 #ifdef nextdp
2879 #undef nextdp
2880 #endif
2881 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2882 
2883 /*
2884  * This macro computes the size of a response which contains
2885  * one directory entry including the attributes as well as file handle.
2886  * If the incoming request is larger than this, then we are guaranteed to be
2887  * able to return at least one more directory entry if one exists.
2888  *
2889  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2890  *
2891  * boolean - 1 * BYTES_PER_XDR_UNIT
2892  * file id - 2 * BYTES_PER_XDR_UNIT
2893  * directory name length - 1 * BYTES_PER_XDR_UNIT
2894  * cookie - 2 * BYTES_PER_XDR_UNIT
2895  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2896  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2897  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2898  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2899  * Maxmum length of a file handle (NFS3_CURFHSIZE)
2900  * name length of the entry to the nearest bytes
2901  */
2902 #define NFS3_READDIRPLUS_ENTRY(namelen) \
2903         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2904                 BYTES_PER_XDR_UNIT + \
2905         NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2906 
2907 static int rfs3_readdir_unit = MAXBSIZE;
2908 
2909 /* ARGSUSED */
2910 void
2911 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2912         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2913 {
2914         int error;
2915         vnode_t *vp;
2916         struct vattr *vap;
2917         struct vattr va;
2918         struct iovec iov;
2919         struct uio uio;
2920         char *data;
2921         int iseof;
2922         struct dirent64 *dp;
2923         vnode_t *nvp;
2924         struct vattr *nvap;
2925         struct vattr nva;
2926         entryplus3_info *infop = NULL;
2927         int size = 0;
2928         int nents = 0;
2929         int bufsize = 0;
2930         int entrysize = 0;
2931         int tofit = 0;
2932         int rd_unit = rfs3_readdir_unit;
2933         int prev_len;
2934         int space_left;
2935         int i;
2936         uint_t *namlen = NULL;
2937 
2938         vap = NULL;
2939 
2940         vp = nfs3_fhtovp(&args->dir, exi);
2941         if (vp == NULL) {
2942                 error = ESTALE;
2943                 goto out;
2944         }
2945 
2946         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2947 
2948 #ifdef DEBUG
2949         if (rfs3_do_pre_op_attr) {
2950                 va.va_mask = AT_ALL;
2951                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2952         } else
2953                 vap = NULL;
2954 #else
2955         va.va_mask = AT_ALL;
2956         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2957 #endif
2958 
2959         if (vp->v_type != VDIR) {
2960                 error = ENOTDIR;
2961                 goto out;
2962         }
2963 
2964         error = VOP_ACCESS(vp, VREAD, 0, cr);
2965         if (error)
2966                 goto out;
2967 
2968         /*
2969          * Don't allow arbitrary counts for allocation
2970          */
2971         if (args->maxcount > rfs3_tsize(req))
2972                 args->maxcount = rfs3_tsize(req);
2973 
2974         /*
2975          * Make sure that there is room to read at least one entry
2976          * if any are available
2977          */
2978         args->dircount = MIN(args->dircount, args->maxcount);
2979 
2980         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2981                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2982 
2983         /*
2984          * This allocation relies on a minimum directory entry
2985          * being roughly 24 bytes.  Therefore, the namlen array
2986          * will have enough space based on the maximum number of
2987          * entries to read.
2988          */
2989         namlen = kmem_alloc(args->dircount, KM_SLEEP);
2990 
2991         space_left = args->dircount;
2992         data = kmem_alloc(args->dircount, KM_SLEEP);
2993         dp = (struct dirent64 *)data;
2994         uio.uio_iov = &iov;
2995         uio.uio_iovcnt = 1;
2996         uio.uio_segflg = UIO_SYSSPACE;
2997         uio.uio_extflg = UIO_COPY_CACHED;
2998         uio.uio_loffset = (offset_t)args->cookie;
2999 
3000         /*
3001          * bufsize is used to keep track of the size of the response as we
3002          * get post op attributes and filehandles for each entry.  This is
3003          * an optimization as the server may have read more entries than will
3004          * fit in the buffer specified by maxcount.  We stop calculating
3005          * post op attributes and filehandles once we have exceeded maxcount.
3006          * This will minimize the effect of truncation.
3007          *
3008          * It is primed with:
3009          *      1 for the status +
3010          *      1 for the dir_attributes.attributes boolean +
3011          *      2 for the cookie verifier
3012          * all times BYTES_PER_XDR_UNIT to convert from XDR units
3013          * to bytes.  If there are directory attributes to be
3014          * returned, then:
3015          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3016          * time BYTES_PER_XDR_UNIT is added to account for them.
3017          */
3018         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3019         if (vap != NULL)
3020                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3021 
3022 getmoredents:
3023         /*
3024          * Here we make a check so that our read unit is not larger than
3025          * the space left in the buffer.
3026          */
3027         rd_unit = MIN(rd_unit, space_left);
3028         iov.iov_base = (char *)dp;
3029         iov.iov_len = rd_unit;
3030         uio.uio_resid = rd_unit;
3031         prev_len = rd_unit;
3032 
3033         error = VOP_READDIR(vp, &uio, cr, &iseof);
3034 
3035         if (error) {
3036                 kmem_free(data, args->dircount);
3037                 goto out;
3038         }
3039 
3040         if (uio.uio_resid == prev_len && !iseof) {
3041                 if (nents == 0) {
3042                         kmem_free(data, args->dircount);
3043                         resp->status = NFS3ERR_TOOSMALL;
3044                         goto out1;
3045                 }
3046 
3047                 /*
3048                  * We could not get any more entries, so get the attributes
3049                  * and filehandle for the entries already obtained.
3050                  */
3051                 goto good;
3052         }
3053 
3054         /*
3055          * We estimate the size of the response by assuming the
3056          * entry exists and attributes and filehandle are also valid
3057          */
3058         for (size = prev_len - uio.uio_resid;
3059                 size > 0;
3060                 size -= dp->d_reclen, dp = nextdp(dp)) {
3061 
3062                 if (dp->d_ino == 0) {
3063                         nents++;
3064                         continue;
3065                 }
3066 
3067                 namlen[nents] = strlen(dp->d_name);
3068                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3069 
3070                 /*
3071                  * We need to check to see if the number of bytes left
3072                  * to go into the buffer will actually fit into the
3073                  * buffer.  This is calculated as the size of this
3074                  * entry plus:
3075                  *      1 for the true/false list indicator +
3076                  *      1 for the eof indicator
3077                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3078                  * to bytes.
3079                  *
3080                  * Also check the dircount limit against the first entry read
3081                  *
3082                  */
3083                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3084                 if (bufsize + tofit > args->maxcount) {
3085                         /*
3086                          * We make a check here to see if this was the
3087                          * first entry being measured.  If so, then maxcount
3088                          * was too small to begin with and so we need to
3089                          * return with NFS3ERR_TOOSMALL.
3090                          */
3091                         if (nents == 0) {
3092                                 kmem_free(data, args->dircount);
3093                                 resp->status = NFS3ERR_TOOSMALL;
3094                                 goto out1;
3095                         }
3096                         iseof = FALSE;
3097                         goto good;
3098                 }
3099                 bufsize += entrysize;
3100                 nents++;
3101         }
3102 
3103         /*
3104          * If there is enough room to fit at least 1 more entry including
3105          * post op attributes and filehandle in the buffer AND that we haven't
3106          * exceeded dircount then go back and get some more.
3107          */
3108         if (!iseof &&
3109             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3110                 space_left -= (prev_len - uio.uio_resid);
3111                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3112                         goto getmoredents;
3113 
3114                 /* else, fall through */
3115         }
3116 
3117 good:
3118 
3119 #ifdef DEBUG
3120         if (rfs3_do_post_op_attr) {
3121                 va.va_mask = AT_ALL;
3122                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3123         } else
3124                 vap = NULL;
3125 #else
3126         va.va_mask = AT_ALL;
3127         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3128 #endif
3129 
3130         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3131 
3132         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3133         resp->resok.infop = infop;
3134 
3135         dp = (struct dirent64 *)data;
3136         for (i = 0; i < nents; i++) {
3137 
3138                 if (dp->d_ino == 0) {
3139                         infop[i].attr.attributes = FALSE;
3140                         infop[i].fh.handle_follows = FALSE;
3141                         dp = nextdp(dp);
3142                         continue;
3143                 }
3144 
3145                 infop[i].namelen = namlen[i];
3146 
3147                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3148                 if (error) {
3149                         infop[i].attr.attributes = FALSE;
3150                         infop[i].fh.handle_follows = FALSE;
3151                         dp = nextdp(dp);
3152                         continue;
3153                 }
3154 
3155 #ifdef DEBUG
3156                 if (rfs3_do_post_op_attr) {
3157                         nva.va_mask = AT_ALL;
3158                         nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3159                                 NULL : &nva;
3160                 } else
3161                         nvap = NULL;
3162 #else
3163                 nva.va_mask = AT_ALL;
3164                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3165 #endif
3166                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3167 
3168 #ifdef DEBUG
3169                 if (!rfs3_do_post_op_fh3)
3170                         infop[i].fh.handle_follows = FALSE;
3171                 else {
3172 #endif
3173                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3174                 if (!error)
3175                         infop[i].fh.handle_follows = TRUE;
3176                 else
3177                         infop[i].fh.handle_follows = FALSE;
3178 #ifdef DEBUG
3179                 }
3180 #endif
3181 
3182                 VN_RELE(nvp);
3183                 dp = nextdp(dp);
3184         }
3185 
3186 #if 0 /* notyet */
3187         /*
3188          * Don't do this.  It causes local disk writes when just
3189          * reading the file and the overhead is deemed larger
3190          * than the benefit.
3191          */
3192         /*
3193          * Force modified metadata out to stable storage.
3194          */
3195         (void) VOP_FSYNC(vp, FNODSYNC, cr);
3196 #endif
3197 
3198         VN_RELE(vp);
3199 
3200         kmem_free(namlen, args->dircount);
3201 
3202         resp->status = NFS3_OK;
3203         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3204         resp->resok.cookieverf = 0;
3205         resp->resok.reply.entries = (entryplus3 *)data;
3206         resp->resok.reply.eof = iseof;
3207         resp->resok.size = nents;
3208         resp->resok.count = args->dircount;
3209         resp->resok.maxcount = args->maxcount;
3210         return;
3211 
3212 out:
3213         if (curthread->t_flag & T_WOULDBLOCK) {
3214                 curthread->t_flag &= ~T_WOULDBLOCK;
3215                 resp->status = NFS3ERR_JUKEBOX;
3216         } else
3217                 resp->status = puterrno3(error);
3218 out1:
3219         if (vp != NULL) {
3220                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3221                 VN_RELE(vp);
3222         }
3223 
3224         if (namlen != NULL)
3225                 kmem_free(namlen, args->dircount);
3226 
3227         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3228 }
3229 
3230 fhandle_t *
3231 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3232 {
3233 
3234         return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3235 }
3236 
3237 void
3238 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3239 {
3240 
3241         if (resp->status == NFS3_OK) {
3242                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3243                 kmem_free(resp->resok.infop,
3244                         resp->resok.size * sizeof (struct entryplus3_info));
3245         }
3246 }
3247 
3248 /* ARGSUSED */
3249 void
3250 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3251         struct svc_req *req, cred_t *cr)
3252 {
3253         int error;
3254         vnode_t *vp;
3255         struct vattr *vap;
3256         struct vattr va;
3257         struct statvfs64 sb;
3258 
3259         vap = NULL;
3260 
3261         vp = nfs3_fhtovp(&args->fsroot, exi);
3262         if (vp == NULL) {
3263                 error = ESTALE;
3264                 goto out;
3265         }
3266 
3267         error = VFS_STATVFS(vp->v_vfsp, &sb);
3268 
3269 #ifdef DEBUG
3270         if (rfs3_do_post_op_attr) {
3271                 va.va_mask = AT_ALL;
3272                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3273         } else
3274                 vap = NULL;
3275 #else
3276         va.va_mask = AT_ALL;
3277         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3278 #endif
3279 
3280         VN_RELE(vp);
3281 
3282         if (error)
3283                 goto out;
3284 
3285         resp->status = NFS3_OK;
3286         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3287         if (sb.f_blocks != (fsblkcnt64_t)-1)
3288                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3289         else
3290                 resp->resok.tbytes = (size3)sb.f_blocks;
3291         if (sb.f_bfree != (fsblkcnt64_t)-1)
3292                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3293         else
3294                 resp->resok.fbytes = (size3)sb.f_bfree;
3295         if (sb.f_bavail != (fsblkcnt64_t)-1)
3296                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3297         else
3298                 resp->resok.abytes = (size3)sb.f_bavail;
3299         resp->resok.tfiles = (size3)sb.f_files;
3300         resp->resok.ffiles = (size3)sb.f_ffree;
3301         resp->resok.afiles = (size3)sb.f_favail;
3302         resp->resok.invarsec = 0;
3303         return;
3304 
3305 out:
3306         if (curthread->t_flag & T_WOULDBLOCK) {
3307                 curthread->t_flag &= ~T_WOULDBLOCK;
3308                 resp->status = NFS3ERR_JUKEBOX;
3309         } else
3310                 resp->status = puterrno3(error);
3311         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3312 }
3313 
3314 fhandle_t *
3315 rfs3_fsstat_getfh(FSSTAT3args *args)
3316 {
3317 
3318         return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3319 }
3320 
3321 /* ARGSUSED */
3322 void
3323 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3324         struct svc_req *req, cred_t *cr)
3325 {
3326         vnode_t *vp;
3327         struct vattr *vap;
3328         struct vattr va;
3329         uint32_t xfer_size;
3330         ulong_t l = 0;
3331         int error;
3332 
3333         vp = nfs3_fhtovp(&args->fsroot, exi);
3334         if (vp == NULL) {
3335                 if (curthread->t_flag & T_WOULDBLOCK) {
3336                         curthread->t_flag &= ~T_WOULDBLOCK;
3337                         resp->status = NFS3ERR_JUKEBOX;
3338                 } else
3339                         resp->status = NFS3ERR_STALE;
3340                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3341                 return;
3342         }
3343 
3344 #ifdef DEBUG
3345         if (rfs3_do_post_op_attr) {
3346                 va.va_mask = AT_ALL;
3347                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3348         } else
3349                 vap = NULL;
3350 #else
3351         va.va_mask = AT_ALL;
3352         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3353 #endif
3354 
3355         resp->status = NFS3_OK;
3356         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3357         xfer_size = rfs3_tsize(req);
3358         resp->resok.rtmax = xfer_size;
3359         resp->resok.rtpref = xfer_size;
3360         resp->resok.rtmult = DEV_BSIZE;
3361         resp->resok.wtmax = xfer_size;
3362         resp->resok.wtpref = xfer_size;
3363         resp->resok.wtmult = DEV_BSIZE;
3364         resp->resok.dtpref = MAXBSIZE;
3365 
3366         /*
3367          * Large file spec: want maxfilesize based on limit of
3368          * underlying filesystem.  We can guess 2^31-1 if need be.
3369          */
3370         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3371 
3372         VN_RELE(vp);
3373 
3374         if (!error && l != 0 && l <= 64)
3375                 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3376         else
3377                 resp->resok.maxfilesize = MAXOFF32_T;
3378 
3379         resp->resok.time_delta.seconds = 0;
3380         resp->resok.time_delta.nseconds = 1000;
3381         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3382             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3383 }
3384 
3385 fhandle_t *
3386 rfs3_fsinfo_getfh(FSINFO3args *args)
3387 {
3388 
3389         return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3390 }
3391 
3392 /* ARGSUSED */
3393 void
3394 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3395         struct svc_req *req, cred_t *cr)
3396 {
3397         int error;
3398         vnode_t *vp;
3399         struct vattr *vap;
3400         struct vattr va;
3401         ulong_t val;
3402 
3403         vap = NULL;
3404 
3405         vp = nfs3_fhtovp(&args->object, exi);
3406         if (vp == NULL) {
3407                 error = ESTALE;
3408                 goto out;
3409         }
3410 
3411 #ifdef DEBUG
3412         if (rfs3_do_post_op_attr) {
3413                 va.va_mask = AT_ALL;
3414                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3415         } else
3416                 vap = NULL;
3417 #else
3418         va.va_mask = AT_ALL;
3419         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3420 #endif
3421 
3422         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3423         if (error)
3424                 goto out;
3425         resp->resok.info.link_max = (uint32)val;
3426 
3427         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3428         if (error)
3429                 goto out;
3430         resp->resok.info.name_max = (uint32)val;
3431 
3432         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3433         if (error)
3434                 goto out;
3435         if (val == 1)
3436                 resp->resok.info.no_trunc = TRUE;
3437         else
3438                 resp->resok.info.no_trunc = FALSE;
3439 
3440         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3441         if (error)
3442                 goto out;
3443         if (val == 1)
3444                 resp->resok.info.chown_restricted = TRUE;
3445         else
3446                 resp->resok.info.chown_restricted = FALSE;
3447 
3448         VN_RELE(vp);
3449 
3450         resp->status = NFS3_OK;
3451         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3452         resp->resok.info.case_insensitive = FALSE;
3453         resp->resok.info.case_preserving = TRUE;
3454         return;
3455 
3456 out:
3457         if (curthread->t_flag & T_WOULDBLOCK) {
3458                 curthread->t_flag &= ~T_WOULDBLOCK;
3459                 resp->status = NFS3ERR_JUKEBOX;
3460         } else
3461                 resp->status = puterrno3(error);
3462         if (vp != NULL)
3463                 VN_RELE(vp);
3464         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3465 }
3466 
3467 fhandle_t *
3468 rfs3_pathconf_getfh(PATHCONF3args *args)
3469 {
3470 
3471         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3472 }
3473 
3474 void
3475 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3476         struct svc_req *req, cred_t *cr)
3477 {
3478         int error;
3479         vnode_t *vp;
3480         struct vattr *bvap;
3481         struct vattr bva;
3482         struct vattr *avap;
3483         struct vattr ava;
3484 
3485         bvap = NULL;
3486         avap = NULL;
3487 
3488         vp = nfs3_fhtovp(&args->file, exi);
3489         if (vp == NULL) {
3490                 error = ESTALE;
3491                 goto out;
3492         }
3493 
3494         bva.va_mask = AT_ALL;
3495         error = VOP_GETATTR(vp, &bva, 0, cr);
3496 
3497         /*
3498          * If we can't get the attributes, then we can't do the
3499          * right access checking.  So, we'll fail the request.
3500          */
3501         if (error)
3502                 goto out;
3503 
3504 #ifdef DEBUG
3505         if (rfs3_do_pre_op_attr)
3506                 bvap = &bva;
3507         else
3508                 bvap = NULL;
3509 #else
3510         bvap = &bva;
3511 #endif
3512 
3513         if (rdonly(exi, req)) {
3514                 resp->status = NFS3ERR_ROFS;
3515                 goto out1;
3516         }
3517 
3518         if (vp->v_type != VREG) {
3519                 resp->status = NFS3ERR_INVAL;
3520                 goto out1;
3521         }
3522 
3523         if (crgetuid(cr) != bva.va_uid &&
3524             (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3525                 goto out;
3526 
3527         error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3528         if (!error)
3529                 error = VOP_FSYNC(vp, FNODSYNC, cr);
3530 
3531 #ifdef DEBUG
3532         if (rfs3_do_post_op_attr) {
3533                 ava.va_mask = AT_ALL;
3534                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3535         } else
3536                 avap = NULL;
3537 #else
3538         ava.va_mask = AT_ALL;
3539         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3540 #endif
3541 
3542         if (error)
3543                 goto out;
3544 
3545         VN_RELE(vp);
3546 
3547         resp->status = NFS3_OK;
3548         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3549         resp->resok.verf = write3verf;
3550         return;
3551 
3552 out:
3553         if (curthread->t_flag & T_WOULDBLOCK) {
3554                 curthread->t_flag &= ~T_WOULDBLOCK;
3555                 resp->status = NFS3ERR_JUKEBOX;
3556         } else
3557                 resp->status = puterrno3(error);
3558 out1:
3559         if (vp != NULL)
3560                 VN_RELE(vp);
3561         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3562 }
3563 
3564 fhandle_t *
3565 rfs3_commit_getfh(COMMIT3args *args)
3566 {
3567 
3568         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3569 }
3570 
3571 static int
3572 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3573 {
3574 
3575         vap->va_mask = 0;
3576 
3577         if (sap->mode.set_it) {
3578                 vap->va_mode = (mode_t)sap->mode.mode;
3579                 vap->va_mask |= AT_MODE;
3580         }
3581         if (sap->uid.set_it) {
3582                 vap->va_uid = (uid_t)sap->uid.uid;
3583                 vap->va_mask |= AT_UID;
3584         }
3585         if (sap->gid.set_it) {
3586                 vap->va_gid = (gid_t)sap->gid.gid;
3587                 vap->va_mask |= AT_GID;
3588         }
3589         if (sap->size.set_it) {
3590                 if (sap->size.size > (size3)((u_longlong_t)-1))
3591                         return (EINVAL);
3592                 vap->va_size = sap->size.size;
3593                 vap->va_mask |= AT_SIZE;
3594         }
3595         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3596 #ifndef _LP64
3597                 /* check time validity */
3598                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3599                         return (EOVERFLOW);
3600 #endif
3601                 /*
3602                  * nfs protocol defines times as unsigned so don't extend sign,
3603                  * unless sysadmin set nfs_allow_preepoch_time.
3604                  */
3605                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3606                         sap->atime.atime.seconds);
3607                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3608                 vap->va_mask |= AT_ATIME;
3609         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3610                 gethrestime(&vap->va_atime);
3611                 vap->va_mask |= AT_ATIME;
3612         }
3613         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3614 #ifndef _LP64
3615                 /* check time validity */
3616                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3617                         return (EOVERFLOW);
3618 #endif
3619                 /*
3620                  * nfs protocol defines times as unsigned so don't extend sign,
3621                  * unless sysadmin set nfs_allow_preepoch_time.
3622                  */
3623                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3624                         sap->mtime.mtime.seconds);
3625                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3626                 vap->va_mask |= AT_MTIME;
3627         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3628                 gethrestime(&vap->va_mtime);
3629                 vap->va_mask |= AT_MTIME;
3630         }
3631 
3632         return (0);
3633 }
3634 
3635 static ftype3 vt_to_nf3[] = {
3636         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3637 };
3638 
3639 static int
3640 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3641 {
3642 
3643         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3644         /* Return error if time or size overflow */
3645         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3646                 return (EOVERFLOW);
3647         }
3648         fap->type = vt_to_nf3[vap->va_type];
3649         fap->mode = (mode3)(vap->va_mode & MODEMASK);
3650         fap->nlink = (uint32)vap->va_nlink;
3651         if (vap->va_uid == UID_NOBODY)
3652                 fap->uid = (uid3)NFS_UID_NOBODY;
3653         else
3654                 fap->uid = (uid3)vap->va_uid;
3655         if (vap->va_gid == GID_NOBODY)
3656                 fap->gid = (gid3)NFS_GID_NOBODY;
3657         else
3658                 fap->gid = (gid3)vap->va_gid;
3659         fap->size = (size3)vap->va_size;
3660         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3661         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3662         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3663         fap->fsid = (uint64)vap->va_fsid;
3664         fap->fileid = (fileid3)vap->va_nodeid;
3665         fap->atime.seconds = vap->va_atime.tv_sec;
3666         fap->atime.nseconds = vap->va_atime.tv_nsec;
3667         fap->mtime.seconds = vap->va_mtime.tv_sec;
3668         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3669         fap->ctime.seconds = vap->va_ctime.tv_sec;
3670         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3671         return (0);
3672 }
3673 
3674 static int
3675 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3676 {
3677 
3678         /* Return error if time or size overflow */
3679         if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3680                 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3681                 NFS3_SIZE_OK(vap->va_size))) {
3682                 return (EOVERFLOW);
3683         }
3684         wccap->size = (size3)vap->va_size;
3685         wccap->mtime.seconds = vap->va_mtime.tv_sec;
3686         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3687         wccap->ctime.seconds = vap->va_ctime.tv_sec;
3688         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3689         return (0);
3690 }
3691 
3692 static void
3693 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3694 {
3695 
3696         /* don't return attrs if time overflow */
3697         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3698                 poap->attributes = TRUE;
3699         } else
3700                 poap->attributes = FALSE;
3701 }
3702 
3703 void
3704 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3705 {
3706 
3707         /* don't return attrs if time overflow */
3708         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3709                 poap->attributes = TRUE;
3710         } else
3711                 poap->attributes = FALSE;
3712 }
3713 
3714 static void
3715 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3716 {
3717 
3718         vattr_to_pre_op_attr(bvap, &wccp->before);
3719         vattr_to_post_op_attr(avap, &wccp->after);
3720 }
3721 
3722 void
3723 rfs3_srvrinit(void)
3724 {
3725         struct rfs3_verf_overlay {
3726                 uint_t id; /* a "unique" identifier */
3727                 int ts; /* a unique timestamp */
3728         } *verfp;
3729         timestruc_t now;
3730 
3731         /*
3732          * The following algorithm attempts to find a unique verifier
3733          * to be used as the write verifier returned from the server
3734          * to the client.  It is important that this verifier change
3735          * whenever the server reboots.  Of secondary importance, it
3736          * is important for the verifier to be unique between two
3737          * different servers.
3738          *
3739          * Thus, an attempt is made to use the system hostid and the
3740          * current time in seconds when the nfssrv kernel module is
3741          * loaded.  It is assumed that an NFS server will not be able
3742          * to boot and then to reboot in less than a second.  If the
3743          * hostid has not been set, then the current high resolution
3744          * time is used.  This will ensure different verifiers each
3745          * time the server reboots and minimize the chances that two
3746          * different servers will have the same verifier.
3747          */
3748 
3749 #ifndef lint
3750         /*
3751          * We ASSERT that this constant logic expression is
3752          * always true because in the past, it wasn't.
3753          */
3754         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3755 #endif
3756 
3757         gethrestime(&now);
3758         verfp = (struct rfs3_verf_overlay *)&write3verf;
3759         verfp->ts = (int)now.tv_sec;
3760         verfp->id = (uint_t)nfs_atoi(hw_serial);
3761 
3762         if (verfp->id == 0)
3763                 verfp->id = (uint_t)now.tv_nsec;
3764 
3765 }
3766 
3767 void
3768 rfs3_srvrfini(void)
3769 {
3770         /* Nothing to do */
3771 }
3772