Old nfs3_srv.c
  1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 
 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
 28 /* All Rights Reserved */
 29 
 30 #pragma ident   "@(#)nfs3_srv.c 1.114   05/12/16 SMI"
 31 
 32 #include <sys/param.h>
 33 #include <sys/types.h>
 34 #include <sys/systm.h>
 35 #include <sys/cred.h>
 36 #include <sys/buf.h>
 37 #include <sys/vfs.h>
 38 #include <sys/vnode.h>
 39 #include <sys/uio.h>
 40 #include <sys/errno.h>
 41 #include <sys/sysmacros.h>
 42 #include <sys/statvfs.h>
 43 #include <sys/kmem.h>
 44 #include <sys/dirent.h>
 45 #include <sys/cmn_err.h>
 46 #include <sys/debug.h>
 47 #include <sys/systeminfo.h>
 48 #include <sys/flock.h>
 49 #include <sys/nbmlock.h>
 50 #include <sys/policy.h>
 51 
 52 #include <rpc/types.h>
 53 #include <rpc/auth.h>
 54 #include <rpc/svc.h>
 55 
 56 #include <nfs/nfs.h>
 57 #include <nfs/export.h>
 58 
 59 #include <sys/strsubr.h>
 60 
 61 /*
 62  * These are the interface routines for the server side of the
 63  * Network File System.  See the NFS version 3 protocol specification
 64  * for a description of this interface.
 65  */
 66 
 67 #ifdef DEBUG
 68 int rfs3_do_pre_op_attr = 1;
 69 int rfs3_do_post_op_attr = 1;
 70 int rfs3_do_post_op_fh3 = 1;
 71 #endif
 72 
 73 static writeverf3 write3verf;
 74 
 75 static int      sattr3_to_vattr(sattr3 *, struct vattr *);
 76 static int      vattr_to_fattr3(struct vattr *, fattr3 *);
 77 static int      vattr_to_wcc_attr(struct vattr *, wcc_attr *);
 78 static void     vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
 79 static void     vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
 80 
 81 /* ARGSUSED */
 82 void
 83 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
 84         struct svc_req *req, cred_t *cr)
 85 {
 86         int error;
 87         vnode_t *vp;
 88         struct vattr va;
 89 
 90         vp = nfs3_fhtovp(&args->object, exi);
 91         if (vp == NULL) {
 92                 error = ESTALE;
 93                 goto out;
 94         }
 95 
 96         va.va_mask = AT_ALL;
 97         error = rfs4_delegated_getattr(vp, &va, 0, cr);
 98 
 99         VN_RELE(vp);
100 
101         if (!error) {
102                 /* overflow error if time or size is out of range */
103                 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
104                 if (error)
105                         goto out;
106                 resp->status = NFS3_OK;
107                 return;
108         }
109 
110 out:
111         if (curthread->t_flag & T_WOULDBLOCK) {
112                 curthread->t_flag &= ~T_WOULDBLOCK;
113                 resp->status = NFS3ERR_JUKEBOX;
114         } else
115                 resp->status = puterrno3(error);
116 }
117 
118 fhandle_t *
119 rfs3_getattr_getfh(GETATTR3args *args)
120 {
121 
122         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
123 }
124 
125 void
126 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
127         struct svc_req *req, cred_t *cr)
128 {
129         int error;
130         vnode_t *vp;
131         struct vattr *bvap;
132         struct vattr bva;
133         struct vattr *avap;
134         struct vattr ava;
135         int flag;
136         int in_crit = 0;
137         struct flock64 bf;
138 
139         bvap = NULL;
140         avap = NULL;
141 
142         vp = nfs3_fhtovp(&args->object, exi);
143         if (vp == NULL) {
144                 error = ESTALE;
145                 goto out;
146         }
147 
148         error = sattr3_to_vattr(&args->new_attributes, &ava);
149         if (error)
150                 goto out;
151 
152         /*
153          * We need to specially handle size changes because of
154          * possible conflicting NBMAND locks. Get into critical
155          * region before VOP_GETATTR, so the size attribute is
156          * valid when checking conflicts.
157          *
158          * Also, check to see if the v4 side of the server has
159          * delegated this file.  If so, then we return JUKEBOX to
160          * allow the client to retrasmit its request.
161          */
162         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
163                 if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
164                         resp->status = NFS3ERR_JUKEBOX;
165                         goto out1;
166                 }
167                 if (nbl_need_check(vp)) {
168                         nbl_start_crit(vp, RW_READER);
169                         in_crit = 1;
170                 }
171         }
172 
173         bva.va_mask = AT_ALL;
174         error = rfs4_delegated_getattr(vp, &bva, 0, cr);
175 
176         /*
177          * If we can't get the attributes, then we can't do the
178          * right access checking.  So, we'll fail the request.
179          */
180         if (error)
181                 goto out;
182 
183 #ifdef DEBUG
184         if (rfs3_do_pre_op_attr)
185                 bvap = &bva;
186 #else
187         bvap = &bva;
188 #endif
189 
190         if (rdonly(exi, req) || vn_is_readonly(vp)) {
191                 resp->status = NFS3ERR_ROFS;
192                 goto out1;
193         }
194 
195         if (args->guard.check &&
196             (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
197             args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
198                 resp->status = NFS3ERR_NOT_SYNC;
199                 goto out1;
200         }
201 
202         if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
203                 flag = ATTR_UTIME;
204         else
205                 flag = 0;
206 
207         /*
208          * If the filesystem is exported with nosuid, then mask off
209          * the setuid and setgid bits.
210          */
211         if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
212             (exi->exi_export.ex_flags & EX_NOSUID))
213                 ava.va_mode &= ~(VSUID | VSGID);
214 
215         /*
216          * We need to specially handle size changes because it is
217          * possible for the client to create a file with modes
218          * which indicate read-only, but with the file opened for
219          * writing.  If the client then tries to set the size of
220          * the file, then the normal access checking done in
221          * VOP_SETATTR would prevent the client from doing so,
222          * although it should be legal for it to do so.  To get
223          * around this, we do the access checking for ourselves
224          * and then use VOP_SPACE which doesn't do the access
225          * checking which VOP_SETATTR does. VOP_SPACE can only
226          * operate on VREG files, let VOP_SETATTR handle the other
227          * extremely rare cases.
228          * Also the client should not be allowed to change the
229          * size of the file if there is a conflicting non-blocking
230          * mandatory lock in the region the change.
231          */
232         if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
233                 if (in_crit) {
234                         u_offset_t offset;
235                         ssize_t length;
236 
237                         if (ava.va_size < bva.va_size) {
238                                 offset = ava.va_size;
239                                 length = bva.va_size - ava.va_size;
240                         } else {
241                                 offset = bva.va_size;
242                                 length = ava.va_size - bva.va_size;
243                         }
244                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
245                                 error = EACCES;
246                                 goto out;
247                         }
248                 }
249 
250                 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
251                         ava.va_mask &= ~AT_SIZE;
252                         bf.l_type = F_WRLCK;
253                         bf.l_whence = 0;
254                         bf.l_start = (off64_t)ava.va_size;
255                         bf.l_len = 0;
256                         bf.l_sysid = 0;
257                         bf.l_pid = 0;
258                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
259                             (offset_t)ava.va_size, cr, NULL);
260                 }
261         }
262 
263         if (!error && ava.va_mask)
264                 error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
265 
266 #ifdef DEBUG
267         if (rfs3_do_post_op_attr) {
268                 ava.va_mask = AT_ALL;
269                 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
270         } else
271                 avap = NULL;
272 #else
273         ava.va_mask = AT_ALL;
274         avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
275 #endif
276 
277         /*
278          * Force modified metadata out to stable storage.
279          */
280         (void) VOP_FSYNC(vp, FNODSYNC, cr);
281 
282         if (error)
283                 goto out;
284 
285         if (in_crit)
286                 nbl_end_crit(vp);
287         VN_RELE(vp);
288 
289         resp->status = NFS3_OK;
290         vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
291         return;
292 
293 out:
294         if (curthread->t_flag & T_WOULDBLOCK) {
295                 curthread->t_flag &= ~T_WOULDBLOCK;
296                 resp->status = NFS3ERR_JUKEBOX;
297         } else
298                 resp->status = puterrno3(error);
299 out1:
300         if (vp != NULL) {
301                 if (in_crit)
302                         nbl_end_crit(vp);
303                 VN_RELE(vp);
304         }
305         vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
306 }
307 
308 fhandle_t *
309 rfs3_setattr_getfh(SETATTR3args *args)
310 {
311 
312         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
313 }
314 
315 /* ARGSUSED */
316 void
317 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
318         struct svc_req *req, cred_t *cr)
319 {
320         int error;
321         vnode_t *vp;
322         vnode_t *dvp;
323         struct vattr *vap;
324         struct vattr va;
325         struct vattr *dvap;
326         struct vattr dva;
327         nfs_fh3 *fhp;
328         struct sec_ol sec = {0, 0};
329         bool_t publicfh_flag = FALSE, auth_weak = FALSE;
330 
331         dvap = NULL;
332 
333         /*
334          * Allow lookups from the root - the default
335          * location of the public filehandle.
336          */
337         if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
338                 dvp = rootdir;
339                 VN_HOLD(dvp);
340         } else {
341                 dvp = nfs3_fhtovp(args->what.dirp, exi);
342                 if (dvp == NULL) {
343                         error = ESTALE;
344                         goto out;
345                 }
346         }
347 
348 #ifdef DEBUG
349         if (rfs3_do_pre_op_attr) {
350                 dva.va_mask = AT_ALL;
351                 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
352         }
353 #else
354         dva.va_mask = AT_ALL;
355         dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
356 #endif
357 
358         if (args->what.name == nfs3nametoolong) {
359                 resp->status = NFS3ERR_NAMETOOLONG;
360                 goto out1;
361         }
362 
363         if (args->what.name == NULL || *(args->what.name) == '\0') {
364                 resp->status = NFS3ERR_ACCES;
365                 goto out1;
366         }
367 
368         fhp = args->what.dirp;
369         if (strcmp(args->what.name, "..") == 0 &&
370             EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
371                 resp->status = NFS3ERR_NOENT;
372                 goto out1;
373         }
374 
375         /*
376          * If the public filehandle is used then allow
377          * a multi-component lookup
378          */
379         if (PUBLIC_FH3(args->what.dirp)) {
380                 publicfh_flag = TRUE;
381                 error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
382                                         &exi, &sec);
383                 if (error && exi != NULL)
384                         exi_rele(exi);  /* See the comment below */
385         } else {
386                 error = VOP_LOOKUP(dvp, args->what.name, &vp,
387                                 NULL, 0, NULL, cr);
388         }
389 
390 #ifdef DEBUG
391         if (rfs3_do_post_op_attr) {
392                 dva.va_mask = AT_ALL;
393                 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
394         } else
395                 dvap = NULL;
396 #else
397         dva.va_mask = AT_ALL;
398         dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
399 #endif
400 
401         if (error)
402                 goto out;
403 
404         if (sec.sec_flags & SEC_QUERY) {
405                 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
406         } else {
407                 error = makefh3(&resp->resok.object, vp, exi);
408                 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
409                         auth_weak = TRUE;
410         }
411 
412         if (error) {
413                 VN_RELE(vp);
414                 goto out;
415         }
416 
417         /*
418          * If publicfh_flag is true then we have called rfs_publicfh_mclookup
419          * and have obtained a new exportinfo in exi which needs to be
420          * released. Note the the original exportinfo pointed to by exi
421          * will be released by the caller, common_dispatch.
422          */
423         if (publicfh_flag)
424                 exi_rele(exi);
425 
426         VN_RELE(dvp);
427 
428 #ifdef DEBUG
429         if (rfs3_do_post_op_attr) {
430                 va.va_mask = AT_ALL;
431                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
432         } else
433                 vap = NULL;
434 #else
435         va.va_mask = AT_ALL;
436         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
437 #endif
438 
439         VN_RELE(vp);
440 
441         resp->status = NFS3_OK;
442         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
443         vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
444 
445         /*
446          * If it's public fh, no 0x81, and client's flavor is
447          * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
448          * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
449          */
450         if (auth_weak)
451                 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
452 
453         return;
454 
455 out:
456         if (curthread->t_flag & T_WOULDBLOCK) {
457                 curthread->t_flag &= ~T_WOULDBLOCK;
458                 resp->status = NFS3ERR_JUKEBOX;
459         } else
460                 resp->status = puterrno3(error);
461 out1:
462         if (dvp != NULL)
463                 VN_RELE(dvp);
464         vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
465 
466 }
467 
468 fhandle_t *
469 rfs3_lookup_getfh(LOOKUP3args *args)
470 {
471 
472         return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
473 }
474 
475 /* ARGSUSED */
476 void
477 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
478         struct svc_req *req, cred_t *cr)
479 {
480         int error;
481         vnode_t *vp;
482         struct vattr *vap;
483         struct vattr va;
484         int checkwriteperm;
485 
486         vap = NULL;
487 
488         vp = nfs3_fhtovp(&args->object, exi);
489         if (vp == NULL) {
490                 error = ESTALE;
491                 goto out;
492         }
493 
494         /*
495          * If the file system is exported read only, it is not appropriate
496          * to check write permissions for regular files and directories.
497          * Special files are interpreted by the client, so the underlying
498          * permissions are sent back to the client for interpretation.
499          */
500         if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
501                 checkwriteperm = 0;
502         else
503                 checkwriteperm = 1;
504 
505         /*
506          * We need the mode so that we can correctly determine access
507          * permissions relative to a mandatory lock file.  Access to
508          * mandatory lock files is denied on the server, so it might
509          * as well be reflected to the server during the open.
510          */
511         va.va_mask = AT_MODE;
512         error = VOP_GETATTR(vp, &va, 0, cr);
513         if (error)
514                 goto out;
515 
516 #ifdef DEBUG
517         if (rfs3_do_post_op_attr)
518                 vap = &va;
519 #else
520         vap = &va;
521 #endif
522 
523         resp->resok.access = 0;
524 
525         if (args->access & ACCESS3_READ) {
526                 error = VOP_ACCESS(vp, VREAD, 0, cr);
527                 if (error) {
528                         if (curthread->t_flag & T_WOULDBLOCK)
529                                 goto out;
530                 } else if (!MANDLOCK(vp, va.va_mode))
531                         resp->resok.access |= ACCESS3_READ;
532         }
533         if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
534                 error = VOP_ACCESS(vp, VEXEC, 0, cr);
535                 if (error) {
536                         if (curthread->t_flag & T_WOULDBLOCK)
537                                 goto out;
538                 } else
539                         resp->resok.access |= ACCESS3_LOOKUP;
540         }
541         if (checkwriteperm &&
542             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
543                 error = VOP_ACCESS(vp, VWRITE, 0, cr);
544                 if (error) {
545                         if (curthread->t_flag & T_WOULDBLOCK)
546                                 goto out;
547                 } else if (!MANDLOCK(vp, va.va_mode)) {
548                         resp->resok.access |=
549                             (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
550                 }
551         }
552         if (checkwriteperm &&
553             (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
554                 error = VOP_ACCESS(vp, VWRITE, 0, cr);
555                 if (error) {
556                         if (curthread->t_flag & T_WOULDBLOCK)
557                                 goto out;
558                 } else
559                         resp->resok.access |= ACCESS3_DELETE;
560         }
561         if (args->access & ACCESS3_EXECUTE) {
562                 error = VOP_ACCESS(vp, VEXEC, 0, cr);
563                 if (error) {
564                         if (curthread->t_flag & T_WOULDBLOCK)
565                                 goto out;
566                 } else if (!MANDLOCK(vp, va.va_mode))
567                         resp->resok.access |= ACCESS3_EXECUTE;
568         }
569 
570 #ifdef DEBUG
571         if (rfs3_do_post_op_attr) {
572                 va.va_mask = AT_ALL;
573                 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
574         } else
575                 vap = NULL;
576 #else
577         va.va_mask = AT_ALL;
578         vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
579 #endif
580 
581         VN_RELE(vp);
582 
583         resp->status = NFS3_OK;
584         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
585         return;
586 
587 out:
588         if (curthread->t_flag & T_WOULDBLOCK) {
589                 curthread->t_flag &= ~T_WOULDBLOCK;
590                 resp->status = NFS3ERR_JUKEBOX;
591         } else
592                 resp->status = puterrno3(error);
593         if (vp != NULL)
594                 VN_RELE(vp);
595         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
596 }
597 
598 fhandle_t *
599 rfs3_access_getfh(ACCESS3args *args)
600 {
601 
602         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
603 }
604 
605 /* ARGSUSED */
606 void
607 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
608         struct svc_req *req, cred_t *cr)
609 {
610         int error;
611         vnode_t *vp;
612         struct vattr *vap;
613         struct vattr va;
614         struct iovec iov;
615         struct uio uio;
616         char *data;
617 
618         vap = NULL;
619 
620         vp = nfs3_fhtovp(&args->symlink, exi);
621         if (vp == NULL) {
622                 error = ESTALE;
623                 goto out;
624         }
625 
626         va.va_mask = AT_ALL;
627         error = VOP_GETATTR(vp, &va, 0, cr);
628         if (error)
629                 goto out;
630 
631 #ifdef DEBUG
632         if (rfs3_do_post_op_attr)
633                 vap = &va;
634 #else
635         vap = &va;
636 #endif
637 
638         if (vp->v_type != VLNK) {
639                 resp->status = NFS3ERR_INVAL;
640                 goto out1;
641         }
642 
643         if (MANDLOCK(vp, va.va_mode)) {
644                 resp->status = NFS3ERR_ACCES;
645                 goto out1;
646         }
647 
648         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
649 
650         iov.iov_base = data;
651         iov.iov_len = MAXPATHLEN;
652         uio.uio_iov = &iov;
653         uio.uio_iovcnt = 1;
654         uio.uio_segflg = UIO_SYSSPACE;
655         uio.uio_extflg = UIO_COPY_CACHED;
656         uio.uio_loffset = 0;
657         uio.uio_resid = MAXPATHLEN;
658 
659         error = VOP_READLINK(vp, &uio, cr);
660 
661 #ifdef DEBUG
662         if (rfs3_do_post_op_attr) {
663                 va.va_mask = AT_ALL;
664                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
665         } else
666                 vap = NULL;
667 #else
668         va.va_mask = AT_ALL;
669         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
670 #endif
671 
672 #if 0 /* notyet */
673         /*
674          * Don't do this.  It causes local disk writes when just
675          * reading the file and the overhead is deemed larger
676          * than the benefit.
677          */
678         /*
679          * Force modified metadata out to stable storage.
680          */
681         (void) VOP_FSYNC(vp, FNODSYNC, cr);
682 #endif
683 
684         if (error) {
685                 kmem_free(data, MAXPATHLEN + 1);
686                 goto out;
687         }
688 
689         VN_RELE(vp);
690 
691         resp->status = NFS3_OK;
692         vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
693         resp->resok.data = data;
694         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
695         return;
696 
697 out:
698         if (curthread->t_flag & T_WOULDBLOCK) {
699                 curthread->t_flag &= ~T_WOULDBLOCK;
700                 resp->status = NFS3ERR_JUKEBOX;
701         } else
702                 resp->status = puterrno3(error);
703 out1:
704         if (vp != NULL)
705                 VN_RELE(vp);
706         vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
707 }
708 
709 fhandle_t *
710 rfs3_readlink_getfh(READLINK3args *args)
711 {
712 
713         return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
714 }
715 
716 void
717 rfs3_readlink_free(READLINK3res *resp)
718 {
719 
720         if (resp->status == NFS3_OK)
721                 kmem_free(resp->resok.data, MAXPATHLEN + 1);
722 }
723 
724 /* ARGSUSED */
725 void
726 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
727         struct svc_req *req, cred_t *cr)
728 {
729         int error;
730         vnode_t *vp;
731         struct vattr *vap;
732         struct vattr va;
733         struct iovec iov;
734         struct uio uio;
735         u_offset_t offset;
736         mblk_t *mp;
737         int alloc_err = 0;
738         int in_crit = 0;
739         int need_rwunlock = 0;
740 
741         vap = NULL;
742 
743         vp = nfs3_fhtovp(&args->file, exi);
744         if (vp == NULL) {
745                 error = ESTALE;
746                 goto out;
747         }
748 
749         /*
750          * Check to see if the v4 side of the server has delegated
751          * this file.  If so, then we return JUKEBOX to allow the
752          * client to retrasmit its request.
753          */
754         if (rfs4_check_delegated(FREAD, vp, FALSE)) {
755                 resp->status = NFS3ERR_JUKEBOX;
756                 goto out1;
757         }
758 
759         /*
760          * Enter the critical region before calling VOP_RWLOCK
761          * to avoid a deadlock with write requests.
762          */
763         if (nbl_need_check(vp)) {
764                 nbl_start_crit(vp, RW_READER);
765                 in_crit = 1;
766                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
767                         error = EACCES;
768                         goto out;
769                 }
770         }
771 
772         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
773         need_rwunlock = 1;
774 
775         va.va_mask = AT_ALL;
776         error = VOP_GETATTR(vp, &va, 0, cr);
777 
778         /*
779          * If we can't get the attributes, then we can't do the
780          * right access checking.  So, we'll fail the request.
781          */
782         if (error)
783                 goto out;
784 
785 #ifdef DEBUG
786         if (rfs3_do_post_op_attr)
787                 vap = &va;
788 #else
789         vap = &va;
790 #endif
791 
792         if (vp->v_type != VREG) {
793                 resp->status = NFS3ERR_INVAL;
794                 goto out1;
795         }
796 
797         if (crgetuid(cr) != va.va_uid) {
798                 error = VOP_ACCESS(vp, VREAD, 0, cr);
799                 if (error) {
800                         if (curthread->t_flag & T_WOULDBLOCK)
801                                 goto out;
802                         error = VOP_ACCESS(vp, VEXEC, 0, cr);
803                         if (error)
804                                 goto out;
805                 }
806         }
807 
808         if (MANDLOCK(vp, va.va_mode)) {
809                 resp->status = NFS3ERR_ACCES;
810                 goto out1;
811         }
812 
813         offset = args->offset;
814         if (offset >= va.va_size) {
815                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
816                 if (in_crit)
817                         nbl_end_crit(vp);
818                 VN_RELE(vp);
819                 resp->status = NFS3_OK;
820                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
821                 resp->resok.count = 0;
822                 resp->resok.eof = TRUE;
823                 resp->resok.data.data_len = 0;
824                 resp->resok.data.data_val = NULL;
825                 resp->resok.data.mp = NULL;
826                 return;
827         }
828 
829         if (args->count == 0) {
830                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
831                 if (in_crit)
832                         nbl_end_crit(vp);
833                 VN_RELE(vp);
834                 resp->status = NFS3_OK;
835                 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
836                 resp->resok.count = 0;
837                 resp->resok.eof = FALSE;
838                 resp->resok.data.data_len = 0;
839                 resp->resok.data.data_val = NULL;
840                 resp->resok.data.mp = NULL;
841                 return;
842         }
843 
844         /*
845          * do not allocate memory more the max. allowed
846          * transfer size
847          */
848         if (args->count > rfs3_tsize(req))
849                 args->count = rfs3_tsize(req);
850 
851         /*
852          * mp will contain the data to be sent out in the read reply.
853          * This will be freed after the reply has been sent out (by the
854          * driver).
855          * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
856          * that the call to xdrmblk_putmblk() never fails.
857          */
858         mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
859         ASSERT(mp != NULL);
860         ASSERT(alloc_err == 0);
861 
862         iov.iov_base = (caddr_t)mp->b_datap->db_base;
863         iov.iov_len = args->count;
864         uio.uio_iov = &iov;
865         uio.uio_iovcnt = 1;
866         uio.uio_segflg = UIO_SYSSPACE;
867         uio.uio_extflg = UIO_COPY_CACHED;
868         uio.uio_loffset = args->offset;
869         uio.uio_resid = args->count;
870 
871         error = VOP_READ(vp, &uio, 0, cr, NULL);
872 
873         if (error) {
874                 freeb(mp);
875                 goto out;
876         }
877 
878         va.va_mask = AT_ALL;
879         error = VOP_GETATTR(vp, &va, 0, cr);
880 
881 #ifdef DEBUG
882         if (rfs3_do_post_op_attr) {
883                 if (error)
884                         vap = NULL;
885                 else
886                         vap = &va;
887         } else
888                 vap = NULL;
889 #else
890         if (error)
891                 vap = NULL;
892         else
893                 vap = &va;
894 #endif
895 
896         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
897 
898 #if 0 /* notyet */
899         /*
900          * Don't do this.  It causes local disk writes when just
901          * reading the file and the overhead is deemed larger
902          * than the benefit.
903          */
904         /*
905          * Force modified metadata out to stable storage.
906          */
907         (void) VOP_FSYNC(vp, FNODSYNC, cr);
908 #endif
909 
910         if (in_crit)
911                 nbl_end_crit(vp);
912         VN_RELE(vp);
913 
914         resp->status = NFS3_OK;
915         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
916         resp->resok.count = args->count - uio.uio_resid;
917         if (!error && offset + resp->resok.count == va.va_size)
918                 resp->resok.eof = TRUE;
919         else
920                 resp->resok.eof = FALSE;
921         resp->resok.data.data_len = resp->resok.count;
922         resp->resok.data.data_val = (char *)mp->b_datap->db_base;
923 
924         resp->resok.data.mp = mp;
925 
926         resp->resok.size = (uint_t)args->count;
927         return;
928 
929 out:
930         if (curthread->t_flag & T_WOULDBLOCK) {
931                 curthread->t_flag &= ~T_WOULDBLOCK;
932                 resp->status = NFS3ERR_JUKEBOX;
933         } else
934                 resp->status = puterrno3(error);
935 out1:
936         if (vp != NULL) {
937                 if (need_rwunlock)
938                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
939                 if (in_crit)
940                         nbl_end_crit(vp);
941                 VN_RELE(vp);
942         }
943         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
944 }
945 
946 void
947 rfs3_read_free(READ3res *resp)
948 {
949         mblk_t *mp;
950 
951         if (resp->status == NFS3_OK) {
952                 mp = resp->resok.data.mp;
953                 if (mp != NULL)
954                         freeb(mp);
955         }
956 }
957 
958 fhandle_t *
959 rfs3_read_getfh(READ3args *args)
960 {
961 
962         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
963 }
964 
965 #define MAX_IOVECS      12
966 
967 #ifdef DEBUG
968 static int rfs3_write_hits = 0;
969 static int rfs3_write_misses = 0;
970 #endif
971 
972 void
973 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
974         struct svc_req *req, cred_t *cr)
975 {
976         int error;
977         vnode_t *vp;
978         struct vattr *bvap = NULL;
979         struct vattr bva;
980         struct vattr *avap = NULL;
981         struct vattr ava;
982         u_offset_t rlimit;
983         struct uio uio;
984         struct iovec iov[MAX_IOVECS];
985         mblk_t *m;
986         struct iovec *iovp;
987         int iovcnt;
988         int ioflag;
989         cred_t *savecred;
990         int in_crit = 0;
991         int rwlock_ret = -1;
992 
993         vp = nfs3_fhtovp(&args->file, exi);
994         if (vp == NULL) {
995                 error = ESTALE;
996                 goto out;
997         }
998 
999         /*
1000          * Check to see if the v4 side of the server has delegated
1001          * this file.  If so, then we return JUKEBOX to allow the
1002          * client to retrasmit its request.
1003          */
1004         if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1005                 resp->status = NFS3ERR_JUKEBOX;
1006                 goto out1;
1007         }
1008 
1009         /*
1010          * We have to enter the critical region before calling VOP_RWLOCK
1011          * to avoid a deadlock with ufs.
1012          */
1013         if (nbl_need_check(vp)) {
1014                 nbl_start_crit(vp, RW_READER);
1015                 in_crit = 1;
1016                 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1017                         error = EACCES;
1018                         goto out;
1019                 }
1020         }
1021 
1022         rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1023 
1024         bva.va_mask = AT_ALL;
1025         error = VOP_GETATTR(vp, &bva, 0, cr);
1026 
1027         /*
1028          * If we can't get the attributes, then we can't do the
1029          * right access checking.  So, we'll fail the request.
1030          */
1031         if (error)
1032                 goto out;
1033 
1034         bvap = &bva;
1035 #ifdef DEBUG
1036         if (!rfs3_do_pre_op_attr)
1037                 bvap = NULL;
1038 #endif
1039         avap = bvap;
1040 
1041         if (args->count != args->data.data_len) {
1042                 resp->status = NFS3ERR_INVAL;
1043                 goto out1;
1044         }
1045 
1046         if (rdonly(exi, req)) {
1047                 resp->status = NFS3ERR_ROFS;
1048                 goto out1;
1049         }
1050 
1051         if (vp->v_type != VREG) {
1052                 resp->status = NFS3ERR_INVAL;
1053                 goto out1;
1054         }
1055 
1056         if (crgetuid(cr) != bva.va_uid &&
1057             (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1058                 goto out;
1059 
1060         if (MANDLOCK(vp, bva.va_mode)) {
1061                 resp->status = NFS3ERR_ACCES;
1062                 goto out1;
1063         }
1064 
1065         if (args->count == 0) {
1066                 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1067                 VN_RELE(vp);
1068                 resp->status = NFS3_OK;
1069                 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1070                 resp->resok.count = 0;
1071                 resp->resok.committed = args->stable;
1072                 resp->resok.verf = write3verf;
1073                 return;
1074         }
1075 
1076         if (args->mblk != NULL) {
1077                 iovcnt = 0;
1078                 for (m = args->mblk; m != NULL; m = m->b_cont)
1079                         iovcnt++;
1080                 if (iovcnt <= MAX_IOVECS) {
1081 #ifdef DEBUG
1082                         rfs3_write_hits++;
1083 #endif
1084                         iovp = iov;
1085                 } else {
1086 #ifdef DEBUG
1087                         rfs3_write_misses++;
1088 #endif
1089                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1090                 }
1091                 mblk_to_iov(args->mblk, iovcnt, iovp);
1092         } else {
1093                 iovcnt = 1;
1094                 iovp = iov;
1095                 iovp->iov_base = args->data.data_val;
1096                 iovp->iov_len = args->count;
1097         }
1098 
1099         uio.uio_iov = iovp;
1100         uio.uio_iovcnt = iovcnt;
1101 
1102         uio.uio_segflg = UIO_SYSSPACE;
1103         uio.uio_extflg = UIO_COPY_DEFAULT;
1104         uio.uio_loffset = args->offset;
1105         uio.uio_resid = args->count;
1106         uio.uio_llimit = curproc->p_fsz_ctl;
1107         rlimit = uio.uio_llimit - args->offset;
1108         if (rlimit < (u_offset_t)uio.uio_resid)
1109                 uio.uio_resid = (int)rlimit;
1110 
1111         if (args->stable == UNSTABLE)
1112                 ioflag = 0;
1113         else if (args->stable == FILE_SYNC)
1114                 ioflag = FSYNC;
1115         else if (args->stable == DATA_SYNC)
1116                 ioflag = FDSYNC;
1117         else {
1118                 if (iovp != iov)
1119                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
1120                 resp->status = NFS3ERR_INVAL;
1121                 goto out1;
1122         }
1123 
1124         /*
1125          * We're changing creds because VM may fault and we need
1126          * the cred of the current thread to be used if quota
1127          * checking is enabled.
1128          */
1129         savecred = curthread->t_cred;
1130         curthread->t_cred = cr;
1131         error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1132         curthread->t_cred = savecred;
1133 
1134         if (iovp != iov)
1135                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1136 
1137         ava.va_mask = AT_ALL;
1138         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1139 
1140 #ifdef DEBUG
1141         if (!rfs3_do_post_op_attr)
1142                 avap = NULL;
1143 #endif
1144 
1145         if (error)
1146                 goto out;
1147 
1148         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1149         if (in_crit)
1150                 nbl_end_crit(vp);
1151         VN_RELE(vp);
1152 
1153         /*
1154          * If we were unable to get the V_WRITELOCK_TRUE, then we
1155          * may not have accurate after attrs, so check if
1156          * we have both attributes, they have a non-zero va_seq, and
1157          * va_seq has changed by exactly one,
1158          * if not, turn off the before attr.
1159          */
1160         if (rwlock_ret != V_WRITELOCK_TRUE) {
1161                 if (bvap == NULL || avap == NULL ||
1162                                 bvap->va_seq == 0 || avap->va_seq == 0 ||
1163                                 avap->va_seq != (bvap->va_seq + 1)) {
1164                         bvap = NULL;
1165                 }
1166         }
1167 
1168         resp->status = NFS3_OK;
1169         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1170         resp->resok.count = args->count - uio.uio_resid;
1171         resp->resok.committed = args->stable;
1172         resp->resok.verf = write3verf;
1173         return;
1174 
1175 out:
1176         if (curthread->t_flag & T_WOULDBLOCK) {
1177                 curthread->t_flag &= ~T_WOULDBLOCK;
1178                 resp->status = NFS3ERR_JUKEBOX;
1179         } else
1180                 resp->status = puterrno3(error);
1181 out1:
1182         if (vp != NULL) {
1183                 if (rwlock_ret != -1)
1184                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1185                 if (in_crit)
1186                         nbl_end_crit(vp);
1187                 VN_RELE(vp);
1188         }
1189         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1190 }
1191 
1192 fhandle_t *
1193 rfs3_write_getfh(WRITE3args *args)
1194 {
1195 
1196         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1197 }
1198 
1199 void
1200 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1201         struct svc_req *req, cred_t *cr)
1202 {
1203         int error;
1204         int in_crit = 0;
1205         vnode_t *vp;
1206         vnode_t *tvp = NULL;
1207         vnode_t *dvp;
1208         struct vattr *vap;
1209         struct vattr va;
1210         struct vattr *dbvap;
1211         struct vattr dbva;
1212         struct vattr *davap;
1213         struct vattr dava;
1214         enum vcexcl excl;
1215         nfstime3 *mtime;
1216         len_t reqsize;
1217         bool_t trunc;
1218 
1219         dbvap = NULL;
1220         davap = NULL;
1221 
1222         dvp = nfs3_fhtovp(args->where.dirp, exi);
1223         if (dvp == NULL) {
1224                 error = ESTALE;
1225                 goto out;
1226         }
1227 
1228 #ifdef DEBUG
1229         if (rfs3_do_pre_op_attr) {
1230                 dbva.va_mask = AT_ALL;
1231                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1232         } else
1233                 dbvap = NULL;
1234 #else
1235         dbva.va_mask = AT_ALL;
1236         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1237 #endif
1238         davap = dbvap;
1239 
1240         if (args->where.name == nfs3nametoolong) {
1241                 resp->status = NFS3ERR_NAMETOOLONG;
1242                 goto out1;
1243         }
1244 
1245         if (args->where.name == NULL || *(args->where.name) == '\0') {
1246                 resp->status = NFS3ERR_ACCES;
1247                 goto out1;
1248         }
1249 
1250         if (rdonly(exi, req)) {
1251                 resp->status = NFS3ERR_ROFS;
1252                 goto out1;
1253         }
1254 
1255         if (args->how.mode == EXCLUSIVE) {
1256                 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1257                 va.va_type = VREG;
1258                 va.va_mode = (mode_t)0;
1259                 /*
1260                  * Ensure no time overflows and that types match
1261                  */
1262                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1263                 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1264                 va.va_mtime.tv_nsec = mtime->nseconds;
1265                 excl = EXCL;
1266         } else {
1267                 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1268                     &va);
1269                 if (error)
1270                         goto out;
1271                 va.va_mask |= AT_TYPE;
1272                 va.va_type = VREG;
1273                 if (args->how.mode == GUARDED)
1274                         excl = EXCL;
1275                 else {
1276                         excl = NONEXCL;
1277 
1278                         /*
1279                          * During creation of file in non-exclusive mode
1280                          * if size of file is being set then make sure
1281                          * that if the file already exists that no conflicting
1282                          * non-blocking mandatory locks exists in the region
1283                          * being modified. If there are conflicting locks fail
1284                          * the operation with EACCES.
1285                          */
1286                         if (va.va_mask & AT_SIZE) {
1287                                 struct vattr tva;
1288 
1289                                 /*
1290                                  * Does file already exist?
1291                                  */
1292                                 error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1293                                                 NULL, 0, NULL, cr);
1294 
1295                                 /*
1296                                  * Check to see if the file has been delegated
1297                                  * to a v4 client.  If so, then begin recall of
1298                                  * the delegation and return JUKEBOX to allow
1299                                  * the client to retrasmit its request.
1300                                  */
1301 
1302                                 trunc = va.va_size == 0;
1303                                 if (!error &&
1304                                     rfs4_check_delegated(FWRITE, tvp, trunc)) {
1305                                         resp->status = NFS3ERR_JUKEBOX;
1306                                         goto out1;
1307                                 }
1308 
1309                                 /*
1310                                  * Check for NBMAND lock conflicts
1311                                  */
1312                                 if (!error && nbl_need_check(tvp)) {
1313                                         u_offset_t offset;
1314                                         ssize_t len;
1315 
1316                                         nbl_start_crit(tvp, RW_READER);
1317                                         in_crit = 1;
1318 
1319                                         tva.va_mask = AT_SIZE;
1320                                         error = VOP_GETATTR(tvp, &tva, 0, cr);
1321                                         /*
1322                                          * Can't check for conflicts, so return
1323                                          * error.
1324                                          */
1325                                         if (error)
1326                                                 goto out;
1327 
1328                                         offset = tva.va_size < va.va_size ?
1329                                                 tva.va_size : va.va_size;
1330                                         len = tva.va_size < va.va_size ?
1331                                                 va.va_size - tva.va_size :
1332                                                 tva.va_size - va.va_size;
1333                                         if (nbl_conflict(tvp, NBL_WRITE,
1334                                                         offset, len, 0)) {
1335                                                 error = EACCES;
1336                                                 goto out;
1337                                         }
1338                                 } else if (tvp) {
1339                                         VN_RELE(tvp);
1340                                         tvp = NULL;
1341                                 }
1342                         }
1343                 }
1344                 if (va.va_mask & AT_SIZE)
1345                         reqsize = va.va_size;
1346         }
1347 
1348         /*
1349          * Must specify the mode.
1350          */
1351         if (!(va.va_mask & AT_MODE)) {
1352                 resp->status = NFS3ERR_INVAL;
1353                 goto out1;
1354         }
1355 
1356         /*
1357          * If the filesystem is exported with nosuid, then mask off
1358          * the setuid and setgid bits.
1359          */
1360         if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1361                 va.va_mode &= ~(VSUID | VSGID);
1362 
1363 tryagain:
1364         /*
1365          * The file open mode used is VWRITE.  If the client needs
1366          * some other semantic, then it should do the access checking
1367          * itself.  It would have been nice to have the file open mode
1368          * passed as part of the arguments.
1369          */
1370         error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1371             &vp, cr, 0);
1372 
1373 #ifdef DEBUG
1374         if (rfs3_do_post_op_attr) {
1375                 dava.va_mask = AT_ALL;
1376                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1377         } else
1378                 davap = NULL;
1379 #else
1380         dava.va_mask = AT_ALL;
1381         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1382 #endif
1383 
1384         if (error) {
1385                 /*
1386                  * If we got something other than file already exists
1387                  * then just return this error.  Otherwise, we got
1388                  * EEXIST.  If we were doing a GUARDED create, then
1389                  * just return this error.  Otherwise, we need to
1390                  * make sure that this wasn't a duplicate of an
1391                  * exclusive create request.
1392                  *
1393                  * The assumption is made that a non-exclusive create
1394                  * request will never return EEXIST.
1395                  */
1396                 if (error != EEXIST || args->how.mode == GUARDED)
1397                         goto out;
1398                 /*
1399                  * Lookup the file so that we can get a vnode for it.
1400                  */
1401                 error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1402                     NULL, cr);
1403                 if (error) {
1404                         /*
1405                          * We couldn't find the file that we thought that
1406                          * we just created.  So, we'll just try creating
1407                          * it again.
1408                          */
1409                         if (error == ENOENT)
1410                                 goto tryagain;
1411                         goto out;
1412                 }
1413 
1414                 /*
1415                  * If the file is delegated to a v4 client, go ahead
1416                  * and initiate recall, this create is a hint that a
1417                  * conflicting v3 open has occurred.
1418                  */
1419 
1420                 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1421                         VN_RELE(vp);
1422                         resp->status = NFS3ERR_JUKEBOX;
1423                         goto out1;
1424                 }
1425 
1426                 va.va_mask = AT_ALL;
1427                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1428 
1429                 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1430                 /* % with INT32_MAX to prevent overflows */
1431                 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1432                     vap->va_mtime.tv_sec !=
1433                     (mtime->seconds % INT32_MAX) ||
1434                     vap->va_mtime.tv_nsec != mtime->nseconds)) {
1435                         VN_RELE(vp);
1436                         error = EEXIST;
1437                         goto out;
1438                 }
1439         } else {
1440 
1441                 if ((args->how.mode == UNCHECKED ||
1442                     args->how.mode == GUARDED) &&
1443                     args->how.createhow3_u.obj_attributes.size.set_it &&
1444                     va.va_size == 0)
1445                         trunc = TRUE;
1446                 else
1447                         trunc = FALSE;
1448 
1449                 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1450                         VN_RELE(vp);
1451                         resp->status = NFS3ERR_JUKEBOX;
1452                         goto out1;
1453                 }
1454 
1455                 va.va_mask = AT_ALL;
1456                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1457 
1458                 /*
1459                  * We need to check to make sure that the file got
1460                  * created to the indicated size.  If not, we do a
1461                  * setattr to try to change the size, but we don't
1462                  * try too hard.  This shouldn't a problem as most
1463                  * clients will only specifiy a size of zero which
1464                  * local file systems handle.  However, even if
1465                  * the client does specify a non-zero size, it can
1466                  * still recover by checking the size of the file
1467                  * after it has created it and then issue a setattr
1468                  * request of its own to set the size of the file.
1469                  */
1470                 if (vap != NULL &&
1471                     (args->how.mode == UNCHECKED ||
1472                     args->how.mode == GUARDED) &&
1473                     args->how.createhow3_u.obj_attributes.size.set_it &&
1474                     vap->va_size != reqsize) {
1475                         va.va_mask = AT_SIZE;
1476                         va.va_size = reqsize;
1477                         (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1478                         va.va_mask = AT_ALL;
1479                         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1480                 }
1481         }
1482 
1483 #ifdef DEBUG
1484         if (!rfs3_do_post_op_attr)
1485                 vap = NULL;
1486 #endif
1487 
1488 #ifdef DEBUG
1489         if (!rfs3_do_post_op_fh3)
1490                 resp->resok.obj.handle_follows = FALSE;
1491         else {
1492 #endif
1493         error = makefh3(&resp->resok.obj.handle, vp, exi);
1494         if (error)
1495                 resp->resok.obj.handle_follows = FALSE;
1496         else
1497                 resp->resok.obj.handle_follows = TRUE;
1498 #ifdef DEBUG
1499         }
1500 #endif
1501 
1502         /*
1503          * Force modified data and metadata out to stable storage.
1504          */
1505         (void) VOP_FSYNC(vp, FNODSYNC, cr);
1506         (void) VOP_FSYNC(dvp, 0, cr);
1507 
1508         VN_RELE(vp);
1509         VN_RELE(dvp);
1510         if (tvp != NULL) {
1511                 if (in_crit)
1512                         nbl_end_crit(tvp);
1513                 VN_RELE(tvp);
1514         }
1515 
1516         resp->status = NFS3_OK;
1517         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1518         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1519         return;
1520 
1521 out:
1522         if (curthread->t_flag & T_WOULDBLOCK) {
1523                 curthread->t_flag &= ~T_WOULDBLOCK;
1524                 resp->status = NFS3ERR_JUKEBOX;
1525         } else
1526                 resp->status = puterrno3(error);
1527 out1:
1528         if (tvp != NULL) {
1529                 if (in_crit)
1530                         nbl_end_crit(tvp);
1531                 VN_RELE(tvp);
1532         }
1533         if (dvp != NULL)
1534                 VN_RELE(dvp);
1535         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1536 }
1537 
1538 fhandle_t *
1539 rfs3_create_getfh(CREATE3args *args)
1540 {
1541 
1542         return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1543 }
1544 
1545 void
1546 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1547         struct svc_req *req, cred_t *cr)
1548 {
1549         int error;
1550         vnode_t *vp = NULL;
1551         vnode_t *dvp;
1552         struct vattr *vap;
1553         struct vattr va;
1554         struct vattr *dbvap;
1555         struct vattr dbva;
1556         struct vattr *davap;
1557         struct vattr dava;
1558 
1559         dbvap = NULL;
1560         davap = NULL;
1561 
1562         dvp = nfs3_fhtovp(args->where.dirp, exi);
1563         if (dvp == NULL) {
1564                 error = ESTALE;
1565                 goto out;
1566         }
1567 
1568 #ifdef DEBUG
1569         if (rfs3_do_pre_op_attr) {
1570                 dbva.va_mask = AT_ALL;
1571                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1572         } else
1573                 dbvap = NULL;
1574 #else
1575         dbva.va_mask = AT_ALL;
1576         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1577 #endif
1578         davap = dbvap;
1579 
1580         if (args->where.name == nfs3nametoolong) {
1581                 resp->status = NFS3ERR_NAMETOOLONG;
1582                 goto out1;
1583         }
1584 
1585         if (args->where.name == NULL || *(args->where.name) == '\0') {
1586                 resp->status = NFS3ERR_ACCES;
1587                 goto out1;
1588         }
1589 
1590         if (rdonly(exi, req)) {
1591                 resp->status = NFS3ERR_ROFS;
1592                 goto out1;
1593         }
1594 
1595         error = sattr3_to_vattr(&args->attributes, &va);
1596         if (error)
1597                 goto out;
1598 
1599         if (!(va.va_mask & AT_MODE)) {
1600                 resp->status = NFS3ERR_INVAL;
1601                 goto out1;
1602         }
1603 
1604         va.va_mask |= AT_TYPE;
1605         va.va_type = VDIR;
1606 
1607         error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1608 
1609 #ifdef DEBUG
1610         if (rfs3_do_post_op_attr) {
1611                 dava.va_mask = AT_ALL;
1612                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1613         } else
1614                 davap = NULL;
1615 #else
1616         dava.va_mask = AT_ALL;
1617         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1618 #endif
1619 
1620         /*
1621          * Force modified data and metadata out to stable storage.
1622          */
1623         (void) VOP_FSYNC(dvp, 0, cr);
1624 
1625         if (error)
1626                 goto out;
1627 
1628         VN_RELE(dvp);
1629 
1630 #ifdef DEBUG
1631         if (!rfs3_do_post_op_fh3)
1632                 resp->resok.obj.handle_follows = FALSE;
1633         else {
1634 #endif
1635         error = makefh3(&resp->resok.obj.handle, vp, exi);
1636         if (error)
1637                 resp->resok.obj.handle_follows = FALSE;
1638         else
1639                 resp->resok.obj.handle_follows = TRUE;
1640 #ifdef DEBUG
1641         }
1642 #endif
1643 
1644 #ifdef DEBUG
1645         if (rfs3_do_post_op_attr) {
1646                 va.va_mask = AT_ALL;
1647                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1648         } else
1649                 vap = NULL;
1650 #else
1651         va.va_mask = AT_ALL;
1652         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1653 #endif
1654 
1655         /*
1656          * Force modified data and metadata out to stable storage.
1657          */
1658         (void) VOP_FSYNC(vp, 0, cr);
1659 
1660         VN_RELE(vp);
1661 
1662         resp->status = NFS3_OK;
1663         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1664         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1665         return;
1666 
1667 out:
1668         if (curthread->t_flag & T_WOULDBLOCK) {
1669                 curthread->t_flag &= ~T_WOULDBLOCK;
1670                 resp->status = NFS3ERR_JUKEBOX;
1671         } else
1672                 resp->status = puterrno3(error);
1673 out1:
1674         if (dvp != NULL)
1675                 VN_RELE(dvp);
1676         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1677 }
1678 
1679 fhandle_t *
1680 rfs3_mkdir_getfh(MKDIR3args *args)
1681 {
1682 
1683         return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1684 }
1685 
1686 void
1687 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1688         struct svc_req *req, cred_t *cr)
1689 {
1690         int error;
1691         vnode_t *vp;
1692         vnode_t *dvp;
1693         struct vattr *vap;
1694         struct vattr va;
1695         struct vattr *dbvap;
1696         struct vattr dbva;
1697         struct vattr *davap;
1698         struct vattr dava;
1699 
1700         dbvap = NULL;
1701         davap = NULL;
1702 
1703         dvp = nfs3_fhtovp(args->where.dirp, exi);
1704         if (dvp == NULL) {
1705                 error = ESTALE;
1706                 goto out;
1707         }
1708 
1709 #ifdef DEBUG
1710         if (rfs3_do_pre_op_attr) {
1711                 dbva.va_mask = AT_ALL;
1712                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1713         } else
1714                 dbvap = NULL;
1715 #else
1716         dbva.va_mask = AT_ALL;
1717         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1718 #endif
1719         davap = dbvap;
1720 
1721         if (args->where.name == nfs3nametoolong) {
1722                 resp->status = NFS3ERR_NAMETOOLONG;
1723                 goto out1;
1724         }
1725 
1726         if (args->where.name == NULL || *(args->where.name) == '\0') {
1727                 resp->status = NFS3ERR_ACCES;
1728                 goto out1;
1729         }
1730 
1731         if (rdonly(exi, req)) {
1732                 resp->status = NFS3ERR_ROFS;
1733                 goto out1;
1734         }
1735 
1736         error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1737         if (error)
1738                 goto out;
1739 
1740         if (!(va.va_mask & AT_MODE)) {
1741                 resp->status = NFS3ERR_INVAL;
1742                 goto out1;
1743         }
1744 
1745         if (args->symlink.symlink_data == nfs3nametoolong) {
1746                 resp->status = NFS3ERR_NAMETOOLONG;
1747                 goto out1;
1748         }
1749 
1750         va.va_mask |= AT_TYPE;
1751         va.va_type = VLNK;
1752 
1753         error = VOP_SYMLINK(dvp, args->where.name, &va,
1754             args->symlink.symlink_data, cr);
1755 
1756 #ifdef DEBUG
1757         if (rfs3_do_post_op_attr) {
1758                 dava.va_mask = AT_ALL;
1759                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1760         } else
1761                 davap = NULL;
1762 #else
1763         dava.va_mask = AT_ALL;
1764         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1765 #endif
1766 
1767         if (error)
1768                 goto out;
1769 
1770         error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1771 
1772         /*
1773          * Force modified data and metadata out to stable storage.
1774          */
1775         (void) VOP_FSYNC(dvp, 0, cr);
1776 
1777         VN_RELE(dvp);
1778 
1779         resp->status = NFS3_OK;
1780         if (error) {
1781                 resp->resok.obj.handle_follows = FALSE;
1782                 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1783                 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1784                 return;
1785         }
1786 
1787 #ifdef DEBUG
1788         if (!rfs3_do_post_op_fh3)
1789                 resp->resok.obj.handle_follows = FALSE;
1790         else {
1791 #endif
1792         error = makefh3(&resp->resok.obj.handle, vp, exi);
1793         if (error)
1794                 resp->resok.obj.handle_follows = FALSE;
1795         else
1796                 resp->resok.obj.handle_follows = TRUE;
1797 #ifdef DEBUG
1798         }
1799 #endif
1800 
1801 #ifdef DEBUG
1802         if (rfs3_do_post_op_attr) {
1803                 va.va_mask = AT_ALL;
1804                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1805         } else
1806                 vap = NULL;
1807 #else
1808         va.va_mask = AT_ALL;
1809         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1810 #endif
1811 
1812         /*
1813          * Force modified data and metadata out to stable storage.
1814          */
1815         (void) VOP_FSYNC(vp, 0, cr);
1816 
1817         VN_RELE(vp);
1818 
1819         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1820         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1821         return;
1822 
1823 out:
1824         if (curthread->t_flag & T_WOULDBLOCK) {
1825                 curthread->t_flag &= ~T_WOULDBLOCK;
1826                 resp->status = NFS3ERR_JUKEBOX;
1827         } else
1828                 resp->status = puterrno3(error);
1829 out1:
1830         if (dvp != NULL)
1831                 VN_RELE(dvp);
1832         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1833 }
1834 
1835 fhandle_t *
1836 rfs3_symlink_getfh(SYMLINK3args *args)
1837 {
1838 
1839         return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1840 }
1841 
1842 void
1843 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1844         struct svc_req *req, cred_t *cr)
1845 {
1846         int error;
1847         vnode_t *vp;
1848         vnode_t *dvp;
1849         struct vattr *vap;
1850         struct vattr va;
1851         struct vattr *dbvap;
1852         struct vattr dbva;
1853         struct vattr *davap;
1854         struct vattr dava;
1855         int mode;
1856         enum vcexcl excl;
1857 
1858         dbvap = NULL;
1859         davap = NULL;
1860 
1861         dvp = nfs3_fhtovp(args->where.dirp, exi);
1862         if (dvp == NULL) {
1863                 error = ESTALE;
1864                 goto out;
1865         }
1866 
1867 #ifdef DEBUG
1868         if (rfs3_do_pre_op_attr) {
1869                 dbva.va_mask = AT_ALL;
1870                 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1871         } else
1872                 dbvap = NULL;
1873 #else
1874         dbva.va_mask = AT_ALL;
1875         dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1876 #endif
1877         davap = dbvap;
1878 
1879         if (args->where.name == nfs3nametoolong) {
1880                 resp->status = NFS3ERR_NAMETOOLONG;
1881                 goto out1;
1882         }
1883 
1884         if (args->where.name == NULL || *(args->where.name) == '\0') {
1885                 resp->status = NFS3ERR_ACCES;
1886                 goto out1;
1887         }
1888 
1889         if (rdonly(exi, req)) {
1890                 resp->status = NFS3ERR_ROFS;
1891                 goto out1;
1892         }
1893 
1894         switch (args->what.type) {
1895         case NF3CHR:
1896         case NF3BLK:
1897                 error = sattr3_to_vattr(
1898                     &args->what.mknoddata3_u.device.dev_attributes, &va);
1899                 if (error)
1900                         goto out;
1901                 if (secpolicy_sys_devices(cr) != 0) {
1902                         resp->status = NFS3ERR_PERM;
1903                         goto out1;
1904                 }
1905                 if (args->what.type == NF3CHR)
1906                         va.va_type = VCHR;
1907                 else
1908                         va.va_type = VBLK;
1909                 va.va_rdev = makedevice(
1910                     args->what.mknoddata3_u.device.spec.specdata1,
1911                     args->what.mknoddata3_u.device.spec.specdata2);
1912                 va.va_mask |= AT_TYPE | AT_RDEV;
1913                 break;
1914         case NF3SOCK:
1915                 error = sattr3_to_vattr(
1916                     &args->what.mknoddata3_u.pipe_attributes, &va);
1917                 if (error)
1918                         goto out;
1919                 va.va_type = VSOCK;
1920                 va.va_mask |= AT_TYPE;
1921                 break;
1922         case NF3FIFO:
1923                 error = sattr3_to_vattr(
1924                     &args->what.mknoddata3_u.pipe_attributes, &va);
1925                 if (error)
1926                         goto out;
1927                 va.va_type = VFIFO;
1928                 va.va_mask |= AT_TYPE;
1929                 break;
1930         default:
1931                 resp->status = NFS3ERR_BADTYPE;
1932                 goto out1;
1933         }
1934 
1935         /*
1936          * Must specify the mode.
1937          */
1938         if (!(va.va_mask & AT_MODE)) {
1939                 resp->status = NFS3ERR_INVAL;
1940                 goto out1;
1941         }
1942 
1943         excl = EXCL;
1944 
1945         mode = 0;
1946 
1947         error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1948             &vp, cr, 0);
1949 
1950 #ifdef DEBUG
1951         if (rfs3_do_post_op_attr) {
1952                 dava.va_mask = AT_ALL;
1953                 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1954         } else
1955                 davap = NULL;
1956 #else
1957         dava.va_mask = AT_ALL;
1958         davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1959 #endif
1960 
1961         /*
1962          * Force modified data and metadata out to stable storage.
1963          */
1964         (void) VOP_FSYNC(dvp, 0, cr);
1965 
1966         if (error)
1967                 goto out;
1968 
1969         VN_RELE(dvp);
1970 
1971         resp->status = NFS3_OK;
1972 
1973 #ifdef DEBUG
1974         if (!rfs3_do_post_op_fh3)
1975                 resp->resok.obj.handle_follows = FALSE;
1976         else {
1977 #endif
1978         error = makefh3(&resp->resok.obj.handle, vp, exi);
1979         if (error)
1980                 resp->resok.obj.handle_follows = FALSE;
1981         else
1982                 resp->resok.obj.handle_follows = TRUE;
1983 #ifdef DEBUG
1984         }
1985 #endif
1986 
1987 #ifdef DEBUG
1988         if (rfs3_do_post_op_attr) {
1989                 va.va_mask = AT_ALL;
1990                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1991         } else
1992                 vap = NULL;
1993 #else
1994         va.va_mask = AT_ALL;
1995         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1996 #endif
1997 
1998         /*
1999          * Force modified metadata out to stable storage.
2000          */
2001         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2002 
2003         VN_RELE(vp);
2004 
2005         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2006         vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2007         return;
2008 
2009 out:
2010         if (curthread->t_flag & T_WOULDBLOCK) {
2011                 curthread->t_flag &= ~T_WOULDBLOCK;
2012                 resp->status = NFS3ERR_JUKEBOX;
2013         } else
2014                 resp->status = puterrno3(error);
2015 out1:
2016         if (dvp != NULL)
2017                 VN_RELE(dvp);
2018         vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2019 }
2020 
2021 fhandle_t *
2022 rfs3_mknod_getfh(MKNOD3args *args)
2023 {
2024 
2025         return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2026 }
2027 
2028 void
2029 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2030         struct svc_req *req, cred_t *cr)
2031 {
2032         int error = 0;
2033         vnode_t *vp;
2034         struct vattr *bvap;
2035         struct vattr bva;
2036         struct vattr *avap;
2037         struct vattr ava;
2038         vnode_t *targvp = NULL;
2039 
2040         bvap = NULL;
2041         avap = NULL;
2042 
2043         vp = nfs3_fhtovp(args->object.dirp, exi);
2044         if (vp == NULL) {
2045                 error = ESTALE;
2046                 goto out;
2047         }
2048 
2049 #ifdef DEBUG
2050         if (rfs3_do_pre_op_attr) {
2051                 bva.va_mask = AT_ALL;
2052                 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2053         } else
2054                 bvap = NULL;
2055 #else
2056         bva.va_mask = AT_ALL;
2057         bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2058 #endif
2059         avap = bvap;
2060 
2061         if (vp->v_type != VDIR) {
2062                 resp->status = NFS3ERR_NOTDIR;
2063                 goto out1;
2064         }
2065 
2066         if (args->object.name == nfs3nametoolong) {
2067                 resp->status = NFS3ERR_NAMETOOLONG;
2068                 goto out1;
2069         }
2070 
2071         if (args->object.name == NULL || *(args->object.name) == '\0') {
2072                 resp->status = NFS3ERR_ACCES;
2073                 goto out1;
2074         }
2075 
2076         if (rdonly(exi, req)) {
2077                 resp->status = NFS3ERR_ROFS;
2078                 goto out1;
2079         }
2080 
2081         /*
2082          * Check for a conflict with a non-blocking mandatory share
2083          * reservation and V4 delegations
2084          */
2085         error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2086                         NULL, cr);
2087         if (error != 0)
2088                 goto out;
2089 
2090         if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2091                 resp->status = NFS3ERR_JUKEBOX;
2092                 goto out1;
2093         }
2094 
2095         if (!nbl_need_check(targvp)) {
2096                 error = VOP_REMOVE(vp, args->object.name, cr);
2097         } else {
2098                 nbl_start_crit(targvp, RW_READER);
2099                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2100                         error = EACCES;
2101                 } else {
2102                         error = VOP_REMOVE(vp, args->object.name, cr);
2103                 }
2104                 nbl_end_crit(targvp);
2105         }
2106         VN_RELE(targvp);
2107         targvp = NULL;
2108 
2109 #ifdef DEBUG
2110         if (rfs3_do_post_op_attr) {
2111                 ava.va_mask = AT_ALL;
2112                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2113         } else
2114                 avap = NULL;
2115 #else
2116         ava.va_mask = AT_ALL;
2117         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2118 #endif
2119 
2120         /*
2121          * Force modified data and metadata out to stable storage.
2122          */
2123         (void) VOP_FSYNC(vp, 0, cr);
2124 
2125         if (error)
2126                 goto out;
2127 
2128         VN_RELE(vp);
2129 
2130         resp->status = NFS3_OK;
2131         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2132         return;
2133 
2134 out:
2135         if (curthread->t_flag & T_WOULDBLOCK) {
2136                 curthread->t_flag &= ~T_WOULDBLOCK;
2137                 resp->status = NFS3ERR_JUKEBOX;
2138         } else
2139                 resp->status = puterrno3(error);
2140 out1:
2141         if (vp != NULL)
2142                 VN_RELE(vp);
2143         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2144 }
2145 
2146 fhandle_t *
2147 rfs3_remove_getfh(REMOVE3args *args)
2148 {
2149 
2150         return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2151 }
2152 
2153 void
2154 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2155         struct svc_req *req, cred_t *cr)
2156 {
2157         int error;
2158         vnode_t *vp;
2159         struct vattr *bvap;
2160         struct vattr bva;
2161         struct vattr *avap;
2162         struct vattr ava;
2163 
2164         bvap = NULL;
2165         avap = NULL;
2166 
2167         vp = nfs3_fhtovp(args->object.dirp, exi);
2168         if (vp == NULL) {
2169                 error = ESTALE;
2170                 goto out;
2171         }
2172 
2173 #ifdef DEBUG
2174         if (rfs3_do_pre_op_attr) {
2175                 bva.va_mask = AT_ALL;
2176                 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2177         } else
2178                 bvap = NULL;
2179 #else
2180         bva.va_mask = AT_ALL;
2181         bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2182 #endif
2183         avap = bvap;
2184 
2185         if (vp->v_type != VDIR) {
2186                 resp->status = NFS3ERR_NOTDIR;
2187                 goto out1;
2188         }
2189 
2190         if (args->object.name == nfs3nametoolong) {
2191                 resp->status = NFS3ERR_NAMETOOLONG;
2192                 goto out1;
2193         }
2194 
2195         if (args->object.name == NULL || *(args->object.name) == '\0') {
2196                 resp->status = NFS3ERR_ACCES;
2197                 goto out1;
2198         }
2199 
2200         if (rdonly(exi, req)) {
2201                 resp->status = NFS3ERR_ROFS;
2202                 goto out1;
2203         }
2204 
2205         error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2206 
2207 #ifdef DEBUG
2208         if (rfs3_do_post_op_attr) {
2209                 ava.va_mask = AT_ALL;
2210                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2211         } else
2212                 avap = NULL;
2213 #else
2214         ava.va_mask = AT_ALL;
2215         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2216 #endif
2217 
2218         /*
2219          * Force modified data and metadata out to stable storage.
2220          */
2221         (void) VOP_FSYNC(vp, 0, cr);
2222 
2223         if (error) {
2224                 /*
2225                  * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2226                  * if the directory is not empty.  A System V NFS server
2227                  * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2228                  * over the wire.
2229                  */
2230                 if (error == EEXIST)
2231                         error = ENOTEMPTY;
2232                 goto out;
2233         }
2234 
2235         VN_RELE(vp);
2236 
2237         resp->status = NFS3_OK;
2238         vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2239         return;
2240 
2241 out:
2242         if (curthread->t_flag & T_WOULDBLOCK) {
2243                 curthread->t_flag &= ~T_WOULDBLOCK;
2244                 resp->status = NFS3ERR_JUKEBOX;
2245         } else
2246                 resp->status = puterrno3(error);
2247 out1:
2248         if (vp != NULL)
2249                 VN_RELE(vp);
2250         vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2251 }
2252 
2253 fhandle_t *
2254 rfs3_rmdir_getfh(RMDIR3args *args)
2255 {
2256 
2257         return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2258 }
2259 
2260 void
2261 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2262         struct svc_req *req, cred_t *cr)
2263 {
2264         int error = 0;
2265         vnode_t *fvp;
2266         vnode_t *tvp;
2267         vnode_t *targvp;
2268         struct vattr *fbvap;
2269         struct vattr fbva;
2270         struct vattr *favap;
2271         struct vattr fava;
2272         struct vattr *tbvap;
2273         struct vattr tbva;
2274         struct vattr *tavap;
2275         struct vattr tava;
2276         nfs_fh3 *fh3;
2277         struct exportinfo *to_exi;
2278         vnode_t *srcvp = NULL;
2279 
2280         fbvap = NULL;
2281         favap = NULL;
2282         tbvap = NULL;
2283         tavap = NULL;
2284         tvp = NULL;
2285 
2286         fvp = nfs3_fhtovp(args->from.dirp, exi);
2287         if (fvp == NULL) {
2288                 error = ESTALE;
2289                 goto out;
2290         }
2291 
2292 #ifdef DEBUG
2293         if (rfs3_do_pre_op_attr) {
2294                 fbva.va_mask = AT_ALL;
2295                 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2296         } else
2297                 fbvap = NULL;
2298 #else
2299         fbva.va_mask = AT_ALL;
2300         fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2301 #endif
2302         favap = fbvap;
2303 
2304         fh3 = args->to.dirp;
2305         to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2306         if (to_exi == NULL) {
2307                 resp->status = NFS3ERR_ACCES;
2308                 goto out1;
2309         }
2310         exi_rele(to_exi);
2311 
2312         if (to_exi != exi) {
2313                 resp->status = NFS3ERR_XDEV;
2314                 goto out1;
2315         }
2316 
2317         tvp = nfs3_fhtovp(args->to.dirp, exi);
2318         if (tvp == NULL) {
2319                 error = ESTALE;
2320                 goto out;
2321         }
2322 
2323 #ifdef DEBUG
2324         if (rfs3_do_pre_op_attr) {
2325                 tbva.va_mask = AT_ALL;
2326                 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2327         } else
2328                 tbvap = NULL;
2329 #else
2330         tbva.va_mask = AT_ALL;
2331         tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2332 #endif
2333         tavap = tbvap;
2334 
2335         if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2336                 resp->status = NFS3ERR_NOTDIR;
2337                 goto out1;
2338         }
2339 
2340         if (args->from.name == nfs3nametoolong ||
2341             args->to.name == nfs3nametoolong) {
2342                 resp->status = NFS3ERR_NAMETOOLONG;
2343                 goto out1;
2344         }
2345         if (args->from.name == NULL || *(args->from.name) == '\0' ||
2346             args->to.name == NULL || *(args->to.name) == '\0') {
2347                 resp->status = NFS3ERR_ACCES;
2348                 goto out1;
2349         }
2350 
2351         if (rdonly(exi, req)) {
2352                 resp->status = NFS3ERR_ROFS;
2353                 goto out1;
2354         }
2355 
2356         /*
2357          * Check for a conflict with a non-blocking mandatory share
2358          * reservation or V4 delegations.
2359          */
2360         error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2361                         NULL, cr);
2362         if (error != 0)
2363                 goto out;
2364 
2365         /*
2366          * If we rename a delegated file we should recall the
2367          * delegation, since future opens should fail or would
2368          * refer to a new file.
2369          */
2370         if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2371                 resp->status = NFS3ERR_JUKEBOX;
2372                 goto out1;
2373         }
2374 
2375         /*
2376          * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2377          * first to avoid VOP_LOOKUP if possible.
2378          */
2379         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2380             VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2381 
2382                 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2383                         VN_RELE(targvp);
2384                         resp->status = NFS3ERR_JUKEBOX;
2385                         goto out1;
2386                 }
2387                 VN_RELE(targvp);
2388         }
2389 
2390         if (!nbl_need_check(srcvp)) {
2391                 error = VOP_RENAME(fvp, args->from.name, tvp,
2392                                     args->to.name, cr);
2393         } else {
2394                 nbl_start_crit(srcvp, RW_READER);
2395                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2396                         error = EACCES;
2397                 } else {
2398                         error = VOP_RENAME(fvp, args->from.name, tvp,
2399                                     args->to.name, cr);
2400                 }
2401                 nbl_end_crit(srcvp);
2402         }
2403         if (error == 0) {
2404                 char *tmp;
2405 
2406                 /* fix the path name for the renamed file */
2407                 mutex_enter(&srcvp->v_lock);
2408                 tmp = srcvp->v_path;
2409                 srcvp->v_path = NULL;
2410                 mutex_exit(&srcvp->v_lock);
2411                 vn_setpath(rootdir, tvp, srcvp, args->to.name,
2412                                 strlen(args->to.name));
2413                 if (tmp != NULL)
2414                         kmem_free(tmp, strlen(tmp) + 1);
2415         }
2416         VN_RELE(srcvp);
2417         srcvp = NULL;
2418 
2419 #ifdef DEBUG
2420         if (rfs3_do_post_op_attr) {
2421                 fava.va_mask = AT_ALL;
2422                 favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2423                 tava.va_mask = AT_ALL;
2424                 tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2425         } else {
2426                 favap = NULL;
2427                 tavap = NULL;
2428         }
2429 #else
2430         fava.va_mask = AT_ALL;
2431         favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2432         tava.va_mask = AT_ALL;
2433         tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2434 #endif
2435 
2436         /*
2437          * Force modified data and metadata out to stable storage.
2438          */
2439         (void) VOP_FSYNC(fvp, 0, cr);
2440         (void) VOP_FSYNC(tvp, 0, cr);
2441 
2442         if (error)
2443                 goto out;
2444 
2445         VN_RELE(tvp);
2446         VN_RELE(fvp);
2447 
2448         resp->status = NFS3_OK;
2449         vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2450         vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2451         return;
2452 
2453 out:
2454         if (curthread->t_flag & T_WOULDBLOCK) {
2455                 curthread->t_flag &= ~T_WOULDBLOCK;
2456                 resp->status = NFS3ERR_JUKEBOX;
2457         } else
2458                 resp->status = puterrno3(error);
2459 out1:
2460         if (fvp != NULL)
2461                 VN_RELE(fvp);
2462         if (tvp != NULL)
2463                 VN_RELE(tvp);
2464         vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2465         vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2466 }
2467 
2468 fhandle_t *
2469 rfs3_rename_getfh(RENAME3args *args)
2470 {
2471 
2472         return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2473 }
2474 
2475 void
2476 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2477         struct svc_req *req, cred_t *cr)
2478 {
2479         int error;
2480         vnode_t *vp;
2481         vnode_t *dvp;
2482         struct vattr *vap;
2483         struct vattr va;
2484         struct vattr *bvap;
2485         struct vattr bva;
2486         struct vattr *avap;
2487         struct vattr ava;
2488         nfs_fh3 *fh3;
2489         struct exportinfo *to_exi;
2490 
2491         vap = NULL;
2492         bvap = NULL;
2493         avap = NULL;
2494         dvp = NULL;
2495 
2496         vp = nfs3_fhtovp(&args->file, exi);
2497         if (vp == NULL) {
2498                 error = ESTALE;
2499                 goto out;
2500         }
2501 
2502 #ifdef DEBUG
2503         if (rfs3_do_pre_op_attr) {
2504                 va.va_mask = AT_ALL;
2505                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2506         } else
2507                 vap = NULL;
2508 #else
2509         va.va_mask = AT_ALL;
2510         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2511 #endif
2512 
2513         fh3 = args->link.dirp;
2514         to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2515         if (to_exi == NULL) {
2516                 resp->status = NFS3ERR_ACCES;
2517                 goto out1;
2518         }
2519         exi_rele(to_exi);
2520 
2521         if (to_exi != exi) {
2522                 resp->status = NFS3ERR_XDEV;
2523                 goto out1;
2524         }
2525 
2526         dvp = nfs3_fhtovp(args->link.dirp, exi);
2527         if (dvp == NULL) {
2528                 error = ESTALE;
2529                 goto out;
2530         }
2531 
2532 #ifdef DEBUG
2533         if (rfs3_do_pre_op_attr) {
2534                 bva.va_mask = AT_ALL;
2535                 bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2536         } else
2537                 bvap = NULL;
2538 #else
2539         bva.va_mask = AT_ALL;
2540         bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2541 #endif
2542 
2543         if (dvp->v_type != VDIR) {
2544                 resp->status = NFS3ERR_NOTDIR;
2545                 goto out1;
2546         }
2547 
2548         if (args->link.name == nfs3nametoolong) {
2549                 resp->status = NFS3ERR_NAMETOOLONG;
2550                 goto out1;
2551         }
2552 
2553         if (args->link.name == NULL || *(args->link.name) == '\0') {
2554                 resp->status = NFS3ERR_ACCES;
2555                 goto out1;
2556         }
2557 
2558         if (rdonly(exi, req)) {
2559                 resp->status = NFS3ERR_ROFS;
2560                 goto out1;
2561         }
2562 
2563         error = VOP_LINK(dvp, vp, args->link.name, cr);
2564 
2565 #ifdef DEBUG
2566         if (rfs3_do_post_op_attr) {
2567                 va.va_mask = AT_ALL;
2568                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2569                 ava.va_mask = AT_ALL;
2570                 avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2571         } else {
2572                 vap = NULL;
2573                 avap = NULL;
2574         }
2575 #else
2576         va.va_mask = AT_ALL;
2577         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2578         ava.va_mask = AT_ALL;
2579         avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2580 #endif
2581 
2582         /*
2583          * Force modified data and metadata out to stable storage.
2584          */
2585         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2586         (void) VOP_FSYNC(dvp, 0, cr);
2587 
2588         if (error)
2589                 goto out;
2590 
2591         VN_RELE(dvp);
2592         VN_RELE(vp);
2593 
2594         resp->status = NFS3_OK;
2595         vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2596         vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2597         return;
2598 
2599 out:
2600         if (curthread->t_flag & T_WOULDBLOCK) {
2601                 curthread->t_flag &= ~T_WOULDBLOCK;
2602                 resp->status = NFS3ERR_JUKEBOX;
2603         } else
2604                 resp->status = puterrno3(error);
2605 out1:
2606         if (vp != NULL)
2607                 VN_RELE(vp);
2608         if (dvp != NULL)
2609                 VN_RELE(dvp);
2610         vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2611         vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2612 }
2613 
2614 fhandle_t *
2615 rfs3_link_getfh(LINK3args *args)
2616 {
2617 
2618         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2619 }
2620 
2621 /*
2622  * This macro defines the size of a response which contains attribute
2623  * information and one directory entry (whose length is specified by
2624  * the macro parameter).  If the incoming request is larger than this,
2625  * then we are guaranteed to be able to return at one directory entry
2626  * if one exists.  Therefore, we do not need to check for
2627  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2628  * is not, then we need to check to make sure that this error does not
2629  * need to be returned.
2630  *
2631  * NFS3_READDIR_MIN_COUNT is comprised of following :
2632  *
2633  * status - 1 * BYTES_PER_XDR_UNIT
2634  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2635  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2636  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2637  * boolean - 1 * BYTES_PER_XDR_UNIT
2638  * file id - 2 * BYTES_PER_XDR_UNIT
2639  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2640  * cookie - 2 * BYTES_PER_XDR_UNIT
2641  * end of list - 1 * BYTES_PER_XDR_UNIT
2642  * end of file - 1 * BYTES_PER_XDR_UNIT
2643  * Name length of directory to the nearest byte
2644  */
2645 
2646 #define NFS3_READDIR_MIN_COUNT(length)  \
2647         ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2648                 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2649 
2650 /* ARGSUSED */
2651 void
2652 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2653         struct svc_req *req, cred_t *cr)
2654 {
2655         int error;
2656         vnode_t *vp;
2657         struct vattr *vap;
2658         struct vattr va;
2659         struct iovec iov;
2660         struct uio uio;
2661         char *data;
2662         int iseof;
2663         int bufsize;
2664         int namlen;
2665         uint_t count;
2666 
2667         vap = NULL;
2668 
2669         vp = nfs3_fhtovp(&args->dir, exi);
2670         if (vp == NULL) {
2671                 error = ESTALE;
2672                 goto out;
2673         }
2674 
2675         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2676 
2677 #ifdef DEBUG
2678         if (rfs3_do_pre_op_attr) {
2679                 va.va_mask = AT_ALL;
2680                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2681         } else
2682                 vap = NULL;
2683 #else
2684         va.va_mask = AT_ALL;
2685         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2686 #endif
2687 
2688         if (vp->v_type != VDIR) {
2689                 resp->status = NFS3ERR_NOTDIR;
2690                 goto out1;
2691         }
2692 
2693         error = VOP_ACCESS(vp, VREAD, 0, cr);
2694         if (error)
2695                 goto out;
2696 
2697         /*
2698          * Now don't allow arbitrary count to alloc;
2699          * allow the maximum not to exceed rfs3_tsize()
2700          */
2701         if (args->count > rfs3_tsize(req))
2702                 args->count = rfs3_tsize(req);
2703 
2704         /*
2705          * Make sure that there is room to read at least one entry
2706          * if any are available.
2707          */
2708         if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2709                 count = DIRENT64_RECLEN(MAXNAMELEN);
2710         else
2711                 count = args->count;
2712 
2713         data = kmem_alloc(count, KM_SLEEP);
2714 
2715         iov.iov_base = data;
2716         iov.iov_len = count;
2717         uio.uio_iov = &iov;
2718         uio.uio_iovcnt = 1;
2719         uio.uio_segflg = UIO_SYSSPACE;
2720         uio.uio_extflg = UIO_COPY_CACHED;
2721         uio.uio_loffset = (offset_t)args->cookie;
2722         uio.uio_resid = count;
2723 
2724         error = VOP_READDIR(vp, &uio, cr, &iseof);
2725 
2726 #ifdef DEBUG
2727         if (rfs3_do_post_op_attr) {
2728                 va.va_mask = AT_ALL;
2729                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2730         } else
2731                 vap = NULL;
2732 #else
2733         va.va_mask = AT_ALL;
2734         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2735 #endif
2736 
2737         if (error) {
2738                 kmem_free(data, count);
2739                 goto out;
2740         }
2741 
2742         /*
2743          * If the count was not large enough to be able to guarantee
2744          * to be able to return at least one entry, then need to
2745          * check to see if NFS3ERR_TOOSMALL should be returned.
2746          */
2747         if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2748                 /*
2749                  * bufsize is used to keep track of the size of the response.
2750                  * It is primed with:
2751                  *      1 for the status +
2752                  *      1 for the dir_attributes.attributes boolean +
2753                  *      2 for the cookie verifier
2754                  * all times BYTES_PER_XDR_UNIT to convert from XDR units
2755                  * to bytes.  If there are directory attributes to be
2756                  * returned, then:
2757                  *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2758                  * time BYTES_PER_XDR_UNIT is added to account for them.
2759                  */
2760                 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2761                 if (vap != NULL)
2762                         bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2763                 /*
2764                  * An entry is composed of:
2765                  *      1 for the true/false list indicator +
2766                  *      2 for the fileid +
2767                  *      1 for the length of the name +
2768                  *      2 for the cookie +
2769                  * all times BYTES_PER_XDR_UNIT to convert from
2770                  * XDR units to bytes, plus the length of the name
2771                  * rounded up to the nearest BYTES_PER_XDR_UNIT.
2772                  */
2773                 if (count != uio.uio_resid) {
2774                         namlen = strlen(((struct dirent64 *)data)->d_name);
2775                         bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2776                                     roundup(namlen, BYTES_PER_XDR_UNIT);
2777                 }
2778                 /*
2779                  * We need to check to see if the number of bytes left
2780                  * to go into the buffer will actually fit into the
2781                  * buffer.  This is calculated as the size of this
2782                  * entry plus:
2783                  *      1 for the true/false list indicator +
2784                  *      1 for the eof indicator
2785                  * times BYTES_PER_XDR_UNIT to convert from from
2786                  * XDR units to bytes.
2787                  */
2788                 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2789                 if (bufsize > args->count) {
2790                         kmem_free(data, count);
2791                         resp->status = NFS3ERR_TOOSMALL;
2792                         goto out1;
2793                 }
2794         }
2795 
2796         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2797 
2798 #if 0 /* notyet */
2799         /*
2800          * Don't do this.  It causes local disk writes when just
2801          * reading the file and the overhead is deemed larger
2802          * than the benefit.
2803          */
2804         /*
2805          * Force modified metadata out to stable storage.
2806          */
2807         (void) VOP_FSYNC(vp, FNODSYNC, cr);
2808 #endif
2809 
2810         VN_RELE(vp);
2811 
2812         resp->status = NFS3_OK;
2813         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2814         resp->resok.cookieverf = 0;
2815         resp->resok.reply.entries = (entry3 *)data;
2816         resp->resok.reply.eof = iseof;
2817         resp->resok.size = count - uio.uio_resid;
2818         resp->resok.count = args->count;
2819         resp->resok.freecount = count;
2820         return;
2821 
2822 out:
2823         if (curthread->t_flag & T_WOULDBLOCK) {
2824                 curthread->t_flag &= ~T_WOULDBLOCK;
2825                 resp->status = NFS3ERR_JUKEBOX;
2826         } else
2827                 resp->status = puterrno3(error);
2828 out1:
2829         if (vp != NULL) {
2830                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2831                 VN_RELE(vp);
2832         }
2833         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2834 }
2835 
2836 fhandle_t *
2837 rfs3_readdir_getfh(READDIR3args *args)
2838 {
2839 
2840         return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2841 }
2842 
2843 void
2844 rfs3_readdir_free(READDIR3res *resp)
2845 {
2846 
2847         if (resp->status == NFS3_OK)
2848                 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2849 }
2850 
2851 #ifdef nextdp
2852 #undef nextdp
2853 #endif
2854 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2855 
2856 /*
2857  * This macro computes the size of a response which contains
2858  * one directory entry including the attributes as well as file handle.
2859  * If the incoming request is larger than this, then we are guaranteed to be
2860  * able to return at least one more directory entry if one exists.
2861  *
2862  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2863  *
2864  * boolean - 1 * BYTES_PER_XDR_UNIT
2865  * file id - 2 * BYTES_PER_XDR_UNIT
2866  * directory name length - 1 * BYTES_PER_XDR_UNIT
2867  * cookie - 2 * BYTES_PER_XDR_UNIT
2868  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2869  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2870  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2871  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2872  * Maxmum length of a file handle (NFS3_CURFHSIZE)
2873  * name length of the entry to the nearest bytes
2874  */
2875 #define NFS3_READDIRPLUS_ENTRY(namelen) \
2876         ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2877                 BYTES_PER_XDR_UNIT + \
2878         NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2879 
2880 static int rfs3_readdir_unit = MAXBSIZE;
2881 
2882 /* ARGSUSED */
2883 void
2884 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2885         struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2886 {
2887         int error;
2888         vnode_t *vp;
2889         struct vattr *vap;
2890         struct vattr va;
2891         struct iovec iov;
2892         struct uio uio;
2893         char *data;
2894         int iseof;
2895         struct dirent64 *dp;
2896         vnode_t *nvp;
2897         struct vattr *nvap;
2898         struct vattr nva;
2899         entryplus3_info *infop = NULL;
2900         int size = 0;
2901         int nents = 0;
2902         int bufsize = 0;
2903         int entrysize = 0;
2904         int tofit = 0;
2905         int rd_unit = rfs3_readdir_unit;
2906         int prev_len;
2907         int space_left;
2908         int i;
2909         uint_t *namlen = NULL;
2910 
2911         vap = NULL;
2912 
2913         vp = nfs3_fhtovp(&args->dir, exi);
2914         if (vp == NULL) {
2915                 error = ESTALE;
2916                 goto out;
2917         }
2918 
2919         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2920 
2921 #ifdef DEBUG
2922         if (rfs3_do_pre_op_attr) {
2923                 va.va_mask = AT_ALL;
2924                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2925         } else
2926                 vap = NULL;
2927 #else
2928         va.va_mask = AT_ALL;
2929         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2930 #endif
2931 
2932         if (vp->v_type != VDIR) {
2933                 error = ENOTDIR;
2934                 goto out;
2935         }
2936 
2937         error = VOP_ACCESS(vp, VREAD, 0, cr);
2938         if (error)
2939                 goto out;
2940 
2941         /*
2942          * Don't allow arbitrary counts for allocation
2943          */
2944         if (args->maxcount > rfs3_tsize(req))
2945                 args->maxcount = rfs3_tsize(req);
2946 
2947         /*
2948          * Make sure that there is room to read at least one entry
2949          * if any are available
2950          */
2951         args->dircount = MIN(args->dircount, args->maxcount);
2952 
2953         if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2954                 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2955 
2956         /*
2957          * This allocation relies on a minimum directory entry
2958          * being roughly 24 bytes.  Therefore, the namlen array
2959          * will have enough space based on the maximum number of
2960          * entries to read.
2961          */
2962         namlen = kmem_alloc(args->dircount, KM_SLEEP);
2963 
2964         space_left = args->dircount;
2965         data = kmem_alloc(args->dircount, KM_SLEEP);
2966         dp = (struct dirent64 *)data;
2967         uio.uio_iov = &iov;
2968         uio.uio_iovcnt = 1;
2969         uio.uio_segflg = UIO_SYSSPACE;
2970         uio.uio_extflg = UIO_COPY_CACHED;
2971         uio.uio_loffset = (offset_t)args->cookie;
2972 
2973         /*
2974          * bufsize is used to keep track of the size of the response as we
2975          * get post op attributes and filehandles for each entry.  This is
2976          * an optimization as the server may have read more entries than will
2977          * fit in the buffer specified by maxcount.  We stop calculating
2978          * post op attributes and filehandles once we have exceeded maxcount.
2979          * This will minimize the effect of truncation.
2980          *
2981          * It is primed with:
2982          *      1 for the status +
2983          *      1 for the dir_attributes.attributes boolean +
2984          *      2 for the cookie verifier
2985          * all times BYTES_PER_XDR_UNIT to convert from XDR units
2986          * to bytes.  If there are directory attributes to be
2987          * returned, then:
2988          *      NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2989          * time BYTES_PER_XDR_UNIT is added to account for them.
2990          */
2991         bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2992         if (vap != NULL)
2993                 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2994 
2995 getmoredents:
2996         /*
2997          * Here we make a check so that our read unit is not larger than
2998          * the space left in the buffer.
2999          */
3000         rd_unit = MIN(rd_unit, space_left);
3001         iov.iov_base = (char *)dp;
3002         iov.iov_len = rd_unit;
3003         uio.uio_resid = rd_unit;
3004         prev_len = rd_unit;
3005 
3006         error = VOP_READDIR(vp, &uio, cr, &iseof);
3007 
3008         if (error) {
3009                 kmem_free(data, args->dircount);
3010                 goto out;
3011         }
3012 
3013         if (uio.uio_resid == prev_len && !iseof) {
3014                 if (nents == 0) {
3015                         kmem_free(data, args->dircount);
3016                         resp->status = NFS3ERR_TOOSMALL;
3017                         goto out1;
3018                 }
3019 
3020                 /*
3021                  * We could not get any more entries, so get the attributes
3022                  * and filehandle for the entries already obtained.
3023                  */
3024                 goto good;
3025         }
3026 
3027         /*
3028          * We estimate the size of the response by assuming the
3029          * entry exists and attributes and filehandle are also valid
3030          */
3031         for (size = prev_len - uio.uio_resid;
3032                 size > 0;
3033                 size -= dp->d_reclen, dp = nextdp(dp)) {
3034 
3035                 if (dp->d_ino == 0) {
3036                         nents++;
3037                         continue;
3038                 }
3039 
3040                 namlen[nents] = strlen(dp->d_name);
3041                 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3042 
3043                 /*
3044                  * We need to check to see if the number of bytes left
3045                  * to go into the buffer will actually fit into the
3046                  * buffer.  This is calculated as the size of this
3047                  * entry plus:
3048                  *      1 for the true/false list indicator +
3049                  *      1 for the eof indicator
3050                  * times BYTES_PER_XDR_UNIT to convert from XDR units
3051                  * to bytes.
3052                  *
3053                  * Also check the dircount limit against the first entry read
3054                  *
3055                  */
3056                 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3057                 if (bufsize + tofit > args->maxcount) {
3058                         /*
3059                          * We make a check here to see if this was the
3060                          * first entry being measured.  If so, then maxcount
3061                          * was too small to begin with and so we need to
3062                          * return with NFS3ERR_TOOSMALL.
3063                          */
3064                         if (nents == 0) {
3065                                 kmem_free(data, args->dircount);
3066                                 resp->status = NFS3ERR_TOOSMALL;
3067                                 goto out1;
3068                         }
3069                         iseof = FALSE;
3070                         goto good;
3071                 }
3072                 bufsize += entrysize;
3073                 nents++;
3074         }
3075 
3076         /*
3077          * If there is enough room to fit at least 1 more entry including
3078          * post op attributes and filehandle in the buffer AND that we haven't
3079          * exceeded dircount then go back and get some more.
3080          */
3081         if (!iseof &&
3082             (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3083                 space_left -= (prev_len - uio.uio_resid);
3084                 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3085                         goto getmoredents;
3086 
3087                 /* else, fall through */
3088         }
3089 
3090 good:
3091 
3092 #ifdef DEBUG
3093         if (rfs3_do_post_op_attr) {
3094                 va.va_mask = AT_ALL;
3095                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3096         } else
3097                 vap = NULL;
3098 #else
3099         va.va_mask = AT_ALL;
3100         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3101 #endif
3102 
3103         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3104 
3105         infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3106         resp->resok.infop = infop;
3107 
3108         dp = (struct dirent64 *)data;
3109         for (i = 0; i < nents; i++) {
3110 
3111                 if (dp->d_ino == 0) {
3112                         infop[i].attr.attributes = FALSE;
3113                         infop[i].fh.handle_follows = FALSE;
3114                         dp = nextdp(dp);
3115                         continue;
3116                 }
3117 
3118                 infop[i].namelen = namlen[i];
3119 
3120                 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3121                 if (error) {
3122                         infop[i].attr.attributes = FALSE;
3123                         infop[i].fh.handle_follows = FALSE;
3124                         dp = nextdp(dp);
3125                         continue;
3126                 }
3127 
3128 #ifdef DEBUG
3129                 if (rfs3_do_post_op_attr) {
3130                         nva.va_mask = AT_ALL;
3131                         nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3132                                 NULL : &nva;
3133                 } else
3134                         nvap = NULL;
3135 #else
3136                 nva.va_mask = AT_ALL;
3137                 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3138 #endif
3139                 vattr_to_post_op_attr(nvap, &infop[i].attr);
3140 
3141 #ifdef DEBUG
3142                 if (!rfs3_do_post_op_fh3)
3143                         infop[i].fh.handle_follows = FALSE;
3144                 else {
3145 #endif
3146                 error = makefh3(&infop[i].fh.handle, nvp, exi);
3147                 if (!error)
3148                         infop[i].fh.handle_follows = TRUE;
3149                 else
3150                         infop[i].fh.handle_follows = FALSE;
3151 #ifdef DEBUG
3152                 }
3153 #endif
3154 
3155                 VN_RELE(nvp);
3156                 dp = nextdp(dp);
3157         }
3158 
3159 #if 0 /* notyet */
3160         /*
3161          * Don't do this.  It causes local disk writes when just
3162          * reading the file and the overhead is deemed larger
3163          * than the benefit.
3164          */
3165         /*
3166          * Force modified metadata out to stable storage.
3167          */
3168         (void) VOP_FSYNC(vp, FNODSYNC, cr);
3169 #endif
3170 
3171         VN_RELE(vp);
3172 
3173         kmem_free(namlen, args->dircount);
3174 
3175         resp->status = NFS3_OK;
3176         vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3177         resp->resok.cookieverf = 0;
3178         resp->resok.reply.entries = (entryplus3 *)data;
3179         resp->resok.reply.eof = iseof;
3180         resp->resok.size = nents;
3181         resp->resok.count = args->dircount;
3182         resp->resok.maxcount = args->maxcount;
3183         return;
3184 
3185 out:
3186         if (curthread->t_flag & T_WOULDBLOCK) {
3187                 curthread->t_flag &= ~T_WOULDBLOCK;
3188                 resp->status = NFS3ERR_JUKEBOX;
3189         } else
3190                 resp->status = puterrno3(error);
3191 out1:
3192         if (vp != NULL) {
3193                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3194                 VN_RELE(vp);
3195         }
3196 
3197         if (namlen != NULL)
3198                 kmem_free(namlen, args->dircount);
3199 
3200         vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3201 }
3202 
3203 fhandle_t *
3204 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3205 {
3206 
3207         return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3208 }
3209 
3210 void
3211 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3212 {
3213 
3214         if (resp->status == NFS3_OK) {
3215                 kmem_free(resp->resok.reply.entries, resp->resok.count);
3216                 kmem_free(resp->resok.infop,
3217                         resp->resok.size * sizeof (struct entryplus3_info));
3218         }
3219 }
3220 
3221 /* ARGSUSED */
3222 void
3223 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3224         struct svc_req *req, cred_t *cr)
3225 {
3226         int error;
3227         vnode_t *vp;
3228         struct vattr *vap;
3229         struct vattr va;
3230         struct statvfs64 sb;
3231 
3232         vap = NULL;
3233 
3234         vp = nfs3_fhtovp(&args->fsroot, exi);
3235         if (vp == NULL) {
3236                 error = ESTALE;
3237                 goto out;
3238         }
3239 
3240         error = VFS_STATVFS(vp->v_vfsp, &sb);
3241 
3242 #ifdef DEBUG
3243         if (rfs3_do_post_op_attr) {
3244                 va.va_mask = AT_ALL;
3245                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3246         } else
3247                 vap = NULL;
3248 #else
3249         va.va_mask = AT_ALL;
3250         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3251 #endif
3252 
3253         VN_RELE(vp);
3254 
3255         if (error)
3256                 goto out;
3257 
3258         resp->status = NFS3_OK;
3259         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3260         if (sb.f_blocks != (fsblkcnt64_t)-1)
3261                 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3262         else
3263                 resp->resok.tbytes = (size3)sb.f_blocks;
3264         if (sb.f_bfree != (fsblkcnt64_t)-1)
3265                 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3266         else
3267                 resp->resok.fbytes = (size3)sb.f_bfree;
3268         if (sb.f_bavail != (fsblkcnt64_t)-1)
3269                 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3270         else
3271                 resp->resok.abytes = (size3)sb.f_bavail;
3272         resp->resok.tfiles = (size3)sb.f_files;
3273         resp->resok.ffiles = (size3)sb.f_ffree;
3274         resp->resok.afiles = (size3)sb.f_favail;
3275         resp->resok.invarsec = 0;
3276         return;
3277 
3278 out:
3279         if (curthread->t_flag & T_WOULDBLOCK) {
3280                 curthread->t_flag &= ~T_WOULDBLOCK;
3281                 resp->status = NFS3ERR_JUKEBOX;
3282         } else
3283                 resp->status = puterrno3(error);
3284         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3285 }
3286 
3287 fhandle_t *
3288 rfs3_fsstat_getfh(FSSTAT3args *args)
3289 {
3290 
3291         return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3292 }
3293 
3294 /* ARGSUSED */
3295 void
3296 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3297         struct svc_req *req, cred_t *cr)
3298 {
3299         vnode_t *vp;
3300         struct vattr *vap;
3301         struct vattr va;
3302         uint32_t xfer_size;
3303         ulong_t l = 0;
3304         int error;
3305 
3306         vp = nfs3_fhtovp(&args->fsroot, exi);
3307         if (vp == NULL) {
3308                 if (curthread->t_flag & T_WOULDBLOCK) {
3309                         curthread->t_flag &= ~T_WOULDBLOCK;
3310                         resp->status = NFS3ERR_JUKEBOX;
3311                 } else
3312                         resp->status = NFS3ERR_STALE;
3313                 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3314                 return;
3315         }
3316 
3317 #ifdef DEBUG
3318         if (rfs3_do_post_op_attr) {
3319                 va.va_mask = AT_ALL;
3320                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3321         } else
3322                 vap = NULL;
3323 #else
3324         va.va_mask = AT_ALL;
3325         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3326 #endif
3327 
3328         resp->status = NFS3_OK;
3329         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3330         xfer_size = rfs3_tsize(req);
3331         resp->resok.rtmax = xfer_size;
3332         resp->resok.rtpref = xfer_size;
3333         resp->resok.rtmult = DEV_BSIZE;
3334         resp->resok.wtmax = xfer_size;
3335         resp->resok.wtpref = xfer_size;
3336         resp->resok.wtmult = DEV_BSIZE;
3337         resp->resok.dtpref = MAXBSIZE;
3338 
3339         /*
3340          * Large file spec: want maxfilesize based on limit of
3341          * underlying filesystem.  We can guess 2^31-1 if need be.
3342          */
3343         error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3344 
3345         VN_RELE(vp);
3346 
3347         if (!error && l != 0 && l <= 64)
3348                 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3349         else
3350                 resp->resok.maxfilesize = MAXOFF32_T;
3351 
3352         resp->resok.time_delta.seconds = 0;
3353         resp->resok.time_delta.nseconds = 1000;
3354         resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3355             FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3356 }
3357 
3358 fhandle_t *
3359 rfs3_fsinfo_getfh(FSINFO3args *args)
3360 {
3361 
3362         return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3363 }
3364 
3365 /* ARGSUSED */
3366 void
3367 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3368         struct svc_req *req, cred_t *cr)
3369 {
3370         int error;
3371         vnode_t *vp;
3372         struct vattr *vap;
3373         struct vattr va;
3374         ulong_t val;
3375 
3376         vap = NULL;
3377 
3378         vp = nfs3_fhtovp(&args->object, exi);
3379         if (vp == NULL) {
3380                 error = ESTALE;
3381                 goto out;
3382         }
3383 
3384 #ifdef DEBUG
3385         if (rfs3_do_post_op_attr) {
3386                 va.va_mask = AT_ALL;
3387                 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3388         } else
3389                 vap = NULL;
3390 #else
3391         va.va_mask = AT_ALL;
3392         vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3393 #endif
3394 
3395         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3396         if (error)
3397                 goto out;
3398         resp->resok.info.link_max = (uint32)val;
3399 
3400         error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3401         if (error)
3402                 goto out;
3403         resp->resok.info.name_max = (uint32)val;
3404 
3405         error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3406         if (error)
3407                 goto out;
3408         if (val == 1)
3409                 resp->resok.info.no_trunc = TRUE;
3410         else
3411                 resp->resok.info.no_trunc = FALSE;
3412 
3413         error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3414         if (error)
3415                 goto out;
3416         if (val == 1)
3417                 resp->resok.info.chown_restricted = TRUE;
3418         else
3419                 resp->resok.info.chown_restricted = FALSE;
3420 
3421         VN_RELE(vp);
3422 
3423         resp->status = NFS3_OK;
3424         vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3425         resp->resok.info.case_insensitive = FALSE;
3426         resp->resok.info.case_preserving = TRUE;
3427         return;
3428 
3429 out:
3430         if (curthread->t_flag & T_WOULDBLOCK) {
3431                 curthread->t_flag &= ~T_WOULDBLOCK;
3432                 resp->status = NFS3ERR_JUKEBOX;
3433         } else
3434                 resp->status = puterrno3(error);
3435         if (vp != NULL)
3436                 VN_RELE(vp);
3437         vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3438 }
3439 
3440 fhandle_t *
3441 rfs3_pathconf_getfh(PATHCONF3args *args)
3442 {
3443 
3444         return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3445 }
3446 
3447 void
3448 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3449         struct svc_req *req, cred_t *cr)
3450 {
3451         int error;
3452         vnode_t *vp;
3453         struct vattr *bvap;
3454         struct vattr bva;
3455         struct vattr *avap;
3456         struct vattr ava;
3457 
3458         bvap = NULL;
3459         avap = NULL;
3460 
3461         vp = nfs3_fhtovp(&args->file, exi);
3462         if (vp == NULL) {
3463                 error = ESTALE;
3464                 goto out;
3465         }
3466 
3467         bva.va_mask = AT_ALL;
3468         error = VOP_GETATTR(vp, &bva, 0, cr);
3469 
3470         /*
3471          * If we can't get the attributes, then we can't do the
3472          * right access checking.  So, we'll fail the request.
3473          */
3474         if (error)
3475                 goto out;
3476 
3477 #ifdef DEBUG
3478         if (rfs3_do_pre_op_attr)
3479                 bvap = &bva;
3480         else
3481                 bvap = NULL;
3482 #else
3483         bvap = &bva;
3484 #endif
3485 
3486         if (rdonly(exi, req)) {
3487                 resp->status = NFS3ERR_ROFS;
3488                 goto out1;
3489         }
3490 
3491         if (vp->v_type != VREG) {
3492                 resp->status = NFS3ERR_INVAL;
3493                 goto out1;
3494         }
3495 
3496         if (crgetuid(cr) != bva.va_uid &&
3497             (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3498                 goto out;
3499 
3500         error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3501         if (!error)
3502                 error = VOP_FSYNC(vp, FNODSYNC, cr);
3503 
3504 #ifdef DEBUG
3505         if (rfs3_do_post_op_attr) {
3506                 ava.va_mask = AT_ALL;
3507                 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3508         } else
3509                 avap = NULL;
3510 #else
3511         ava.va_mask = AT_ALL;
3512         avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3513 #endif
3514 
3515         if (error)
3516                 goto out;
3517 
3518         VN_RELE(vp);
3519 
3520         resp->status = NFS3_OK;
3521         vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3522         resp->resok.verf = write3verf;
3523         return;
3524 
3525 out:
3526         if (curthread->t_flag & T_WOULDBLOCK) {
3527                 curthread->t_flag &= ~T_WOULDBLOCK;
3528                 resp->status = NFS3ERR_JUKEBOX;
3529         } else
3530                 resp->status = puterrno3(error);
3531 out1:
3532         if (vp != NULL)
3533                 VN_RELE(vp);
3534         vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3535 }
3536 
3537 fhandle_t *
3538 rfs3_commit_getfh(COMMIT3args *args)
3539 {
3540 
3541         return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3542 }
3543 
3544 static int
3545 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3546 {
3547 
3548         vap->va_mask = 0;
3549 
3550         if (sap->mode.set_it) {
3551                 vap->va_mode = (mode_t)sap->mode.mode;
3552                 vap->va_mask |= AT_MODE;
3553         }
3554         if (sap->uid.set_it) {
3555                 vap->va_uid = (uid_t)sap->uid.uid;
3556                 vap->va_mask |= AT_UID;
3557         }
3558         if (sap->gid.set_it) {
3559                 vap->va_gid = (gid_t)sap->gid.gid;
3560                 vap->va_mask |= AT_GID;
3561         }
3562         if (sap->size.set_it) {
3563                 if (sap->size.size > (size3)((u_longlong_t)-1))
3564                         return (EINVAL);
3565                 vap->va_size = sap->size.size;
3566                 vap->va_mask |= AT_SIZE;
3567         }
3568         if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3569 #ifndef _LP64
3570                 /* check time validity */
3571                 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3572                         return (EOVERFLOW);
3573 #endif
3574                 /*
3575                  * nfs protocol defines times as unsigned so don't extend sign,
3576                  * unless sysadmin set nfs_allow_preepoch_time.
3577                  */
3578                 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3579                         sap->atime.atime.seconds);
3580                 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3581                 vap->va_mask |= AT_ATIME;
3582         } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3583                 gethrestime(&vap->va_atime);
3584                 vap->va_mask |= AT_ATIME;
3585         }
3586         if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3587 #ifndef _LP64
3588                 /* check time validity */
3589                 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3590                         return (EOVERFLOW);
3591 #endif
3592                 /*
3593                  * nfs protocol defines times as unsigned so don't extend sign,
3594                  * unless sysadmin set nfs_allow_preepoch_time.
3595                  */
3596                 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3597                         sap->mtime.mtime.seconds);
3598                 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3599                 vap->va_mask |= AT_MTIME;
3600         } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3601                 gethrestime(&vap->va_mtime);
3602                 vap->va_mask |= AT_MTIME;
3603         }
3604 
3605         return (0);
3606 }
3607 
3608 static ftype3 vt_to_nf3[] = {
3609         0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3610 };
3611 
3612 static int
3613 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3614 {
3615 
3616         ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3617         /* Return error if time or size overflow */
3618         if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3619                 return (EOVERFLOW);
3620         }
3621         fap->type = vt_to_nf3[vap->va_type];
3622         fap->mode = (mode3)(vap->va_mode & MODEMASK);
3623         fap->nlink = (uint32)vap->va_nlink;
3624         if (vap->va_uid == UID_NOBODY)
3625                 fap->uid = (uid3)NFS_UID_NOBODY;
3626         else
3627                 fap->uid = (uid3)vap->va_uid;
3628         if (vap->va_gid == GID_NOBODY)
3629                 fap->gid = (gid3)NFS_GID_NOBODY;
3630         else
3631                 fap->gid = (gid3)vap->va_gid;
3632         fap->size = (size3)vap->va_size;
3633         fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3634         fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3635         fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3636         fap->fsid = (uint64)vap->va_fsid;
3637         fap->fileid = (fileid3)vap->va_nodeid;
3638         fap->atime.seconds = vap->va_atime.tv_sec;
3639         fap->atime.nseconds = vap->va_atime.tv_nsec;
3640         fap->mtime.seconds = vap->va_mtime.tv_sec;
3641         fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3642         fap->ctime.seconds = vap->va_ctime.tv_sec;
3643         fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3644         return (0);
3645 }
3646 
3647 static int
3648 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3649 {
3650 
3651         /* Return error if time or size overflow */
3652         if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3653                 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3654                 NFS3_SIZE_OK(vap->va_size))) {
3655                 return (EOVERFLOW);
3656         }
3657         wccap->size = (size3)vap->va_size;
3658         wccap->mtime.seconds = vap->va_mtime.tv_sec;
3659         wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3660         wccap->ctime.seconds = vap->va_ctime.tv_sec;
3661         wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3662         return (0);
3663 }
3664 
3665 static void
3666 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3667 {
3668 
3669         /* don't return attrs if time overflow */
3670         if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3671                 poap->attributes = TRUE;
3672         } else
3673                 poap->attributes = FALSE;
3674 }
3675 
3676 void
3677 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3678 {
3679 
3680         /* don't return attrs if time overflow */
3681         if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3682                 poap->attributes = TRUE;
3683         } else
3684                 poap->attributes = FALSE;
3685 }
3686 
3687 static void
3688 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3689 {
3690 
3691         vattr_to_pre_op_attr(bvap, &wccp->before);
3692         vattr_to_post_op_attr(avap, &wccp->after);
3693 }
3694 
3695 void
3696 rfs3_srvrinit(void)
3697 {
3698         struct rfs3_verf_overlay {
3699                 uint_t id; /* a "unique" identifier */
3700                 int ts; /* a unique timestamp */
3701         } *verfp;
3702         timestruc_t now;
3703 
3704         /*
3705          * The following algorithm attempts to find a unique verifier
3706          * to be used as the write verifier returned from the server
3707          * to the client.  It is important that this verifier change
3708          * whenever the server reboots.  Of secondary importance, it
3709          * is important for the verifier to be unique between two
3710          * different servers.
3711          *
3712          * Thus, an attempt is made to use the system hostid and the
3713          * current time in seconds when the nfssrv kernel module is
3714          * loaded.  It is assumed that an NFS server will not be able
3715          * to boot and then to reboot in less than a second.  If the
3716          * hostid has not been set, then the current high resolution
3717          * time is used.  This will ensure different verifiers each
3718          * time the server reboots and minimize the chances that two
3719          * different servers will have the same verifier.
3720          */
3721 
3722 #ifndef lint
3723         /*
3724          * We ASSERT that this constant logic expression is
3725          * always true because in the past, it wasn't.
3726          */
3727         ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3728 #endif
3729 
3730         gethrestime(&now);
3731         verfp = (struct rfs3_verf_overlay *)&write3verf;
3732         verfp->ts = (int)now.tv_sec;
3733         verfp->id = (uint_t)nfs_atoi(hw_serial);
3734 
3735         if (verfp->id == 0)
3736                 verfp->id = (uint_t)now.tv_nsec;
3737 
3738 }
3739 
3740 void
3741 rfs3_srvrfini(void)
3742 {
3743         /* Nothing to do */
3744 }