New nfs3_srv.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /* Copyright (c) 2006, The Ohio State University. All rights reserved.
31 *
32 * Portions of this source code is developed by the team members of
33 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
34 * headed by Professor Dhabaleswar K. (DK) Panda.
35 *
36 * Acknowledgements to contributions from developors:
37 * Ranjit Noronha: noronha@cse.ohio-state.edu
38 * Lei Chai : chail@cse.ohio-state.edu
39 * Weikuan Yu : yuw@cse.ohio-state.edu
40 *
41 */
42 #pragma ident "@(#)nfs3_srv.c 1.113 05/07/25 SMI"
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/systm.h>
47 #include <sys/cred.h>
48 #include <sys/buf.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/uio.h>
52 #include <sys/errno.h>
53 #include <sys/sysmacros.h>
54 #include <sys/statvfs.h>
55 #include <sys/kmem.h>
56 #include <sys/dirent.h>
57 #include <sys/cmn_err.h>
58 #include <sys/debug.h>
59 #include <sys/systeminfo.h>
60 #include <sys/flock.h>
61 #include <sys/nbmlock.h>
62 #include <sys/policy.h>
63
64 #include <rpc/types.h>
65 #include <rpc/auth.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70
71 #include <sys/strsubr.h>
72
73 /* #define RPC_RDMA_INLINE 1 */
74
75 /*
76 * These are the interface routines for the server side of the
77 * Network File System. See the NFS version 3 protocol specification
78 * for a description of this interface.
79 */
80
81 #ifdef DEBUG
82 int rfs3_do_pre_op_attr = 1;
83 int rfs3_do_post_op_attr = 1;
84 int rfs3_do_post_op_fh3 = 1;
85 #endif
86
87 static writeverf3 write3verf;
88
89 static int sattr3_to_vattr(sattr3 *, struct vattr *);
90 static int vattr_to_fattr3(struct vattr *, fattr3 *);
91 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
92 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
93 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
94
95 /* ARGSUSED */
96 void
97 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
98 struct svc_req *req, cred_t *cr)
99 {
100 int error;
101 vnode_t *vp;
102 struct vattr va;
103
104 vp = nfs3_fhtovp(&args->object, exi);
105 if (vp == NULL) {
106 error = ESTALE;
107 goto out;
108 }
109
110 va.va_mask = AT_ALL;
111 error = rfs4_delegated_getattr(vp, &va, 0, cr);
112
113 VN_RELE(vp);
114
115 if (!error) {
116 /* overflow error if time or size is out of range */
117 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 if (error)
119 goto out;
120 resp->status = NFS3_OK;
121 return;
122 }
123
124 out:
125 if (curthread->t_flag & T_WOULDBLOCK) {
126 curthread->t_flag &= ~T_WOULDBLOCK;
127 resp->status = NFS3ERR_JUKEBOX;
128 } else
129 resp->status = puterrno3(error);
130 }
131
132 fhandle_t *
133 rfs3_getattr_getfh(GETATTR3args *args)
134 {
135
136 return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
137 }
138
139 void
140 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
141 struct svc_req *req, cred_t *cr)
142 {
143 int error;
144 vnode_t *vp;
145 struct vattr *bvap;
146 struct vattr bva;
147 struct vattr *avap;
148 struct vattr ava;
149 int flag;
150 int in_crit = 0;
151 struct flock64 bf;
152
153 bvap = NULL;
154 avap = NULL;
155
156 vp = nfs3_fhtovp(&args->object, exi);
157 if (vp == NULL) {
158 error = ESTALE;
159 goto out;
160 }
161
162 error = sattr3_to_vattr(&args->new_attributes, &ava);
163 if (error)
164 goto out;
165
166 /*
167 * We need to specially handle size changes because of
168 * possible conflicting NBMAND locks. Get into critical
169 * region before VOP_GETATTR, so the size attribute is
170 * valid when checking conflicts.
171 *
172 * Also, check to see if the v4 side of the server has
173 * delegated this file. If so, then we return JUKEBOX to
174 * allow the client to retrasmit its request.
175 */
176 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
177 if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
178 resp->status = NFS3ERR_JUKEBOX;
179 goto out1;
180 }
181 if (nbl_need_check(vp)) {
182 nbl_start_crit(vp, RW_READER);
183 in_crit = 1;
184 }
185 }
186
187 bva.va_mask = AT_ALL;
188 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
189
190 /*
191 * If we can't get the attributes, then we can't do the
192 * right access checking. So, we'll fail the request.
193 */
194 if (error)
195 goto out;
196
197 #ifdef DEBUG
198 if (rfs3_do_pre_op_attr)
199 bvap = &bva;
200 #else
201 bvap = &bva;
202 #endif
203
204 if (rdonly(exi, req) || vn_is_readonly(vp)) {
205 resp->status = NFS3ERR_ROFS;
206 goto out1;
207 }
208
209 if (args->guard.check &&
210 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
211 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
212 resp->status = NFS3ERR_NOT_SYNC;
213 goto out1;
214 }
215
216 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
217 flag = ATTR_UTIME;
218 else
219 flag = 0;
220
221 /*
222 * If the filesystem is exported with nosuid, then mask off
223 * the setuid and setgid bits.
224 */
225 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
226 (exi->exi_export.ex_flags & EX_NOSUID))
227 ava.va_mode &= ~(VSUID | VSGID);
228
229 /*
230 * We need to specially handle size changes because it is
231 * possible for the client to create a file with modes
232 * which indicate read-only, but with the file opened for
233 * writing. If the client then tries to set the size of
234 * the file, then the normal access checking done in
235 * VOP_SETATTR would prevent the client from doing so,
236 * although it should be legal for it to do so. To get
237 * around this, we do the access checking for ourselves
238 * and then use VOP_SPACE which doesn't do the access
239 * checking which VOP_SETATTR does. VOP_SPACE can only
240 * operate on VREG files, let VOP_SETATTR handle the other
241 * extremely rare cases.
242 * Also the client should not be allowed to change the
243 * size of the file if there is a conflicting non-blocking
244 * mandatory lock in the region the change.
245 */
246 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
247 if (in_crit) {
248 u_offset_t offset;
249 ssize_t length;
250
251 if (ava.va_size < bva.va_size) {
252 offset = ava.va_size;
253 length = bva.va_size - ava.va_size;
254 } else {
255 offset = bva.va_size;
256 length = ava.va_size - bva.va_size;
257 }
258 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
259 error = EACCES;
260 goto out;
261 }
262 }
263
264 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
265 ava.va_mask &= ~AT_SIZE;
266 bf.l_type = F_WRLCK;
267 bf.l_whence = 0;
268 bf.l_start = (off64_t)ava.va_size;
269 bf.l_len = 0;
270 bf.l_sysid = 0;
271 bf.l_pid = 0;
272 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
273 (offset_t)ava.va_size, cr, NULL);
274 }
275 }
276
277 if (!error && ava.va_mask)
278 error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
279
280 #ifdef DEBUG
281 if (rfs3_do_post_op_attr) {
282 ava.va_mask = AT_ALL;
283 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
284 } else
285 avap = NULL;
286 #else
287 ava.va_mask = AT_ALL;
288 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
289 #endif
290
291 /*
292 * Force modified metadata out to stable storage.
293 */
294 (void) VOP_FSYNC(vp, FNODSYNC, cr);
295
296 if (error)
297 goto out;
298
299 if (in_crit)
300 nbl_end_crit(vp);
301 VN_RELE(vp);
302
303 resp->status = NFS3_OK;
304 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
305 return;
306
307 out:
308 if (curthread->t_flag & T_WOULDBLOCK) {
309 curthread->t_flag &= ~T_WOULDBLOCK;
310 resp->status = NFS3ERR_JUKEBOX;
311 } else
312 resp->status = puterrno3(error);
313 out1:
314 if (vp != NULL) {
315 if (in_crit)
316 nbl_end_crit(vp);
317 VN_RELE(vp);
318 }
319 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
320 }
321
322 fhandle_t *
323 rfs3_setattr_getfh(SETATTR3args *args)
324 {
325
326 return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
327 }
328
329 /* ARGSUSED */
330 void
331 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
332 struct svc_req *req, cred_t *cr)
333 {
334 int error;
335 vnode_t *vp;
336 vnode_t *dvp;
337 struct vattr *vap;
338 struct vattr va;
339 struct vattr *dvap;
340 struct vattr dva;
341 nfs_fh3 *fhp;
342 struct sec_ol sec = {0, 0};
343 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
344
345 dvap = NULL;
346
347 /*
348 * Allow lookups from the root - the default
349 * location of the public filehandle.
350 */
351 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
352 dvp = rootdir;
353 VN_HOLD(dvp);
354 } else {
355 dvp = nfs3_fhtovp(args->what.dirp, exi);
356 if (dvp == NULL) {
357 error = ESTALE;
358 goto out;
359 }
360 }
361
362 #ifdef DEBUG
363 if (rfs3_do_pre_op_attr) {
364 dva.va_mask = AT_ALL;
365 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
366 }
367 #else
368 dva.va_mask = AT_ALL;
369 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
370 #endif
371
372 if (args->what.name == nfs3nametoolong) {
373 resp->status = NFS3ERR_NAMETOOLONG;
374 goto out1;
375 }
376
377 if (args->what.name == NULL || *(args->what.name) == '\0') {
378 resp->status = NFS3ERR_ACCES;
379 goto out1;
380 }
381
382 fhp = args->what.dirp;
383 if (strcmp(args->what.name, "..") == 0 &&
384 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
385 resp->status = NFS3ERR_NOENT;
386 goto out1;
387 }
388
389 /*
390 * If the public filehandle is used then allow
391 * a multi-component lookup
392 */
393 if (PUBLIC_FH3(args->what.dirp)) {
394 publicfh_flag = TRUE;
395 error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
396 &exi, &sec);
397 if (error && exi != NULL)
398 exi_rele(exi); /* See the comment below */
399 } else {
400 error = VOP_LOOKUP(dvp, args->what.name, &vp,
401 NULL, 0, NULL, cr);
402 }
403
404 #ifdef DEBUG
405 if (rfs3_do_post_op_attr) {
406 dva.va_mask = AT_ALL;
407 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
408 } else
409 dvap = NULL;
410 #else
411 dva.va_mask = AT_ALL;
412 dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
413 #endif
414
415 if (error)
416 goto out;
417
418 if (sec.sec_flags & SEC_QUERY) {
419 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
420 } else {
421 error = makefh3(&resp->resok.object, vp, exi);
422 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
423 auth_weak = TRUE;
424 }
425
426 if (error) {
427 VN_RELE(vp);
428 goto out;
429 }
430
431 /*
432 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
433 * and have obtained a new exportinfo in exi which needs to be
434 * released. Note the the original exportinfo pointed to by exi
435 * will be released by the caller, common_dispatch.
436 */
437 if (publicfh_flag)
438 exi_rele(exi);
439
440 VN_RELE(dvp);
441
442 #ifdef DEBUG
443 if (rfs3_do_post_op_attr) {
444 va.va_mask = AT_ALL;
445 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
446 } else
447 vap = NULL;
448 #else
449 va.va_mask = AT_ALL;
450 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
451 #endif
452
453 VN_RELE(vp);
454
455 resp->status = NFS3_OK;
456 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
457 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
458
459 /*
460 * If it's public fh, no 0x81, and client's flavor is
461 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
462 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
463 */
464 if (auth_weak)
465 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
466
467 return;
468
469 out:
470 if (curthread->t_flag & T_WOULDBLOCK) {
471 curthread->t_flag &= ~T_WOULDBLOCK;
472 resp->status = NFS3ERR_JUKEBOX;
473 } else
474 resp->status = puterrno3(error);
475 out1:
476 if (dvp != NULL)
477 VN_RELE(dvp);
478 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
479
480 }
481
482 fhandle_t *
483 rfs3_lookup_getfh(LOOKUP3args *args)
484 {
485
486 return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
487 }
488
489 /* ARGSUSED */
490 void
491 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
492 struct svc_req *req, cred_t *cr)
493 {
494 int error;
495 vnode_t *vp;
496 struct vattr *vap;
497 struct vattr va;
498 int checkwriteperm;
499
500 vap = NULL;
501
502 vp = nfs3_fhtovp(&args->object, exi);
503 if (vp == NULL) {
504 error = ESTALE;
505 goto out;
506 }
507
508 /*
509 * If the file system is exported read only, it is not appropriate
510 * to check write permissions for regular files and directories.
511 * Special files are interpreted by the client, so the underlying
512 * permissions are sent back to the client for interpretation.
513 */
514 if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
515 checkwriteperm = 0;
516 else
517 checkwriteperm = 1;
518
519 /*
520 * We need the mode so that we can correctly determine access
521 * permissions relative to a mandatory lock file. Access to
522 * mandatory lock files is denied on the server, so it might
523 * as well be reflected to the server during the open.
524 */
525 va.va_mask = AT_MODE;
526 error = VOP_GETATTR(vp, &va, 0, cr);
527 if (error)
528 goto out;
529
530 #ifdef DEBUG
531 if (rfs3_do_post_op_attr)
532 vap = &va;
533 #else
534 vap = &va;
535 #endif
536
537 resp->resok.access = 0;
538
539 if (args->access & ACCESS3_READ) {
540 error = VOP_ACCESS(vp, VREAD, 0, cr);
541 if (error) {
542 if (curthread->t_flag & T_WOULDBLOCK)
543 goto out;
544 } else if (!MANDLOCK(vp, va.va_mode))
545 resp->resok.access |= ACCESS3_READ;
546 }
547 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
548 error = VOP_ACCESS(vp, VEXEC, 0, cr);
549 if (error) {
550 if (curthread->t_flag & T_WOULDBLOCK)
551 goto out;
552 } else
553 resp->resok.access |= ACCESS3_LOOKUP;
554 }
555 if (checkwriteperm &&
556 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
557 error = VOP_ACCESS(vp, VWRITE, 0, cr);
558 if (error) {
559 if (curthread->t_flag & T_WOULDBLOCK)
560 goto out;
561 } else if (!MANDLOCK(vp, va.va_mode)) {
562 resp->resok.access |=
563 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
564 }
565 }
566 if (checkwriteperm &&
567 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
568 error = VOP_ACCESS(vp, VWRITE, 0, cr);
569 if (error) {
570 if (curthread->t_flag & T_WOULDBLOCK)
571 goto out;
572 } else
573 resp->resok.access |= ACCESS3_DELETE;
574 }
575 if (args->access & ACCESS3_EXECUTE) {
576 error = VOP_ACCESS(vp, VEXEC, 0, cr);
577 if (error) {
578 if (curthread->t_flag & T_WOULDBLOCK)
579 goto out;
580 } else if (!MANDLOCK(vp, va.va_mode))
581 resp->resok.access |= ACCESS3_EXECUTE;
582 }
583
584 #ifdef DEBUG
585 if (rfs3_do_post_op_attr) {
586 va.va_mask = AT_ALL;
587 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
588 } else
589 vap = NULL;
590 #else
591 va.va_mask = AT_ALL;
592 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
593 #endif
594
595 VN_RELE(vp);
596
597 resp->status = NFS3_OK;
598 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
599 return;
600
601 out:
602 if (curthread->t_flag & T_WOULDBLOCK) {
603 curthread->t_flag &= ~T_WOULDBLOCK;
604 resp->status = NFS3ERR_JUKEBOX;
605 } else
606 resp->status = puterrno3(error);
607 if (vp != NULL)
608 VN_RELE(vp);
609 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
610 }
611
612 fhandle_t *
613 rfs3_access_getfh(ACCESS3args *args)
614 {
615
616 return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
617 }
618
619 /* ARGSUSED */
620 void
621 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
622 struct svc_req *req, cred_t *cr)
623 {
624 int error;
625 vnode_t *vp;
626 struct vattr *vap;
627 struct vattr va;
628 struct iovec iov;
629 struct uio uio;
630 char *data;
631
632 vap = NULL;
633
634 vp = nfs3_fhtovp(&args->symlink, exi);
635 if (vp == NULL) {
636 error = ESTALE;
637 goto out;
638 }
639
640 va.va_mask = AT_ALL;
641 error = VOP_GETATTR(vp, &va, 0, cr);
642 if (error)
643 goto out;
644
645 #ifdef DEBUG
646 if (rfs3_do_post_op_attr)
647 vap = &va;
648 #else
649 vap = &va;
650 #endif
651
652 if (vp->v_type != VLNK) {
653 resp->status = NFS3ERR_INVAL;
654 goto out1;
655 }
656
657 if (MANDLOCK(vp, va.va_mode)) {
658 resp->status = NFS3ERR_ACCES;
659 goto out1;
660 }
661
662 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
663
664 iov.iov_base = data;
665 iov.iov_len = MAXPATHLEN;
666 uio.uio_iov = &iov;
667 uio.uio_iovcnt = 1;
668 uio.uio_segflg = UIO_SYSSPACE;
669 uio.uio_extflg = UIO_COPY_CACHED;
670 uio.uio_loffset = 0;
671 uio.uio_resid = MAXPATHLEN;
672
673 error = VOP_READLINK(vp, &uio, cr);
674
675 #ifdef DEBUG
676 if (rfs3_do_post_op_attr) {
677 va.va_mask = AT_ALL;
678 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
679 } else
680 vap = NULL;
681 #else
682 va.va_mask = AT_ALL;
683 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
684 #endif
685
686 #if 0 /* notyet */
687 /*
688 * Don't do this. It causes local disk writes when just
689 * reading the file and the overhead is deemed larger
690 * than the benefit.
691 */
692 /*
693 * Force modified metadata out to stable storage.
694 */
695 (void) VOP_FSYNC(vp, FNODSYNC, cr);
696 #endif
697
698 if (error) {
699 kmem_free(data, MAXPATHLEN + 1);
700 goto out;
701 }
702
703 VN_RELE(vp);
704
705 resp->status = NFS3_OK;
706 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
707 resp->resok.data = data;
708 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
709 return;
710
711 out:
712 if (curthread->t_flag & T_WOULDBLOCK) {
713 curthread->t_flag &= ~T_WOULDBLOCK;
714 resp->status = NFS3ERR_JUKEBOX;
715 } else
716 resp->status = puterrno3(error);
717 out1:
718 if (vp != NULL)
719 VN_RELE(vp);
720 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
721 }
722
723 fhandle_t *
724 rfs3_readlink_getfh(READLINK3args *args)
725 {
726
727 return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
728 }
729
730 void
731 rfs3_readlink_free(READLINK3res *resp)
732 {
733
734 if (resp->status == NFS3_OK)
735 kmem_free(resp->resok.data, MAXPATHLEN + 1);
736 }
737
738 /* ARGSUSED */
739 void
740 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
741 struct svc_req *req, cred_t *cr)
742 {
743 int error;
744 vnode_t *vp;
745 struct vattr *vap;
746 struct vattr va;
747 struct iovec iov;
748 struct uio uio;
749 u_offset_t offset;
750 mblk_t *mp;
751 int alloc_err = 0;
752 int in_crit = 0;
753 int need_rwunlock = 0;
754
755 vap = NULL;
756
757 vp = nfs3_fhtovp(&args->file, exi);
758 if (vp == NULL) {
759 error = ESTALE;
760 goto out;
761 }
762
763 /*
764 * Check to see if the v4 side of the server has delegated
765 * this file. If so, then we return JUKEBOX to allow the
766 * client to retrasmit its request.
767 */
768 if (rfs4_check_delegated(FREAD, vp, FALSE)) {
769 resp->status = NFS3ERR_JUKEBOX;
770 goto out1;
771 }
772
773 /*
774 * Enter the critical region before calling VOP_RWLOCK
775 * to avoid a deadlock with write requests.
776 */
777 if (nbl_need_check(vp)) {
778 nbl_start_crit(vp, RW_READER);
779 in_crit = 1;
780 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
781 error = EACCES;
782 goto out;
783 }
784 }
785
786 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
787 need_rwunlock = 1;
788
789 va.va_mask = AT_ALL;
790 error = VOP_GETATTR(vp, &va, 0, cr);
791
792 /*
793 * If we can't get the attributes, then we can't do the
794 * right access checking. So, we'll fail the request.
795 */
796 if (error)
797 goto out;
798
799 #ifdef DEBUG
800 if (rfs3_do_post_op_attr)
801 vap = &va;
802 #else
803 vap = &va;
804 #endif
805
806 if (vp->v_type != VREG) {
807 resp->status = NFS3ERR_INVAL;
808 goto out1;
809 }
810
811 if (crgetuid(cr) != va.va_uid) {
812 error = VOP_ACCESS(vp, VREAD, 0, cr);
813 if (error) {
814 if (curthread->t_flag & T_WOULDBLOCK)
815 goto out;
816 error = VOP_ACCESS(vp, VEXEC, 0, cr);
817 if (error)
818 goto out;
819 }
820 }
821
822 if (MANDLOCK(vp, va.va_mode)) {
823 resp->status = NFS3ERR_ACCES;
824 goto out1;
825 }
826
827 offset = args->offset;
828 if (offset >= va.va_size) {
829 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
830 if (in_crit)
831 nbl_end_crit(vp);
832 VN_RELE(vp);
833 resp->status = NFS3_OK;
834 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
835 resp->resok.count = 0;
836 resp->resok.eof = TRUE;
837 resp->resok.data.data_len = 0;
838 resp->resok.data.data_val = NULL;
839 resp->resok.data.mp = NULL;
840 return;
841 }
842
843 if (args->count == 0) {
844 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
845 if (in_crit)
846 nbl_end_crit(vp);
847 VN_RELE(vp);
848 resp->status = NFS3_OK;
849 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
850 resp->resok.count = 0;
851 resp->resok.eof = FALSE;
852 resp->resok.data.data_len = 0;
853 resp->resok.data.data_val = NULL;
854 resp->resok.data.mp = NULL;
855 return;
856 }
857
858 /*
859 * do not allocate memory more the max. allowed
860 * transfer size
861 */
862 if (args->count > rfs3_tsize(req))
863 args->count = rfs3_tsize(req);
864
865 /*
866 * If we aren't returning READ data w/RDMA_WRITE, then grab
867 * a mblk.
868 */
869 if (SVC_GET_WCHUNK(req->rq_xprt, req, &iov) == FALSE) {
870
871 /*
872 * mp will contain the data to be sent out in the read reply.
873 * This will be freed after the reply has been sent out (by the
874 * driver).
875 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
876 * that the call to xdrmblk_putmblk() never fails.
877 */
878 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
879 &alloc_err);
880 ASSERT(mp != NULL);
881 ASSERT(alloc_err == 0);
882
883 iov.iov_base = (caddr_t)mp->b_datap->db_base;
884 iov.iov_len = args->count;
885 } else
886 mp = NULL;
887 uio.uio_iov = &iov;
888 uio.uio_iovcnt = 1;
889 uio.uio_segflg = UIO_SYSSPACE;
890 uio.uio_extflg = UIO_COPY_CACHED;
891 uio.uio_loffset = args->offset;
892 uio.uio_resid = args->count;
893
894 error = VOP_READ(vp, &uio, 0, cr, NULL);
895
896 if (error) {
897 freeb(mp);
898 goto out;
899 }
900
901 va.va_mask = AT_ALL;
902 error = VOP_GETATTR(vp, &va, 0, cr);
903
904 #ifdef DEBUG
905 if (rfs3_do_post_op_attr) {
906 if (error)
907 vap = NULL;
908 else
909 vap = &va;
910 } else
911 vap = NULL;
912 #else
913 if (error)
914 vap = NULL;
915 else
916 vap = &va;
917 #endif
918
919 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
920
921 #if 0 /* notyet */
922 /*
923 * Don't do this. It causes local disk writes when just
924 * reading the file and the overhead is deemed larger
925 * than the benefit.
926 */
927 /*
928 * Force modified metadata out to stable storage.
929 */
930 (void) VOP_FSYNC(vp, FNODSYNC, cr);
931 #endif
932
933 if (in_crit)
934 nbl_end_crit(vp);
935 VN_RELE(vp);
936
937 resp->status = NFS3_OK;
938 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
939 resp->resok.count = args->count - uio.uio_resid;
940 if (!error && offset + resp->resok.count == va.va_size)
941 resp->resok.eof = TRUE;
942 else
943 resp->resok.eof = FALSE;
944 resp->resok.data.data_len = resp->resok.count;
945 #ifndef RPC_RDMA_INLINE
946 resp->resok.data.data_val = (caddr_t)iov.iov_base;
947 #else
948 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
949 #endif
950 resp->resok.data.mp = mp;
951
952 resp->resok.size = (uint_t)args->count;
953 return;
954
955 out:
956 if (curthread->t_flag & T_WOULDBLOCK) {
957 curthread->t_flag &= ~T_WOULDBLOCK;
958 resp->status = NFS3ERR_JUKEBOX;
959 } else
960 resp->status = puterrno3(error);
961 out1:
962 if (vp != NULL) {
963 if (need_rwunlock)
964 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
965 if (in_crit)
966 nbl_end_crit(vp);
967 VN_RELE(vp);
968 }
969 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
970 }
971
972 void
973 rfs3_read_free(READ3res *resp)
974 {
975 mblk_t *mp;
976
977 if (resp->status == NFS3_OK) {
978 mp = resp->resok.data.mp;
979 if (mp != NULL)
980 freeb(mp);
981 }
982 }
983
984 fhandle_t *
985 rfs3_read_getfh(READ3args *args)
986 {
987
988 return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
989 }
990
991 #define MAX_IOVECS 12
992
993 #ifdef DEBUG
994 static int rfs3_write_hits = 0;
995 static int rfs3_write_misses = 0;
996 #endif
997
998 void
999 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1000 struct svc_req *req, cred_t *cr)
1001 {
1002 int error;
1003 vnode_t *vp;
1004 struct vattr *bvap = NULL;
1005 struct vattr bva;
1006 struct vattr *avap = NULL;
1007 struct vattr ava;
1008 u_offset_t rlimit;
1009 struct uio uio;
1010 struct iovec iov[MAX_IOVECS];
1011 mblk_t *m;
1012 struct iovec *iovp;
1013 int iovcnt;
1014 int ioflag;
1015 cred_t *savecred;
1016 int in_crit = 0;
1017 int rwlock_ret = -1;
1018
1019 vp = nfs3_fhtovp(&args->file, exi);
1020 if (vp == NULL) {
1021 error = ESTALE;
1022 goto out;
1023 }
1024
1025 /*
1026 * Check to see if the v4 side of the server has delegated
1027 * this file. If so, then we return JUKEBOX to allow the
1028 * client to retrasmit its request.
1029 */
1030 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1031 resp->status = NFS3ERR_JUKEBOX;
1032 goto out1;
1033 }
1034
1035 /*
1036 * We have to enter the critical region before calling VOP_RWLOCK
1037 * to avoid a deadlock with ufs.
1038 */
1039 if (nbl_need_check(vp)) {
1040 nbl_start_crit(vp, RW_READER);
1041 in_crit = 1;
1042 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1043 error = EACCES;
1044 goto out;
1045 }
1046 }
1047
1048 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1049
1050 bva.va_mask = AT_ALL;
1051 error = VOP_GETATTR(vp, &bva, 0, cr);
1052
1053 /*
1054 * If we can't get the attributes, then we can't do the
1055 * right access checking. So, we'll fail the request.
1056 */
1057 if (error)
1058 goto out;
1059
1060 bvap = &bva;
1061 #ifdef DEBUG
1062 if (!rfs3_do_pre_op_attr)
1063 bvap = NULL;
1064 #endif
1065 avap = bvap;
1066
1067 if (args->count != args->data.data_len) {
1068 resp->status = NFS3ERR_INVAL;
1069 goto out1;
1070 }
1071
1072 if (rdonly(exi, req)) {
1073 resp->status = NFS3ERR_ROFS;
1074 goto out1;
1075 }
1076
1077 if (vp->v_type != VREG) {
1078 resp->status = NFS3ERR_INVAL;
1079 goto out1;
1080 }
1081
1082 if (crgetuid(cr) != bva.va_uid &&
1083 (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1084 goto out;
1085
1086 if (MANDLOCK(vp, bva.va_mode)) {
1087 resp->status = NFS3ERR_ACCES;
1088 goto out1;
1089 }
1090
1091 if (args->count == 0) {
1092 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1093 VN_RELE(vp);
1094 resp->status = NFS3_OK;
1095 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1096 resp->resok.count = 0;
1097 resp->resok.committed = args->stable;
1098 resp->resok.verf = write3verf;
1099 return;
1100 }
1101
1102 if (args->mblk != NULL) {
1103 iovcnt = 0;
1104 for (m = args->mblk; m != NULL; m = m->b_cont)
1105 iovcnt++;
1106 if (iovcnt <= MAX_IOVECS) {
1107 #ifdef DEBUG
1108 rfs3_write_hits++;
1109 #endif
1110 iovp = iov;
1111 } else {
1112 #ifdef DEBUG
1113 rfs3_write_misses++;
1114 #endif
1115 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1116 }
1117 mblk_to_iov(args->mblk, iovcnt, iovp);
1118 } else {
1119 iovcnt = 1;
1120 iovp = iov;
1121 iovp->iov_base = args->data.data_val;
1122 iovp->iov_len = args->count;
1123 }
1124
1125 uio.uio_iov = iovp;
1126 uio.uio_iovcnt = iovcnt;
1127
1128 uio.uio_segflg = UIO_SYSSPACE;
1129 uio.uio_extflg = UIO_COPY_DEFAULT;
1130 uio.uio_loffset = args->offset;
1131 uio.uio_resid = args->count;
1132 uio.uio_llimit = curproc->p_fsz_ctl;
1133 rlimit = uio.uio_llimit - args->offset;
1134 if (rlimit < (u_offset_t)uio.uio_resid)
1135 uio.uio_resid = (int)rlimit;
1136
1137 if (args->stable == UNSTABLE)
1138 ioflag = 0;
1139 else if (args->stable == FILE_SYNC)
1140 ioflag = FSYNC;
1141 else if (args->stable == DATA_SYNC)
1142 ioflag = FDSYNC;
1143 else {
1144 if (iovp != iov)
1145 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1146 resp->status = NFS3ERR_INVAL;
1147 goto out1;
1148 }
1149
1150 /*
1151 * We're changing creds because VM may fault and we need
1152 * the cred of the current thread to be used if quota
1153 * checking is enabled.
1154 */
1155 savecred = curthread->t_cred;
1156 curthread->t_cred = cr;
1157 error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1158 curthread->t_cred = savecred;
1159
1160 if (iovp != iov)
1161 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1162
1163 ava.va_mask = AT_ALL;
1164 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1165
1166 #ifdef DEBUG
1167 if (!rfs3_do_post_op_attr)
1168 avap = NULL;
1169 #endif
1170
1171 if (error)
1172 goto out;
1173
1174 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1175 if (in_crit)
1176 nbl_end_crit(vp);
1177 VN_RELE(vp);
1178
1179 /*
1180 * If we were unable to get the V_WRITELOCK_TRUE, then we
1181 * may not have accurate after attrs, so check if
1182 * we have both attributes, they have a non-zero va_seq, and
1183 * va_seq has changed by exactly one,
1184 * if not, turn off the before attr.
1185 */
1186 if (rwlock_ret != V_WRITELOCK_TRUE) {
1187 if (bvap == NULL || avap == NULL ||
1188 bvap->va_seq == 0 || avap->va_seq == 0 ||
1189 avap->va_seq != (bvap->va_seq + 1)) {
1190 bvap = NULL;
1191 }
1192 }
1193
1194 resp->status = NFS3_OK;
1195 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1196 resp->resok.count = args->count - uio.uio_resid;
1197 resp->resok.committed = args->stable;
1198 resp->resok.verf = write3verf;
1199 return;
1200
1201 out:
1202 if (curthread->t_flag & T_WOULDBLOCK) {
1203 curthread->t_flag &= ~T_WOULDBLOCK;
1204 resp->status = NFS3ERR_JUKEBOX;
1205 } else
1206 resp->status = puterrno3(error);
1207 out1:
1208 if (vp != NULL) {
1209 if (rwlock_ret != -1)
1210 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1211 if (in_crit)
1212 nbl_end_crit(vp);
1213 VN_RELE(vp);
1214 }
1215 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1216 }
1217
1218 fhandle_t *
1219 rfs3_write_getfh(WRITE3args *args)
1220 {
1221
1222 return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1223 }
1224
1225 void
1226 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1227 struct svc_req *req, cred_t *cr)
1228 {
1229 int error;
1230 int in_crit = 0;
1231 vnode_t *vp;
1232 vnode_t *tvp = NULL;
1233 vnode_t *dvp;
1234 struct vattr *vap;
1235 struct vattr va;
1236 struct vattr *dbvap;
1237 struct vattr dbva;
1238 struct vattr *davap;
1239 struct vattr dava;
1240 enum vcexcl excl;
1241 nfstime3 *mtime;
1242 len_t reqsize;
1243 bool_t trunc;
1244
1245 dbvap = NULL;
1246 davap = NULL;
1247
1248 dvp = nfs3_fhtovp(args->where.dirp, exi);
1249 if (dvp == NULL) {
1250 error = ESTALE;
1251 goto out;
1252 }
1253
1254 #ifdef DEBUG
1255 if (rfs3_do_pre_op_attr) {
1256 dbva.va_mask = AT_ALL;
1257 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1258 } else
1259 dbvap = NULL;
1260 #else
1261 dbva.va_mask = AT_ALL;
1262 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1263 #endif
1264 davap = dbvap;
1265
1266 if (args->where.name == nfs3nametoolong) {
1267 resp->status = NFS3ERR_NAMETOOLONG;
1268 goto out1;
1269 }
1270
1271 if (args->where.name == NULL || *(args->where.name) == '\0') {
1272 resp->status = NFS3ERR_ACCES;
1273 goto out1;
1274 }
1275
1276 if (rdonly(exi, req)) {
1277 resp->status = NFS3ERR_ROFS;
1278 goto out1;
1279 }
1280
1281 if (args->how.mode == EXCLUSIVE) {
1282 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1283 va.va_type = VREG;
1284 va.va_mode = (mode_t)0;
1285 /*
1286 * Ensure no time overflows and that types match
1287 */
1288 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1289 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1290 va.va_mtime.tv_nsec = mtime->nseconds;
1291 excl = EXCL;
1292 } else {
1293 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1294 &va);
1295 if (error)
1296 goto out;
1297 va.va_mask |= AT_TYPE;
1298 va.va_type = VREG;
1299 if (args->how.mode == GUARDED)
1300 excl = EXCL;
1301 else {
1302 excl = NONEXCL;
1303
1304 /*
1305 * During creation of file in non-exclusive mode
1306 * if size of file is being set then make sure
1307 * that if the file already exists that no conflicting
1308 * non-blocking mandatory locks exists in the region
1309 * being modified. If there are conflicting locks fail
1310 * the operation with EACCES.
1311 */
1312 if (va.va_mask & AT_SIZE) {
1313 struct vattr tva;
1314
1315 /*
1316 * Does file already exist?
1317 */
1318 error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1319 NULL, 0, NULL, cr);
1320
1321 /*
1322 * Check to see if the file has been delegated
1323 * to a v4 client. If so, then begin recall of
1324 * the delegation and return JUKEBOX to allow
1325 * the client to retrasmit its request.
1326 */
1327
1328 trunc = va.va_size == 0;
1329 if (!error &&
1330 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1331 resp->status = NFS3ERR_JUKEBOX;
1332 goto out1;
1333 }
1334
1335 /*
1336 * Check for NBMAND lock conflicts
1337 */
1338 if (!error && nbl_need_check(tvp)) {
1339 u_offset_t offset;
1340 ssize_t len;
1341
1342 nbl_start_crit(tvp, RW_READER);
1343 in_crit = 1;
1344
1345 tva.va_mask = AT_SIZE;
1346 error = VOP_GETATTR(tvp, &tva, 0, cr);
1347 /*
1348 * Can't check for conflicts, so return
1349 * error.
1350 */
1351 if (error)
1352 goto out;
1353
1354 offset = tva.va_size < va.va_size ?
1355 tva.va_size : va.va_size;
1356 len = tva.va_size < va.va_size ?
1357 va.va_size - tva.va_size :
1358 tva.va_size - va.va_size;
1359 if (nbl_conflict(tvp, NBL_WRITE,
1360 offset, len, 0)) {
1361 error = EACCES;
1362 goto out;
1363 }
1364 } else if (tvp) {
1365 VN_RELE(tvp);
1366 tvp = NULL;
1367 }
1368 }
1369 }
1370 if (va.va_mask & AT_SIZE)
1371 reqsize = va.va_size;
1372 }
1373
1374 /*
1375 * Must specify the mode.
1376 */
1377 if (!(va.va_mask & AT_MODE)) {
1378 resp->status = NFS3ERR_INVAL;
1379 goto out1;
1380 }
1381
1382 /*
1383 * If the filesystem is exported with nosuid, then mask off
1384 * the setuid and setgid bits.
1385 */
1386 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1387 va.va_mode &= ~(VSUID | VSGID);
1388
1389 tryagain:
1390 /*
1391 * The file open mode used is VWRITE. If the client needs
1392 * some other semantic, then it should do the access checking
1393 * itself. It would have been nice to have the file open mode
1394 * passed as part of the arguments.
1395 */
1396 error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1397 &vp, cr, 0);
1398
1399 #ifdef DEBUG
1400 if (rfs3_do_post_op_attr) {
1401 dava.va_mask = AT_ALL;
1402 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1403 } else
1404 davap = NULL;
1405 #else
1406 dava.va_mask = AT_ALL;
1407 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1408 #endif
1409
1410 if (error) {
1411 /*
1412 * If we got something other than file already exists
1413 * then just return this error. Otherwise, we got
1414 * EEXIST. If we were doing a GUARDED create, then
1415 * just return this error. Otherwise, we need to
1416 * make sure that this wasn't a duplicate of an
1417 * exclusive create request.
1418 *
1419 * The assumption is made that a non-exclusive create
1420 * request will never return EEXIST.
1421 */
1422 if (error != EEXIST || args->how.mode == GUARDED)
1423 goto out;
1424 /*
1425 * Lookup the file so that we can get a vnode for it.
1426 */
1427 error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1428 NULL, cr);
1429 if (error) {
1430 /*
1431 * We couldn't find the file that we thought that
1432 * we just created. So, we'll just try creating
1433 * it again.
1434 */
1435 if (error == ENOENT)
1436 goto tryagain;
1437 goto out;
1438 }
1439
1440 /*
1441 * If the file is delegated to a v4 client, go ahead
1442 * and initiate recall, this create is a hint that a
1443 * conflicting v3 open has occurred.
1444 */
1445
1446 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1447 VN_RELE(vp);
1448 resp->status = NFS3ERR_JUKEBOX;
1449 goto out1;
1450 }
1451
1452 va.va_mask = AT_ALL;
1453 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1454
1455 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1456 /* % with INT32_MAX to prevent overflows */
1457 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1458 vap->va_mtime.tv_sec !=
1459 (mtime->seconds % INT32_MAX) ||
1460 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1461 VN_RELE(vp);
1462 error = EEXIST;
1463 goto out;
1464 }
1465 } else {
1466
1467 if ((args->how.mode == UNCHECKED ||
1468 args->how.mode == GUARDED) &&
1469 args->how.createhow3_u.obj_attributes.size.set_it &&
1470 va.va_size == 0)
1471 trunc = TRUE;
1472 else
1473 trunc = FALSE;
1474
1475 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1476 VN_RELE(vp);
1477 resp->status = NFS3ERR_JUKEBOX;
1478 goto out1;
1479 }
1480
1481 va.va_mask = AT_ALL;
1482 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1483
1484 /*
1485 * We need to check to make sure that the file got
1486 * created to the indicated size. If not, we do a
1487 * setattr to try to change the size, but we don't
1488 * try too hard. This shouldn't a problem as most
1489 * clients will only specifiy a size of zero which
1490 * local file systems handle. However, even if
1491 * the client does specify a non-zero size, it can
1492 * still recover by checking the size of the file
1493 * after it has created it and then issue a setattr
1494 * request of its own to set the size of the file.
1495 */
1496 if (vap != NULL &&
1497 (args->how.mode == UNCHECKED ||
1498 args->how.mode == GUARDED) &&
1499 args->how.createhow3_u.obj_attributes.size.set_it &&
1500 vap->va_size != reqsize) {
1501 va.va_mask = AT_SIZE;
1502 va.va_size = reqsize;
1503 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1504 va.va_mask = AT_ALL;
1505 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1506 }
1507 }
1508
1509 #ifdef DEBUG
1510 if (!rfs3_do_post_op_attr)
1511 vap = NULL;
1512 #endif
1513
1514 #ifdef DEBUG
1515 if (!rfs3_do_post_op_fh3)
1516 resp->resok.obj.handle_follows = FALSE;
1517 else {
1518 #endif
1519 error = makefh3(&resp->resok.obj.handle, vp, exi);
1520 if (error)
1521 resp->resok.obj.handle_follows = FALSE;
1522 else
1523 resp->resok.obj.handle_follows = TRUE;
1524 #ifdef DEBUG
1525 }
1526 #endif
1527
1528 /*
1529 * Force modified data and metadata out to stable storage.
1530 */
1531 (void) VOP_FSYNC(vp, FNODSYNC, cr);
1532 (void) VOP_FSYNC(dvp, 0, cr);
1533
1534 VN_RELE(vp);
1535 VN_RELE(dvp);
1536 if (tvp != NULL) {
1537 if (in_crit)
1538 nbl_end_crit(tvp);
1539 VN_RELE(tvp);
1540 }
1541
1542 resp->status = NFS3_OK;
1543 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1544 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1545 return;
1546
1547 out:
1548 if (curthread->t_flag & T_WOULDBLOCK) {
1549 curthread->t_flag &= ~T_WOULDBLOCK;
1550 resp->status = NFS3ERR_JUKEBOX;
1551 } else
1552 resp->status = puterrno3(error);
1553 out1:
1554 if (tvp != NULL) {
1555 if (in_crit)
1556 nbl_end_crit(tvp);
1557 VN_RELE(tvp);
1558 }
1559 if (dvp != NULL)
1560 VN_RELE(dvp);
1561 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1562 }
1563
1564 fhandle_t *
1565 rfs3_create_getfh(CREATE3args *args)
1566 {
1567
1568 return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1569 }
1570
1571 void
1572 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1573 struct svc_req *req, cred_t *cr)
1574 {
1575 int error;
1576 vnode_t *vp = NULL;
1577 vnode_t *dvp;
1578 struct vattr *vap;
1579 struct vattr va;
1580 struct vattr *dbvap;
1581 struct vattr dbva;
1582 struct vattr *davap;
1583 struct vattr dava;
1584
1585 dbvap = NULL;
1586 davap = NULL;
1587
1588 dvp = nfs3_fhtovp(args->where.dirp, exi);
1589 if (dvp == NULL) {
1590 error = ESTALE;
1591 goto out;
1592 }
1593
1594 #ifdef DEBUG
1595 if (rfs3_do_pre_op_attr) {
1596 dbva.va_mask = AT_ALL;
1597 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1598 } else
1599 dbvap = NULL;
1600 #else
1601 dbva.va_mask = AT_ALL;
1602 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1603 #endif
1604 davap = dbvap;
1605
1606 if (args->where.name == nfs3nametoolong) {
1607 resp->status = NFS3ERR_NAMETOOLONG;
1608 goto out1;
1609 }
1610
1611 if (args->where.name == NULL || *(args->where.name) == '\0') {
1612 resp->status = NFS3ERR_ACCES;
1613 goto out1;
1614 }
1615
1616 if (rdonly(exi, req)) {
1617 resp->status = NFS3ERR_ROFS;
1618 goto out1;
1619 }
1620
1621 error = sattr3_to_vattr(&args->attributes, &va);
1622 if (error)
1623 goto out;
1624
1625 if (!(va.va_mask & AT_MODE)) {
1626 resp->status = NFS3ERR_INVAL;
1627 goto out1;
1628 }
1629
1630 va.va_mask |= AT_TYPE;
1631 va.va_type = VDIR;
1632
1633 error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1634
1635 #ifdef DEBUG
1636 if (rfs3_do_post_op_attr) {
1637 dava.va_mask = AT_ALL;
1638 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1639 } else
1640 davap = NULL;
1641 #else
1642 dava.va_mask = AT_ALL;
1643 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1644 #endif
1645
1646 /*
1647 * Force modified data and metadata out to stable storage.
1648 */
1649 (void) VOP_FSYNC(dvp, 0, cr);
1650
1651 if (error)
1652 goto out;
1653
1654 VN_RELE(dvp);
1655
1656 #ifdef DEBUG
1657 if (!rfs3_do_post_op_fh3)
1658 resp->resok.obj.handle_follows = FALSE;
1659 else {
1660 #endif
1661 error = makefh3(&resp->resok.obj.handle, vp, exi);
1662 if (error)
1663 resp->resok.obj.handle_follows = FALSE;
1664 else
1665 resp->resok.obj.handle_follows = TRUE;
1666 #ifdef DEBUG
1667 }
1668 #endif
1669
1670 #ifdef DEBUG
1671 if (rfs3_do_post_op_attr) {
1672 va.va_mask = AT_ALL;
1673 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1674 } else
1675 vap = NULL;
1676 #else
1677 va.va_mask = AT_ALL;
1678 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1679 #endif
1680
1681 /*
1682 * Force modified data and metadata out to stable storage.
1683 */
1684 (void) VOP_FSYNC(vp, 0, cr);
1685
1686 VN_RELE(vp);
1687
1688 resp->status = NFS3_OK;
1689 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1690 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1691 return;
1692
1693 out:
1694 if (curthread->t_flag & T_WOULDBLOCK) {
1695 curthread->t_flag &= ~T_WOULDBLOCK;
1696 resp->status = NFS3ERR_JUKEBOX;
1697 } else
1698 resp->status = puterrno3(error);
1699 out1:
1700 if (dvp != NULL)
1701 VN_RELE(dvp);
1702 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1703 }
1704
1705 fhandle_t *
1706 rfs3_mkdir_getfh(MKDIR3args *args)
1707 {
1708
1709 return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1710 }
1711
1712 void
1713 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1714 struct svc_req *req, cred_t *cr)
1715 {
1716 int error;
1717 vnode_t *vp;
1718 vnode_t *dvp;
1719 struct vattr *vap;
1720 struct vattr va;
1721 struct vattr *dbvap;
1722 struct vattr dbva;
1723 struct vattr *davap;
1724 struct vattr dava;
1725
1726 dbvap = NULL;
1727 davap = NULL;
1728
1729 dvp = nfs3_fhtovp(args->where.dirp, exi);
1730 if (dvp == NULL) {
1731 error = ESTALE;
1732 goto out;
1733 }
1734
1735 #ifdef DEBUG
1736 if (rfs3_do_pre_op_attr) {
1737 dbva.va_mask = AT_ALL;
1738 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1739 } else
1740 dbvap = NULL;
1741 #else
1742 dbva.va_mask = AT_ALL;
1743 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1744 #endif
1745 davap = dbvap;
1746
1747 if (args->where.name == nfs3nametoolong) {
1748 resp->status = NFS3ERR_NAMETOOLONG;
1749 goto out1;
1750 }
1751
1752 if (args->where.name == NULL || *(args->where.name) == '\0') {
1753 resp->status = NFS3ERR_ACCES;
1754 goto out1;
1755 }
1756
1757 if (rdonly(exi, req)) {
1758 resp->status = NFS3ERR_ROFS;
1759 goto out1;
1760 }
1761
1762 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1763 if (error)
1764 goto out;
1765
1766 if (!(va.va_mask & AT_MODE)) {
1767 resp->status = NFS3ERR_INVAL;
1768 goto out1;
1769 }
1770
1771 if (args->symlink.symlink_data == nfs3nametoolong) {
1772 resp->status = NFS3ERR_NAMETOOLONG;
1773 goto out1;
1774 }
1775
1776 va.va_mask |= AT_TYPE;
1777 va.va_type = VLNK;
1778
1779 error = VOP_SYMLINK(dvp, args->where.name, &va,
1780 args->symlink.symlink_data, cr);
1781
1782 #ifdef DEBUG
1783 if (rfs3_do_post_op_attr) {
1784 dava.va_mask = AT_ALL;
1785 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1786 } else
1787 davap = NULL;
1788 #else
1789 dava.va_mask = AT_ALL;
1790 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1791 #endif
1792
1793 if (error)
1794 goto out;
1795
1796 error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1797
1798 /*
1799 * Force modified data and metadata out to stable storage.
1800 */
1801 (void) VOP_FSYNC(dvp, 0, cr);
1802
1803 VN_RELE(dvp);
1804
1805 resp->status = NFS3_OK;
1806 if (error) {
1807 resp->resok.obj.handle_follows = FALSE;
1808 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1809 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1810 return;
1811 }
1812
1813 #ifdef DEBUG
1814 if (!rfs3_do_post_op_fh3)
1815 resp->resok.obj.handle_follows = FALSE;
1816 else {
1817 #endif
1818 error = makefh3(&resp->resok.obj.handle, vp, exi);
1819 if (error)
1820 resp->resok.obj.handle_follows = FALSE;
1821 else
1822 resp->resok.obj.handle_follows = TRUE;
1823 #ifdef DEBUG
1824 }
1825 #endif
1826
1827 #ifdef DEBUG
1828 if (rfs3_do_post_op_attr) {
1829 va.va_mask = AT_ALL;
1830 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1831 } else
1832 vap = NULL;
1833 #else
1834 va.va_mask = AT_ALL;
1835 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1836 #endif
1837
1838 /*
1839 * Force modified data and metadata out to stable storage.
1840 */
1841 (void) VOP_FSYNC(vp, 0, cr);
1842
1843 VN_RELE(vp);
1844
1845 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1846 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1847 return;
1848
1849 out:
1850 if (curthread->t_flag & T_WOULDBLOCK) {
1851 curthread->t_flag &= ~T_WOULDBLOCK;
1852 resp->status = NFS3ERR_JUKEBOX;
1853 } else
1854 resp->status = puterrno3(error);
1855 out1:
1856 if (dvp != NULL)
1857 VN_RELE(dvp);
1858 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1859 }
1860
1861 fhandle_t *
1862 rfs3_symlink_getfh(SYMLINK3args *args)
1863 {
1864
1865 return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1866 }
1867
1868 void
1869 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1870 struct svc_req *req, cred_t *cr)
1871 {
1872 int error;
1873 vnode_t *vp;
1874 vnode_t *dvp;
1875 struct vattr *vap;
1876 struct vattr va;
1877 struct vattr *dbvap;
1878 struct vattr dbva;
1879 struct vattr *davap;
1880 struct vattr dava;
1881 int mode;
1882 enum vcexcl excl;
1883
1884 dbvap = NULL;
1885 davap = NULL;
1886
1887 dvp = nfs3_fhtovp(args->where.dirp, exi);
1888 if (dvp == NULL) {
1889 error = ESTALE;
1890 goto out;
1891 }
1892
1893 #ifdef DEBUG
1894 if (rfs3_do_pre_op_attr) {
1895 dbva.va_mask = AT_ALL;
1896 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1897 } else
1898 dbvap = NULL;
1899 #else
1900 dbva.va_mask = AT_ALL;
1901 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1902 #endif
1903 davap = dbvap;
1904
1905 if (args->where.name == nfs3nametoolong) {
1906 resp->status = NFS3ERR_NAMETOOLONG;
1907 goto out1;
1908 }
1909
1910 if (args->where.name == NULL || *(args->where.name) == '\0') {
1911 resp->status = NFS3ERR_ACCES;
1912 goto out1;
1913 }
1914
1915 if (rdonly(exi, req)) {
1916 resp->status = NFS3ERR_ROFS;
1917 goto out1;
1918 }
1919
1920 switch (args->what.type) {
1921 case NF3CHR:
1922 case NF3BLK:
1923 error = sattr3_to_vattr(
1924 &args->what.mknoddata3_u.device.dev_attributes, &va);
1925 if (error)
1926 goto out;
1927 if (secpolicy_sys_devices(cr) != 0) {
1928 resp->status = NFS3ERR_PERM;
1929 goto out1;
1930 }
1931 if (args->what.type == NF3CHR)
1932 va.va_type = VCHR;
1933 else
1934 va.va_type = VBLK;
1935 va.va_rdev = makedevice(
1936 args->what.mknoddata3_u.device.spec.specdata1,
1937 args->what.mknoddata3_u.device.spec.specdata2);
1938 va.va_mask |= AT_TYPE | AT_RDEV;
1939 break;
1940 case NF3SOCK:
1941 error = sattr3_to_vattr(
1942 &args->what.mknoddata3_u.pipe_attributes, &va);
1943 if (error)
1944 goto out;
1945 va.va_type = VSOCK;
1946 va.va_mask |= AT_TYPE;
1947 break;
1948 case NF3FIFO:
1949 error = sattr3_to_vattr(
1950 &args->what.mknoddata3_u.pipe_attributes, &va);
1951 if (error)
1952 goto out;
1953 va.va_type = VFIFO;
1954 va.va_mask |= AT_TYPE;
1955 break;
1956 default:
1957 resp->status = NFS3ERR_BADTYPE;
1958 goto out1;
1959 }
1960
1961 /*
1962 * Must specify the mode.
1963 */
1964 if (!(va.va_mask & AT_MODE)) {
1965 resp->status = NFS3ERR_INVAL;
1966 goto out1;
1967 }
1968
1969 excl = EXCL;
1970
1971 mode = 0;
1972
1973 error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1974 &vp, cr, 0);
1975
1976 #ifdef DEBUG
1977 if (rfs3_do_post_op_attr) {
1978 dava.va_mask = AT_ALL;
1979 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1980 } else
1981 davap = NULL;
1982 #else
1983 dava.va_mask = AT_ALL;
1984 davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1985 #endif
1986
1987 /*
1988 * Force modified data and metadata out to stable storage.
1989 */
1990 (void) VOP_FSYNC(dvp, 0, cr);
1991
1992 if (error)
1993 goto out;
1994
1995 VN_RELE(dvp);
1996
1997 resp->status = NFS3_OK;
1998
1999 #ifdef DEBUG
2000 if (!rfs3_do_post_op_fh3)
2001 resp->resok.obj.handle_follows = FALSE;
2002 else {
2003 #endif
2004 error = makefh3(&resp->resok.obj.handle, vp, exi);
2005 if (error)
2006 resp->resok.obj.handle_follows = FALSE;
2007 else
2008 resp->resok.obj.handle_follows = TRUE;
2009 #ifdef DEBUG
2010 }
2011 #endif
2012
2013 #ifdef DEBUG
2014 if (rfs3_do_post_op_attr) {
2015 va.va_mask = AT_ALL;
2016 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2017 } else
2018 vap = NULL;
2019 #else
2020 va.va_mask = AT_ALL;
2021 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2022 #endif
2023
2024 /*
2025 * Force modified metadata out to stable storage.
2026 */
2027 (void) VOP_FSYNC(vp, FNODSYNC, cr);
2028
2029 VN_RELE(vp);
2030
2031 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2032 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2033 return;
2034
2035 out:
2036 if (curthread->t_flag & T_WOULDBLOCK) {
2037 curthread->t_flag &= ~T_WOULDBLOCK;
2038 resp->status = NFS3ERR_JUKEBOX;
2039 } else
2040 resp->status = puterrno3(error);
2041 out1:
2042 if (dvp != NULL)
2043 VN_RELE(dvp);
2044 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2045 }
2046
2047 fhandle_t *
2048 rfs3_mknod_getfh(MKNOD3args *args)
2049 {
2050
2051 return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2052 }
2053
2054 void
2055 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2056 struct svc_req *req, cred_t *cr)
2057 {
2058 int error = 0;
2059 vnode_t *vp;
2060 struct vattr *bvap;
2061 struct vattr bva;
2062 struct vattr *avap;
2063 struct vattr ava;
2064 vnode_t *targvp = NULL;
2065
2066 bvap = NULL;
2067 avap = NULL;
2068
2069 vp = nfs3_fhtovp(args->object.dirp, exi);
2070 if (vp == NULL) {
2071 error = ESTALE;
2072 goto out;
2073 }
2074
2075 #ifdef DEBUG
2076 if (rfs3_do_pre_op_attr) {
2077 bva.va_mask = AT_ALL;
2078 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2079 } else
2080 bvap = NULL;
2081 #else
2082 bva.va_mask = AT_ALL;
2083 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2084 #endif
2085 avap = bvap;
2086
2087 if (vp->v_type != VDIR) {
2088 resp->status = NFS3ERR_NOTDIR;
2089 goto out1;
2090 }
2091
2092 if (args->object.name == nfs3nametoolong) {
2093 resp->status = NFS3ERR_NAMETOOLONG;
2094 goto out1;
2095 }
2096
2097 if (args->object.name == NULL || *(args->object.name) == '\0') {
2098 resp->status = NFS3ERR_ACCES;
2099 goto out1;
2100 }
2101
2102 if (rdonly(exi, req)) {
2103 resp->status = NFS3ERR_ROFS;
2104 goto out1;
2105 }
2106
2107 /*
2108 * Check for a conflict with a non-blocking mandatory share
2109 * reservation and V4 delegations
2110 */
2111 error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2112 NULL, cr);
2113 if (error != 0)
2114 goto out;
2115
2116 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2117 resp->status = NFS3ERR_JUKEBOX;
2118 goto out1;
2119 }
2120
2121 if (!nbl_need_check(targvp)) {
2122 error = VOP_REMOVE(vp, args->object.name, cr);
2123 } else {
2124 nbl_start_crit(targvp, RW_READER);
2125 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2126 error = EACCES;
2127 } else {
2128 error = VOP_REMOVE(vp, args->object.name, cr);
2129 }
2130 nbl_end_crit(targvp);
2131 }
2132 VN_RELE(targvp);
2133 targvp = NULL;
2134
2135 #ifdef DEBUG
2136 if (rfs3_do_post_op_attr) {
2137 ava.va_mask = AT_ALL;
2138 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2139 } else
2140 avap = NULL;
2141 #else
2142 ava.va_mask = AT_ALL;
2143 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2144 #endif
2145
2146 /*
2147 * Force modified data and metadata out to stable storage.
2148 */
2149 (void) VOP_FSYNC(vp, 0, cr);
2150
2151 if (error)
2152 goto out;
2153
2154 VN_RELE(vp);
2155
2156 resp->status = NFS3_OK;
2157 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2158 return;
2159
2160 out:
2161 if (curthread->t_flag & T_WOULDBLOCK) {
2162 curthread->t_flag &= ~T_WOULDBLOCK;
2163 resp->status = NFS3ERR_JUKEBOX;
2164 } else
2165 resp->status = puterrno3(error);
2166 out1:
2167 if (vp != NULL)
2168 VN_RELE(vp);
2169 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2170 }
2171
2172 fhandle_t *
2173 rfs3_remove_getfh(REMOVE3args *args)
2174 {
2175
2176 return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2177 }
2178
2179 void
2180 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2181 struct svc_req *req, cred_t *cr)
2182 {
2183 int error;
2184 vnode_t *vp;
2185 struct vattr *bvap;
2186 struct vattr bva;
2187 struct vattr *avap;
2188 struct vattr ava;
2189
2190 bvap = NULL;
2191 avap = NULL;
2192
2193 vp = nfs3_fhtovp(args->object.dirp, exi);
2194 if (vp == NULL) {
2195 error = ESTALE;
2196 goto out;
2197 }
2198
2199 #ifdef DEBUG
2200 if (rfs3_do_pre_op_attr) {
2201 bva.va_mask = AT_ALL;
2202 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2203 } else
2204 bvap = NULL;
2205 #else
2206 bva.va_mask = AT_ALL;
2207 bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2208 #endif
2209 avap = bvap;
2210
2211 if (vp->v_type != VDIR) {
2212 resp->status = NFS3ERR_NOTDIR;
2213 goto out1;
2214 }
2215
2216 if (args->object.name == nfs3nametoolong) {
2217 resp->status = NFS3ERR_NAMETOOLONG;
2218 goto out1;
2219 }
2220
2221 if (args->object.name == NULL || *(args->object.name) == '\0') {
2222 resp->status = NFS3ERR_ACCES;
2223 goto out1;
2224 }
2225
2226 if (rdonly(exi, req)) {
2227 resp->status = NFS3ERR_ROFS;
2228 goto out1;
2229 }
2230
2231 error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2232
2233 #ifdef DEBUG
2234 if (rfs3_do_post_op_attr) {
2235 ava.va_mask = AT_ALL;
2236 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2237 } else
2238 avap = NULL;
2239 #else
2240 ava.va_mask = AT_ALL;
2241 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2242 #endif
2243
2244 /*
2245 * Force modified data and metadata out to stable storage.
2246 */
2247 (void) VOP_FSYNC(vp, 0, cr);
2248
2249 if (error) {
2250 /*
2251 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2252 * if the directory is not empty. A System V NFS server
2253 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2254 * over the wire.
2255 */
2256 if (error == EEXIST)
2257 error = ENOTEMPTY;
2258 goto out;
2259 }
2260
2261 VN_RELE(vp);
2262
2263 resp->status = NFS3_OK;
2264 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2265 return;
2266
2267 out:
2268 if (curthread->t_flag & T_WOULDBLOCK) {
2269 curthread->t_flag &= ~T_WOULDBLOCK;
2270 resp->status = NFS3ERR_JUKEBOX;
2271 } else
2272 resp->status = puterrno3(error);
2273 out1:
2274 if (vp != NULL)
2275 VN_RELE(vp);
2276 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2277 }
2278
2279 fhandle_t *
2280 rfs3_rmdir_getfh(RMDIR3args *args)
2281 {
2282
2283 return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2284 }
2285
2286 void
2287 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2288 struct svc_req *req, cred_t *cr)
2289 {
2290 int error = 0;
2291 vnode_t *fvp;
2292 vnode_t *tvp;
2293 vnode_t *targvp;
2294 struct vattr *fbvap;
2295 struct vattr fbva;
2296 struct vattr *favap;
2297 struct vattr fava;
2298 struct vattr *tbvap;
2299 struct vattr tbva;
2300 struct vattr *tavap;
2301 struct vattr tava;
2302 nfs_fh3 *fh3;
2303 struct exportinfo *to_exi;
2304 vnode_t *srcvp = NULL;
2305
2306 fbvap = NULL;
2307 favap = NULL;
2308 tbvap = NULL;
2309 tavap = NULL;
2310 tvp = NULL;
2311
2312 fvp = nfs3_fhtovp(args->from.dirp, exi);
2313 if (fvp == NULL) {
2314 error = ESTALE;
2315 goto out;
2316 }
2317
2318 #ifdef DEBUG
2319 if (rfs3_do_pre_op_attr) {
2320 fbva.va_mask = AT_ALL;
2321 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2322 } else
2323 fbvap = NULL;
2324 #else
2325 fbva.va_mask = AT_ALL;
2326 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2327 #endif
2328 favap = fbvap;
2329
2330 fh3 = args->to.dirp;
2331 to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2332 if (to_exi == NULL) {
2333 resp->status = NFS3ERR_ACCES;
2334 goto out1;
2335 }
2336 exi_rele(to_exi);
2337
2338 if (to_exi != exi) {
2339 resp->status = NFS3ERR_XDEV;
2340 goto out1;
2341 }
2342
2343 tvp = nfs3_fhtovp(args->to.dirp, exi);
2344 if (tvp == NULL) {
2345 error = ESTALE;
2346 goto out;
2347 }
2348
2349 #ifdef DEBUG
2350 if (rfs3_do_pre_op_attr) {
2351 tbva.va_mask = AT_ALL;
2352 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2353 } else
2354 tbvap = NULL;
2355 #else
2356 tbva.va_mask = AT_ALL;
2357 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2358 #endif
2359 tavap = tbvap;
2360
2361 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2362 resp->status = NFS3ERR_NOTDIR;
2363 goto out1;
2364 }
2365
2366 if (args->from.name == nfs3nametoolong ||
2367 args->to.name == nfs3nametoolong) {
2368 resp->status = NFS3ERR_NAMETOOLONG;
2369 goto out1;
2370 }
2371 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2372 args->to.name == NULL || *(args->to.name) == '\0') {
2373 resp->status = NFS3ERR_ACCES;
2374 goto out1;
2375 }
2376
2377 if (rdonly(exi, req)) {
2378 resp->status = NFS3ERR_ROFS;
2379 goto out1;
2380 }
2381
2382 /*
2383 * Check for a conflict with a non-blocking mandatory share
2384 * reservation or V4 delegations.
2385 */
2386 error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2387 NULL, cr);
2388 if (error != 0)
2389 goto out;
2390
2391 /*
2392 * If we rename a delegated file we should recall the
2393 * delegation, since future opens should fail or would
2394 * refer to a new file.
2395 */
2396 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2397 resp->status = NFS3ERR_JUKEBOX;
2398 goto out1;
2399 }
2400
2401 /*
2402 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2403 * first to avoid VOP_LOOKUP if possible.
2404 */
2405 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2406 VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2407
2408 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2409 VN_RELE(targvp);
2410 resp->status = NFS3ERR_JUKEBOX;
2411 goto out1;
2412 }
2413 VN_RELE(targvp);
2414 }
2415
2416 if (!nbl_need_check(srcvp)) {
2417 error = VOP_RENAME(fvp, args->from.name, tvp,
2418 args->to.name, cr);
2419 } else {
2420 nbl_start_crit(srcvp, RW_READER);
2421 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2422 error = EACCES;
2423 } else {
2424 error = VOP_RENAME(fvp, args->from.name, tvp,
2425 args->to.name, cr);
2426 }
2427 nbl_end_crit(srcvp);
2428 }
2429 if (error == 0) {
2430 char *tmp;
2431
2432 /* fix the path name for the renamed file */
2433 mutex_enter(&srcvp->v_lock);
2434 tmp = srcvp->v_path;
2435 srcvp->v_path = NULL;
2436 mutex_exit(&srcvp->v_lock);
2437 vn_setpath(rootdir, tvp, srcvp, args->to.name,
2438 strlen(args->to.name));
2439 if (tmp != NULL)
2440 kmem_free(tmp, strlen(tmp) + 1);
2441 }
2442
2443 VN_RELE(srcvp);
2444 srcvp = NULL;
2445
2446 #ifdef DEBUG
2447 if (rfs3_do_post_op_attr) {
2448 fava.va_mask = AT_ALL;
2449 favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2450 tava.va_mask = AT_ALL;
2451 tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2452 } else {
2453 favap = NULL;
2454 tavap = NULL;
2455 }
2456 #else
2457 fava.va_mask = AT_ALL;
2458 favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2459 tava.va_mask = AT_ALL;
2460 tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2461 #endif
2462
2463 /*
2464 * Force modified data and metadata out to stable storage.
2465 */
2466 (void) VOP_FSYNC(fvp, 0, cr);
2467 (void) VOP_FSYNC(tvp, 0, cr);
2468
2469 if (error)
2470 goto out;
2471
2472 VN_RELE(tvp);
2473 VN_RELE(fvp);
2474
2475 resp->status = NFS3_OK;
2476 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2477 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2478 return;
2479
2480 out:
2481 if (curthread->t_flag & T_WOULDBLOCK) {
2482 curthread->t_flag &= ~T_WOULDBLOCK;
2483 resp->status = NFS3ERR_JUKEBOX;
2484 } else
2485 resp->status = puterrno3(error);
2486 out1:
2487 if (fvp != NULL)
2488 VN_RELE(fvp);
2489 if (tvp != NULL)
2490 VN_RELE(tvp);
2491 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2492 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2493 }
2494
2495 fhandle_t *
2496 rfs3_rename_getfh(RENAME3args *args)
2497 {
2498
2499 return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2500 }
2501
2502 void
2503 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2504 struct svc_req *req, cred_t *cr)
2505 {
2506 int error;
2507 vnode_t *vp;
2508 vnode_t *dvp;
2509 struct vattr *vap;
2510 struct vattr va;
2511 struct vattr *bvap;
2512 struct vattr bva;
2513 struct vattr *avap;
2514 struct vattr ava;
2515 nfs_fh3 *fh3;
2516 struct exportinfo *to_exi;
2517
2518 vap = NULL;
2519 bvap = NULL;
2520 avap = NULL;
2521 dvp = NULL;
2522
2523 vp = nfs3_fhtovp(&args->file, exi);
2524 if (vp == NULL) {
2525 error = ESTALE;
2526 goto out;
2527 }
2528
2529 #ifdef DEBUG
2530 if (rfs3_do_pre_op_attr) {
2531 va.va_mask = AT_ALL;
2532 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2533 } else
2534 vap = NULL;
2535 #else
2536 va.va_mask = AT_ALL;
2537 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2538 #endif
2539
2540 fh3 = args->link.dirp;
2541 to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2542 if (to_exi == NULL) {
2543 resp->status = NFS3ERR_ACCES;
2544 goto out1;
2545 }
2546 exi_rele(to_exi);
2547
2548 if (to_exi != exi) {
2549 resp->status = NFS3ERR_XDEV;
2550 goto out1;
2551 }
2552
2553 dvp = nfs3_fhtovp(args->link.dirp, exi);
2554 if (dvp == NULL) {
2555 error = ESTALE;
2556 goto out;
2557 }
2558
2559 #ifdef DEBUG
2560 if (rfs3_do_pre_op_attr) {
2561 bva.va_mask = AT_ALL;
2562 bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2563 } else
2564 bvap = NULL;
2565 #else
2566 bva.va_mask = AT_ALL;
2567 bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2568 #endif
2569
2570 if (dvp->v_type != VDIR) {
2571 resp->status = NFS3ERR_NOTDIR;
2572 goto out1;
2573 }
2574
2575 if (args->link.name == nfs3nametoolong) {
2576 resp->status = NFS3ERR_NAMETOOLONG;
2577 goto out1;
2578 }
2579
2580 if (args->link.name == NULL || *(args->link.name) == '\0') {
2581 resp->status = NFS3ERR_ACCES;
2582 goto out1;
2583 }
2584
2585 if (rdonly(exi, req)) {
2586 resp->status = NFS3ERR_ROFS;
2587 goto out1;
2588 }
2589
2590 error = VOP_LINK(dvp, vp, args->link.name, cr);
2591
2592 #ifdef DEBUG
2593 if (rfs3_do_post_op_attr) {
2594 va.va_mask = AT_ALL;
2595 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2596 ava.va_mask = AT_ALL;
2597 avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2598 } else {
2599 vap = NULL;
2600 avap = NULL;
2601 }
2602 #else
2603 va.va_mask = AT_ALL;
2604 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2605 ava.va_mask = AT_ALL;
2606 avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2607 #endif
2608
2609 /*
2610 * Force modified data and metadata out to stable storage.
2611 */
2612 (void) VOP_FSYNC(vp, FNODSYNC, cr);
2613 (void) VOP_FSYNC(dvp, 0, cr);
2614
2615 if (error)
2616 goto out;
2617
2618 VN_RELE(dvp);
2619 VN_RELE(vp);
2620
2621 resp->status = NFS3_OK;
2622 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2623 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2624 return;
2625
2626 out:
2627 if (curthread->t_flag & T_WOULDBLOCK) {
2628 curthread->t_flag &= ~T_WOULDBLOCK;
2629 resp->status = NFS3ERR_JUKEBOX;
2630 } else
2631 resp->status = puterrno3(error);
2632 out1:
2633 if (vp != NULL)
2634 VN_RELE(vp);
2635 if (dvp != NULL)
2636 VN_RELE(dvp);
2637 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2638 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2639 }
2640
2641 fhandle_t *
2642 rfs3_link_getfh(LINK3args *args)
2643 {
2644
2645 return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2646 }
2647
2648 /*
2649 * This macro defines the size of a response which contains attribute
2650 * information and one directory entry (whose length is specified by
2651 * the macro parameter). If the incoming request is larger than this,
2652 * then we are guaranteed to be able to return at one directory entry
2653 * if one exists. Therefore, we do not need to check for
2654 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
2655 * is not, then we need to check to make sure that this error does not
2656 * need to be returned.
2657 *
2658 * NFS3_READDIR_MIN_COUNT is comprised of following :
2659 *
2660 * status - 1 * BYTES_PER_XDR_UNIT
2661 * attr. flag - 1 * BYTES_PER_XDR_UNIT
2662 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2663 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2664 * boolean - 1 * BYTES_PER_XDR_UNIT
2665 * file id - 2 * BYTES_PER_XDR_UNIT
2666 * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2667 * cookie - 2 * BYTES_PER_XDR_UNIT
2668 * end of list - 1 * BYTES_PER_XDR_UNIT
2669 * end of file - 1 * BYTES_PER_XDR_UNIT
2670 * Name length of directory to the nearest byte
2671 */
2672
2673 #define NFS3_READDIR_MIN_COUNT(length) \
2674 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2675 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2676
2677 /* ARGSUSED */
2678 void
2679 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2680 struct svc_req *req, cred_t *cr)
2681 {
2682 int error;
2683 vnode_t *vp;
2684 struct vattr *vap;
2685 struct vattr va;
2686 struct iovec iov;
2687 struct uio uio;
2688 char *data;
2689 int iseof;
2690 int bufsize;
2691 int namlen;
2692 uint_t count;
2693
2694 vap = NULL;
2695
2696 vp = nfs3_fhtovp(&args->dir, exi);
2697 if (vp == NULL) {
2698 error = ESTALE;
2699 goto out;
2700 }
2701
2702 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2703
2704 #ifdef DEBUG
2705 if (rfs3_do_pre_op_attr) {
2706 va.va_mask = AT_ALL;
2707 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2708 } else
2709 vap = NULL;
2710 #else
2711 va.va_mask = AT_ALL;
2712 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2713 #endif
2714
2715 if (vp->v_type != VDIR) {
2716 resp->status = NFS3ERR_NOTDIR;
2717 goto out1;
2718 }
2719
2720 error = VOP_ACCESS(vp, VREAD, 0, cr);
2721 if (error)
2722 goto out;
2723
2724 /*
2725 * Now don't allow arbitrary count to alloc;
2726 * allow the maximum not to exceed rfs3_tsize()
2727 */
2728 if (args->count > rfs3_tsize(req))
2729 args->count = rfs3_tsize(req);
2730
2731 /*
2732 * Make sure that there is room to read at least one entry
2733 * if any are available.
2734 */
2735 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2736 count = DIRENT64_RECLEN(MAXNAMELEN);
2737 else
2738 count = args->count;
2739
2740 data = kmem_alloc(count, KM_SLEEP);
2741
2742 iov.iov_base = data;
2743 iov.iov_len = count;
2744 uio.uio_iov = &iov;
2745 uio.uio_iovcnt = 1;
2746 uio.uio_segflg = UIO_SYSSPACE;
2747 uio.uio_extflg = UIO_COPY_CACHED;
2748 uio.uio_loffset = (offset_t)args->cookie;
2749 uio.uio_resid = count;
2750
2751 error = VOP_READDIR(vp, &uio, cr, &iseof);
2752
2753 #ifdef DEBUG
2754 if (rfs3_do_post_op_attr) {
2755 va.va_mask = AT_ALL;
2756 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2757 } else
2758 vap = NULL;
2759 #else
2760 va.va_mask = AT_ALL;
2761 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2762 #endif
2763
2764 if (error) {
2765 kmem_free(data, count);
2766 goto out;
2767 }
2768
2769 /*
2770 * If the count was not large enough to be able to guarantee
2771 * to be able to return at least one entry, then need to
2772 * check to see if NFS3ERR_TOOSMALL should be returned.
2773 */
2774 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2775 /*
2776 * bufsize is used to keep track of the size of the response.
2777 * It is primed with:
2778 * 1 for the status +
2779 * 1 for the dir_attributes.attributes boolean +
2780 * 2 for the cookie verifier
2781 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2782 * to bytes. If there are directory attributes to be
2783 * returned, then:
2784 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2785 * time BYTES_PER_XDR_UNIT is added to account for them.
2786 */
2787 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2788 if (vap != NULL)
2789 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2790 /*
2791 * An entry is composed of:
2792 * 1 for the true/false list indicator +
2793 * 2 for the fileid +
2794 * 1 for the length of the name +
2795 * 2 for the cookie +
2796 * all times BYTES_PER_XDR_UNIT to convert from
2797 * XDR units to bytes, plus the length of the name
2798 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2799 */
2800 if (count != uio.uio_resid) {
2801 namlen = strlen(((struct dirent64 *)data)->d_name);
2802 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2803 roundup(namlen, BYTES_PER_XDR_UNIT);
2804 }
2805 /*
2806 * We need to check to see if the number of bytes left
2807 * to go into the buffer will actually fit into the
2808 * buffer. This is calculated as the size of this
2809 * entry plus:
2810 * 1 for the true/false list indicator +
2811 * 1 for the eof indicator
2812 * times BYTES_PER_XDR_UNIT to convert from from
2813 * XDR units to bytes.
2814 */
2815 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2816 if (bufsize > args->count) {
2817 kmem_free(data, count);
2818 resp->status = NFS3ERR_TOOSMALL;
2819 goto out1;
2820 }
2821 }
2822
2823 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2824
2825 #if 0 /* notyet */
2826 /*
2827 * Don't do this. It causes local disk writes when just
2828 * reading the file and the overhead is deemed larger
2829 * than the benefit.
2830 */
2831 /*
2832 * Force modified metadata out to stable storage.
2833 */
2834 (void) VOP_FSYNC(vp, FNODSYNC, cr);
2835 #endif
2836
2837 VN_RELE(vp);
2838
2839 resp->status = NFS3_OK;
2840 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2841 resp->resok.cookieverf = 0;
2842 resp->resok.reply.entries = (entry3 *)data;
2843 resp->resok.reply.eof = iseof;
2844 resp->resok.size = count - uio.uio_resid;
2845 resp->resok.count = args->count;
2846 resp->resok.freecount = count;
2847 return;
2848
2849 out:
2850 if (curthread->t_flag & T_WOULDBLOCK) {
2851 curthread->t_flag &= ~T_WOULDBLOCK;
2852 resp->status = NFS3ERR_JUKEBOX;
2853 } else
2854 resp->status = puterrno3(error);
2855 out1:
2856 if (vp != NULL) {
2857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2858 VN_RELE(vp);
2859 }
2860 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2861 }
2862
2863 fhandle_t *
2864 rfs3_readdir_getfh(READDIR3args *args)
2865 {
2866
2867 return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2868 }
2869
2870 void
2871 rfs3_readdir_free(READDIR3res *resp)
2872 {
2873
2874 if (resp->status == NFS3_OK)
2875 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2876 }
2877
2878 #ifdef nextdp
2879 #undef nextdp
2880 #endif
2881 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2882
2883 /*
2884 * This macro computes the size of a response which contains
2885 * one directory entry including the attributes as well as file handle.
2886 * If the incoming request is larger than this, then we are guaranteed to be
2887 * able to return at least one more directory entry if one exists.
2888 *
2889 * NFS3_READDIRPLUS_ENTRY is made up of the following:
2890 *
2891 * boolean - 1 * BYTES_PER_XDR_UNIT
2892 * file id - 2 * BYTES_PER_XDR_UNIT
2893 * directory name length - 1 * BYTES_PER_XDR_UNIT
2894 * cookie - 2 * BYTES_PER_XDR_UNIT
2895 * attribute flag - 1 * BYTES_PER_XDR_UNIT
2896 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2897 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
2898 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2899 * Maxmum length of a file handle (NFS3_CURFHSIZE)
2900 * name length of the entry to the nearest bytes
2901 */
2902 #define NFS3_READDIRPLUS_ENTRY(namelen) \
2903 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2904 BYTES_PER_XDR_UNIT + \
2905 NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2906
2907 static int rfs3_readdir_unit = MAXBSIZE;
2908
2909 /* ARGSUSED */
2910 void
2911 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2912 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2913 {
2914 int error;
2915 vnode_t *vp;
2916 struct vattr *vap;
2917 struct vattr va;
2918 struct iovec iov;
2919 struct uio uio;
2920 char *data;
2921 int iseof;
2922 struct dirent64 *dp;
2923 vnode_t *nvp;
2924 struct vattr *nvap;
2925 struct vattr nva;
2926 entryplus3_info *infop = NULL;
2927 int size = 0;
2928 int nents = 0;
2929 int bufsize = 0;
2930 int entrysize = 0;
2931 int tofit = 0;
2932 int rd_unit = rfs3_readdir_unit;
2933 int prev_len;
2934 int space_left;
2935 int i;
2936 uint_t *namlen = NULL;
2937
2938 vap = NULL;
2939
2940 vp = nfs3_fhtovp(&args->dir, exi);
2941 if (vp == NULL) {
2942 error = ESTALE;
2943 goto out;
2944 }
2945
2946 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2947
2948 #ifdef DEBUG
2949 if (rfs3_do_pre_op_attr) {
2950 va.va_mask = AT_ALL;
2951 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2952 } else
2953 vap = NULL;
2954 #else
2955 va.va_mask = AT_ALL;
2956 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2957 #endif
2958
2959 if (vp->v_type != VDIR) {
2960 error = ENOTDIR;
2961 goto out;
2962 }
2963
2964 error = VOP_ACCESS(vp, VREAD, 0, cr);
2965 if (error)
2966 goto out;
2967
2968 /*
2969 * Don't allow arbitrary counts for allocation
2970 */
2971 if (args->maxcount > rfs3_tsize(req))
2972 args->maxcount = rfs3_tsize(req);
2973
2974 /*
2975 * Make sure that there is room to read at least one entry
2976 * if any are available
2977 */
2978 args->dircount = MIN(args->dircount, args->maxcount);
2979
2980 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2981 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2982
2983 /*
2984 * This allocation relies on a minimum directory entry
2985 * being roughly 24 bytes. Therefore, the namlen array
2986 * will have enough space based on the maximum number of
2987 * entries to read.
2988 */
2989 namlen = kmem_alloc(args->dircount, KM_SLEEP);
2990
2991 space_left = args->dircount;
2992 data = kmem_alloc(args->dircount, KM_SLEEP);
2993 dp = (struct dirent64 *)data;
2994 uio.uio_iov = &iov;
2995 uio.uio_iovcnt = 1;
2996 uio.uio_segflg = UIO_SYSSPACE;
2997 uio.uio_extflg = UIO_COPY_CACHED;
2998 uio.uio_loffset = (offset_t)args->cookie;
2999
3000 /*
3001 * bufsize is used to keep track of the size of the response as we
3002 * get post op attributes and filehandles for each entry. This is
3003 * an optimization as the server may have read more entries than will
3004 * fit in the buffer specified by maxcount. We stop calculating
3005 * post op attributes and filehandles once we have exceeded maxcount.
3006 * This will minimize the effect of truncation.
3007 *
3008 * It is primed with:
3009 * 1 for the status +
3010 * 1 for the dir_attributes.attributes boolean +
3011 * 2 for the cookie verifier
3012 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3013 * to bytes. If there are directory attributes to be
3014 * returned, then:
3015 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3016 * time BYTES_PER_XDR_UNIT is added to account for them.
3017 */
3018 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3019 if (vap != NULL)
3020 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3021
3022 getmoredents:
3023 /*
3024 * Here we make a check so that our read unit is not larger than
3025 * the space left in the buffer.
3026 */
3027 rd_unit = MIN(rd_unit, space_left);
3028 iov.iov_base = (char *)dp;
3029 iov.iov_len = rd_unit;
3030 uio.uio_resid = rd_unit;
3031 prev_len = rd_unit;
3032
3033 error = VOP_READDIR(vp, &uio, cr, &iseof);
3034
3035 if (error) {
3036 kmem_free(data, args->dircount);
3037 goto out;
3038 }
3039
3040 if (uio.uio_resid == prev_len && !iseof) {
3041 if (nents == 0) {
3042 kmem_free(data, args->dircount);
3043 resp->status = NFS3ERR_TOOSMALL;
3044 goto out1;
3045 }
3046
3047 /*
3048 * We could not get any more entries, so get the attributes
3049 * and filehandle for the entries already obtained.
3050 */
3051 goto good;
3052 }
3053
3054 /*
3055 * We estimate the size of the response by assuming the
3056 * entry exists and attributes and filehandle are also valid
3057 */
3058 for (size = prev_len - uio.uio_resid;
3059 size > 0;
3060 size -= dp->d_reclen, dp = nextdp(dp)) {
3061
3062 if (dp->d_ino == 0) {
3063 nents++;
3064 continue;
3065 }
3066
3067 namlen[nents] = strlen(dp->d_name);
3068 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3069
3070 /*
3071 * We need to check to see if the number of bytes left
3072 * to go into the buffer will actually fit into the
3073 * buffer. This is calculated as the size of this
3074 * entry plus:
3075 * 1 for the true/false list indicator +
3076 * 1 for the eof indicator
3077 * times BYTES_PER_XDR_UNIT to convert from XDR units
3078 * to bytes.
3079 *
3080 * Also check the dircount limit against the first entry read
3081 *
3082 */
3083 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3084 if (bufsize + tofit > args->maxcount) {
3085 /*
3086 * We make a check here to see if this was the
3087 * first entry being measured. If so, then maxcount
3088 * was too small to begin with and so we need to
3089 * return with NFS3ERR_TOOSMALL.
3090 */
3091 if (nents == 0) {
3092 kmem_free(data, args->dircount);
3093 resp->status = NFS3ERR_TOOSMALL;
3094 goto out1;
3095 }
3096 iseof = FALSE;
3097 goto good;
3098 }
3099 bufsize += entrysize;
3100 nents++;
3101 }
3102
3103 /*
3104 * If there is enough room to fit at least 1 more entry including
3105 * post op attributes and filehandle in the buffer AND that we haven't
3106 * exceeded dircount then go back and get some more.
3107 */
3108 if (!iseof &&
3109 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3110 space_left -= (prev_len - uio.uio_resid);
3111 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3112 goto getmoredents;
3113
3114 /* else, fall through */
3115 }
3116
3117 good:
3118
3119 #ifdef DEBUG
3120 if (rfs3_do_post_op_attr) {
3121 va.va_mask = AT_ALL;
3122 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3123 } else
3124 vap = NULL;
3125 #else
3126 va.va_mask = AT_ALL;
3127 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3128 #endif
3129
3130 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3131
3132 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3133 resp->resok.infop = infop;
3134
3135 dp = (struct dirent64 *)data;
3136 for (i = 0; i < nents; i++) {
3137
3138 if (dp->d_ino == 0) {
3139 infop[i].attr.attributes = FALSE;
3140 infop[i].fh.handle_follows = FALSE;
3141 dp = nextdp(dp);
3142 continue;
3143 }
3144
3145 infop[i].namelen = namlen[i];
3146
3147 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3148 if (error) {
3149 infop[i].attr.attributes = FALSE;
3150 infop[i].fh.handle_follows = FALSE;
3151 dp = nextdp(dp);
3152 continue;
3153 }
3154
3155 #ifdef DEBUG
3156 if (rfs3_do_post_op_attr) {
3157 nva.va_mask = AT_ALL;
3158 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3159 NULL : &nva;
3160 } else
3161 nvap = NULL;
3162 #else
3163 nva.va_mask = AT_ALL;
3164 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3165 #endif
3166 vattr_to_post_op_attr(nvap, &infop[i].attr);
3167
3168 #ifdef DEBUG
3169 if (!rfs3_do_post_op_fh3)
3170 infop[i].fh.handle_follows = FALSE;
3171 else {
3172 #endif
3173 error = makefh3(&infop[i].fh.handle, nvp, exi);
3174 if (!error)
3175 infop[i].fh.handle_follows = TRUE;
3176 else
3177 infop[i].fh.handle_follows = FALSE;
3178 #ifdef DEBUG
3179 }
3180 #endif
3181
3182 VN_RELE(nvp);
3183 dp = nextdp(dp);
3184 }
3185
3186 #if 0 /* notyet */
3187 /*
3188 * Don't do this. It causes local disk writes when just
3189 * reading the file and the overhead is deemed larger
3190 * than the benefit.
3191 */
3192 /*
3193 * Force modified metadata out to stable storage.
3194 */
3195 (void) VOP_FSYNC(vp, FNODSYNC, cr);
3196 #endif
3197
3198 VN_RELE(vp);
3199
3200 kmem_free(namlen, args->dircount);
3201
3202 resp->status = NFS3_OK;
3203 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3204 resp->resok.cookieverf = 0;
3205 resp->resok.reply.entries = (entryplus3 *)data;
3206 resp->resok.reply.eof = iseof;
3207 resp->resok.size = nents;
3208 resp->resok.count = args->dircount;
3209 resp->resok.maxcount = args->maxcount;
3210 return;
3211
3212 out:
3213 if (curthread->t_flag & T_WOULDBLOCK) {
3214 curthread->t_flag &= ~T_WOULDBLOCK;
3215 resp->status = NFS3ERR_JUKEBOX;
3216 } else
3217 resp->status = puterrno3(error);
3218 out1:
3219 if (vp != NULL) {
3220 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3221 VN_RELE(vp);
3222 }
3223
3224 if (namlen != NULL)
3225 kmem_free(namlen, args->dircount);
3226
3227 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3228 }
3229
3230 fhandle_t *
3231 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3232 {
3233
3234 return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3235 }
3236
3237 void
3238 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3239 {
3240
3241 if (resp->status == NFS3_OK) {
3242 kmem_free(resp->resok.reply.entries, resp->resok.count);
3243 kmem_free(resp->resok.infop,
3244 resp->resok.size * sizeof (struct entryplus3_info));
3245 }
3246 }
3247
3248 /* ARGSUSED */
3249 void
3250 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3251 struct svc_req *req, cred_t *cr)
3252 {
3253 int error;
3254 vnode_t *vp;
3255 struct vattr *vap;
3256 struct vattr va;
3257 struct statvfs64 sb;
3258
3259 vap = NULL;
3260
3261 vp = nfs3_fhtovp(&args->fsroot, exi);
3262 if (vp == NULL) {
3263 error = ESTALE;
3264 goto out;
3265 }
3266
3267 error = VFS_STATVFS(vp->v_vfsp, &sb);
3268
3269 #ifdef DEBUG
3270 if (rfs3_do_post_op_attr) {
3271 va.va_mask = AT_ALL;
3272 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3273 } else
3274 vap = NULL;
3275 #else
3276 va.va_mask = AT_ALL;
3277 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3278 #endif
3279
3280 VN_RELE(vp);
3281
3282 if (error)
3283 goto out;
3284
3285 resp->status = NFS3_OK;
3286 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3287 if (sb.f_blocks != (fsblkcnt64_t)-1)
3288 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3289 else
3290 resp->resok.tbytes = (size3)sb.f_blocks;
3291 if (sb.f_bfree != (fsblkcnt64_t)-1)
3292 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3293 else
3294 resp->resok.fbytes = (size3)sb.f_bfree;
3295 if (sb.f_bavail != (fsblkcnt64_t)-1)
3296 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3297 else
3298 resp->resok.abytes = (size3)sb.f_bavail;
3299 resp->resok.tfiles = (size3)sb.f_files;
3300 resp->resok.ffiles = (size3)sb.f_ffree;
3301 resp->resok.afiles = (size3)sb.f_favail;
3302 resp->resok.invarsec = 0;
3303 return;
3304
3305 out:
3306 if (curthread->t_flag & T_WOULDBLOCK) {
3307 curthread->t_flag &= ~T_WOULDBLOCK;
3308 resp->status = NFS3ERR_JUKEBOX;
3309 } else
3310 resp->status = puterrno3(error);
3311 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3312 }
3313
3314 fhandle_t *
3315 rfs3_fsstat_getfh(FSSTAT3args *args)
3316 {
3317
3318 return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3319 }
3320
3321 /* ARGSUSED */
3322 void
3323 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3324 struct svc_req *req, cred_t *cr)
3325 {
3326 vnode_t *vp;
3327 struct vattr *vap;
3328 struct vattr va;
3329 uint32_t xfer_size;
3330 ulong_t l = 0;
3331 int error;
3332
3333 vp = nfs3_fhtovp(&args->fsroot, exi);
3334 if (vp == NULL) {
3335 if (curthread->t_flag & T_WOULDBLOCK) {
3336 curthread->t_flag &= ~T_WOULDBLOCK;
3337 resp->status = NFS3ERR_JUKEBOX;
3338 } else
3339 resp->status = NFS3ERR_STALE;
3340 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3341 return;
3342 }
3343
3344 #ifdef DEBUG
3345 if (rfs3_do_post_op_attr) {
3346 va.va_mask = AT_ALL;
3347 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3348 } else
3349 vap = NULL;
3350 #else
3351 va.va_mask = AT_ALL;
3352 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3353 #endif
3354
3355 resp->status = NFS3_OK;
3356 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3357 xfer_size = rfs3_tsize(req);
3358 resp->resok.rtmax = xfer_size;
3359 resp->resok.rtpref = xfer_size;
3360 resp->resok.rtmult = DEV_BSIZE;
3361 resp->resok.wtmax = xfer_size;
3362 resp->resok.wtpref = xfer_size;
3363 resp->resok.wtmult = DEV_BSIZE;
3364 resp->resok.dtpref = MAXBSIZE;
3365
3366 /*
3367 * Large file spec: want maxfilesize based on limit of
3368 * underlying filesystem. We can guess 2^31-1 if need be.
3369 */
3370 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3371
3372 VN_RELE(vp);
3373
3374 if (!error && l != 0 && l <= 64)
3375 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3376 else
3377 resp->resok.maxfilesize = MAXOFF32_T;
3378
3379 resp->resok.time_delta.seconds = 0;
3380 resp->resok.time_delta.nseconds = 1000;
3381 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3382 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3383 }
3384
3385 fhandle_t *
3386 rfs3_fsinfo_getfh(FSINFO3args *args)
3387 {
3388
3389 return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3390 }
3391
3392 /* ARGSUSED */
3393 void
3394 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3395 struct svc_req *req, cred_t *cr)
3396 {
3397 int error;
3398 vnode_t *vp;
3399 struct vattr *vap;
3400 struct vattr va;
3401 ulong_t val;
3402
3403 vap = NULL;
3404
3405 vp = nfs3_fhtovp(&args->object, exi);
3406 if (vp == NULL) {
3407 error = ESTALE;
3408 goto out;
3409 }
3410
3411 #ifdef DEBUG
3412 if (rfs3_do_post_op_attr) {
3413 va.va_mask = AT_ALL;
3414 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3415 } else
3416 vap = NULL;
3417 #else
3418 va.va_mask = AT_ALL;
3419 vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3420 #endif
3421
3422 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3423 if (error)
3424 goto out;
3425 resp->resok.info.link_max = (uint32)val;
3426
3427 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3428 if (error)
3429 goto out;
3430 resp->resok.info.name_max = (uint32)val;
3431
3432 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3433 if (error)
3434 goto out;
3435 if (val == 1)
3436 resp->resok.info.no_trunc = TRUE;
3437 else
3438 resp->resok.info.no_trunc = FALSE;
3439
3440 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3441 if (error)
3442 goto out;
3443 if (val == 1)
3444 resp->resok.info.chown_restricted = TRUE;
3445 else
3446 resp->resok.info.chown_restricted = FALSE;
3447
3448 VN_RELE(vp);
3449
3450 resp->status = NFS3_OK;
3451 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3452 resp->resok.info.case_insensitive = FALSE;
3453 resp->resok.info.case_preserving = TRUE;
3454 return;
3455
3456 out:
3457 if (curthread->t_flag & T_WOULDBLOCK) {
3458 curthread->t_flag &= ~T_WOULDBLOCK;
3459 resp->status = NFS3ERR_JUKEBOX;
3460 } else
3461 resp->status = puterrno3(error);
3462 if (vp != NULL)
3463 VN_RELE(vp);
3464 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3465 }
3466
3467 fhandle_t *
3468 rfs3_pathconf_getfh(PATHCONF3args *args)
3469 {
3470
3471 return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3472 }
3473
3474 void
3475 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3476 struct svc_req *req, cred_t *cr)
3477 {
3478 int error;
3479 vnode_t *vp;
3480 struct vattr *bvap;
3481 struct vattr bva;
3482 struct vattr *avap;
3483 struct vattr ava;
3484
3485 bvap = NULL;
3486 avap = NULL;
3487
3488 vp = nfs3_fhtovp(&args->file, exi);
3489 if (vp == NULL) {
3490 error = ESTALE;
3491 goto out;
3492 }
3493
3494 bva.va_mask = AT_ALL;
3495 error = VOP_GETATTR(vp, &bva, 0, cr);
3496
3497 /*
3498 * If we can't get the attributes, then we can't do the
3499 * right access checking. So, we'll fail the request.
3500 */
3501 if (error)
3502 goto out;
3503
3504 #ifdef DEBUG
3505 if (rfs3_do_pre_op_attr)
3506 bvap = &bva;
3507 else
3508 bvap = NULL;
3509 #else
3510 bvap = &bva;
3511 #endif
3512
3513 if (rdonly(exi, req)) {
3514 resp->status = NFS3ERR_ROFS;
3515 goto out1;
3516 }
3517
3518 if (vp->v_type != VREG) {
3519 resp->status = NFS3ERR_INVAL;
3520 goto out1;
3521 }
3522
3523 if (crgetuid(cr) != bva.va_uid &&
3524 (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3525 goto out;
3526
3527 error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3528 if (!error)
3529 error = VOP_FSYNC(vp, FNODSYNC, cr);
3530
3531 #ifdef DEBUG
3532 if (rfs3_do_post_op_attr) {
3533 ava.va_mask = AT_ALL;
3534 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3535 } else
3536 avap = NULL;
3537 #else
3538 ava.va_mask = AT_ALL;
3539 avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3540 #endif
3541
3542 if (error)
3543 goto out;
3544
3545 VN_RELE(vp);
3546
3547 resp->status = NFS3_OK;
3548 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3549 resp->resok.verf = write3verf;
3550 return;
3551
3552 out:
3553 if (curthread->t_flag & T_WOULDBLOCK) {
3554 curthread->t_flag &= ~T_WOULDBLOCK;
3555 resp->status = NFS3ERR_JUKEBOX;
3556 } else
3557 resp->status = puterrno3(error);
3558 out1:
3559 if (vp != NULL)
3560 VN_RELE(vp);
3561 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3562 }
3563
3564 fhandle_t *
3565 rfs3_commit_getfh(COMMIT3args *args)
3566 {
3567
3568 return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3569 }
3570
3571 static int
3572 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3573 {
3574
3575 vap->va_mask = 0;
3576
3577 if (sap->mode.set_it) {
3578 vap->va_mode = (mode_t)sap->mode.mode;
3579 vap->va_mask |= AT_MODE;
3580 }
3581 if (sap->uid.set_it) {
3582 vap->va_uid = (uid_t)sap->uid.uid;
3583 vap->va_mask |= AT_UID;
3584 }
3585 if (sap->gid.set_it) {
3586 vap->va_gid = (gid_t)sap->gid.gid;
3587 vap->va_mask |= AT_GID;
3588 }
3589 if (sap->size.set_it) {
3590 if (sap->size.size > (size3)((u_longlong_t)-1))
3591 return (EINVAL);
3592 vap->va_size = sap->size.size;
3593 vap->va_mask |= AT_SIZE;
3594 }
3595 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3596 #ifndef _LP64
3597 /* check time validity */
3598 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3599 return (EOVERFLOW);
3600 #endif
3601 /*
3602 * nfs protocol defines times as unsigned so don't extend sign,
3603 * unless sysadmin set nfs_allow_preepoch_time.
3604 */
3605 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3606 sap->atime.atime.seconds);
3607 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3608 vap->va_mask |= AT_ATIME;
3609 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3610 gethrestime(&vap->va_atime);
3611 vap->va_mask |= AT_ATIME;
3612 }
3613 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3614 #ifndef _LP64
3615 /* check time validity */
3616 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3617 return (EOVERFLOW);
3618 #endif
3619 /*
3620 * nfs protocol defines times as unsigned so don't extend sign,
3621 * unless sysadmin set nfs_allow_preepoch_time.
3622 */
3623 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3624 sap->mtime.mtime.seconds);
3625 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3626 vap->va_mask |= AT_MTIME;
3627 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3628 gethrestime(&vap->va_mtime);
3629 vap->va_mask |= AT_MTIME;
3630 }
3631
3632 return (0);
3633 }
3634
3635 static ftype3 vt_to_nf3[] = {
3636 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3637 };
3638
3639 static int
3640 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3641 {
3642
3643 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3644 /* Return error if time or size overflow */
3645 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3646 return (EOVERFLOW);
3647 }
3648 fap->type = vt_to_nf3[vap->va_type];
3649 fap->mode = (mode3)(vap->va_mode & MODEMASK);
3650 fap->nlink = (uint32)vap->va_nlink;
3651 if (vap->va_uid == UID_NOBODY)
3652 fap->uid = (uid3)NFS_UID_NOBODY;
3653 else
3654 fap->uid = (uid3)vap->va_uid;
3655 if (vap->va_gid == GID_NOBODY)
3656 fap->gid = (gid3)NFS_GID_NOBODY;
3657 else
3658 fap->gid = (gid3)vap->va_gid;
3659 fap->size = (size3)vap->va_size;
3660 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3661 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3662 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3663 fap->fsid = (uint64)vap->va_fsid;
3664 fap->fileid = (fileid3)vap->va_nodeid;
3665 fap->atime.seconds = vap->va_atime.tv_sec;
3666 fap->atime.nseconds = vap->va_atime.tv_nsec;
3667 fap->mtime.seconds = vap->va_mtime.tv_sec;
3668 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3669 fap->ctime.seconds = vap->va_ctime.tv_sec;
3670 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3671 return (0);
3672 }
3673
3674 static int
3675 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3676 {
3677
3678 /* Return error if time or size overflow */
3679 if (! (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3680 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3681 NFS3_SIZE_OK(vap->va_size))) {
3682 return (EOVERFLOW);
3683 }
3684 wccap->size = (size3)vap->va_size;
3685 wccap->mtime.seconds = vap->va_mtime.tv_sec;
3686 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3687 wccap->ctime.seconds = vap->va_ctime.tv_sec;
3688 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3689 return (0);
3690 }
3691
3692 static void
3693 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3694 {
3695
3696 /* don't return attrs if time overflow */
3697 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3698 poap->attributes = TRUE;
3699 } else
3700 poap->attributes = FALSE;
3701 }
3702
3703 void
3704 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3705 {
3706
3707 /* don't return attrs if time overflow */
3708 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3709 poap->attributes = TRUE;
3710 } else
3711 poap->attributes = FALSE;
3712 }
3713
3714 static void
3715 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3716 {
3717
3718 vattr_to_pre_op_attr(bvap, &wccp->before);
3719 vattr_to_post_op_attr(avap, &wccp->after);
3720 }
3721
3722 void
3723 rfs3_srvrinit(void)
3724 {
3725 struct rfs3_verf_overlay {
3726 uint_t id; /* a "unique" identifier */
3727 int ts; /* a unique timestamp */
3728 } *verfp;
3729 timestruc_t now;
3730
3731 /*
3732 * The following algorithm attempts to find a unique verifier
3733 * to be used as the write verifier returned from the server
3734 * to the client. It is important that this verifier change
3735 * whenever the server reboots. Of secondary importance, it
3736 * is important for the verifier to be unique between two
3737 * different servers.
3738 *
3739 * Thus, an attempt is made to use the system hostid and the
3740 * current time in seconds when the nfssrv kernel module is
3741 * loaded. It is assumed that an NFS server will not be able
3742 * to boot and then to reboot in less than a second. If the
3743 * hostid has not been set, then the current high resolution
3744 * time is used. This will ensure different verifiers each
3745 * time the server reboots and minimize the chances that two
3746 * different servers will have the same verifier.
3747 */
3748
3749 #ifndef lint
3750 /*
3751 * We ASSERT that this constant logic expression is
3752 * always true because in the past, it wasn't.
3753 */
3754 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3755 #endif
3756
3757 gethrestime(&now);
3758 verfp = (struct rfs3_verf_overlay *)&write3verf;
3759 verfp->ts = (int)now.tv_sec;
3760 verfp->id = (uint_t)nfs_atoi(hw_serial);
3761
3762 if (verfp->id == 0)
3763 verfp->id = (uint_t)now.tv_nsec;
3764
3765 }
3766
3767 void
3768 rfs3_srvrfini(void)
3769 {
3770 /* Nothing to do */
3771 }
3772