<carry>: Umount volumes with force

In 4.5, NFS driver pod used 'clientaddr=<pod ip>' as a mount option.
When such pod restarts, it gets a different IP address and volumes mounted
by the old pod are basically broken - all operations (stat, umount) will
just hang forever.

Therefore:

1. Do not use IsLikelyNotMountPoint - it runs stat() on the volume, which
could hang forever. Read /etc/mounts instead.

2. Use "umount -f" to forcefully remove NFS mount.

Carry at least for whole OCP 4.6, where we need to support move from 4.5.
This commit is contained in:
Jan Safranek 2020-08-19 16:56:24 +02:00
parent 2f48abf136
commit 59fe400d43
1 changed files with 77 additions and 12 deletions

View File

@ -19,7 +19,9 @@ package nfs
import (
"fmt"
"os"
"os/exec"
"strings"
"time"
"github.com/golang/glog"
@ -35,6 +37,11 @@ type nodeServer struct {
mounter mount.Interface
}
const (
// Deadline for unmount. After this time, umount -f is performed.
unmountTimeout = time.Minute
)
func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
targetPath := req.GetTargetPath()
notMnt, err := ns.mounter.IsLikelyNotMountPoint(targetPath)
@ -82,29 +89,87 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
return &csi.NodePublishVolumeResponse{}, nil
}
func (ns *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
targetPath := req.GetTargetPath()
notMnt, err := ns.mounter.IsLikelyNotMountPoint(targetPath)
func (ns *nodeServer) IsNotMountPoint(path string) (bool, error) {
mtab, err := ns.mounter.List()
if err != nil {
if os.IsNotExist(err) {
return nil, status.Error(codes.NotFound, "Targetpath not found")
} else {
return nil, status.Error(codes.Internal, err.Error())
return false, err
}
for _, mnt := range mtab {
// This is how a directory deleted on the NFS server looks like
deletedDir := fmt.Sprintf("%s\\040(deleted)", mnt.Path)
if mnt.Path == path || mnt.Path == deletedDir {
return false, nil
}
}
if notMnt {
return nil, status.Error(codes.NotFound, "Volume not mounted")
}
return true, nil
}
err = mount.CleanupMountPoint(req.GetTargetPath(), ns.mounter, false)
func (ns *nodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
targetPath := req.GetTargetPath()
glog.V(6).Infof("NodeUnpublishVolume started for %s", targetPath)
notMnt, err := ns.IsNotMountPoint(targetPath)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
glog.V(4).Infof("NodeUnpublishVolume: path %s is *not* a mount point: %t", targetPath, notMnt)
if !notMnt {
err := ns.tryUnmount(targetPath)
if err != nil {
if err == context.DeadlineExceeded {
glog.V(2).Infof("Timed out waiting for unmount of %s, trying with -f", targetPath)
err = ns.forceUnmount(targetPath)
}
}
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
glog.V(2).Infof("Unmounted %s", targetPath)
}
if err := os.Remove(targetPath); err != nil {
if !os.IsNotExist(err) {
return nil, status.Error(codes.Internal, err.Error())
}
}
glog.V(4).Infof("Cleaned %s", targetPath)
return &csi.NodeUnpublishVolumeResponse{}, nil
}
// tryUnmount calls plain "umount" and waits for unmountTimeout for it to finish.
func (ns *nodeServer) tryUnmount(path string) error {
ctx, cancel := context.WithTimeout(context.Background(), unmountTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "umount", path)
out, cmderr := cmd.CombinedOutput()
// CombinedOutput() does not return DeadlineExceeded, make sure it's
// propagated on timeout.
if ctx.Err() != nil {
return ctx.Err()
}
if cmderr != nil {
return fmt.Errorf("failed to unmount volume: %s: %s", cmderr, string(out))
}
return nil
}
func (ns *nodeServer) forceUnmount(path string) error {
cmd := exec.Command("umount", "-f", path)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to force-unmount volume: %s: %s", err, string(out))
}
return nil
}
func (ns *nodeServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) {
glog.V(5).Infof("Using default NodeGetInfo")