#!/usr/bin/sh
#
# ocf:heartbeat:ganesha-nfs
#
# OCF Resource Agent for a single NFS-Ganesha (ganesha.nfsd) server instance,
# for HA NFS on top of replicated block storage (DRBD + LINSTOR).
#
# The agent realises the export from its own parameters at runtime, the same way
# ocf:heartbeat:exportfs programs the kernel export table: given export_path,
# clients, etc. it renders an EPHEMERAL ganesha.conf under /run and starts
# ganesha against it. Nothing authoritative is persisted; on failover the new
# node re-renders from its parameters.
#
# Modes:
#   - Static    : if `config_file` points at an existing file and no
#                 `export_path` is given, that file is used verbatim.
#   - Generated : if `export_path` is set, the agent renders the config from its
#                 parameters and starts ganesha with `ganesha.nfsd -f`. The
#                 export_path/export_id/pseudo parameters accept ';'-separated
#                 lists, so one process can serve several exports.
#
# The agent does not manage kernel NFS, /var/lib/nfs, rpcbind, or statd. The
# NFSv4 RecoveryDir must live on the replicated filesystem this resource fails
# over with, so client lock state migrates on failover.
#
# Author:  Yusuf Yildiz <yusuf@upforge.at>
# License: GNU GPL v2 or later

#######################################################################
# Initialization

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

#######################################################################
# Defaults

OCF_RESKEY_config_file_default="/etc/ganesha/ganesha.conf"
OCF_RESKEY_runtime_dir_default="/run/ganesha"
OCF_RESKEY_pid_file_default="/run/ganesha.pid"
OCF_RESKEY_log_file_default="/var/log/ganesha/ganesha.log"
OCF_RESKEY_log_level_default="NIV_EVENT"
OCF_RESKEY_nfs_ip_default=""
OCF_RESKEY_nfs_port_default="2049"
OCF_RESKEY_start_timeout_default="30"

# --- generated-mode (export intent) ---
OCF_RESKEY_export_path_default=""
OCF_RESKEY_export_id_default="1"
OCF_RESKEY_pseudo_default=""
OCF_RESKEY_clients_default=""
OCF_RESKEY_access_type_default="RW"
OCF_RESKEY_squash_default="Root_Squash"
OCF_RESKEY_anonuid_default=""
OCF_RESKEY_anongid_default=""
OCF_RESKEY_sectype_default="sys"
OCF_RESKEY_protocols_default="3,4"
OCF_RESKEY_recovery_dir_default=""
OCF_RESKEY_server_scope_default=""
OCF_RESKEY_enable_nlm_default="true"
OCF_RESKEY_grace_period_default=""
OCF_RESKEY_lease_lifetime_default=""
OCF_RESKEY_fsal_default="VFS"

: ${OCF_RESKEY_config_file=${OCF_RESKEY_config_file_default}}
: ${OCF_RESKEY_runtime_dir=${OCF_RESKEY_runtime_dir_default}}
: ${OCF_RESKEY_pid_file=${OCF_RESKEY_pid_file_default}}
: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}}
: ${OCF_RESKEY_log_level=${OCF_RESKEY_log_level_default}}
: ${OCF_RESKEY_nfs_ip=${OCF_RESKEY_nfs_ip_default}}
: ${OCF_RESKEY_nfs_port=${OCF_RESKEY_nfs_port_default}}
: ${OCF_RESKEY_start_timeout=${OCF_RESKEY_start_timeout_default}}

: ${OCF_RESKEY_export_path=${OCF_RESKEY_export_path_default}}
: ${OCF_RESKEY_export_id=${OCF_RESKEY_export_id_default}}
: ${OCF_RESKEY_pseudo=${OCF_RESKEY_pseudo_default}}
: ${OCF_RESKEY_clients=${OCF_RESKEY_clients_default}}
: ${OCF_RESKEY_access_type=${OCF_RESKEY_access_type_default}}
: ${OCF_RESKEY_squash=${OCF_RESKEY_squash_default}}
: ${OCF_RESKEY_anonuid=${OCF_RESKEY_anonuid_default}}
: ${OCF_RESKEY_anongid=${OCF_RESKEY_anongid_default}}
: ${OCF_RESKEY_sectype=${OCF_RESKEY_sectype_default}}
: ${OCF_RESKEY_protocols=${OCF_RESKEY_protocols_default}}
: ${OCF_RESKEY_recovery_dir=${OCF_RESKEY_recovery_dir_default}}
: ${OCF_RESKEY_server_scope=${OCF_RESKEY_server_scope_default}}
: ${OCF_RESKEY_enable_nlm=${OCF_RESKEY_enable_nlm_default}}
: ${OCF_RESKEY_grace_period=${OCF_RESKEY_grace_period_default}}
: ${OCF_RESKEY_lease_lifetime=${OCF_RESKEY_lease_lifetime_default}}
: ${OCF_RESKEY_fsal=${OCF_RESKEY_fsal_default}}

#######################################################################
# Mode + path helpers

# Generated mode if and only if an export_path was supplied.
generated_mode() {
    [ -n "$OCF_RESKEY_export_path" ]
}

# Path of the config ganesha is actually started against.
#   generated mode -> per-instance ephemeral file under runtime_dir
#   legacy mode    -> the operator-supplied config_file
active_config_path() {
    if generated_mode; then
        echo "${OCF_RESKEY_runtime_dir}/${OCF_RESOURCE_INSTANCE:-ganesha}.conf"
    else
        echo "$OCF_RESKEY_config_file"
    fi
}

# export_path / export_id / pseudo accept ';'-separated lists, zipped
# positionally, so one ganesha process can serve several EXPORT blocks. These
# helpers read field N (1-based, whitespace-trimmed) and count the fields.
list_nth() {
    printf '%s' "$1" | awk -F';' -v n="$2" \
        '{ s=$n; sub(/^[ \t]+/,"",s); sub(/[ \t]+$/,"",s); print s }'
}
list_len() {
    [ -n "$1" ] || { echo 0; return; }
    printf '%s' "$1" | awk -F';' '{ print NF }'
}

# NFSv4 RecoveryDir — a server-wide singleton; it must live on the replicated FS
# so lock state migrates. Default to a hidden dir under the first export path.
recovery_dir() {
    if [ -n "$OCF_RESKEY_recovery_dir" ]; then
        echo "$OCF_RESKEY_recovery_dir"
    else
        local first
        first=$(list_nth "$OCF_RESKEY_export_path" 1)
        echo "${first%/}/.nfs-ganesha-recovery"
    fi
}

#######################################################################
# Config rendering (generated mode)

# Render the parts that genuinely cannot change at runtime: listener,
# recovery dir, FSAL availability. Logging stays on the -L/-N CLI flags.
render_base_config() {
    local bind_addr="${OCF_RESKEY_nfs_ip:-::}"
    # With NLM enabled, ganesha always waits out the full Grace_Period after a
    # start because NLM reclaims have no completion signal. With NLM disabled,
    # grace is lifted as soon as every v4.1+ client has sent RECLAIM_COMPLETE,
    # which shortens the post-failover I/O stall from Grace_Period (90s
    # default) to a few seconds. The price is no file locking for NFSv3
    # clients (their I/O is unaffected).
    local enable_nlm=false
    ocf_is_true "$OCF_RESKEY_enable_nlm" && enable_nlm=true
    cat <<EOF
# Generated by ocf:heartbeat:ganesha-nfs for ${OCF_RESOURCE_INSTANCE:-ganesha}.
# EPHEMERAL — regenerated on every start, not authoritative. Do not edit.
NFS_Core_Param {
    Bind_Addr = ${bind_addr};
    NFS_Port = ${OCF_RESKEY_nfs_port};
    Enable_NLM = ${enable_nlm};
}

NFSv4 {
    RecoveryRoot = $(recovery_dir);
    Graceless = false;
EOF
    [ -n "$OCF_RESKEY_lease_lifetime" ] && echo "    Lease_Lifetime = ${OCF_RESKEY_lease_lifetime};"
    [ -n "$OCF_RESKEY_grace_period" ] && echo "    Grace_Period = ${OCF_RESKEY_grace_period};"
    # Server_Scope must be identical on every node (RFC 8881): on failover the
    # client compares scopes from EXCHANGE_ID, and a mismatch means "different
    # server" — open/lock state is never reclaimed and I/O on open files hangs.
    # Ganesha's default scope is the local hostname, which differs per node.
    [ -n "$OCF_RESKEY_server_scope" ] && echo "    Server_Scope = ${OCF_RESKEY_server_scope};"
    echo "}"
}

# Render one EXPORT {} block (for the export at 1-based list index $1) to
# stdout. The per-export fields path/id/pseudo come from the ';'-separated
# lists; everything else is shared. export_id defaults to the index and pseudo
# defaults to the path. A bare IPv6 address is accepted in Clients, but a /128
# prefix or a quoted form is not.
#
# DENY-DEFAULT policy (deliberate choice for HA storage): EXPORT-level
# Access_Type = None means an unmatched client is refused (EACCES); access is
# granted only inside the CLIENT whitelist, so an unlisted client cannot reach
# the data and an accidentally world-readable export is impossible.
render_export_block() {
    local idx="$1" epath eid epseudo
    epath=$(list_nth "$OCF_RESKEY_export_path" "$idx")
    eid=$(list_nth "$OCF_RESKEY_export_id" "$idx")
    [ -n "$eid" ] || eid="$idx"
    epseudo=$(list_nth "$OCF_RESKEY_pseudo" "$idx")
    [ -n "$epseudo" ] || epseudo="$epath"
    cat <<EOF
EXPORT {
    Export_Id = ${eid};
    Path = ${epath};
    Pseudo = ${epseudo};
    Access_Type = None;
    Squash = ${OCF_RESKEY_squash};
EOF
    # EXPORT-level Anonymous_Uid/Gid is inherited by the CLIENT block. Left
    # unset, ganesha squashes to uid/gid -2 (4294967294), which is not the
    # usual 'nobody' (65534).
    [ -n "$OCF_RESKEY_anonuid" ] && echo "    Anonymous_Uid = ${OCF_RESKEY_anonuid};"
    [ -n "$OCF_RESKEY_anongid" ] && echo "    Anonymous_Gid = ${OCF_RESKEY_anongid};"
    cat <<EOF
    SecType = ${OCF_RESKEY_sectype};
    Protocols = ${OCF_RESKEY_protocols};
    Transports = TCP;

    FSAL {
        Name = ${OCF_RESKEY_fsal};
    }

    CLIENT {
        Clients = ${OCF_RESKEY_clients};
        Access_Type = ${OCF_RESKEY_access_type};
        Squash = ${OCF_RESKEY_squash};
    }
}
EOF
}

render_config() {
    local n i
    render_base_config
    n=$(list_len "$OCF_RESKEY_export_path")
    i=1
    while [ "$i" -le "$n" ]; do
        echo
        render_export_block "$i"
        i=$((i + 1))
    done
}

write_runtime_config() {
    local path
    path=$(active_config_path)
    mkdir -p "$(dirname "$path")"
    render_config > "$path" || return 1
    ocf_log info "Rendered ephemeral Ganesha config at $path"
    return 0
}

# Remove generated runtime artifacts (the ephemeral config).
cleanup_generated() {
    generated_mode || return 0
    rm -f "$(active_config_path)"
    return 0
}

#######################################################################
# meta-data

meta_data() {
cat <<EOM
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ganesha-nfs" version="0.2">
<version>1.0</version>

<longdesc lang="en">
Manages a single NFS-Ganesha (ganesha.nfsd) server instance as a cluster
resource, for HA NFS on DRBD/LINSTOR-backed storage.

Two modes. If 'export_path' is set, the agent renders an ephemeral
ganesha.conf under 'runtime_dir' from the OCF parameters and starts ganesha
against it (the exportfs(8) model: runtime realisation, nothing persisted).
If 'export_path' is empty and 'config_file' points at an existing file, that
file is used verbatim (legacy/static mode).

A single ganesha process can serve several exports: 'export_path', 'export_id'
and 'pseudo' accept ';'-separated lists, zipped positionally (one EXPORT block
per entry). All exports share one service IP, one clients whitelist and the
other scalar parameters.

The agent does not manage kernel NFS, /var/lib/nfs, rpcbind, or statd.

Critical: the NFSv4 RecoveryDir must live on the replicated filesystem this
resource fails over with, or clients lose their locks on failover.
</longdesc>
<shortdesc lang="en">Manages an NFS-Ganesha server instance</shortdesc>

<parameters>

<parameter name="export_path" unique="0" required="0">
<longdesc lang="en">Server-side directory to export, on the replicated FS. Setting this switches the agent into generated mode (it renders ganesha.conf from its parameters). May be a ';'-separated list to export several directories from one ganesha process.</longdesc>
<shortdesc lang="en">Exported directory, or ';'-list (enables generated mode)</shortdesc>
<content type="string" default="${OCF_RESKEY_export_path_default}" />
</parameter>

<parameter name="export_id" unique="0" required="0">
<longdesc lang="en">NFS Export_Id (uint16, 1-65535), or a ';'-separated list parallel to export_path. Must be unique and stable per server; collision-free assignment is the caller's responsibility. Defaults to the export's 1-based position if omitted.</longdesc>
<shortdesc lang="en">Export_Id, or ';'-list parallel to export_path</shortdesc>
<content type="integer" default="${OCF_RESKEY_export_id_default}" />
</parameter>

<parameter name="pseudo" unique="0" required="0">
<longdesc lang="en">NFSv4 pseudo path, or a ';'-separated list parallel to export_path. Each entry defaults to its export_path if unset; must be unique per export.</longdesc>
<shortdesc lang="en">NFSv4 pseudo path, or ';'-list parallel to export_path</shortdesc>
<content type="string" default="${OCF_RESKEY_pseudo_default}" />
</parameter>

<parameter name="clients" unique="0" required="0">
<longdesc lang="en">Client spec for the CLIENT whitelist (e.g. "10.1.10.0/24", a bare IPv6 like fd00:10:1:10::3, or "*"). REQUIRED in generated mode (deny-default: no clients means nobody can mount). Use "*" explicitly to allow all reachable clients.</longdesc>
<shortdesc lang="en">Allowed clients (required in generated mode)</shortdesc>
<content type="string" default="${OCF_RESKEY_clients_default}" />
</parameter>

<parameter name="access_type" unique="0" required="0">
<longdesc lang="en">Access type granted to whitelisted clients: RW, RO, or None.</longdesc>
<shortdesc lang="en">Access type</shortdesc>
<content type="string" default="${OCF_RESKEY_access_type_default}" />
</parameter>

<parameter name="squash" unique="0" required="0">
<longdesc lang="en">UID/GID squashing: Root_Squash, No_Root_Squash, or All_Squash.</longdesc>
<shortdesc lang="en">Squash policy</shortdesc>
<content type="string" default="${OCF_RESKEY_squash_default}" />
</parameter>

<parameter name="anonuid" unique="0" required="0">
<longdesc lang="en">Numeric uid that squashed requests are mapped to (Anonymous_Uid). When unset, ganesha's built-in default of -2 (uid 4294967294) applies — note that this is NOT the common 'nobody' uid 65534, so squashed clients can usually not write anywhere. Set to 65534 (or the uid owning the export directory) to allow squashed clients to write. Shared by all exports of this instance.</longdesc>
<shortdesc lang="en">Anonymous (squash target) uid</shortdesc>
<content type="string" default="${OCF_RESKEY_anonuid_default}" />
</parameter>

<parameter name="anongid" unique="0" required="0">
<longdesc lang="en">Numeric gid that squashed requests are mapped to (Anonymous_Gid). When unset, ganesha's built-in default of -2 (gid 4294967294) applies. Shared by all exports of this instance.</longdesc>
<shortdesc lang="en">Anonymous (squash target) gid</shortdesc>
<content type="string" default="${OCF_RESKEY_anongid_default}" />
</parameter>

<parameter name="sectype" unique="0" required="0">
<longdesc lang="en">Security flavour: sys, krb5, krb5i, krb5p.</longdesc>
<shortdesc lang="en">SecType</shortdesc>
<content type="string" default="${OCF_RESKEY_sectype_default}" />
</parameter>

<parameter name="protocols" unique="0" required="0">
<longdesc lang="en">NFS protocol versions to offer, e.g. "3,4" or "4".</longdesc>
<shortdesc lang="en">Protocols</shortdesc>
<content type="string" default="${OCF_RESKEY_protocols_default}" />
</parameter>

<parameter name="fsal" unique="0" required="0">
<longdesc lang="en">Ganesha FSAL name (VFS, GLUSTER, CEPH, ...).</longdesc>
<shortdesc lang="en">FSAL</shortdesc>
<content type="string" default="${OCF_RESKEY_fsal_default}" />
</parameter>

<parameter name="server_scope" unique="0" required="0">
<longdesc lang="en">NFSv4 Server_Scope (RFC 8881 server scope). MUST be set to the same value on all nodes that can run this resource, e.g. the service IP or a cluster name. Without it, ganesha derives the scope from the local hostname, so after a failover NFSv4.1+ clients see a different scope, refuse to reclaim their open/lock state, and I/O on files that were open hangs indefinitely. Note that setting or changing this value is itself a scope change: existing client state cannot be reclaimed across the reconfiguration.</longdesc>
<shortdesc lang="en">NFSv4 server scope (must match on all nodes)</shortdesc>
<content type="string" default="${OCF_RESKEY_server_scope_default}" />
</parameter>

<parameter name="enable_nlm" unique="0" required="0">
<longdesc lang="en">Whether to enable NLM (file locking for NFSv3 clients). With NLM enabled, ganesha always waits out the full grace period on every start/failover, because NLM lock reclaims have no completion signal; NFSv4 I/O on open files stalls for that long. With NLM disabled, grace ends as soon as all NFSv4.1+ clients have reclaimed (typically seconds), NFSv3 reads/writes are unaffected, but NFSv3 lock requests fail. Note that NFSv3 lock recovery additionally requires an rpc.statd/SM_NOTIFY setup that this agent does not manage; without one, NFSv3 locks are silently lost on failover even with NLM enabled.</longdesc>
<shortdesc lang="en">Enable NLM (NFSv3 locking; forces full grace period)</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_nlm_default}" />
</parameter>

<parameter name="grace_period" unique="0" required="0">
<longdesc lang="en">NFSv4 Grace_Period in seconds (0-180; ganesha default 90). Upper bound on how long clients have to reclaim state after a failover; stateful NFSv4 operations from clients that have finished reclaiming may still be blocked until grace ends (see enable_nlm). Must be at least lease_lifetime, or clients that were briefly unreachable lose their state silently.</longdesc>
<shortdesc lang="en">NFSv4 grace period (seconds)</shortdesc>
<content type="integer" default="${OCF_RESKEY_grace_period_default}" />
</parameter>

<parameter name="lease_lifetime" unique="0" required="0">
<longdesc lang="en">NFSv4 Lease_Lifetime in seconds (1-120; ganesha default 60). How long the server holds a client's state without renewal. Lower values shorten failover recovery but increase renewal traffic and the risk of spurious state loss for slow clients.</longdesc>
<shortdesc lang="en">NFSv4 lease lifetime (seconds)</shortdesc>
<content type="integer" default="${OCF_RESKEY_lease_lifetime_default}" />
</parameter>

<parameter name="recovery_dir" unique="0" required="0">
<longdesc lang="en">NFSv4 RecoveryDir (server-wide singleton). MUST be on the replicated FS. Defaults to a hidden dir under the first export_path.</longdesc>
<shortdesc lang="en">NFSv4 RecoveryDir</shortdesc>
<content type="string" default="${OCF_RESKEY_recovery_dir_default}" />
</parameter>

<parameter name="runtime_dir" unique="0" required="0">
<longdesc lang="en">Directory for the ephemeral generated config (tmpfs recommended).</longdesc>
<shortdesc lang="en">Runtime config directory</shortdesc>
<content type="string" default="${OCF_RESKEY_runtime_dir_default}" />
</parameter>

<parameter name="config_file" unique="0" required="0">
<longdesc lang="en">Legacy mode only: path to a pre-existing ganesha.conf. Ignored when export_path is set.</longdesc>
<shortdesc lang="en">Static configuration file (legacy mode)</shortdesc>
<content type="string" default="${OCF_RESKEY_config_file_default}" />
</parameter>

<parameter name="pid_file" unique="1" required="0">
<longdesc lang="en">PID file. Must be unique across co-located instances.</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_file_default}" />
</parameter>

<parameter name="log_file" unique="0" required="0">
<longdesc lang="en">Ganesha log file path.</longdesc>
<shortdesc lang="en">Log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_file_default}" />
</parameter>

<parameter name="log_level" unique="0" required="0">
<longdesc lang="en">Ganesha log level (NIV_EVENT, NIV_INFO, NIV_DEBUG, ...).</longdesc>
<shortdesc lang="en">Log level</shortdesc>
<content type="string" default="${OCF_RESKEY_log_level_default}" />
</parameter>

<parameter name="nfs_ip" unique="0" required="0">
<longdesc lang="en">Service IP (v4 or v6) to bind and probe. Each co-located instance should use a distinct IP.</longdesc>
<shortdesc lang="en">NFS service IP</shortdesc>
<content type="string" default="${OCF_RESKEY_nfs_ip_default}" />
</parameter>

<parameter name="nfs_port" unique="0" required="0">
<longdesc lang="en">TCP port to bind/probe.</longdesc>
<shortdesc lang="en">NFS port</shortdesc>
<content type="integer" default="${OCF_RESKEY_nfs_port_default}" />
</parameter>

<parameter name="start_timeout" unique="0" required="0">
<longdesc lang="en">Seconds to wait for readiness (PID + listening port).</longdesc>
<shortdesc lang="en">Start readiness timeout</shortdesc>
<content type="integer" default="${OCF_RESKEY_start_timeout_default}" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="60s" />
<action name="stop"         timeout="60s" />
<action name="monitor"      timeout="20s" interval="30s" />
<action name="meta-data"    timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>

</resource-agent>
EOM
}

#######################################################################
# Helpers (process lifecycle)

ganesha_binary() {
    if [ -x /usr/bin/ganesha.nfsd ]; then
        echo /usr/bin/ganesha.nfsd
    elif [ -x /usr/sbin/ganesha.nfsd ]; then
        echo /usr/sbin/ganesha.nfsd
    else
        command -v ganesha.nfsd 2>/dev/null || echo ganesha.nfsd
    fi
}

read_pid() {
    [ -f "$OCF_RESKEY_pid_file" ] || return 1
    local p
    p=$(cat "$OCF_RESKEY_pid_file" 2>/dev/null)
    case "$p" in
        ''|*[!0-9]*) return 1 ;;
    esac
    echo "$p"
}

is_running() {
    local pid comm
    pid=$(read_pid) || return 1
    kill -0 "$pid" 2>/dev/null || return 1
    comm=$(cat /proc/"$pid"/comm 2>/dev/null)
    [ "$comm" = "ganesha.nfsd" ]
}

port_listening() {
    local ip="$OCF_RESKEY_nfs_ip"
    local port="$OCF_RESKEY_nfs_port"

    if ! have_binary ss; then
        local hex
        hex=$(printf '%04X' "$port")
        grep -qE ":${hex} [0-9A-F]{8}:[0-9A-F]{4} 0A" /proc/net/tcp  2>/dev/null && return 0
        grep -qE ":${hex} [0-9A-F]{32}:[0-9A-F]{4} 0A" /proc/net/tcp6 2>/dev/null && return 0
        return 1
    fi

    if [ -n "$ip" ]; then
        ss -Hln -tnp "sport = :${port}" 2>/dev/null | \
            awk -v ip="$ip" '
                { addr=$4; sub(/:[^:]+$/,"",addr); gsub(/[\[\]]/,"",addr);
                  sub(/^::ffff:/,"",addr);   # normalise IPv4-mapped IPv6 (ganesha binds v4 this way)
                  if (addr==ip || addr=="0.0.0.0" || addr=="*" || addr=="::" || addr=="") { found=1 } }
                END { exit(found?0:1) }'
        return $?
    fi

    ss -Hln -tn "sport = :${port}" 2>/dev/null | grep -q .
}

ensure_dirs() {
    local d
    for d in "$(dirname "$OCF_RESKEY_pid_file")" "$(dirname "$OCF_RESKEY_log_file")"; do
        [ -d "$d" ] || mkdir -p "$d"
    done
    if generated_mode; then
        [ -d "$(recovery_dir)" ] || mkdir -p "$(recovery_dir)"
    fi
}

#######################################################################
# OCF actions

ganesha_validate() {
    local bin
    bin=$(ganesha_binary)
    if ! check_binary "$bin"; then
        ocf_exit_reason "ganesha.nfsd binary not found"
        return $OCF_ERR_INSTALLED
    fi

    if generated_mode; then
        local np ni npp i id
        np=$(list_len "$OCF_RESKEY_export_path")

        # export_id: optional ';'-list parallel to export_path; each a uint16.
        if [ -n "$OCF_RESKEY_export_id" ]; then
            ni=$(list_len "$OCF_RESKEY_export_id")
            if [ "$ni" -ne "$np" ]; then
                ocf_exit_reason "export_id list has $ni entries but export_path has $np"
                return $OCF_ERR_CONFIGURED
            fi
            i=1
            while [ "$i" -le "$ni" ]; do
                id=$(list_nth "$OCF_RESKEY_export_id" "$i")
                case "$id" in
                    ''|*[!0-9]*) ocf_exit_reason "export_id entries must be integers"; return $OCF_ERR_CONFIGURED ;;
                esac
                if [ "$id" -lt 1 ] || [ "$id" -gt 65535 ]; then
                    ocf_exit_reason "export_id entries must be in 1..65535"; return $OCF_ERR_CONFIGURED
                fi
                i=$((i + 1))
            done
        fi

        # pseudo: optional ';'-list, must match export_path length when set.
        if [ -n "$OCF_RESKEY_pseudo" ]; then
            npp=$(list_len "$OCF_RESKEY_pseudo")
            if [ "$npp" -ne "$np" ]; then
                ocf_exit_reason "pseudo list has $npp entries but export_path has $np"
                return $OCF_ERR_CONFIGURED
            fi
        fi

        case "$OCF_RESKEY_access_type" in
            RW|RO|None) ;;
            *) ocf_exit_reason "access_type must be RW, RO, or None"; return $OCF_ERR_CONFIGURED ;;
        esac

        # anonuid/anongid: optional integers (negative values like -2 allowed).
        local aid
        for aid in "$OCF_RESKEY_anonuid" "$OCF_RESKEY_anongid"; do
            [ -n "$aid" ] || continue
            case "${aid#-}" in
                ''|*[!0-9]*)
                    ocf_exit_reason "anonuid/anongid must be integers"
                    return $OCF_ERR_CONFIGURED ;;
            esac
        done

        # grace_period / lease_lifetime: optional, ganesha-enforced ranges.
        if [ -n "$OCF_RESKEY_grace_period" ]; then
            case "$OCF_RESKEY_grace_period" in
                *[!0-9]*) ocf_exit_reason "grace_period must be an integer"; return $OCF_ERR_CONFIGURED ;;
            esac
            if [ "$OCF_RESKEY_grace_period" -gt 180 ]; then
                ocf_exit_reason "grace_period must be in 0..180"; return $OCF_ERR_CONFIGURED
            fi
        fi
        if [ -n "$OCF_RESKEY_lease_lifetime" ]; then
            case "$OCF_RESKEY_lease_lifetime" in
                *[!0-9]*) ocf_exit_reason "lease_lifetime must be an integer"; return $OCF_ERR_CONFIGURED ;;
            esac
            if [ "$OCF_RESKEY_lease_lifetime" -lt 1 ] || [ "$OCF_RESKEY_lease_lifetime" -gt 120 ]; then
                ocf_exit_reason "lease_lifetime must be in 1..120"; return $OCF_ERR_CONFIGURED
            fi
        fi
        if [ -z "$OCF_RESKEY_clients" ]; then
            ocf_exit_reason "clients is required in generated mode (deny-default); set it explicitly, use \"*\" to allow all"
            return $OCF_ERR_CONFIGURED
        fi
        # export_path existence is checked at start (FS may not be mounted yet at probe time).
    else
        if [ ! -f "$OCF_RESKEY_config_file" ]; then
            ocf_exit_reason "Config file not found: $OCF_RESKEY_config_file (and no export_path set)"
            return $OCF_ERR_INSTALLED
        fi
    fi

    case "$OCF_RESKEY_start_timeout" in
        ''|*[!0-9]*) ocf_exit_reason "start_timeout must be a positive integer"; return $OCF_ERR_CONFIGURED ;;
    esac
    return $OCF_SUCCESS
}

ganesha_monitor() {
    if is_running; then
        if port_listening; then
            return $OCF_SUCCESS
        fi
        ocf_log warn "Ganesha PID alive but not listening on ${OCF_RESKEY_nfs_ip:-*}:${OCF_RESKEY_nfs_port}"
        return $OCF_ERR_GENERIC
    fi
    if [ -f "$OCF_RESKEY_pid_file" ]; then
        ocf_log info "Removing stale PID file $OCF_RESKEY_pid_file"
        rm -f "$OCF_RESKEY_pid_file"
    fi
    return $OCF_NOT_RUNNING
}

ganesha_start() {
    ganesha_validate
    rc=$?
    [ $rc -ne $OCF_SUCCESS ] && return $rc

    if is_running; then
        if port_listening; then
            ocf_log info "Ganesha already running and listening (PID $(read_pid))"
            return $OCF_SUCCESS
        fi
        ocf_log info "Ganesha PID alive but not yet listening; waiting up to 10s"
        local j=0
        while [ $j -lt 10 ]; do
            sleep 1
            if port_listening; then
                ocf_log info "Ganesha became ready"
                return $OCF_SUCCESS
            fi
            j=$((j + 1))
        done
        ocf_log warn "Existing Ganesha PID $(read_pid) not listening after 10s; stopping before retry"
        ganesha_stop
    fi

    if generated_mode; then
        local np i p
        np=$(list_len "$OCF_RESKEY_export_path")
        i=1
        while [ "$i" -le "$np" ]; do
            p=$(list_nth "$OCF_RESKEY_export_path" "$i")
            if [ ! -d "$p" ]; then
                ocf_exit_reason "export_path does not exist (is the replicated FS mounted?): $p"
                return $OCF_ERR_GENERIC
            fi
            i=$((i + 1))
        done
        ensure_dirs
        if ! write_runtime_config; then
            ocf_exit_reason "Failed to render runtime config at $(active_config_path)"
            return $OCF_ERR_GENERIC
        fi
    else
        ensure_dirs
    fi

    local bin cfg
    bin=$(ganesha_binary)
    cfg=$(active_config_path)
    ocf_log info "Starting Ganesha: $bin -f $cfg -p $OCF_RESKEY_pid_file"

    "$bin" \
        -f "$cfg" \
        -p "$OCF_RESKEY_pid_file" \
        -L "$OCF_RESKEY_log_file" \
        -N "$OCF_RESKEY_log_level"
    rc=$?
    if [ $rc -ne 0 ]; then
        ocf_exit_reason "ganesha.nfsd exited with rc=$rc on start; see $OCF_RESKEY_log_file"
        return $OCF_ERR_GENERIC
    fi

    local i=0 ready=0
    while [ $i -lt "$OCF_RESKEY_start_timeout" ]; do
        if ganesha_monitor >/dev/null 2>&1; then ready=1; break; fi
        sleep 1
        i=$((i + 1))
    done
    if [ $ready -ne 1 ]; then
        ocf_exit_reason "Ganesha did not become ready within ${OCF_RESKEY_start_timeout}s; see $OCF_RESKEY_log_file"
        return $OCF_ERR_GENERIC
    fi

    ocf_log info "Ganesha ready (PID $(read_pid))"
    return $OCF_SUCCESS
}

ganesha_stop() {
    local pid
    if ! is_running; then
        [ -f "$OCF_RESKEY_pid_file" ] && rm -f "$OCF_RESKEY_pid_file"
        cleanup_generated
        return $OCF_SUCCESS
    fi

    pid=$(read_pid)
    ocf_log info "Stopping Ganesha (PID $pid)"

    local tmo=${OCF_RESKEY_CRM_meta_timeout:-60000}
    tmo=$(( (tmo / 1000) - 5 ))
    [ $tmo -lt 10 ] && tmo=10

    kill -TERM "$pid" 2>/dev/null

    local i=0
    while [ $i -lt $tmo ]; do
        if ! kill -0 "$pid" 2>/dev/null; then
            rm -f "$OCF_RESKEY_pid_file"
            cleanup_generated
            ocf_log info "Ganesha stopped cleanly"
            return $OCF_SUCCESS
        fi
        sleep 1
        i=$((i + 1))
    done

    ocf_log warn "Ganesha did not exit after ${tmo}s of SIGTERM; escalating to SIGKILL"
    kill -KILL "$pid" 2>/dev/null
    sleep 2
    if kill -0 "$pid" 2>/dev/null; then
        ocf_exit_reason "Failed to kill Ganesha PID $pid"
        return $OCF_ERR_GENERIC
    fi
    rm -f "$OCF_RESKEY_pid_file"
    generated_mode && rm -f "$(active_config_path)"
    return $OCF_SUCCESS
}

#######################################################################
# Dispatch

case "$__OCF_ACTION" in
    meta-data)    meta_data; exit $OCF_SUCCESS ;;
    start)        ganesha_start;    exit $? ;;
    stop)         ganesha_stop;     exit $? ;;
    monitor|status) ganesha_monitor; exit $? ;;
    validate-all) ganesha_validate; exit $? ;;
    usage|help)
        echo "usage: $0 {start|stop|monitor|validate-all|meta-data}"
        exit $OCF_SUCCESS
        ;;
    *)
        echo "Unknown action: $__OCF_ACTION" >&2
        exit $OCF_ERR_UNIMPLEMENTED
        ;;
esac
