1    	/*
2    	 * Copyright 2015-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU Lesser General Public License
7    	 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	#include <stdio.h>
14   	#include <stdlib.h>
15   	#include <string.h>
16   	#include <sys/stat.h>
17   	#include <sys/types.h>
18   	#include <dirent.h>
19   	#include <ctype.h>
20   	
21   	#include <glib.h>		// g_str_has_prefix()
22   	
23   	#if HAVE_LINUX_PROCFS
24   	/*!
25   	 * \internal
26   	 * \brief Return name of /proc file containing the CIB daemon's load statistics
27   	 *
28   	 * \return Newly allocated memory with file name on success, NULL otherwise
29   	 *
30   	 * \note It is the caller's responsibility to free the return value.
31   	 *       This will return NULL if the daemon is being run via valgrind.
32   	 *       This should be called only on Linux systems.
33   	 */
34   	static char *
35   	find_cib_loadfile(const char *server)
36   	{
37   	    pid_t pid = pcmk__procfs_pid_of(server);
38   	
39   	    if (pid == 0) {
40   	        return NULL;
41   	    }
42   	    return pcmk__assert_asprintf("/proc/%lld/stat", (long long) pid);
43   	}
44   	
45   	/*!
46   	 * \internal
47   	 * \brief Get process ID and name associated with a /proc directory entry
48   	 *
49   	 * \param[in]  entry    Directory entry (must be result of readdir() on /proc)
50   	 * \param[out] name     If not NULL, a char[16] to hold the process name
51   	 * \param[out] pid      If not NULL, will be set to process ID of entry
52   	 *
53   	 * \return Standard Pacemaker return code
54   	 * \note This should be called only on Linux systems, as not all systems that
55   	 *       support /proc store process names and IDs in the same way. The kernel
56   	 *       limits the process name to the first 15 characters (plus terminator).
57   	 *       It would be nice if there were a public kernel API constant for that
58   	 *       limit, but there isn't.
59   	 */
60   	static int
61   	pcmk__procfs_process_info(const struct dirent *entry, char *name, pid_t *pid)
62   	{
63   	    int fd, local_pid;
64   	    FILE *file;
65   	    struct stat statbuf;
66   	    char procpath[128] = { 0 };
67   	
68   	    /* We're only interested in entries whose name is a PID,
69   	     * so skip anything non-numeric or that is too long.
70   	     *
71   	     * 114 = 128 - strlen("/proc/") - strlen("/status") - 1
72   	     */
73   	    local_pid = atoi(entry->d_name);
74   	    if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) {
75   	        return -1;
76   	    }
77   	    if (pid) {
78   	        *pid = (pid_t) local_pid;
79   	    }
80   	
81   	    /* Get this entry's file information */
82   	    strcpy(procpath, "/proc/");
83   	    strcat(procpath, entry->d_name);
84   	    fd = open(procpath, O_RDONLY);
85   	    if (fd < 0 ) {
86   	        return -1;
87   	    }
88   	    if (fstat(fd, &statbuf) < 0) {
89   	        close(fd);
90   	        return -1;
91   	    }
92   	    close(fd);
93   	
94   	    /* We're only interested in subdirectories */
95   	    if (!S_ISDIR(statbuf.st_mode)) {
96   	        return -1;
97   	    }
98   	
99   	    /* Read the first entry ("Name:") from the process's status file.
100  	     * We could handle the valgrind case if we parsed the cmdline file
101  	     * instead, but that's more of a pain than it's worth.
102  	     */
103  	    if (name != NULL) {
104  	        strcat(procpath, "/status");
105  	        file = fopen(procpath, "r");
106  	        if (!file) {
107  	            return -1;
108  	        }
109  	        if (fscanf(file, "Name:\t%15[^\n]", name) != 1) {
110  	            fclose(file);
111  	            return -1;
112  	        }
113  	        name[15] = 0;
114  	        fclose(file);
115  	    }
116  	
117  	    return 0;
118  	}
119  	#endif // HAVE_LINUX_PROCFS
120  	
121  	/*!
122  	 * \internal
123  	 * \brief Return process ID of a named process
124  	 *
125  	 * \param[in] name  Process name (as used in /proc/.../status)
126  	 *
127  	 * \return Process ID of named process if running, 0 otherwise
128  	 *
129  	 * \note This will return 0 if the process is being run via valgrind.
130  	 *       This should be called only on Linux systems.
131  	 */
132  	pid_t
133  	pcmk__procfs_pid_of(const char *name)
134  	{
135  	#if HAVE_LINUX_PROCFS
136  	    DIR *dp;
137  	    struct dirent *entry;
138  	    pid_t pid = 0;
139  	    char entry_name[64] = { 0 };
140  	
141  	    dp = opendir("/proc");
142  	    if (dp == NULL) {
143  	        pcmk__notice("Can not read /proc directory to track existing "
144  	                     "components");
145  	        return 0;
146  	    }
147  	
148  	    while ((entry = readdir(dp)) != NULL) {
149  	        if ((pcmk__procfs_process_info(entry, entry_name, &pid) == pcmk_rc_ok)
150  	            && pcmk__str_eq(entry_name, name, pcmk__str_casei)
151  	            && (pcmk__pid_active(pid, NULL) == pcmk_rc_ok)) {
152  	
153  	            pcmk__info("Found %s active as process %lld", name,
154  	                       (long long) pid);
155  	            break;
156  	        }
157  	        pid = 0;
158  	    }
159  	    closedir(dp);
160  	    return pid;
161  	#else
162  	    return 0;
163  	#endif // HAVE_LINUX_PROCFS
164  	}
165  	
166  	/*!
167  	 * \internal
168  	 * \brief Calculate number of logical CPU cores from procfs
169  	 *
170  	 * \return Number of cores (or 1 if unable to determine)
171  	 */
172  	unsigned int
173  	pcmk__procfs_num_cores(void)
174  	{
175  	#if HAVE_LINUX_PROCFS
176  	    int cores = 0;
177  	    FILE *stream = NULL;
178  	
179  	    /* Parse /proc/stat instead of /proc/cpuinfo because it's smaller */
180  	    stream = fopen("/proc/stat", "r");
181  	    if (stream == NULL) {
182  	        pcmk__info("Could not open /proc/stat: %s", strerror(errno));
183  	
184  	    } else {
185  	        char buffer[2048];
186  	
187  	        while (fgets(buffer, sizeof(buffer), stream)) {
188  	            if (g_str_has_prefix(buffer, "cpu") && isdigit(buffer[3])) {
189  	                ++cores;
190  	            }
191  	        }
192  	        fclose(stream);
193  	    }
194  	    return cores? cores : 1;
195  	#else
196  	    return 1;
197  	#endif // HAVE_LINUX_PROCFS
198  	}
199  	
200  	/*!
201  	 * \internal
202  	 * \brief Get the executable path corresponding to a process ID
203  	 *
204  	 * \param[in]  pid   Process ID to check
205  	 * \param[out] path  Where to store executable path (can be \c NULL)
206  	 *
207  	 * \return Standard Pacemaker error code (as possible errno values from
208  	 *         readlink())
209  	 */
210  	int
211  	pcmk__procfs_pid2path(pid_t pid, char **path)
212  	{
213  	#if HAVE_LINUX_PROCFS
214  	    char *procfs_path = NULL;
215  	    ssize_t link_rc;
216  	
217  	    /* The readlink(2) man page recommends calling lstat() to get the required
218  	     * buffer size, and then dynamically allocate the buffer. However,
219  	     * st_size == 0 for symlinks under /proc. So we use PATH_MAX.
220  	     */
221  	    char real_path[PATH_MAX] = { '\0', };
222  	
223  	    pcmk__assert((path == NULL) || (*path == NULL));
224  	
225  	    procfs_path = pcmk__assert_asprintf("/proc/%lld/exe", (long long) pid);
226  	
227  	    link_rc = readlink(procfs_path, real_path, sizeof(real_path));
228  	    free(procfs_path);
229  	
230  	    if (link_rc < 0) {
231  	        return errno;
232  	    } else if (link_rc >= sizeof(real_path)) {
233  	        return ENAMETOOLONG;
234  	    }
235  	
236  	    if (path != NULL) {
237  	        /* Make Coverity happy; we already zero-initialized real_path, and we
238  	         * returned ENAMETOOLONG if it's no longer null-terminated
239  	         */
240  	        real_path[link_rc] = '\0';
241  	
242  	        *path = pcmk__str_copy(real_path);
243  	    }
244  	    return pcmk_rc_ok;
245  	#else
246  	    return EOPNOTSUPP;
247  	#endif // HAVE_LINUX_PROCFS
248  	}
249  	
250  	/*!
251  	 * \internal
252  	 * \brief Check whether process ID information is available from procfs
253  	 *
254  	 * \return true if process ID information is available, otherwise false
255  	 */
256  	bool
257  	pcmk__procfs_has_pids(void)
258  	{
259  	#if HAVE_LINUX_PROCFS
260  	    static bool have_pids = false;
261  	    static bool checked = false;
262  	
263  	    if (!checked) {
264  	        have_pids = pcmk__procfs_pid2path(getpid(), NULL) == pcmk_rc_ok;
265  	        checked = true;
266  	    }
267  	    return have_pids;
268  	#else
269  	    return false;
270  	#endif // HAVE_LINUX_PROCFS
271  	}
272  	
273  	/*!
274  	 * \internal
275  	 * \brief Return an open handle on the directory containing links to open file
276  	 *        descriptors, or NULL on error
277  	 */
278  	DIR *
279  	pcmk__procfs_fd_dir(void)
280  	{
281  	    DIR *dir = NULL;
282  	
283  	    /* /proc/self/fd (on Linux) or /dev/fd (on most OSes) contains symlinks to
284  	     * all open files for the current process, named as the file descriptor.
285  	     * Use this if available, because it's more efficient than a shotgun
286  	     * approach to closing descriptors.
287  	     */
288  	#if HAVE_LINUX_PROCFS
289  	    dir = opendir("/proc/self/fd");
290  	#endif // HAVE_LINUX_PROCFS
291  	
292  	    return dir;
293  	}
294  	
295  	/*!
296  	 * \internal
297  	 * \brief Trigger a sysrq command if supported on current platform
298  	 *
299  	 * \param[in] t  Sysrq command to trigger
300  	 */
301  	void
302  	pcmk__sysrq_trigger(char t)
303  	{
304  	#if HAVE_LINUX_PROCFS
305  	    // Root can always write here, regardless of kernel.sysrq value
306  	    FILE *procf = fopen("/proc/sysrq-trigger", "a");
307  	
308  	    if (procf == NULL) {
309  	        pcmk__warn("Could not open sysrq-trigger: %s", strerror(errno));
310  	    } else {
311  	        fprintf(procf, "%c\n", t);
312  	        fclose(procf);
313  	    }
314  	#endif // HAVE_LINUX_PROCFS
315  	}
316  	
317  	bool
318  	pcmk__throttle_cib_load(const char *server, float *load)
319  	{
320  	/* /proc/[pid]/stat
321  	 *
322  	 * Status information about the process.  This is used by ps(1).  It is defined
323  	 * in /usr/src/linux/fs/proc/array.c.
324  	 *
325  	 * The fields, in order, with their proper scanf(3) format specifiers, are:
326  	 *
327  	 * pid %d      (1)  The process ID.
328  	 * comm %s     (2)  The filename of the executable, in parentheses.  This is
329  	 *                  visible whether or not the executable is swapped out.
330  	 * state %c    (3)  One character from the string "RSDZTW" where R is running,
331  	 *                  S is sleeping in an interruptible wait, D is waiting in
332  	 *                  uninterruptible disk sleep, Z is zombie, T is traced or
333  	 *                  stopped (on a signal), and W is paging.
334  	 * ppid %d     (4)  The PID of the parent.
335  	 * pgrp %d     (5)  The process group ID of the process.
336  	 * session %d  (6)  The session ID of the process.
337  	 * tty_nr %d   (7)  The controlling terminal of the process.  (The minor device
338  	 *                  number is contained in the combination of bits 31 to 20 and
339  	 *                  7 to 0; the major device number is in bits 15 to 8.)
340  	 * tpgid %d    (8)  The ID of the foreground process group of the controlling
341  	 *                  terminal of the process.
342  	 * flags %u    (9)  The kernel flags word of the process.  For bit meanings, see
343  	 *                  the PF_* defines in the Linux kernel source file include/linux/sched.h.
344  	 *                  Details depend on the kernel version.
345  	 * minflt %lu  (10) The number of minor faults the process has made which have
346  	 *                  not required loading a memory page from disk.
347  	 * cminflt %lu (11) The number of minor faults that the process's waited-for
348  	 *                  children have made.
349  	 * majflt %lu  (12) The number of major faults the process has made which have
350  	 *                  required loading a memory page from disk.
351  	 * cmajflt %lu (13) The number of major faults that the process's waited-for
352  	 *                  children have made.
353  	 * utime %lu   (14) Amount of time that this process has been scheduled in user
354  	 *                  mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
355  	 *                  This includes guest time, guest_time (time spent running a
356  	 *                  virtual CPU, see below), so that applications that are not
357  	 *                  aware of the guest time field do not lose that time from
358  	 *                  their calculations.
359  	 * stime %lu   (15) Amount of time that this process has been scheduled in
360  	 *                  kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
361  	 */
362  	
363  	#if HAVE_LINUX_PROCFS
364  	    static char *loadfile = NULL;
365  	    static time_t last_call = 0;
366  	    static long ticks_per_s = 0;
367  	    static unsigned long last_utime, last_stime;
368  	
369  	    char buffer[64*1024];
370  	    FILE *stream = NULL;
371  	    time_t now = time(NULL);
372  	
(1) Event path: Condition "load == NULL", taking false branch.
373  	    if (load == NULL) {
374  	        return false;
375  	    } else {
376  	        *load = 0.0;
377  	    }
378  	
(2) Event path: Condition "loadfile == NULL", taking true branch.
379  	    if (loadfile == NULL) {
380  	        last_call = 0;
381  	        last_utime = 0;
382  	        last_stime = 0;
383  	
384  	        loadfile = find_cib_loadfile(server);
(3) Event path: Condition "loadfile == NULL", taking false branch.
385  	        if (loadfile == NULL) {
386  	            pcmk__warn("Couldn't find CIB load file");
387  	            return false;
388  	        }
389  	
390  	        ticks_per_s = sysconf(_SC_CLK_TCK);
(4) Event path: Switch case default.
(5) Event path: Condition "trace_cs == NULL", taking true branch.
(6) Event path: Condition "crm_is_callsite_active(trace_cs, _level, 0)", taking false branch.
(7) Event path: Breaking from switch.
391  	        pcmk__trace("Found %s", loadfile);
392  	    }
393  	
394  	    stream = fopen(loadfile, "r");
(8) Event path: Condition "stream == NULL", taking true branch.
395  	    if (stream == NULL) {
396  	        int rc = errno;
397  	
398  	        pcmk__warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
CID (unavailable; MK=26f9d6e6dcf06821789bc1dfe568662d) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(9) Event assign_union_field: The union field "in" of "_pp" is written.
(10) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
399  	        g_clear_pointer(&loadfile, free);
400  	        return false;
401  	    }
402  	
403  	    if (fgets(buffer, sizeof(buffer), stream) != NULL) {
404  	        char *comm = pcmk__assert_alloc(256, sizeof(char));
405  	        char state = 0;
406  	        int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
407  	        unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
408  	
409  	        rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
410  	                    &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid,
411  	                    &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
412  	        free(comm);
413  	
414  	        if (rc != 15) {
415  	            pcmk__err("Only %d of 15 fields found in %s", rc, loadfile);
416  	            fclose(stream);
417  	            return false;
418  	
419  	        } else if ((last_call > 0) && (last_call < now) && (last_utime <= utime) &&
420  	                   (last_stime <= stime)) {
421  	            time_t elapsed = now - last_call;
422  	            unsigned long delta_utime = utime - last_utime;
423  	            unsigned long delta_stime = stime - last_stime;
424  	
425  	            *load = delta_utime + delta_stime; /* Cast to a float before division */
426  	            *load /= ticks_per_s;
427  	            *load /= elapsed;
428  	            pcmk__debug("cib load: %f (%lu ticks in %llds)", *load,
429  	                        (delta_utime + delta_stime), (long long) elapsed);
430  	
431  	        } else {
432  	            pcmk__debug("Init %lu + %lu ticks at %lld (%lu tps)", utime, stime,
433  	                        (long long) now, ticks_per_s);
434  	        }
435  	
436  	        last_call = now;
437  	        last_utime = utime;
438  	        last_stime = stime;
439  	
440  	        fclose(stream);
441  	        return true;
442  	    }
443  	
444  	    fclose(stream);
445  	#endif // HAVE_LINUX_PROCFS
446  	    return false;
447  	}
448  	
449  	bool
450  	pcmk__throttle_load_avg(float *load)
451  	{
452  	#if HAVE_LINUX_PROCFS
453  	    char buffer[256];
454  	    FILE *stream = NULL;
455  	    const char *loadfile = "/proc/loadavg";
456  	
457  	    if (load == NULL) {
458  	        return false;
459  	    }
460  	
461  	    stream = fopen(loadfile, "r");
462  	    if (stream == NULL) {
463  	        int rc = errno;
464  	        pcmk__warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
465  	        return false;
466  	    }
467  	
468  	    if (fgets(buffer, sizeof(buffer), stream) != NULL) {
469  	        /* Grab the 1-minute average, ignore the rest */
470  	        *load = strtof(buffer, NULL);
471  	        fclose(stream);
472  	        return true;
473  	    }
474  	
475  	    fclose(stream);
476  	#endif // HAVE_LINUX_PROCFS
477  	    return false;
478  	}
479