1 /*
2 * Copyright 2015-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/types.h>
18 #include <dirent.h>
19 #include <ctype.h>
20
21 #include <glib.h> // g_str_has_prefix()
22
23 #if HAVE_LINUX_PROCFS
24 /*!
25 * \internal
26 * \brief Return name of /proc file containing the CIB daemon's load statistics
27 *
28 * \return Newly allocated memory with file name on success, NULL otherwise
29 *
30 * \note It is the caller's responsibility to free the return value.
31 * This will return NULL if the daemon is being run via valgrind.
32 * This should be called only on Linux systems.
33 */
34 static char *
35 find_cib_loadfile(const char *server)
36 {
37 pid_t pid = pcmk__procfs_pid_of(server);
38
39 if (pid == 0) {
40 return NULL;
41 }
42 return pcmk__assert_asprintf("/proc/%lld/stat", (long long) pid);
43 }
44
45 /*!
46 * \internal
47 * \brief Get process ID and name associated with a /proc directory entry
48 *
49 * \param[in] entry Directory entry (must be result of readdir() on /proc)
50 * \param[out] name If not NULL, a char[16] to hold the process name
51 * \param[out] pid If not NULL, will be set to process ID of entry
52 *
53 * \return Standard Pacemaker return code
54 * \note This should be called only on Linux systems, as not all systems that
55 * support /proc store process names and IDs in the same way. The kernel
56 * limits the process name to the first 15 characters (plus terminator).
57 * It would be nice if there were a public kernel API constant for that
58 * limit, but there isn't.
59 */
60 static int
61 pcmk__procfs_process_info(const struct dirent *entry, char *name, pid_t *pid)
62 {
63 int fd, local_pid;
64 FILE *file;
65 struct stat statbuf;
66 char procpath[128] = { 0 };
67
68 /* We're only interested in entries whose name is a PID,
69 * so skip anything non-numeric or that is too long.
70 *
71 * 114 = 128 - strlen("/proc/") - strlen("/status") - 1
72 */
73 local_pid = atoi(entry->d_name);
74 if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) {
75 return -1;
76 }
77 if (pid) {
78 *pid = (pid_t) local_pid;
79 }
80
81 /* Get this entry's file information */
82 strcpy(procpath, "/proc/");
83 strcat(procpath, entry->d_name);
84 fd = open(procpath, O_RDONLY);
85 if (fd < 0 ) {
86 return -1;
87 }
88 if (fstat(fd, &statbuf) < 0) {
89 close(fd);
90 return -1;
91 }
92 close(fd);
93
94 /* We're only interested in subdirectories */
95 if (!S_ISDIR(statbuf.st_mode)) {
96 return -1;
97 }
98
99 /* Read the first entry ("Name:") from the process's status file.
100 * We could handle the valgrind case if we parsed the cmdline file
101 * instead, but that's more of a pain than it's worth.
102 */
103 if (name != NULL) {
104 strcat(procpath, "/status");
105 file = fopen(procpath, "r");
106 if (!file) {
107 return -1;
108 }
109 if (fscanf(file, "Name:\t%15[^\n]", name) != 1) {
110 fclose(file);
111 return -1;
112 }
113 name[15] = 0;
114 fclose(file);
115 }
116
117 return 0;
118 }
119 #endif // HAVE_LINUX_PROCFS
120
121 /*!
122 * \internal
123 * \brief Return process ID of a named process
124 *
125 * \param[in] name Process name (as used in /proc/.../status)
126 *
127 * \return Process ID of named process if running, 0 otherwise
128 *
129 * \note This will return 0 if the process is being run via valgrind.
130 * This should be called only on Linux systems.
131 */
132 pid_t
133 pcmk__procfs_pid_of(const char *name)
134 {
135 #if HAVE_LINUX_PROCFS
136 DIR *dp;
137 struct dirent *entry;
138 pid_t pid = 0;
139 char entry_name[64] = { 0 };
140
141 dp = opendir("/proc");
142 if (dp == NULL) {
143 pcmk__notice("Can not read /proc directory to track existing "
144 "components");
145 return 0;
146 }
147
148 while ((entry = readdir(dp)) != NULL) {
149 if ((pcmk__procfs_process_info(entry, entry_name, &pid) == pcmk_rc_ok)
150 && pcmk__str_eq(entry_name, name, pcmk__str_casei)
151 && (pcmk__pid_active(pid, NULL) == pcmk_rc_ok)) {
152
153 pcmk__info("Found %s active as process %lld", name,
154 (long long) pid);
155 break;
156 }
157 pid = 0;
158 }
159 closedir(dp);
160 return pid;
161 #else
162 return 0;
163 #endif // HAVE_LINUX_PROCFS
164 }
165
166 /*!
167 * \internal
168 * \brief Calculate number of logical CPU cores from procfs
169 *
170 * \return Number of cores (or 1 if unable to determine)
171 */
172 unsigned int
173 pcmk__procfs_num_cores(void)
174 {
175 #if HAVE_LINUX_PROCFS
176 int cores = 0;
177 FILE *stream = NULL;
178
179 /* Parse /proc/stat instead of /proc/cpuinfo because it's smaller */
180 stream = fopen("/proc/stat", "r");
181 if (stream == NULL) {
182 pcmk__info("Could not open /proc/stat: %s", strerror(errno));
183
184 } else {
185 char buffer[2048];
186
187 while (fgets(buffer, sizeof(buffer), stream)) {
188 if (g_str_has_prefix(buffer, "cpu") && isdigit(buffer[3])) {
189 ++cores;
190 }
191 }
192 fclose(stream);
193 }
194 return cores? cores : 1;
195 #else
196 return 1;
197 #endif // HAVE_LINUX_PROCFS
198 }
199
200 /*!
201 * \internal
202 * \brief Get the executable path corresponding to a process ID
203 *
204 * \param[in] pid Process ID to check
205 * \param[out] path Where to store executable path (can be \c NULL)
206 *
207 * \return Standard Pacemaker error code (as possible errno values from
208 * readlink())
209 */
210 int
211 pcmk__procfs_pid2path(pid_t pid, char **path)
212 {
213 #if HAVE_LINUX_PROCFS
214 char *procfs_path = NULL;
215 ssize_t link_rc;
216
217 /* The readlink(2) man page recommends calling lstat() to get the required
218 * buffer size, and then dynamically allocate the buffer. However,
219 * st_size == 0 for symlinks under /proc. So we use PATH_MAX.
220 */
221 char real_path[PATH_MAX] = { '\0', };
222
223 pcmk__assert((path == NULL) || (*path == NULL));
224
225 procfs_path = pcmk__assert_asprintf("/proc/%lld/exe", (long long) pid);
226
227 link_rc = readlink(procfs_path, real_path, sizeof(real_path));
228 free(procfs_path);
229
230 if (link_rc < 0) {
231 return errno;
232 } else if (link_rc >= sizeof(real_path)) {
233 return ENAMETOOLONG;
234 }
235
236 if (path != NULL) {
237 /* Make Coverity happy; we already zero-initialized real_path, and we
238 * returned ENAMETOOLONG if it's no longer null-terminated
239 */
240 real_path[link_rc] = '\0';
241
242 *path = pcmk__str_copy(real_path);
243 }
244 return pcmk_rc_ok;
245 #else
246 return EOPNOTSUPP;
247 #endif // HAVE_LINUX_PROCFS
248 }
249
250 /*!
251 * \internal
252 * \brief Check whether process ID information is available from procfs
253 *
254 * \return true if process ID information is available, otherwise false
255 */
256 bool
257 pcmk__procfs_has_pids(void)
258 {
259 #if HAVE_LINUX_PROCFS
260 static bool have_pids = false;
261 static bool checked = false;
262
263 if (!checked) {
264 have_pids = pcmk__procfs_pid2path(getpid(), NULL) == pcmk_rc_ok;
265 checked = true;
266 }
267 return have_pids;
268 #else
269 return false;
270 #endif // HAVE_LINUX_PROCFS
271 }
272
273 /*!
274 * \internal
275 * \brief Return an open handle on the directory containing links to open file
276 * descriptors, or NULL on error
277 */
278 DIR *
279 pcmk__procfs_fd_dir(void)
280 {
281 DIR *dir = NULL;
282
283 /* /proc/self/fd (on Linux) or /dev/fd (on most OSes) contains symlinks to
284 * all open files for the current process, named as the file descriptor.
285 * Use this if available, because it's more efficient than a shotgun
286 * approach to closing descriptors.
287 */
288 #if HAVE_LINUX_PROCFS
289 dir = opendir("/proc/self/fd");
290 #endif // HAVE_LINUX_PROCFS
291
292 return dir;
293 }
294
295 /*!
296 * \internal
297 * \brief Trigger a sysrq command if supported on current platform
298 *
299 * \param[in] t Sysrq command to trigger
300 */
301 void
302 pcmk__sysrq_trigger(char t)
303 {
304 #if HAVE_LINUX_PROCFS
305 // Root can always write here, regardless of kernel.sysrq value
306 FILE *procf = fopen("/proc/sysrq-trigger", "a");
307
308 if (procf == NULL) {
309 pcmk__warn("Could not open sysrq-trigger: %s", strerror(errno));
310 } else {
311 fprintf(procf, "%c\n", t);
312 fclose(procf);
313 }
314 #endif // HAVE_LINUX_PROCFS
315 }
316
317 bool
318 pcmk__throttle_cib_load(const char *server, float *load)
319 {
320 /* /proc/[pid]/stat
321 *
322 * Status information about the process. This is used by ps(1). It is defined
323 * in /usr/src/linux/fs/proc/array.c.
324 *
325 * The fields, in order, with their proper scanf(3) format specifiers, are:
326 *
327 * pid %d (1) The process ID.
328 * comm %s (2) The filename of the executable, in parentheses. This is
329 * visible whether or not the executable is swapped out.
330 * state %c (3) One character from the string "RSDZTW" where R is running,
331 * S is sleeping in an interruptible wait, D is waiting in
332 * uninterruptible disk sleep, Z is zombie, T is traced or
333 * stopped (on a signal), and W is paging.
334 * ppid %d (4) The PID of the parent.
335 * pgrp %d (5) The process group ID of the process.
336 * session %d (6) The session ID of the process.
337 * tty_nr %d (7) The controlling terminal of the process. (The minor device
338 * number is contained in the combination of bits 31 to 20 and
339 * 7 to 0; the major device number is in bits 15 to 8.)
340 * tpgid %d (8) The ID of the foreground process group of the controlling
341 * terminal of the process.
342 * flags %u (9) The kernel flags word of the process. For bit meanings, see
343 * the PF_* defines in the Linux kernel source file include/linux/sched.h.
344 * Details depend on the kernel version.
345 * minflt %lu (10) The number of minor faults the process has made which have
346 * not required loading a memory page from disk.
347 * cminflt %lu (11) The number of minor faults that the process's waited-for
348 * children have made.
349 * majflt %lu (12) The number of major faults the process has made which have
350 * required loading a memory page from disk.
351 * cmajflt %lu (13) The number of major faults that the process's waited-for
352 * children have made.
353 * utime %lu (14) Amount of time that this process has been scheduled in user
354 * mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
355 * This includes guest time, guest_time (time spent running a
356 * virtual CPU, see below), so that applications that are not
357 * aware of the guest time field do not lose that time from
358 * their calculations.
359 * stime %lu (15) Amount of time that this process has been scheduled in
360 * kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
361 */
362
363 #if HAVE_LINUX_PROCFS
364 static char *loadfile = NULL;
365 static time_t last_call = 0;
366 static long ticks_per_s = 0;
367 static unsigned long last_utime, last_stime;
368
369 char buffer[64*1024];
370 FILE *stream = NULL;
371 time_t now = time(NULL);
372
|
(1) Event path: |
Condition "load == NULL", taking false branch. |
373 if (load == NULL) {
374 return false;
375 } else {
376 *load = 0.0;
377 }
378
|
(2) Event path: |
Condition "loadfile == NULL", taking true branch. |
379 if (loadfile == NULL) {
380 last_call = 0;
381 last_utime = 0;
382 last_stime = 0;
383
384 loadfile = find_cib_loadfile(server);
|
(3) Event path: |
Condition "loadfile == NULL", taking false branch. |
385 if (loadfile == NULL) {
386 pcmk__warn("Couldn't find CIB load file");
387 return false;
388 }
389
390 ticks_per_s = sysconf(_SC_CLK_TCK);
|
(4) Event path: |
Switch case default. |
|
(5) Event path: |
Condition "trace_cs == NULL", taking true branch. |
|
(6) Event path: |
Condition "crm_is_callsite_active(trace_cs, _level, 0)", taking false branch. |
|
(7) Event path: |
Breaking from switch. |
391 pcmk__trace("Found %s", loadfile);
392 }
393
394 stream = fopen(loadfile, "r");
|
(8) Event path: |
Condition "stream == NULL", taking true branch. |
395 if (stream == NULL) {
396 int rc = errno;
397
398 pcmk__warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
|
CID (unavailable; MK=26f9d6e6dcf06821789bc1dfe568662d) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(9) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(10) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
399 g_clear_pointer(&loadfile, free);
400 return false;
401 }
402
403 if (fgets(buffer, sizeof(buffer), stream) != NULL) {
404 char *comm = pcmk__assert_alloc(256, sizeof(char));
405 char state = 0;
406 int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
407 unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
408
409 rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
410 &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid,
411 &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
412 free(comm);
413
414 if (rc != 15) {
415 pcmk__err("Only %d of 15 fields found in %s", rc, loadfile);
416 fclose(stream);
417 return false;
418
419 } else if ((last_call > 0) && (last_call < now) && (last_utime <= utime) &&
420 (last_stime <= stime)) {
421 time_t elapsed = now - last_call;
422 unsigned long delta_utime = utime - last_utime;
423 unsigned long delta_stime = stime - last_stime;
424
425 *load = delta_utime + delta_stime; /* Cast to a float before division */
426 *load /= ticks_per_s;
427 *load /= elapsed;
428 pcmk__debug("cib load: %f (%lu ticks in %llds)", *load,
429 (delta_utime + delta_stime), (long long) elapsed);
430
431 } else {
432 pcmk__debug("Init %lu + %lu ticks at %lld (%lu tps)", utime, stime,
433 (long long) now, ticks_per_s);
434 }
435
436 last_call = now;
437 last_utime = utime;
438 last_stime = stime;
439
440 fclose(stream);
441 return true;
442 }
443
444 fclose(stream);
445 #endif // HAVE_LINUX_PROCFS
446 return false;
447 }
448
449 bool
450 pcmk__throttle_load_avg(float *load)
451 {
452 #if HAVE_LINUX_PROCFS
453 char buffer[256];
454 FILE *stream = NULL;
455 const char *loadfile = "/proc/loadavg";
456
457 if (load == NULL) {
458 return false;
459 }
460
461 stream = fopen(loadfile, "r");
462 if (stream == NULL) {
463 int rc = errno;
464 pcmk__warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
465 return false;
466 }
467
468 if (fgets(buffer, sizeof(buffer), stream) != NULL) {
469 /* Grab the 1-minute average, ignore the rest */
470 *load = strtof(buffer, NULL);
471 fclose(stream);
472 return true;
473 }
474
475 fclose(stream);
476 #endif // HAVE_LINUX_PROCFS
477 return false;
478 }
479