1 #include "clusterautoconfig.h"
2
3 #include <stdio.h>
4 #include <stdint.h>
5 #include <inttypes.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <libintl.h>
13 #include <errno.h>
14
15 #define _(String) gettext(String)
16
17 #include <logging.h>
18 #include "libgfs2.h"
19 #include "fsck.h"
20 #include "util.h"
21 #include "fs_recovery.h"
22 #include "metawalk.h"
23 #include "inode_hash.h"
24
25 #define CLEAR_POINTER(x) \
26 if (x) { \
27 free(x); \
28 x = NULL; \
29 }
30 #define HIGHEST_BLOCK 0xffffffffffffffff
31
32 static int was_mounted_ro = 0;
33 static uint64_t possible_root = HIGHEST_BLOCK;
34 static struct lgfs2_meta_dir fix_md;
35 static uint64_t blks_2free = 0;
36
37 /**
38 * block_mounters
39 *
40 * Change the lock protocol so nobody can mount the fs
41 *
42 */
43 static int block_mounters(struct lgfs2_sbd *sdp, int block_em)
44 {
45 if (block_em) {
46 /* verify it starts with lock_ */
47 if (!strncmp(sdp->sd_lockproto, "lock_", 5)) {
48 /* Change lock_ to fsck_ */
49 memcpy(sdp->sd_lockproto, "fsck_", 5);
50 }
51 /* FIXME: Need to do other verification in the else
52 * case */
53 } else {
54 /* verify it starts with fsck_ */
55 /* verify it starts with lock_ */
56 if (!strncmp(sdp->sd_lockproto, "fsck_", 5)) {
57 /* Change fsck_ to lock_ */
58 memcpy(sdp->sd_lockproto, "lock_", 5);
59 }
60 }
61
62 if (lgfs2_sb_write(sdp, sdp->device_fd)) {
63 stack;
64 return -1;
65 }
66 return 0;
67 }
68
69 static void dup_free(struct fsck_cx *cx)
70 {
71 struct osi_node *n;
72 struct duptree *dt;
73
74 while ((n = osi_first(&cx->dup_blocks))) {
75 dt = (struct duptree *)n;
76 dup_delete(cx, dt);
77 }
78 }
79
80 static void dirtree_free(struct fsck_cx *cx)
81 {
82 struct osi_node *n;
83 struct dir_info *dt;
84
85 while ((n = osi_first(&cx->dirtree))) {
86 dt = (struct dir_info *)n;
87 dirtree_delete(cx, dt);
88 }
89 }
90
91 static void inodetree_free(struct fsck_cx *cx)
92 {
93 struct osi_node *n;
94 struct inode_info *dt;
95
96 while ((n = osi_first(&cx->inodetree))) {
97 dt = (struct inode_info *)n;
98 inodetree_delete(cx, dt);
99 }
100 }
101
102 /*
103 * empty_super_block - free all structures in the super block
104 * sdp: the in-core super block
105 *
106 * This function frees all allocated structures within the
107 * super block. It does not free the super block itself.
108 *
109 * Returns: Nothing
110 */
111 static void empty_super_block(struct fsck_cx *cx)
112 {
113 log_info( _("Freeing buffers.\n"));
114 lgfs2_rgrp_free(cx->sdp, &cx->sdp->rgtree);
115
116 inodetree_free(cx);
117 dirtree_free(cx);
118 dup_free(cx);
119 }
120
121
122 /**
123 * set_block_ranges
124 * @sdp: superblock
125 *
126 * Uses info in rgrps and jindex to determine boundaries of the
127 * file system.
128 *
129 * Returns: 0 on success, -1 on failure
130 */
131 static int set_block_ranges(struct lgfs2_sbd *sdp)
132 {
133 struct osi_node *n, *next = NULL;
134 struct lgfs2_rgrp_tree *rgd;
135 uint64_t rmax = 0;
136 uint64_t rmin = 0;
137 ssize_t count;
138 char *buf;
139
140 log_info( _("Setting block ranges..."));
141
142 for (n = osi_first(&sdp->rgtree); n; n = next) {
143 next = osi_next(n);
144 rgd = (struct lgfs2_rgrp_tree *)n;
145 if (rgd->rt_data0 + rgd->rt_data &&
146 rgd->rt_data0 + rgd->rt_data - 1 > rmax)
147 rmax = rgd->rt_data0 + rgd->rt_data - 1;
148 if (!rmin || rgd->rt_data0 < rmin)
149 rmin = rgd->rt_data0;
150 }
151
152 last_fs_block = rmax;
153 if (last_fs_block > 0xffffffff && sizeof(unsigned long) <= 4) {
154 log_crit(_("This file system is too big for this computer to handle.\n"));
155 log_crit(_("Last fs block = 0x%"PRIx64", but sizeof(unsigned long) is %zu bytes.\n"),
156 last_fs_block, sizeof(unsigned long));
157 goto fail;
158 }
159
160 last_data_block = rmax;
161 first_data_block = rmin;
162
163 buf = calloc(1, sdp->sd_bsize);
164 if (buf == NULL) {
165 log_crit(_("Failed to determine file system boundaries: %s\n"), strerror(errno));
166 return -1;
167 }
168 count = pread(sdp->device_fd, buf, sdp->sd_bsize, (last_fs_block * sdp->sd_bsize));
169 free(buf);
170 if (count != sdp->sd_bsize) {
171 log_crit(_("Failed to read highest block number (%"PRIx64"): %s\n"),
172 last_fs_block, strerror(errno));
173 goto fail;
174 }
175
176 log_info(_("0x%"PRIx64" to 0x%"PRIx64"\n"), first_data_block, last_data_block);
177 return 0;
178
179 fail:
180 log_info( _("Error\n"));
181 return -1;
182 }
183
184 /**
185 * check_rgrp_integrity - verify a rgrp free block count against the bitmap
186 */
187 static void check_rgrp_integrity(struct fsck_cx *cx, struct lgfs2_rgrp_tree *rgd,
188 int *fixit, int *this_rg_fixed,
189 int *this_rg_bad, int *this_rg_cleaned)
190 {
191 uint32_t rg_free, rg_reclaimed, rg_unlinked, rg_usedmeta, rg_useddi;
192 int rgb, x, y, off, bytes_to_check, total_bytes_to_check, asked = 0;
193 struct lgfs2_sbd *sdp = cx->sdp;
194 unsigned int state;
195 uint64_t diblock;
196 struct lgfs2_buffer_head *bh;
197
198 rg_free = rg_reclaimed = rg_unlinked = rg_usedmeta = rg_useddi = 0;
199 total_bytes_to_check = rgd->rt_bitbytes;
200
201 *this_rg_fixed = *this_rg_bad = *this_rg_cleaned = 0;
202
203 diblock = rgd->rt_data0;
204 for (rgb = 0; rgb < rgd->rt_length; rgb++){
205 /* Count up the free blocks in the bitmap */
206 off = (rgb) ? sizeof(struct gfs2_meta_header) :
207 sizeof(struct gfs2_rgrp);
208 if (total_bytes_to_check <= sdp->sd_bsize - off)
209 bytes_to_check = total_bytes_to_check;
210 else
211 bytes_to_check = sdp->sd_bsize - off;
212 total_bytes_to_check -= bytes_to_check;
213 for (x = 0; x < bytes_to_check; x++) {
214 unsigned char *byte;
215
216 byte = (unsigned char *)&rgd->rt_bits[rgb].bi_data[off + x];
217 if (*byte == 0x55) {
218 diblock += GFS2_NBBY;
219 continue;
220 }
221 if (*byte == 0x00) {
222 diblock += GFS2_NBBY;
223 rg_free += GFS2_NBBY;
224 continue;
225 }
226 for (y = 0; y < GFS2_NBBY; y++) {
227 state = (*byte >>
228 (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
229 if (state == GFS2_BLKST_USED) {
230 diblock++;
231 continue;
232 }
233 if (state == GFS2_BLKST_DINODE) {
234 diblock++;
235 continue;
236 }
237 if (state == GFS2_BLKST_FREE) {
238 diblock++;
239 rg_free++;
240 continue;
241 }
242 /* GFS2_BLKST_UNLINKED */
243 log_info(_("Unlinked dinode 0x%"PRIx64" found.\n"), diblock);
244 if (!asked) {
245 asked = 1;
246 if (query(cx, _("Okay to reclaim free "
247 "metadata in resource group "
248 "%"PRIu64" (0x%"PRIx64")? (y/n)"),
249 rgd->rt_addr, rgd->rt_addr))
250 *fixit = 1;
251 }
252 if (!(*fixit)) {
253 rg_unlinked++;
254 diblock++;
255 continue;
256 }
257 *byte &= ~(GFS2_BIT_MASK <<
258 (GFS2_BIT_SIZE * y));
259 rgd->rt_bits[rgb].bi_modified = 1;
260 rg_reclaimed++;
261 rg_free++;
262 rgd->rt_free++;
263 log_info(_("Free metadata block %"PRIu64" (0x%"PRIx64") reclaimed.\n"),
264 diblock, diblock);
265 bh = lgfs2_bread(sdp, diblock);
266 if (!lgfs2_check_meta(bh->b_data, GFS2_METATYPE_DI)) {
267 struct lgfs2_inode *ip =
268 fsck_inode_get(sdp, rgd, bh);
269 if (ip->i_blocks > 1) {
270 blks_2free += ip->i_blocks - 1;
271 log_info(_("%"PRIu64" blocks "
272 "(total) may need "
273 "to be freed in "
274 "pass 5.\n"),
275 blks_2free);
276 }
277 fsck_inode_put(&ip);
278 }
279 lgfs2_brelse(bh);
280 diblock++;
281 }
282 }
283 }
284 /* The unlinked blocks we reclaim shouldn't be considered errors,
285 since we're just reclaiming them as a courtesy. If we already
286 got permission to reclaim them, we adjust the rgrp counts
287 accordingly. That way, only "real" rgrp count inconsistencies
288 will be reported. */
289 if (rg_reclaimed && *fixit) {
290 lgfs2_rgrp_out(rgd, rgd->rt_bits[0].bi_data);
291 rgd->rt_bits[0].bi_modified = 1;
292 *this_rg_cleaned = 1;
293 log_info(_("The rgrp at %"PRIu64" (0x%"PRIx64") was cleaned of %d "
294 "free metadata blocks.\n"),
295 rgd->rt_addr, rgd->rt_addr, rg_reclaimed);
296 }
297 if (rgd->rt_free != rg_free) {
298 *this_rg_bad = 1;
299 *this_rg_cleaned = 0;
300 log_err( _("Error: resource group %"PRIu64" (0x%"PRIx64"): "
301 "free space (%d) does not match bitmap (%d)\n"),
302 rgd->rt_addr, rgd->rt_addr, rgd->rt_free, rg_free);
303 if (query(cx, _("Fix the rgrp free blocks count? (y/n)"))) {
304 rgd->rt_free = rg_free;
305 lgfs2_rgrp_out(rgd, rgd->rt_bits[0].bi_data);
306 rgd->rt_bits[0].bi_modified = 1;
307 *this_rg_fixed = 1;
308 log_err( _("The rgrp was fixed.\n"));
309 } else
310 log_err( _("The rgrp was not fixed.\n"));
311 }
312 }
313
314 /**
315 * check_rgrps_integrity - verify rgrp consistency
316 * Note: We consider an rgrp "cleaned" if the unlinked meta blocks are
317 * cleaned, so not quite "bad" and not quite "good" but rewritten anyway.
318 *
319 * Returns: 0 on success, 1 if errors were detected
320 */
321 static void check_rgrps_integrity(struct fsck_cx *cx)
322 {
323 struct osi_node *n, *next = NULL;
324 int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0, rgs_cleaned = 0;
325 int was_bad = 0, was_fixed = 0, was_cleaned = 0;
326 struct lgfs2_rgrp_tree *rgd;
327 int reclaim_unlinked = 0;
328
329 log_info( _("Checking the integrity of all resource groups.\n"));
330 for (n = osi_first(&cx->sdp->rgtree); n; n = next) {
331 next = osi_next(n);
332 rgd = (struct lgfs2_rgrp_tree *)n;
333 if (fsck_abort)
334 return;
335 check_rgrp_integrity(cx, rgd, &reclaim_unlinked,
336 &was_fixed, &was_bad, &was_cleaned);
337 if (was_fixed)
338 rgs_fixed++;
339 if (was_cleaned)
340 rgs_cleaned++;
341 else if (was_bad)
342 rgs_bad++;
343 else
344 rgs_good++;
345 }
346 if (rgs_bad || rgs_cleaned) {
347 log_err( _("RGs: Consistent: %d Cleaned: %d Inconsistent: "
348 "%d Fixed: %d Total: %d\n"),
349 rgs_good, rgs_cleaned, rgs_bad, rgs_fixed,
350 rgs_good + rgs_bad + rgs_cleaned);
351 if (rgs_cleaned && blks_2free)
352 log_err(_("%"PRIu64" blocks may need to be freed in pass 5 "
353 "due to the cleaned resource groups.\n"),
354 blks_2free);
355 }
356 }
357
358 static int rebuild_sysdir(struct fsck_cx *cx)
359 {
360 struct lgfs2_sbd *sdp = cx->sdp;
361 struct lgfs2_inum inum;
362 struct lgfs2_buffer_head *bh = NULL;
363 int err = 0;
364
365 log_err(_("The system directory seems to be destroyed.\n"));
366 if (!query(cx, _("Okay to rebuild it? (y/n)"))) {
367 log_err(_("System directory not rebuilt; aborting.\n"));
368 return -1;
369 }
370 log_err(_("Trying to rebuild the master directory.\n"));
371 inum.in_formal_ino = sdp->md.next_inum++;
372 inum.in_addr = sdp->sd_meta_dir.in_addr;
373 err = lgfs2_init_dinode(sdp, &bh, &inum, S_IFDIR | 0755, GFS2_DIF_SYSTEM, &inum);
374 if (err != 0)
375 return -1;
376 sdp->master_dir = lgfs2_inode_get(sdp, bh);
377 if (sdp->master_dir == NULL) {
378 log_crit(_("Error reading master: %s\n"), strerror(errno));
379 return -1;
380 }
381 sdp->master_dir->bh_owned = 1;
382
383 if (fix_md.jiinode) {
384 inum.in_formal_ino = sdp->md.next_inum++;
385 inum.in_addr = fix_md.jiinode->i_num.in_addr;
386 err = lgfs2_dir_add(sdp->master_dir, "jindex", 6, &inum,
387 IF2DT(S_IFDIR | 0700));
388 if (err) {
389 log_crit(_("Error %d adding jindex directory\n"), errno);
390 exit(FSCK_ERROR);
391 }
392 sdp->master_dir->i_nlink++;
393 } else {
394 err = build_jindex(cx);
395 if (err) {
396 log_crit(_("Error %d building jindex\n"), err);
397 exit(FSCK_ERROR);
398 }
399 }
400
401 if (fix_md.pinode) {
402 inum.in_formal_ino = sdp->md.next_inum++;
403 inum.in_addr = fix_md.pinode->i_num.in_addr;
404 /* coverity[deref_arg:SUPPRESS] */
405 err = lgfs2_dir_add(sdp->master_dir, "per_node", 8, &inum,
406 IF2DT(S_IFDIR | 0700));
407 if (err) {
408 log_crit(_("Error %d adding per_node directory\n"),
409 errno);
410 exit(FSCK_ERROR);
411 }
412 sdp->master_dir->i_nlink++;
413 } else {
414 /* coverity[double_free:SUPPRESS] */
415 err = build_per_node(cx);
416 if (err) {
417 log_crit(_("Error %d building per_node directory\n"),
418 err);
419 exit(FSCK_ERROR);
420 }
421 }
422
423 if (fix_md.inum) {
424 inum.in_formal_ino = sdp->md.next_inum++;
425 inum.in_addr = fix_md.inum->i_num.in_addr;
426 /* coverity[deref_arg:SUPPRESS] */
427 err = lgfs2_dir_add(sdp->master_dir, "inum", 4, &inum,
428 IF2DT(S_IFREG | 0600));
429 if (err) {
430 log_crit(_("Error %d adding inum inode\n"), errno);
431 exit(FSCK_ERROR);
432 }
433 } else {
434 sdp->md.inum = lgfs2_build_inum(sdp);
435 if (sdp->md.inum == NULL) {
436 log_crit(_("Error building inum inode: %s\n"), strerror(errno));
437 exit(FSCK_ERROR);
438 }
439 /* Write the inode but don't free it, to avoid doing an extra lookup */
440 /* coverity[deref_after_free:SUPPRESS] */
441 lgfs2_dinode_out(sdp->md.inum, sdp->md.inum->i_bh->b_data);
442 lgfs2_bwrite(sdp->md.inum->i_bh);
443 }
444
445 if (fix_md.statfs) {
446 inum.in_formal_ino = sdp->md.next_inum++;
447 inum.in_addr = fix_md.statfs->i_num.in_addr;
448 /* coverity[deref_arg:SUPPRESS] */
449 err = lgfs2_dir_add(sdp->master_dir, "statfs", 6, &inum,
450 IF2DT(S_IFREG | 0600));
451 if (err) {
452 log_crit(_("Error %d adding statfs inode\n"), errno);
453 exit(FSCK_ERROR);
454 }
455 } else {
456 sdp->md.statfs = lgfs2_build_statfs(sdp);
457 if (sdp->md.statfs == NULL) {
458 log_crit(_("Error %d building statfs inode\n"), err);
459 exit(FSCK_ERROR);
460 }
461 /* Write the inode but don't free it, to avoid doing an extra lookup */
462 /* coverity[deref_after_free:SUPPRESS] */
463 lgfs2_dinode_out(sdp->md.statfs, sdp->md.statfs->i_bh->b_data);
464 lgfs2_bwrite(sdp->md.statfs->i_bh);
465 }
466
467 if (fix_md.riinode) {
468 inum.in_formal_ino = sdp->md.next_inum++;
469 inum.in_addr = fix_md.riinode->i_num.in_addr;
470 /* coverity[deref_arg:SUPPRESS] */
471 err = lgfs2_dir_add(sdp->master_dir, "rindex", 6, &inum,
472 IF2DT(S_IFREG | 0600));
473 if (err) {
474 log_crit(_("Error %d adding rindex inode\n"), errno);
475 exit(FSCK_ERROR);
476 }
477 } else {
478 /* coverity[double_free:SUPPRESS] */
479 struct lgfs2_inode *rip = lgfs2_build_rindex(sdp);
480 if (rip == NULL) {
481 log_crit(_("Error building rindex inode: %s\n"), strerror(errno));
482 exit(FSCK_ERROR);
483 }
484 lgfs2_inode_put(&rip);
485 }
486
487 if (fix_md.qinode) {
488 inum.in_formal_ino = sdp->md.next_inum++;
489 inum.in_addr = fix_md.qinode->i_num.in_addr;
490 err = lgfs2_dir_add(sdp->master_dir, "quota", 5, &inum,
491 IF2DT(S_IFREG | 0600));
492 if (err) {
493 log_crit(_("Error %d adding quota inode\n"), errno);
494 exit(FSCK_ERROR);
495 }
496 } else {
497 struct lgfs2_inode *qip = lgfs2_build_quota(sdp);
498 if (qip == NULL) {
499 log_crit(_("Error building quota inode: %s\n"), strerror(errno));
500 exit(FSCK_ERROR);
501 }
502 lgfs2_inode_put(&qip);
503 }
504
505 log_err(_("Master directory rebuilt.\n"));
506 lgfs2_inode_put(&sdp->md.inum);
507 lgfs2_inode_put(&sdp->md.statfs);
508 lgfs2_inode_put(&sdp->master_dir);
509 return 0;
510 }
511
512 /**
513 * lookup_per_node - Make sure the per_node directory is read in
514 *
515 * This function is used to read in the per_node directory. It is called
516 * twice. The first call tries to read in the dinode early on. That ensures
517 * that if any journals are missing, we can figure out the number of journals
518 * from per_node. However, we unfortunately can't rebuild per_node at that
519 * point in time because our resource groups aren't read in yet.
520 * The second time it's called is much later when we can rebuild it.
521 *
522 * allow_rebuild: 0 if rebuilds are not allowed
523 * 1 if rebuilds are allowed
524 */
525 static void lookup_per_node(struct fsck_cx *cx, int allow_rebuild)
526 {
527 struct lgfs2_sbd *sdp = cx->sdp;
528
529 if (sdp->md.pinode)
530 return;
531
532 sdp->md.pinode = lgfs2_lookupi(sdp->master_dir, "per_node", 8);
533 if (sdp->md.pinode)
534 return;
535 if (!allow_rebuild) {
536 log_err( _("The gfs2 system per_node directory "
537 "inode is missing, so we might not be \nable to "
538 "rebuild missing journals this run.\n"));
539 return;
540 }
541
542 if (query(cx, _("The gfs2 system per_node directory "
543 "inode is missing. Okay to rebuild it? (y/n) "))) {
544 int err;
545
546 /* coverity[freed_arg:SUPPRESS] False positive */
547 err = build_per_node(cx);
548 if (err) {
549 log_crit(_("Error %d rebuilding per_node directory\n"),
550 err);
551 exit(FSCK_ERROR);
552 }
553 }
554 /* coverity[identity_transfer:SUPPRESS] False positive */
555 sdp->md.pinode = lgfs2_lookupi(sdp->master_dir, "per_node", 8);
556 if (!sdp->md.pinode) {
557 log_err( _("Unable to rebuild per_node; aborting.\n"));
558 exit(FSCK_ERROR);
559 }
560 }
561
562 #define RA_WINDOW 32
563
564 static unsigned rgrp_reada(struct lgfs2_sbd *sdp, unsigned cur_window,
565 struct osi_node *n)
566 {
567 struct lgfs2_rgrp_tree *rgd;
568 unsigned i;
569 off_t start, len;
570
571 for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
572 if (n == NULL)
573 return i;
574 if (i < cur_window)
575 continue;
576 rgd = (struct lgfs2_rgrp_tree *)n;
577 start = rgd->rt_addr * sdp->sd_bsize;
578 len = rgd->rt_length * sdp->sd_bsize;
579 (void)posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
580 }
581
582 return i;
583 }
584
585 /**
586 * read_rgrps - attach rgrps to the super block
587 * @sdp: incore superblock data
588 * @expected: number of resource groups expected (rindex entries)
589 *
590 * Given the rgrp index inode, link in all rgrps into the super block
591 * and be sure that they can be read.
592 *
593 * Returns: 0 on success, -1 on failure.
594 */
595 static int read_rgrps(struct lgfs2_sbd *sdp, uint64_t expected)
596 {
597 struct lgfs2_rgrp_tree *rgd;
598 uint64_t count = 0;
599 uint64_t errblock = 0;
600 uint64_t rmax = 0;
601 struct osi_node *n, *next = NULL;
602 unsigned ra_window = 0;
603
604 /* Turn off generic readhead */
605 (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
606
607 for (n = osi_first(&sdp->rgtree); n; n = next) {
608 next = osi_next(n);
609 rgd = (struct lgfs2_rgrp_tree *)n;
610 /* Readahead resource group headers */
611 if (ra_window < RA_WINDOW/2)
612 ra_window = rgrp_reada(sdp, ra_window, n);
613 /* Read resource group header */
614 errblock = lgfs2_rgrp_read(sdp, rgd);
615 if (errblock)
616 return errblock;
617 ra_window--;
618 count++;
619 if (rgd->rt_data0 + rgd->rt_data - 1 > rmax)
620 rmax = rgd->rt_data0 + rgd->rt_data - 1;
621 }
622
623 sdp->fssize = rmax;
624 if (count != expected)
625 goto fail;
626
627 (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
628 return 0;
629
630 fail:
631 (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
632 lgfs2_rgrp_free(sdp, &sdp->rgtree);
633 return -1;
634 }
635
636 static int fetch_rgrps_level(struct fsck_cx *cx, enum rgindex_trust_level lvl, uint64_t *count, int *ok)
637 {
638 int ret = 1;
639
640 const char *level_desc[] = {
641 _("Checking if all rgrp and rindex values are good"),
642 _("Checking if rindex values may be easily repaired"),
643 _("Calculating where the rgrps should be if evenly spaced"),
644 _("Trying to rebuild rindex assuming evenly spaced rgrps"),
645 _("Trying to rebuild rindex assuming unevenly spaced rgrps"),
646 };
647 const char *fail_desc[] = {
648 _("Some damage was found; we need to take remedial measures"),
649 _("rindex is unevenly spaced: either gfs1-style or corrupt"),
650 _("rindex calculations don't match: uneven rgrp boundaries"),
651 _("Too many rgrp misses: rgrps must be unevenly spaced"),
652 _("Too much damage found: we cannot rebuild this rindex"),
653 };
654
655 log_notice(_("Level %d resource group check: %s.\n"), lvl + 1, level_desc[lvl]);
656
657 if (rindex_repair(cx, lvl, ok) != 0)
658 goto fail;
659
660 if (lgfs2_rindex_read(cx->sdp, count, ok) != 0 || !*ok)
661 goto fail;
662
663 ret = read_rgrps(cx->sdp, *count);
664 if (ret != 0)
665 goto fail;
666
667 log_notice(_("(level %d passed)\n"), lvl + 1);
668 return 0;
669 fail:
670 if (ret == -1)
671 log_err(_("(level %d failed: %s)\n"), lvl + 1, fail_desc[lvl]);
672 else
673 log_err(_("(level %d failed at block %d (0x%x): %s)\n"), lvl + 1,
674 ret, ret, fail_desc[lvl]);
675 return ret;
676 }
677
678 /**
679 * fetch_rgrps - fetch the resource groups from disk, and check their integrity
680 */
681 static int fetch_rgrps(struct fsck_cx *cx)
682 {
683 enum rgindex_trust_level trust_lvl;
684 uint64_t rgcount;
685 int ok = 1;
686
687 log_notice(_("Validating resource group index.\n"));
688 for (trust_lvl = BLIND_FAITH; trust_lvl <= INDIGNATION; trust_lvl++) {
689 int ret = 0;
690
691 ret = fetch_rgrps_level(cx, trust_lvl, &rgcount, &ok);
692 if (ret == 0)
693 break;
694 if (fsck_abort)
695 break;
696 }
697 if (trust_lvl > INDIGNATION) {
698 log_err( _("Resource group recovery impossible; I can't fix "
699 "this file system.\n"));
700 return -1;
701 }
702 log_info( _("%"PRIu64" resource groups found.\n"), rgcount);
703
704 check_rgrps_integrity(cx);
705 return 0;
706 }
707
708 /**
709 * init_system_inodes
710 *
711 * Returns: 0 on success, -1 on failure
712 */
713 static int init_system_inodes(struct fsck_cx *cx)
714 {
715 struct lgfs2_sbd *sdp = cx->sdp;
716 __be64 inumbuf = 0;
717 char *buf;
718 int err;
719
720 log_info( _("Initializing special inodes...\n"));
721
722 /* Get root dinode */
723 sdp->md.rooti = lgfs2_inode_read(sdp, sdp->sd_root_dir.in_addr);
724 if (sdp->md.rooti == NULL)
725 return -1;
726
727 /* Look for "inum" entry in master dinode */
728 sdp->md.inum = lgfs2_lookupi(sdp->master_dir, "inum", 4);
729 if (!sdp->md.inum) {
730 if (!query(cx, _("The gfs2 system inum inode is missing. "
731 "Okay to rebuild it? (y/n) "))) {
732 log_err( _("fsck.gfs2 cannot continue without "
733 "a valid inum file; aborting.\n"));
734 goto fail;
735 }
736 sdp->md.inum = lgfs2_build_inum(sdp);
737 if (sdp->md.inum == NULL) {
738 log_crit(_("Error rebuilding inum inode: %s\n"), strerror(errno));
739 exit(FSCK_ERROR);
740 }
741 lgfs2_dinode_out(sdp->md.inum, sdp->md.inum->i_bh->b_data);
742 if (lgfs2_bwrite(sdp->md.inum->i_bh) != 0) {
743 log_crit(_("System inum inode was not rebuilt. Aborting.\n"));
744 goto fail;
745 }
746 }
747 /* Read inum entry into buffer */
748 err = lgfs2_readi(sdp->md.inum, &inumbuf, 0,
749 sdp->md.inum->i_size);
750 if (err != sdp->md.inum->i_size) {
751 log_crit(_("Error %d reading system inum inode. "
752 "Aborting.\n"), err);
753 goto fail;
754 }
755 /* call gfs2_inum_range_in() to retrieve range */
756 sdp->md.next_inum = be64_to_cpu(inumbuf);
757
758 sdp->md.statfs = lgfs2_lookupi(sdp->master_dir, "statfs", 6);
759 if (!sdp->md.statfs) {
760 if (!query(cx, _("The gfs2 system statfs inode is missing. "
761 "Okay to rebuild it? (y/n) "))) {
762 log_err( _("fsck.gfs2 cannot continue without a valid "
763 "statfs file; aborting.\n"));
764 goto fail;
765 }
766 sdp->md.statfs = lgfs2_build_statfs(sdp);
767 if (sdp->md.statfs == NULL) {
768 log_crit(_("Error %d rebuilding statfs inode\n"), err);
769 exit(FSCK_ERROR);
770 }
771 lgfs2_dinode_out(sdp->md.statfs, sdp->md.statfs->i_bh->b_data);
772 if (lgfs2_bwrite(sdp->md.statfs->i_bh) != 0) {
773 log_err( _("Rebuild of statfs system file failed."));
774 log_err( _("fsck.gfs2 cannot continue without "
775 "a valid statfs file; aborting.\n"));
776 goto fail;
777 }
778 lgfs2_init_statfs(sdp, NULL);
779 }
780 if (sdp->md.statfs->i_size) {
781 buf = malloc(sdp->md.statfs->i_size);
782 if (buf) {
783 err = lgfs2_readi(sdp->md.statfs, buf, 0,
784 sdp->md.statfs->i_size);
785 if (err != sdp->md.statfs->i_size) {
786 log_crit(_("Error %d reading statfs file. "
787 "Aborting.\n"), err);
788 free(buf);
789 goto fail;
790 }
791 free(buf);
792 }
793 }
794
795 sdp->md.qinode = lgfs2_lookupi(sdp->master_dir, "quota", 5);
796 if (!sdp->md.qinode) {
797 if (!query(cx, _("The gfs2 system quota inode is missing. "
798 "Okay to rebuild it? (y/n) "))) {
799 log_crit(_("System quota inode was not "
800 "rebuilt. Aborting.\n"));
801 goto fail;
802 }
803 sdp->md.qinode = lgfs2_build_quota(sdp);
804 if (sdp->md.qinode == NULL) {
805 log_crit(_("Error rebuilding quota inode: %s\n"), strerror(errno));
806 exit(FSCK_ERROR);
807 }
808 lgfs2_dinode_out(sdp->md.qinode, sdp->md.qinode->i_bh->b_data);
809 if (lgfs2_bwrite(sdp->md.qinode->i_bh) != 0) {
810 log_crit(_("Unable to rebuild system quota file "
811 "inode. Aborting.\n"));
812 goto fail;
813 }
814 }
815
816 /* Try to lookup the per_node inode. If it was missing, it is now
817 safe to rebuild it. */
818 lookup_per_node(cx, 1);
819
820 /*******************************************************************
821 ******* Now, set boundary fields in the super block *************
822 *******************************************************************/
823 if (set_block_ranges(sdp)){
824 log_err( _("Unable to determine the boundaries of the"
825 " file system.\n"));
826 goto fail;
827 }
828
829 return 0;
830 fail:
831 empty_super_block(cx);
832
833 return -1;
834 }
835
836 /**
837 * is_journal_copy - Is this a "real" dinode or a copy inside a journal?
838 * A real dinode will be located at the block number in its no_addr.
839 * A journal-copy will be at a different block (inside the journal).
840 */
841 static int is_journal_copy(struct lgfs2_inode *ip)
842 {
843 if (ip->i_num.in_addr == ip->i_bh->b_blocknr)
844 return 0;
845 return 1; /* journal copy */
846 }
847
848 /**
849 * peruse_system_dinode - process a system dinode
850 *
851 * This function looks at a system dinode and tries to figure out which
852 * dinode it is: statfs, inum, per_node, master, etc. Some of them we
853 * can deduce from the contents. For example, di_size will be a multiple
854 * of 96 for the rindex. di_size will be 8 for inum, 24 for statfs, etc.
855 * the per_node directory will have a ".." entry that will lead us to
856 * the master dinode if it's been destroyed.
857 */
858 static void peruse_system_dinode(struct fsck_cx *cx, struct lgfs2_inode *ip)
859 {
860 struct lgfs2_sbd *sdp = cx->sdp;
861 struct lgfs2_inode *child_ip;
862 struct lgfs2_inum inum;
863 int error;
864
865 if (ip->i_num.in_formal_ino == 2) {
866 if (sdp->sd_meta_dir.in_addr)
867 return;
868 log_warn(_("Found system master directory at: 0x%"PRIx64".\n"),
869 ip->i_num.in_addr);
870 sdp->sd_meta_dir.in_addr = ip->i_num.in_addr;
871 return;
872 }
873 if (ip->i_num.in_formal_ino == 3) {
874 if (fix_md.jiinode || is_journal_copy(ip))
875 goto out_discard_ip;
876 log_warn(_("Found system jindex file at: 0x%"PRIx64"\n"), ip->i_num.in_addr);
877 fix_md.jiinode = ip;
878 } else if (is_dir(ip)) {
879 /* Check for a jindex dir entry. Only one system dir has a
880 jindex: master */
881 /* coverity[identity_transfer:SUPPRESS] */
882 child_ip = lgfs2_lookupi(ip, "jindex", 6);
883 if (child_ip) {
884 if (fix_md.jiinode || is_journal_copy(ip)) {
885 lgfs2_inode_put(&child_ip);
886 goto out_discard_ip;
887 }
888 fix_md.jiinode = child_ip;
889 sdp->sd_meta_dir.in_addr = ip->i_num.in_addr;
890 log_warn(_("Found system master directory at: 0x%"PRIx64"\n"),
891 ip->i_num.in_addr);
892 return;
893 }
894
895 /* Check for a statfs_change0 dir entry. Only one system dir
896 has a statfs_change: per_node, and its .. will be master. */
897 /* coverity[identity_transfer:SUPPRESS] */
898 child_ip = lgfs2_lookupi(ip, "statfs_change0", 14);
899 if (child_ip) {
900 lgfs2_inode_put(&child_ip);
901 if (fix_md.pinode || is_journal_copy(ip))
902 goto out_discard_ip;
903 log_warn(_("Found system per_node directory at: 0x%"PRIx64"\n"),
904 ip->i_num.in_addr);
905 fix_md.pinode = ip;
906 error = lgfs2_dir_search(ip, "..", 2, NULL, &inum);
907 if (!error && inum.in_addr) {
908 sdp->sd_meta_dir.in_addr = inum.in_addr;
909 log_warn(_("From per_node's '..' master directory backtracked to: "
910 "0x%"PRIx64"\n"), inum.in_addr);
911 }
912 return;
913 }
914 log_debug(_("Unknown system directory at block 0x%"PRIx64"\n"), ip->i_num.in_addr);
915 goto out_discard_ip;
916 } else if (ip->i_size == 8) {
917 if (fix_md.inum || is_journal_copy(ip))
918 goto out_discard_ip;
919 fix_md.inum = ip;
920 log_warn(_("Found system inum file at: 0x%"PRIx64"\n"), ip->i_num.in_addr);
921 } else if (ip->i_size == 24) {
922 if (fix_md.statfs || is_journal_copy(ip))
923 goto out_discard_ip;
924 fix_md.statfs = ip;
925 log_warn(_("Found system statfs file at: 0x%"PRIx64"\n"), ip->i_num.in_addr);
926 } else if ((ip->i_size % 96) == 0) {
927 if (fix_md.riinode || is_journal_copy(ip))
928 goto out_discard_ip;
929 fix_md.riinode = ip;
930 log_warn(_("Found system rindex file at: 0x%"PRIx64"\n"), ip->i_num.in_addr);
931 } else if (!fix_md.qinode && ip->i_size >= 176 &&
932 ip->i_num.in_formal_ino >= 12 &&
933 ip->i_num.in_formal_ino <= 100) {
934 if (is_journal_copy(ip))
935 goto out_discard_ip;
936 fix_md.qinode = ip;
937 log_warn(_("Found system quota file at: 0x%"PRIx64"\n"), ip->i_num.in_addr);
938 } else {
939 out_discard_ip:
940 lgfs2_inode_put(&ip);
941 }
942 }
943
944 /**
945 * peruse_user_dinode - process a user dinode trying to find the root directory
946 *
947 */
948 static void peruse_user_dinode(struct fsck_cx *cx, struct lgfs2_inode *ip)
949 {
950 struct lgfs2_sbd *sdp = cx->sdp;
951 struct lgfs2_inode *parent_ip;
952 struct lgfs2_inum inum;
953 int error;
954
955 if (sdp->sd_root_dir.in_addr) /* if we know the root dinode */
956 return; /* we don't need to find the root */
957 if (!is_dir(ip)) /* if this isn't a directory */
958 return; /* it can't lead us to the root anyway */
959
960 if (ip->i_num.in_formal_ino == 1) {
961 struct lgfs2_buffer_head *root_bh;
962
963 if (ip->i_num.in_addr == ip->i_bh->b_blocknr) {
964 log_warn(_("Found the root directory at: 0x%"PRIx64".\n"),
965 ip->i_num.in_addr);
966 sdp->sd_root_dir.in_addr = ip->i_num.in_addr;
967 return;
968 }
969 log_warn(_("The root dinode should be at block 0x%"PRIx64" but it "
970 "seems to be destroyed.\n"),
971 ip->i_num.in_addr);
972 log_warn(_("Found a copy of the root directory in a journal "
973 "at block: 0x%"PRIx64".\n"),
974 ip->i_bh->b_blocknr);
975 if (!query(cx, _("Do you want to replace the root dinode from the copy? (y/n)"))) {
976 log_err(_("Damaged root dinode not fixed.\n"));
977 return;
978 }
979 root_bh = lgfs2_bread(sdp, ip->i_num.in_addr);
980 memcpy(root_bh->b_data, ip->i_bh->b_data, sdp->sd_bsize);
981 lgfs2_bmodified(root_bh);
982 lgfs2_brelse(root_bh);
983 log_warn(_("Root directory copied from the journal.\n"));
984 return;
985 }
986 /* coverity[check_after_deref:SUPPRESS] */
987 while (ip) {
988 /* coverity[identity_transfer:SUPPRESS] */
989 parent_ip = lgfs2_lookupi(ip, "..", 2);
990 if (parent_ip && parent_ip->i_num.in_addr == ip->i_num.in_addr) {
991 log_warn(_("Found the root directory at: 0x%"PRIx64"\n"),
992 ip->i_num.in_addr);
993 sdp->sd_root_dir.in_addr = ip->i_num.in_addr;
994 lgfs2_inode_put(&parent_ip);
995 lgfs2_inode_put(&ip);
996 return;
997 }
998 if (!parent_ip)
999 break;
1000 lgfs2_inode_put(&ip);
1001 ip = parent_ip;
1002 }
1003 error = lgfs2_dir_search(ip, "..", 2, NULL, &inum);
1004 if (!error && inum.in_addr && inum.in_addr < possible_root) {
1005 possible_root = inum.in_addr;
1006 log_debug(_("Found a possible root at: 0x%"PRIx64"\n"),
1007 possible_root);
1008 }
1009 lgfs2_inode_put(&ip);
1010 }
1011
1012 /**
1013 * find_rgs_for_bsize - check a range of blocks for rgrps to determine bsize.
1014 * Assumes: device is open.
1015 */
1016 static int find_rgs_for_bsize(struct lgfs2_sbd *sdp, uint64_t startblock,
1017 uint32_t *known_bsize)
1018 {
1019 uint64_t blk, max_rg_size, rb_addr;
1020 uint32_t bsize, bsize2;
1021 int found_rg;
1022
1023 sdp->sd_bsize = LGFS2_DEFAULT_BSIZE;
1024 max_rg_size = 524288;
1025 /* Max RG size is 2GB. Max block size is 4K. 2G / 4K blks = 524288,
1026 So this is traversing 2GB in 4K block increments. */
1027 for (blk = startblock; blk < startblock + max_rg_size; blk++) {
1028 struct lgfs2_buffer_head *bh = lgfs2_bread(sdp, blk);
1029
1030 found_rg = 0;
1031 for (bsize = 0; bsize < LGFS2_DEFAULT_BSIZE; bsize += GFS2_BASIC_BLOCK) {
1032 struct gfs2_meta_header mhp;
1033
1034 memcpy(&mhp, bh->b_data + bsize, sizeof(mhp));
1035 if (be32_to_cpu(mhp.mh_magic) != GFS2_MAGIC)
1036 continue;
1037 if (be32_to_cpu(mhp.mh_type) == GFS2_METATYPE_RG) {
1038 found_rg = 1;
1039 break;
1040 }
1041 }
1042 lgfs2_bfree(&bh);
1043 if (!found_rg)
1044 continue;
1045 /* Try all the block sizes in 512 byte multiples */
1046 for (bsize2 = GFS2_BASIC_BLOCK; bsize2 <= LGFS2_DEFAULT_BSIZE;
1047 bsize2 += GFS2_BASIC_BLOCK) {
1048 struct lgfs2_buffer_head *rb_bh;
1049 struct gfs2_meta_header *mh;
1050 int is_rb;
1051
1052 rb_addr = (blk * (LGFS2_DEFAULT_BSIZE / bsize2)) +
1053 (bsize / bsize2) + 1;
1054 sdp->sd_bsize = bsize2; /* temporarily */
1055 rb_bh = lgfs2_bread(sdp, rb_addr);
1056 mh = (struct gfs2_meta_header *)rb_bh->b_data;
1057 is_rb = (be32_to_cpu(mh->mh_magic) == GFS2_MAGIC &&
1058 be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RB);
1059 lgfs2_brelse(rb_bh);
1060 if (is_rb) {
1061 log_debug(_("boff:%d bsize2:%d rg:0x%"PRIx64", "
1062 "rb:0x%"PRIx64"\n"), bsize, bsize2,
1063 blk, rb_addr);
1064 *known_bsize = bsize2;
1065 break;
1066 }
1067 }
1068 if (!(*known_bsize)) {
1069 sdp->sd_bsize = LGFS2_DEFAULT_BSIZE;
1070 continue;
1071 }
1072
1073 sdp->sd_bsize = *known_bsize;
1074 log_warn(_("Block size determined to be: %d\n"), *known_bsize);
1075 return 0;
1076 }
1077 return 0;
1078 }
1079
1080 /**
1081 * peruse_metadata - check a range of blocks for metadata
1082 * Assumes: device is open.
1083 */
1084 static int peruse_metadata(struct fsck_cx *cx, uint64_t startblock)
1085 {
1086 struct lgfs2_sbd *sdp = cx->sdp;
1087 uint64_t blk, max_rg_size;
1088 struct lgfs2_buffer_head *bh;
1089 struct lgfs2_inode *ip;
1090
1091 max_rg_size = 2147483648ull / sdp->sd_bsize;
1092 /* Max RG size is 2GB. 2G / bsize. */
1093 for (blk = startblock; blk < startblock + max_rg_size; blk++) {
1094 bh = lgfs2_bread(sdp, blk);
1095 if (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_DI)) {
1096 lgfs2_brelse(bh);
1097 continue;
1098 }
1099 ip = lgfs2_inode_get(sdp, bh);
1100 if (ip == NULL)
1101 return -1;
1102 ip->bh_owned = 1; /* lgfs2_inode_put() will free the bh */
1103 if (ip->i_flags & GFS2_DIF_SYSTEM)
1104 peruse_system_dinode(cx, ip);
1105 else
1106 peruse_user_dinode(cx, ip);
1107 }
1108 return 0;
1109 }
1110
1111 /**
1112 * sb_repair - repair a damaged superblock
1113 * Assumes: device is open.
1114 * The biggest RG size is 2GB
1115 */
1116 static int sb_repair(struct fsck_cx *cx)
1117 {
1118 struct lgfs2_sbd *sdp = cx->sdp;
1119 uint64_t half;
1120 uint32_t known_bsize = 0;
1121 int error = 0;
1122
1123 memset(&fix_md, 0, sizeof(fix_md));
1124 /* Step 1 - First we need to determine the correct block size. */
1125 sdp->sd_bsize = LGFS2_DEFAULT_BSIZE;
1126 log_warn(_("Gathering information to repair the gfs2 superblock. "
1127 "This may take some time.\n"));
1128 error = find_rgs_for_bsize(sdp, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) /
1129 LGFS2_DEFAULT_BSIZE, &known_bsize);
1130 if (error)
1131 return error;
1132 if (!known_bsize) {
1133 log_warn(_("Block size not apparent; checking elsewhere.\n"));
1134 /* First, figure out the device size. We need that so we can
1135 find a suitable start point to determine what's what. */
1136 half = sdp->dinfo.size / 2; /* in bytes */
1137 half /= sdp->sd_bsize;
1138 /* Start looking halfway through the device for gfs2
1139 structures. If there aren't any at all, forget it. */
1140 error = find_rgs_for_bsize(sdp, half, &known_bsize);
1141 if (error)
1142 return error;
1143 }
1144 if (!known_bsize) {
1145 log_err(_("Unable to determine the block size; this "
1146 "does not look like a gfs2 file system.\n"));
1147 return -1;
1148 }
1149 /* Step 2 - look for the sytem dinodes */
1150 error = peruse_metadata(cx, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) /
1151 LGFS2_DEFAULT_BSIZE);
1152 if (error)
1153 return error;
1154 if (!sdp->sd_meta_dir.in_addr) {
1155 log_err(_("Unable to locate the system master directory.\n"));
1156 return -1;
1157 }
1158 if (!sdp->sd_root_dir.in_addr) {
1159 log_err(_("Unable to locate the root directory.\n"));
1160 if (possible_root == HIGHEST_BLOCK) {
1161 /* Take advantage of the fact that mkfs.gfs2
1162 creates master immediately after root. */
1163 log_err(_("Can't find any dinodes that might "
1164 "be the root; using master - 1.\n"));
1165 possible_root = sdp->sd_meta_dir.in_addr - 1;
1166 }
1167 log_err(_("Found a possible root at: 0x%"PRIx64"\n"), possible_root);
1168 sdp->sd_root_dir.in_addr = possible_root;
1169 sdp->md.rooti = lgfs2_inode_read(sdp, possible_root);
1170 if (!sdp->md.rooti || sdp->md.rooti->i_magic != GFS2_MAGIC) {
1171 struct lgfs2_buffer_head *bh = NULL;
1172 struct lgfs2_inum inum;
1173
1174 log_err(_("The root dinode block is destroyed.\n"));
1175 log_err(_("At this point I recommend "
1176 "reinitializing it.\n"
1177 "Hopefully everything will later "
1178 "be put into lost+found.\n"));
1179 if (!query(cx, _("Okay to reinitialize the root "
1180 "dinode? (y/n)"))) {
1181 log_err(_("The root dinode was not "
1182 "reinitialized; aborting.\n"));
1183 return -1;
1184 }
1185 inum.in_formal_ino = 1;
1186 inum.in_addr = possible_root;
1187 error = lgfs2_init_dinode(sdp, &bh, &inum, S_IFDIR | 0755, 0, &inum);
1188 if (error != 0)
1189 return -1;
1190 lgfs2_brelse(bh);
1191 }
1192 }
1193 /* Step 3 - Rebuild the lock protocol and file system table name */
1194 if (query(cx, _("Okay to fix the GFS2 superblock? (y/n)"))) {
1195 log_info(_("Found system master directory at: 0x%"PRIx64"\n"),
1196 sdp->sd_meta_dir.in_addr);
1197 sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_meta_dir.in_addr);
1198 if (sdp->master_dir == NULL) {
1199 log_crit(_("Error reading master inode: %s\n"), strerror(errno));
1200 return -1;
1201 }
1202 sdp->master_dir->i_num.in_addr = sdp->sd_meta_dir.in_addr;
1203 log_info(_("Found the root directory at: 0x%"PRIx64"\n"),
1204 sdp->sd_root_dir.in_addr);
1205 sdp->md.rooti = lgfs2_inode_read(sdp, sdp->sd_root_dir.in_addr);
1206 if (sdp->md.rooti == NULL) {
1207 log_crit(_("Error reading root inode: %s\n"), strerror(errno));
1208 return -1;
1209 }
1210 sdp->sd_fs_format = GFS2_FORMAT_FS;
1211 lgfs2_sb_write(sdp, sdp->device_fd);
1212 lgfs2_inode_put(&sdp->md.rooti);
1213 lgfs2_inode_put(&sdp->master_dir);
1214 sb_fixed = 1;
1215 } else {
1216 log_crit(_("GFS2 superblock not fixed; fsck cannot proceed "
1217 "without a valid superblock.\n"));
1218 return -1;
1219 }
1220 return 0;
1221 }
1222
1223 /**
1224 * fill_super_block
1225 * @sdp:
1226 *
1227 * Returns: 0 on success, -1 on failure
1228 */
1229 static int fill_super_block(struct fsck_cx *cx)
1230 {
1231 struct lgfs2_sbd *sdp = cx->sdp;
1232 int ret;
1233
1234 sync();
1235
1236 log_info( _("Initializing lists...\n"));
1237 sdp->rgtree.osi_node = NULL;
1238
1239 sdp->sd_bsize = LGFS2_DEFAULT_BSIZE;
1240 if (lgfs2_compute_constants(sdp)) {
1241 log_crit("%s\n", _("Failed to compute file system constants"));
1242 return FSCK_ERROR;
1243 }
1244 ret = lgfs2_read_sb(sdp);
1245 if (ret < 0) {
1246 if (sb_repair(cx) != 0)
1247 return -1; /* unrepairable, so exit */
1248 /* Now that we've tried to repair it, re-read it. */
1249 ret = lgfs2_read_sb(sdp);
1250 if (ret < 0)
1251 return FSCK_ERROR;
1252 }
1253 if (sdp->sd_fs_format > FSCK_MAX_FORMAT) {
1254 log_crit(_("Unsupported gfs2 format found: %"PRIu32"\n"), sdp->sd_fs_format);
1255 log_crit(_("A newer fsck.gfs2 is required to check this file system.\n"));
1256 return FSCK_USAGE;
1257 }
1258 return 0;
1259 }
1260
1261 /**
1262 * init_rindex - read in the rindex file
1263 */
1264 static int init_rindex(struct fsck_cx *cx)
1265 {
1266 struct lgfs2_sbd *sdp = cx->sdp;
1267 struct lgfs2_inode *ip;
1268
1269 sdp->md.riinode = lgfs2_lookupi(sdp->master_dir, "rindex", 6);
1270 if (sdp->md.riinode)
1271 return 0;
1272
1273 if (!query(cx, _("The gfs2 system rindex inode is missing. "
1274 "Okay to rebuild it? (y/n) "))) {
1275 log_crit(_("Error: Cannot proceed without a valid rindex.\n"));
1276 return -1;
1277 }
1278 ip = lgfs2_build_rindex(sdp);
1279 if (ip == NULL) {
1280 log_crit(_("Error rebuilding rindex: %s\n"), strerror(errno));
1281 return -1;
1282 }
1283 lgfs2_inode_put(&ip);
1284 return 0;
1285 }
1286
1287 /**
1288 * initialize - initialize superblock pointer
1289 *
1290 */
1291 int initialize(struct fsck_cx *cx, int *all_clean)
1292 {
1293 struct lgfs2_sbd *sdp = cx->sdp;
1294 int clean_journals = 0, open_flag;
1295 int err;
1296
1297 *all_clean = 0;
1298
1299 if (cx->opts->no)
1300 open_flag = O_RDONLY;
1301 else
1302 open_flag = O_RDWR | O_EXCL;
1303
1304 sdp->device_fd = open(cx->opts->device, open_flag);
1305 if (sdp->device_fd < 0) {
1306 struct mntent *mnt;
1307 if (open_flag == O_RDONLY || errno != EBUSY) {
1308 log_crit( _("Unable to open device: %s\n"),
1309 cx->opts->device);
1310 return FSCK_USAGE;
1311 }
1312 /* We can't open it EXCL. It may be already open rw (in which
1313 case we want to deny them access) or it may be mounted as
1314 the root file system at boot time (in which case we need to
1315 allow it.)
1316 If the device is busy, but not because it's mounted, fail.
1317 This protects against cases where the file system is LVM
1318 and perhaps mounted on a different node.
1319 Try opening without O_EXCL. */
1320 sdp->device_fd = lgfs2_open_mnt_dev(cx->opts->device, O_RDWR, &mnt);
1321 if (sdp->device_fd < 0)
1322 goto mount_fail;
1323 /* If the device is mounted, but not mounted RO, fail. This
1324 protects them against cases where the file system is
1325 mounted RW, but still allows us to check our own root
1326 file system. */
1327 if (!hasmntopt(mnt, MNTOPT_RO))
1328 goto close_fail;
1329 /* The device is mounted RO, so it's likely our own root
1330 file system. We can only do so much to protect the users
1331 from themselves. */
1332 was_mounted_ro = 1;
1333 }
1334
1335 if (lgfs2_get_dev_info(sdp->device_fd, &sdp->dinfo)) {
1336 perror(cx->opts->device);
1337 return FSCK_ERROR;
1338 }
1339
1340 /* read in sb from disk */
1341 err = fill_super_block(cx);
1342 if (err != FSCK_OK)
1343 return err;
1344
1345 /* Change lock protocol to be fsck_* instead of lock_* */
1346 if (!cx->opts->no && preen_is_safe(sdp, cx->opts)) {
1347 if (block_mounters(sdp, 1)) {
1348 log_err( _("Unable to block other mounters\n"));
1349 return FSCK_USAGE;
1350 }
1351 }
1352
1353 sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_meta_dir.in_addr);
1354 if (sdp->master_dir->i_magic != GFS2_MAGIC ||
1355 sdp->master_dir->i_mh_type != GFS2_METATYPE_DI ||
1356 !sdp->master_dir->i_size) {
1357 lgfs2_inode_put(&sdp->master_dir);
1358 rebuild_sysdir(cx);
1359 sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_meta_dir.in_addr);
1360 if (sdp->master_dir == NULL) {
1361 log_crit(_("Error reading master directory: %s\n"), strerror(errno));
1362 return FSCK_ERROR;
1363 }
1364 }
1365
1366 /* Look up the "per_node" inode. If there are journals missing, we
1367 need to figure out what's missing from per_node. And we need all
1368 our journals to be there before we can replay them. */
1369 lookup_per_node(cx, 0);
1370
1371 /* We need rindex first in case jindex is missing and needs to read
1372 in the rgrps before rebuilding it. However, note that if the rindex
1373 is damaged, we need the journals to repair it. That's because the
1374 journals likely contain rgrps and bitmaps, which we need to ignore
1375 when we're trying to find the rgrps. */
1376 if (init_rindex(cx))
1377 return FSCK_ERROR;
1378
1379 if (fetch_rgrps(cx))
1380 return FSCK_ERROR;
1381
1382 /* We need to read in jindex in order to replay the journals. If
1383 there's an error, we may proceed and let init_system_inodes
1384 try to rebuild it. */
1385 if (init_jindex(cx, 1) == 0) {
1386 if (replay_journals(cx, &clean_journals)) {
1387 if (!cx->opts->no && preen_is_safe(sdp, cx->opts))
1388 block_mounters(sdp, 0);
1389 stack;
1390 return FSCK_ERROR;
1391 }
1392 if (sdp->md.journals == clean_journals)
1393 *all_clean = 1;
1394 else if (cx->opts->force || !cx->opts->preen)
1395 log_notice( _("\nJournal recovery complete.\n"));
1396
1397 if (!cx->opts->force && *all_clean && cx->opts->preen)
1398 return FSCK_OK;
1399 }
1400
1401 if (init_system_inodes(cx))
1402 return FSCK_ERROR;
1403
1404 return FSCK_OK;
1405
1406 close_fail:
1407 close(sdp->device_fd);
1408 mount_fail:
1409 log_crit( _("Device %s is busy.\n"), cx->opts->device);
1410 return FSCK_USAGE;
1411 }
1412
1413 void destroy(struct fsck_cx *cx)
1414 {
1415 struct lgfs2_sbd *sdp = cx->sdp;
1416
1417 if (!cx->opts->no) {
1418 if (block_mounters(sdp, 0)) {
1419 log_warn( _("Unable to unblock other mounters - manual intervention required\n"));
1420 log_warn( _("Use 'gfs2_tool sb <device> proto' to fix\n"));
1421 }
1422 log_info( _("Syncing the device.\n"));
1423 fsync(sdp->device_fd);
1424 }
1425 empty_super_block(cx);
1426 close(sdp->device_fd);
1427 if (was_mounted_ro && errors_corrected) {
1428 sdp->device_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
1429 if (sdp->device_fd >= 0) {
1430 if (write(sdp->device_fd, "2", 1) == 2) {
1431 close(sdp->device_fd);
1432 return;
1433 }
1434 close(sdp->device_fd);
1435 }
1436 log_warn(_("fsck.gfs2: Could not flush caches (non-fatal).\n"));
1437 }
1438 }
1439