1

How to dump the lost+found content from an ext4 img file that cannot be mounted using debugfs in linux and keep the folder hierarchy?

My ext4 disk was failing. I made a disk image. I tried e2fsck and other tools to fix it. It refuses to mount. e2fsck can't repair the image.

However, I can see -using debugfs- that there are a lot of content inside lost+found folder. Almost all data are there. However, debugfs does not provide a tool to extract all this data automatically and keeping any directory hierarchy that may exists. Yes, there is a folder hierarchy in lost+found. eg. /lost+found/#1233131/folderA/folderB/file.mp4.

So i am looking for a solution to automatically restore all this data.

1 Answer 1

0

I made two python scripts

the 1st one is extracting metadata only (fullpath, mtime, size, mode etc.), and save them in a sql database. Then the 2nd python script retrieve the files from these paths to a recovery folder keeping the full path (hierarchy) of the lost+found. Is taking into consideration weird chars, spaces, quotes and other things. However, be cautious, and always check with a few files that are the most weird ones.

1st pyscript

import subprocess
import sqlite3

IMG = "/run/media/mydisk/ext4.img"
DB = "ext4lostfound_db.db"

# SQLite DB setup
conn = sqlite3.connect(DB)
c = conn.cursor()
c.execute("""
DROP TABLE IF EXISTS paths;
""")
c.execute("""
CREATE TABLE paths(
    path TEXT,
    type_0file_1dir INTEGER,
    F_INODE INTEGER,
    F_MODE TEXT,
    F_LINKS INTEGER,
    F_UID INTEGER,
    F_GID INTEGER,
    F_SIZE INTEGER,
    F_MTIME TEXT,
    f_checked INTEGER DEFAULT 0,
    deep_dir INTEGER DEFAULT 0
)
""")
conn.commit()

# BFS queue
queue = [("/lost+found", 1)]  # tuple: (path, depth)

while queue:
    curr_path, depth = queue.pop(0)
    print(f"Scanning: {curr_path}, depth={depth}")

    # Run debugfs and capture stdout
    try:
        out = subprocess.check_output(
            ['debugfs', '-R', f'ls -l "{curr_path}"', IMG],
            stderr=subprocess.DEVNULL,
            text=True,
            encoding='utf-8'
        )
    except subprocess.CalledProcessError:
        print(f"Failed to read {curr_path}, marking as error")
        c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
                  (curr_path, 1, -1, depth))
        conn.commit()
        continue

    # Remove debugfs banner if present
    lines = out.splitlines()
    if lines and lines[0].startswith("debugfs"):
        lines = lines[1:]

    for line in lines:
        if not line.strip():
            continue

        parts = line.split()
        inode = parts[0]
        mode = parts[1]
        links = parts[2].strip("()")
        name = " ".join(parts[8:])

        # Skip '.' and '..' to avoid cycles
        if name in ('.', '..'):
            continue

        typechar = mode[0]  # 4=dir, 1=file
        full_path = f"{curr_path}/{name}"

        # Debug info
        type_str = "DIR" if typechar=="4" else "FILE" if typechar=="1" else "OTHER"
        print(f"  -> {type_str}: {full_path}")

        if typechar == "1":  # file
            c.execute("INSERT INTO paths(path,type_0file_1dir,F_INODE,F_MODE,F_LINKS,f_checked,deep_dir) VALUES(?,?,?,?,?,?,?)",
                      (full_path, 0, inode, mode, links, 1, depth+1))
        elif typechar == "4":  # dir
            c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
                      (full_path, 1, 0, depth+1))
            queue.append((full_path, depth+1))

    # Mark current dir as checked
    c.execute("UPDATE paths SET f_checked=1 WHERE path=?", (curr_path,))
    conn.commit()

conn.close()

2nd pyscript to retrieve

#!/usr/bin/env python3
import sqlite3
import subprocess
import os
import shlex
import unicodedata
import re

IMG = "/run/media/mydisk/ext4.img"       # EXT4 image
DB = "ext4lostfound_db.db"                        # SQLite DB with paths and inodes
OUTDIR = "/mnt/tmp_drive/recover"       # destination folder
LOGFILE = "/mnt/tmp_drive/skipped_files.log"
FAILED_LOG = "/mnt/tmp_drive/failed_files.log"

os.makedirs(OUTDIR, exist_ok=True)
conn = sqlite3.connect(DB)
c = conn.cursor()


def normalize_fullwidth(s):
    """Convert fullwidth Unicode characters to ASCII equivalents."""
    return unicodedata.normalize('NFKC', s)

def sanitize_path(path):
    """Normalize fullwidth characters in each component of the path."""
    parts = path.split(os.sep)
    parts = [normalize_fullwidth(p) for p in parts]
    return os.sep.join(parts)

def decode_escaped_path(file_path):
    """
    Convert literal backslash-escaped sequences like \xef\xbc\x82
    into proper Unicode characters, then normalize.
    """
    # Step 1: interpret backslash escapes
    decoded = file_path.encode('utf-8').decode('unicode_escape')

    # Step 2: decode UTF-8 to proper Unicode
    s = decoded.encode('latin1').decode('utf-8', errors='replace')

    return s

# Query all files
c.execute("SELECT path, F_INODE FROM paths WHERE type_0file_1dir=0")
files = c.fetchall()


# Open log file in append mode
with open(LOGFILE, "a", encoding="utf-8") as log_skip, \
     open(FAILED_LOG, "a", encoding="utf-8") as log_fail:

    for file_path, inode in files:
        # --- your decoding, normalization, sanitize_path logic ---
        s = decode_escaped_path(file_path)
        rel_path = s.lstrip("/lost+found/")
        rel_path = sanitize_path(rel_path)
        local_path = os.path.join(OUTDIR, rel_path)
        os.makedirs(os.path.dirname(local_path), exist_ok=True)

        # Skip if already exists
        if os.path.exists(local_path):
            print(f"Skipping already existing file: {local_path}")
            log_skip.write(f"{local_path}\n")
            continue

        # Double quotes for debugfs
        safe_local_path = local_path.replace('"', '""')

        # Build and run debugfs command
        cmd = ['debugfs', '-R', f'dump <{inode}> "{safe_local_path}"', IMG]
        print('Recovering inode', inode, '->', local_path)
        print('Command:', ' '.join(cmd))
        try:
            subprocess.run(cmd, check=True)

            # --- Post-write check ---
            if not os.path.exists(local_path) or os.path.getsize(local_path) == 0:
                print(f"Failed: file not created or empty -> {local_path}")
                log_fail.write(f"{local_path}\n")

        except subprocess.CalledProcessError:
            print(f"Failed to dump inode {inode} -> {local_path}")
            log_fail.write(f"{local_path}\n")

conn.close()
print("Recovery finished.")

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.