How to dump the lost+found content from an ext4 img file that cannot be mounted using debugfs in linux and keep the folder hierarchy?

Question

My ext4 disk was failing. I made a disk image. I tried e2fsck and other tools to fix it. It refuses to mount. e2fsck can't repair the image.

However, I can see -using debugfs- that there are a lot of content inside lost+found folder. Almost all data are there. However, debugfs does not provide a tool to extract all this data automatically and keeping any directory hierarchy that may exists. Yes, there is a folder hierarchy in lost+found. eg. /lost+found/#1233131/folderA/folderB/file.mp4.

So i am looking for a solution to automatically restore all this data.

Estatistics · Accepted Answer · 2025-11-18 22:41:28Z

I made two python scripts

the 1st one is extracting metadata only (fullpath, mtime, size, mode etc.), and save them in a sql database. Then the 2nd python script retrieve the files from these paths to a recovery folder keeping the full path (hierarchy) of the lost+found. Is taking into consideration weird chars, spaces, quotes and other things. However, be cautious, and always check with a few files that are the most weird ones.

1st pyscript

import subprocess
import sqlite3

IMG = "/run/media/mydisk/ext4.img"
DB = "ext4lostfound_db.db"

# SQLite DB setup
conn = sqlite3.connect(DB)
c = conn.cursor()
c.execute("""
DROP TABLE IF EXISTS paths;
""")
c.execute("""
CREATE TABLE paths(
    path TEXT,
    type_0file_1dir INTEGER,
    F_INODE INTEGER,
    F_MODE TEXT,
    F_LINKS INTEGER,
    F_UID INTEGER,
    F_GID INTEGER,
    F_SIZE INTEGER,
    F_MTIME TEXT,
    f_checked INTEGER DEFAULT 0,
    deep_dir INTEGER DEFAULT 0
)
""")
conn.commit()

# BFS queue
queue = [("/lost+found", 1)]  # tuple: (path, depth)

while queue:
    curr_path, depth = queue.pop(0)
    print(f"Scanning: {curr_path}, depth={depth}")

    # Run debugfs and capture stdout
    try:
        out = subprocess.check_output(
            ['debugfs', '-R', f'ls -l "{curr_path}"', IMG],
            stderr=subprocess.DEVNULL,
            text=True,
            encoding='utf-8'
        )
    except subprocess.CalledProcessError:
        print(f"Failed to read {curr_path}, marking as error")
        c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
                  (curr_path, 1, -1, depth))
        conn.commit()
        continue

    # Remove debugfs banner if present
    lines = out.splitlines()
    if lines and lines[0].startswith("debugfs"):
        lines = lines[1:]

    for line in lines:
        if not line.strip():
            continue

        parts = line.split()
        inode = parts[0]
        mode = parts[1]
        links = parts[2].strip("()")
        name = " ".join(parts[8:])

        # Skip '.' and '..' to avoid cycles
        if name in ('.', '..'):
            continue

        typechar = mode[0]  # 4=dir, 1=file
        full_path = f"{curr_path}/{name}"

        # Debug info
        type_str = "DIR" if typechar=="4" else "FILE" if typechar=="1" else "OTHER"
        print(f"  -> {type_str}: {full_path}")

        if typechar == "1":  # file
            c.execute("INSERT INTO paths(path,type_0file_1dir,F_INODE,F_MODE,F_LINKS,f_checked,deep_dir) VALUES(?,?,?,?,?,?,?)",
                      (full_path, 0, inode, mode, links, 1, depth+1))
        elif typechar == "4":  # dir
            c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
                      (full_path, 1, 0, depth+1))
            queue.append((full_path, depth+1))

    # Mark current dir as checked
    c.execute("UPDATE paths SET f_checked=1 WHERE path=?", (curr_path,))
    conn.commit()

conn.close()

2nd pyscript to retrieve

#!/usr/bin/env python3
import sqlite3
import subprocess
import os
import shlex
import unicodedata
import re

IMG = "/run/media/mydisk/ext4.img"       # EXT4 image
DB = "ext4lostfound_db.db"                        # SQLite DB with paths and inodes
OUTDIR = "/mnt/tmp_drive/recover"       # destination folder
LOGFILE = "/mnt/tmp_drive/skipped_files.log"
FAILED_LOG = "/mnt/tmp_drive/failed_files.log"

os.makedirs(OUTDIR, exist_ok=True)
conn = sqlite3.connect(DB)
c = conn.cursor()


def normalize_fullwidth(s):
    """Convert fullwidth Unicode characters to ASCII equivalents."""
    return unicodedata.normalize('NFKC', s)

def sanitize_path(path):
    """Normalize fullwidth characters in each component of the path."""
    parts = path.split(os.sep)
    parts = [normalize_fullwidth(p) for p in parts]
    return os.sep.join(parts)

def decode_escaped_path(file_path):
    """
    Convert literal backslash-escaped sequences like \xef\xbc\x82
    into proper Unicode characters, then normalize.
    """
    # Step 1: interpret backslash escapes
    decoded = file_path.encode('utf-8').decode('unicode_escape')

    # Step 2: decode UTF-8 to proper Unicode
    s = decoded.encode('latin1').decode('utf-8', errors='replace')

    return s

# Query all files
c.execute("SELECT path, F_INODE FROM paths WHERE type_0file_1dir=0")
files = c.fetchall()


# Open log file in append mode
with open(LOGFILE, "a", encoding="utf-8") as log_skip, \
     open(FAILED_LOG, "a", encoding="utf-8") as log_fail:

    for file_path, inode in files:
        # --- your decoding, normalization, sanitize_path logic ---
        s = decode_escaped_path(file_path)
        rel_path = s.lstrip("/lost+found/")
        rel_path = sanitize_path(rel_path)
        local_path = os.path.join(OUTDIR, rel_path)
        os.makedirs(os.path.dirname(local_path), exist_ok=True)

        # Skip if already exists
        if os.path.exists(local_path):
            print(f"Skipping already existing file: {local_path}")
            log_skip.write(f"{local_path}\n")
            continue

        # Double quotes for debugfs
        safe_local_path = local_path.replace('"', '""')

        # Build and run debugfs command
        cmd = ['debugfs', '-R', f'dump <{inode}> "{safe_local_path}"', IMG]
        print('Recovering inode', inode, '->', local_path)
        print('Command:', ' '.join(cmd))
        try:
            subprocess.run(cmd, check=True)

            # --- Post-write check ---
            if not os.path.exists(local_path) or os.path.getsize(local_path) == 0:
                print(f"Failed: file not created or empty -> {local_path}")
                log_fail.write(f"{local_path}\n")

        except subprocess.CalledProcessError:
            print(f"Failed to dump inode {inode} -> {local_path}")
            log_fail.write(f"{local_path}\n")

conn.close()
print("Recovery finished.")

Stack Exchange Network

How to dump the lost+found content from an ext4 img file that cannot be mounted using debugfs in linux and keep the folder hierarchy?

1 Answer 1

You must log in to answer this question.

Hot Network Questions

How to dump the lost+found content from an ext4 img file that cannot be mounted using debugfs in linux and keep the folder hierarchy?

1 Answer 1

You must log in to answer this question.

Related

Hot Network Questions