I made two python scripts
the 1st one is extracting metadata only (fullpath, mtime, size, mode etc.), and save them in a sql database. Then the 2nd python script retrieve the files from these paths to a recovery folder keeping the full path (hierarchy) of the lost+found.
Is taking into consideration weird chars, spaces, quotes and other things.
However, be cautious, and always check with a few files that are the most weird ones.
1st pyscript
import subprocess
import sqlite3
IMG = "/run/media/mydisk/ext4.img"
DB = "ext4lostfound_db.db"
# SQLite DB setup
conn = sqlite3.connect(DB)
c = conn.cursor()
c.execute("""
DROP TABLE IF EXISTS paths;
""")
c.execute("""
CREATE TABLE paths(
path TEXT,
type_0file_1dir INTEGER,
F_INODE INTEGER,
F_MODE TEXT,
F_LINKS INTEGER,
F_UID INTEGER,
F_GID INTEGER,
F_SIZE INTEGER,
F_MTIME TEXT,
f_checked INTEGER DEFAULT 0,
deep_dir INTEGER DEFAULT 0
)
""")
conn.commit()
# BFS queue
queue = [("/lost+found", 1)] # tuple: (path, depth)
while queue:
curr_path, depth = queue.pop(0)
print(f"Scanning: {curr_path}, depth={depth}")
# Run debugfs and capture stdout
try:
out = subprocess.check_output(
['debugfs', '-R', f'ls -l "{curr_path}"', IMG],
stderr=subprocess.DEVNULL,
text=True,
encoding='utf-8'
)
except subprocess.CalledProcessError:
print(f"Failed to read {curr_path}, marking as error")
c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
(curr_path, 1, -1, depth))
conn.commit()
continue
# Remove debugfs banner if present
lines = out.splitlines()
if lines and lines[0].startswith("debugfs"):
lines = lines[1:]
for line in lines:
if not line.strip():
continue
parts = line.split()
inode = parts[0]
mode = parts[1]
links = parts[2].strip("()")
name = " ".join(parts[8:])
# Skip '.' and '..' to avoid cycles
if name in ('.', '..'):
continue
typechar = mode[0] # 4=dir, 1=file
full_path = f"{curr_path}/{name}"
# Debug info
type_str = "DIR" if typechar=="4" else "FILE" if typechar=="1" else "OTHER"
print(f" -> {type_str}: {full_path}")
if typechar == "1": # file
c.execute("INSERT INTO paths(path,type_0file_1dir,F_INODE,F_MODE,F_LINKS,f_checked,deep_dir) VALUES(?,?,?,?,?,?,?)",
(full_path, 0, inode, mode, links, 1, depth+1))
elif typechar == "4": # dir
c.execute("INSERT INTO paths(path,type_0file_1dir,f_checked,deep_dir) VALUES(?,?,?,?)",
(full_path, 1, 0, depth+1))
queue.append((full_path, depth+1))
# Mark current dir as checked
c.execute("UPDATE paths SET f_checked=1 WHERE path=?", (curr_path,))
conn.commit()
conn.close()
2nd pyscript to retrieve
#!/usr/bin/env python3
import sqlite3
import subprocess
import os
import shlex
import unicodedata
import re
IMG = "/run/media/mydisk/ext4.img" # EXT4 image
DB = "ext4lostfound_db.db" # SQLite DB with paths and inodes
OUTDIR = "/mnt/tmp_drive/recover" # destination folder
LOGFILE = "/mnt/tmp_drive/skipped_files.log"
FAILED_LOG = "/mnt/tmp_drive/failed_files.log"
os.makedirs(OUTDIR, exist_ok=True)
conn = sqlite3.connect(DB)
c = conn.cursor()
def normalize_fullwidth(s):
"""Convert fullwidth Unicode characters to ASCII equivalents."""
return unicodedata.normalize('NFKC', s)
def sanitize_path(path):
"""Normalize fullwidth characters in each component of the path."""
parts = path.split(os.sep)
parts = [normalize_fullwidth(p) for p in parts]
return os.sep.join(parts)
def decode_escaped_path(file_path):
"""
Convert literal backslash-escaped sequences like \xef\xbc\x82
into proper Unicode characters, then normalize.
"""
# Step 1: interpret backslash escapes
decoded = file_path.encode('utf-8').decode('unicode_escape')
# Step 2: decode UTF-8 to proper Unicode
s = decoded.encode('latin1').decode('utf-8', errors='replace')
return s
# Query all files
c.execute("SELECT path, F_INODE FROM paths WHERE type_0file_1dir=0")
files = c.fetchall()
# Open log file in append mode
with open(LOGFILE, "a", encoding="utf-8") as log_skip, \
open(FAILED_LOG, "a", encoding="utf-8") as log_fail:
for file_path, inode in files:
# --- your decoding, normalization, sanitize_path logic ---
s = decode_escaped_path(file_path)
rel_path = s.lstrip("/lost+found/")
rel_path = sanitize_path(rel_path)
local_path = os.path.join(OUTDIR, rel_path)
os.makedirs(os.path.dirname(local_path), exist_ok=True)
# Skip if already exists
if os.path.exists(local_path):
print(f"Skipping already existing file: {local_path}")
log_skip.write(f"{local_path}\n")
continue
# Double quotes for debugfs
safe_local_path = local_path.replace('"', '""')
# Build and run debugfs command
cmd = ['debugfs', '-R', f'dump <{inode}> "{safe_local_path}"', IMG]
print('Recovering inode', inode, '->', local_path)
print('Command:', ' '.join(cmd))
try:
subprocess.run(cmd, check=True)
# --- Post-write check ---
if not os.path.exists(local_path) or os.path.getsize(local_path) == 0:
print(f"Failed: file not created or empty -> {local_path}")
log_fail.write(f"{local_path}\n")
except subprocess.CalledProcessError:
print(f"Failed to dump inode {inode} -> {local_path}")
log_fail.write(f"{local_path}\n")
conn.close()
print("Recovery finished.")