I am trying to recover the imported functions from a PE file. The code looks for call instructions in the disassembled .text section and then checks the call destination address against the function addresses in the IAT. However, none of the call destination addresses match up with the function addresses in the IAT. What am I doing wrong?
import sys
import pefile
from capstone import *
def valid_pefile(file_path):
"""
Validates the input file as a PE file.
Args: the path to the PE file
Returns: pe file or None if the file is not a PE file """
try:
pe = pefile.PE(file_path)
return pe
except pefile.PEFormatError as pe_err:
print("[-] error while parsing file {}:\n\t{}".format(file_path,pe_err))
return None
def get_imported_functions(pe,ins):
"""
Extracts the imported functions from a PE file.
Args: pe file and disassembled.
Returns: A list of imported functions.
"""
imported_calls = []
for (address, size, mnemonic, op_str) in ins:
if mnemonic == 'call' and op_str.startswith('0x'):
# Get the virtual address of the call destination
va = pe.OPTIONAL_HEADER.ImageBase + int(op_str, 16)
# debug: print("Call destination address: " + str(hex(va)))
# Check if the destination address matches an IAT entry
for entry in pe.DIRECTORY_ENTRY_IMPORT:
for imp in entry.imports:
# debug: print("IAT func address: " + str(hex(imp.address)))
if imp.address == va:
imported_calls.append({
'function_name': imp.name.decode(),
'call_address': hex(address),
'rva': hex(address - pe.OPTIONAL_HEADER.ImageBase)
})
break
return imported_calls
def disassemble_code(code, offset=1000):
"""
Disassembles x86-64 binary code section.
Args: binary_data (bytes): The raw binary data to be disassembled.
Returns: A list of disassembled instructions in tuple form.
"""
try:
# Initialize the Capstone disassembler
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Disassemble the binary data
instructions = md.disasm_lite(code, offset)
return instructions
except CsError as e:
print(f"Disassembly error: {e}")
return None
def get_text_section(pe):
"""
Extracts the .text section from a PE file and prints its starting point and size.
Args: file_path (str): The path to the PE file.
Returns: contents of the .text section.
"""
if pe is not None:
text_section = next((section for section in pe.sections if section.Name.decode().strip('\x00') == '.text'), None)
if text_section:
text_data = text_section.get_data()
text_start = text_section.VirtualAddress
text_size = text_section.Misc_VirtualSize
# debugging
# print(f"The .text section starts at virtual address 0x{text_start:08X} and has a size of {text_size} bytes.")
return text_data, text_start
else:
return None
else:
print("Error while parsing PE file: {}".format(pe))
if __name__ == '__main__':
pe = valid_pefile(sys.argv[1])
code, address = get_text_section(pe)
ins = disassemble_code(code, address)
calls = get_imported_functions(pe,ins)
for call in calls:
print(f"Function name: {call['function_name']}, Call address: {call['call_address']}, RVA: {call['rva']}")
The IAT addresses seem to be higher than the call function destination addresses, e.g., Call dest add: 0x140005de8, IAT func add: 0x140007300.
Edit:
I I have tried several (benign) executables. The Call det addresses seem to be always larger.
Example Output:
Call dest addresses IAT func addresses
0x140005de8 0x140007300
0x140005de8 0x140007308
0x140005de8 0x140007310
0x140005de8 0x140007318
0x140005de8 0x140007320
0x140005d9c 0x140007328
0x140005de8 0x140007330
0x14000107c 0x140007338
0x140005de8 0x140007340
0x140005d9c 0x140007348
0x140005de8 0x140007350
0x14000107c 0x140007358
0x14000107c 0x140007360
0x140005d9c 0x140007368