1

I have a code in assembly x86_64 that gets the kernel32.dll base through accessing the PEB and bla bla bla, gets the GetProcAddress address, uses that for getting the LoadLibraryA function and then i use LoadLibraryA function for loading the user32.dll module.

This is the relevant portion of code:

section .text
global _start
_start:
    ; ###### TABLE ######

; [rdi] = kernel32.dll
; [rdi + 0x8] = GetProcAddress
; [rdi + 0x10] = LoadLibraryA
; [rdi + 0x18] = ws2_32.dll
; [rdi + 0x20] = user32.dll
; [rdi + 0x28] = GetConsoleWindow
; [rdi + 0x30] = ShowWindow
; [rdi + 0x38] = WSAStartup
; [rdi + 0x40] = WSASocketA
; [rdi + 0x48] = connect
; [rdi + 0x50] = CreateProcessA
; [rdi + 0x58] = ExitProcess
; [rdi + 0x60] = socket fd

; ###### TABLE ######

; find the kernel32.dll base address
xor rdx, rdx
mov rax, [gs: rdx + 0x60] ; EAX = PEB
mov rax, [rax + 0x18] ; EAX = PEB->Ldr
mov rsi, [rax + 0x20] ; ESI = PEB->Ldr.InMemoryOrderModuleList
lodsq
xchg rax, rsi
lodsq
mov rbx, [rax + 0x20] ; kernel32 dllbase address, sizeof(_LIST_ENTRY) = 0x10 + 0x20 = 0x30 (0x30 offset for dllbase in _LDR_DATA_TABLE_ENTRY)


; find the export table of kernel32.dll
mov edx, dword [rbx + 0x3c] ; IMAGE_DOS_HEADER->e_lfanew (!! is type LONG !!)
add rdx, rbx ; e_lfanew + dllbase = IMAGE_NT_HEADERS
mov edx, dword [rdx + 0x88] ; export data directory (!! is type DWORD !!)
add rdx, rbx ; offset + dllbase = address export table
mov esi, dword [rdx + 0x20] ; offset addressOfNames
add rsi, rbx ; address of addressOfNames
mov rcx, 0xffffffffffffffff ; RCX = -1

; get the GetProcAddress function name
Get_FunctionName:
inc rcx ; index
lodsd ; load offset of the function name into rax
add rax, rbx ; get address of the function name
cmp dword [rax], 0x50746547 ; GetP
jnz Get_FunctionName
cmp dword [rax + 0x4], 0x41636f72 ; rocA
jnz Get_FunctionName
cmp dword [rax + 0x8], 0x65726464 ; ddre
jnz Get_FunctionName

; now rax contains the address to the string 'GetProcAddress'


; find the address of GetProcAddress
mov esi, dword [rdx + 0x24] ; RSI = Offset of addressOfNamesOrdinal
add rsi, rbx ; RSI = Address of addressOfNamesOrdinal
mov cx, [rsi + rcx * 2] ; CX = ordinal ( we multiply by 2 because addressOfnamesOrdinal is a list of WORDS )
mov esi, dword [rdx + 0x1c] ; RSI = offset of addressOfFunctions
add rsi, rbx ; RSI = address of addressOfFunctions
mov edx, dword [rsi + rcx * 4] ; RDX = offset of GetProcAddress address 
add rdx, rbx ; RDX = addres of GetProcAddress

sub rsp, 0x70 ; make space in the stack. Important to be aware of the 16 byte alignment
lea rdi, [rsp] ; RDI = Resolved addresses table
mov [rdi], rbx ; kernel32.dll
mov [rdi+0x8], rdx ; GetProcAddress

sub rsp, 0x10
mov r10, 0x7262694c64616f4c
mov qword [rsp], r10 ; LoadLibr
mov r10, 0x0000000041797261
mov qword [rsp+8], r10 ; aryA
sub rsp, 0x28 ; Reserve 32 bytes shadow space
mov rax, rdx
lea rdx, [rsp+0x28] ; LoadLibraryA string
mov rcx, [rdi] ; base address of kernel32
call rax ; Call GetProcAddress
add rsp, 0x38 ; restore stack, shadow space (32 bytes) + "LoadLibraryA"

; now rax con/tains the address of LoadLibraryA
mov [rdi + 0x10], rax

; Load user32.dll using LoadLibraryA
sub rsp, 0x10
mov r10, 0x642E323372657375
mov qword [rsp], r10
mov r10, 0x0000000000006C6C
mov qword [rsp+0x8], r10
sub rsp, 0x28 ; shadow space
lea rcx, [rsp+0x28] ; string user32.dll
mov rax, [rdi + 0x10] ; address of LoadLibraryA
call rax
add rsp, 0x38

; now rax contains the address of user32.dll
mov [rdi + 0x20], rax

I'm not pro but I really focused in following the ABI and the windows x64 calling convention.

A interesting thing that I saw is that in this post looks like the person asking the question had the same error like me, but he answered his own question and his problem was that he didn't have the 16 byte alignment in the stack before calling LoadLibraryA. Not my case (I think).

I compiled with mingw like this: x86_64-w64-mingw32-gcc customshell_x64.obj -o shell_x86_64.exe -nostdlib -Wl,--entry=_start

And using x64dbg on the binary I see that once I call the LoadLibraryA function I get to a x64dbg breakpoint in the TLS Callback for gdifull32.dll, if I continue execution I'll get a EXCEPTION_ACCESS_VIOLATION somewhere.

I'm not used to using a debugger and, honestly, don't know exactly what I need to look for being able to understand what is going on here.

Also, I actually used LoadLibraryA for loading "ws2_32.dll" and works with no problem.

3
  • Works fine as is for me. I wonder if you issue might be linked to x64dbg itself. See github.com/x64dbg/x64dbg/issues/3523 for an fairly convoluted scenario but the resulting issue looks very similar to yours. Commented May 18 at 7:04
  • Thank you for the reference @AndreyTurkin, i will consider that for sure, i was experiencing crashes even when i wasnt debugging so i assumed that was a problem in the program and not from x64dbg. I'll try to fix everything else and see if it works without debugging, Commented May 18 at 8:49
  • code formal correct and work (load user32.dll ok). despite it not optimal for x64 ( you not need sub rsp,* add rsp,* around every function call. better do this once in prolog and epilog. and main here not need use asm at all. possible write shellcode complete on c++, which will be more easy Commented May 18 at 10:18

1 Answer 1

1

I used to use LoadLibrary, and here’s how I did it:

xor r12, r12
mov r12, 0x6C             ; Push 'l'
push r12
mov r12, 0x6C642E6970617370 ; Push 'psapi.dll' backwards
push r12
mov rcx, rsp              ; RCX points to the DLL string
sub rsp, 0x30             ; Shadow space for the call
call r14                  ; Call LoadLibrary (pointer in r14)
add rsp, 0x30             ; Clean up the stack

In other words, I first pushed the "dll" part, then pushed the rest of the string "psapi", then set rcx to point to the full DLL path on the stack. That way, I could call LoadLibrary easily.

Sign up to request clarification or add additional context in comments.

3 Comments

Thank you for your answer @Teoman. Your solution looks more or less like mine, you are pushing the values instead of moving it but at the end both strings end up in the stack, i could try it tho. Also, InMemoryOrderModuleList does not contain those DLLs by default, contains ntdll, kernel32 and kernelbase as dependency for kernel32.
User32 and ws2_32 not already loaded in general case.
You can push 'l' without moving to a register first, using the push sign_extended_imm8 encoding. In fact, mov r12, 0x6c will contain zero bytes so it's not usable in normal shellcode. It's only for values wider than 32-bit where you have to mov r64, imm64 or split it up somehow, since only mov allows a 64-bit immediate. (And if this isn't shellcode, just put the string in section .rdata and use a RIP-relative lea).

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.