the return address was being truncated to 32 bits, preventing the dlsym code from determining which module contains the calling code.
@@ -2,5 +2,5 @@
.global dlsym
.type dlsym,@function
dlsym:
- mov (%rsp),%edx
+ mov (%rsp),%rdx
jmp __dlsym