Find pathname from dlopen handle on OSX

3.7k views Asked by At

I have dlopen()'ed a library, and I want to invert back from the handle it passes to me to the full pathname of shared library. On Linux and friends, I know that I can use dlinfo() to get the linkmap and iterate through those structures, but I can't seem to find an analogue on OSX. The closest thing I can do is to either:

  • Use dyld_image_count() and dyld_get_image_name(), iterate over all the currently opened libraries and hope I can guess which one corresponds to my handle

  • Somehow find a symbol that lives inside of the handle I have, and pass that to dladdr().

If I have apriori knowledge as to a symbol name inside of the library I just opened, I can dlsym() that and then use dladdr(). That works fine. But in the general case where I have no idea what is inside this shared library, I would need to be able to enumerate symbols to do that, which I don't know how to do either.

So any tips on how to lookup the pathname of a library from its dlopen handle would be very much appreciated. Thanks!

2

There are 2 answers

0
staticfloat On BEST ANSWER

After about a year of using the solution provided by 0xced, we discovered an alternative method that is simpler and avoids one (rather rare) failure mode; specifically, because 0xced's code snippet iterates through each dylib currently loaded, finds the first exported symbol, attempts to resolve it in the dylib currently being sought, and returns positive if that symbol is found in that particular dylib, you can have false positives if the first exported symbol from an arbitrary library happens to be present inside of the dylib you're currently searching for.

My solution was to use _dyld_get_image_name(i) to get the absolute path of each image loaded, dlopen() that image, and compare the handle (after masking out any mode bits set by dlopen() due to usage of things like RTLD_FIRST) to ensure that this dylib is actually the same file as the handle passed into my function.

The complete function can be seen here, as a part of the Julia Language, with the relevant portion copied below:

// Iterate through all images currently in memory
for (int32_t i = _dyld_image_count(); i >= 0 ; i--) {
    // dlopen() each image, check handle
    const char *image_name = _dyld_get_image_name(i);
    uv_lib_t *probe_lib = jl_load_dynamic_library(image_name, JL_RTLD_DEFAULT);
    void *probe_handle = probe_lib->handle;
    uv_dlclose(probe_lib);

    // If the handle is the same as what was passed in (modulo mode bits), return this image name
    if (((intptr_t)handle & (-4)) == ((intptr_t)probe_handle & (-4)))
        return image_name;
}

Note that functions such as jl_load_dynamic_library() are wrappers around dlopen() that return libuv types, but the spirit of the code remains the same.

2
0xced On

Here is how you can get the absolute path of a handle returned by dlopen.

  1. In order to get the absolute path, you need to call the dladdr function and retrieve the Dl_info.dli_fname field.
  2. In order to call the dladdr function, you need to give it an address.
  3. In order to get an address given a handle, you have to call the dlsym function with a symbol.
  4. In order to get a symbol out of a loaded library, you have to parse the library to find its symbol table and iterate over the symbols. You need to find an external symbol because dlsym only searches for external symbols.

Put it all together and you get this:

#import <dlfcn.h>
#import <mach-o/dyld.h>
#import <mach-o/nlist.h>
#import <stdio.h>
#import <string.h>

#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct nlist_64 nlist_t;
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct nlist nlist_t;
#endif

static const char * first_external_symbol_for_image(const mach_header_t *header)
{
    Dl_info info;
    if (dladdr(header, &info) == 0)
        return NULL;

    segment_command_t *seg_linkedit = NULL;
    segment_command_t *seg_text = NULL;
    struct symtab_command *symtab = NULL;

    struct load_command *cmd = (struct load_command *)((intptr_t)header + sizeof(mach_header_t));
    for (uint32_t i = 0; i < header->ncmds; i++, cmd = (struct load_command *)((intptr_t)cmd + cmd->cmdsize))
    {
        switch(cmd->cmd)
        {
            case LC_SEGMENT:
            case LC_SEGMENT_64:
                if (!strcmp(((segment_command_t *)cmd)->segname, SEG_TEXT))
                    seg_text = (segment_command_t *)cmd;
                else if (!strcmp(((segment_command_t *)cmd)->segname, SEG_LINKEDIT))
                    seg_linkedit = (segment_command_t *)cmd;
                break;

            case LC_SYMTAB:
                symtab = (struct symtab_command *)cmd;
                break;
        }
    }

    if ((seg_text == NULL) || (seg_linkedit == NULL) || (symtab == NULL))
        return NULL;

    intptr_t file_slide = ((intptr_t)seg_linkedit->vmaddr - (intptr_t)seg_text->vmaddr) - seg_linkedit->fileoff;
    intptr_t strings = (intptr_t)header + (symtab->stroff + file_slide);
    nlist_t *sym = (nlist_t *)((intptr_t)header + (symtab->symoff + file_slide));

    for (uint32_t i = 0; i < symtab->nsyms; i++, sym++)
    {
        if ((sym->n_type & N_EXT) != N_EXT || !sym->n_value)
            continue;

        return (const char *)strings + sym->n_un.n_strx;
    }

    return NULL;
}

const char * pathname_for_handle(void *handle)
{
    for (int32_t i = _dyld_image_count(); i >= 0 ; i--)
    {
        const char *first_symbol = first_external_symbol_for_image((const mach_header_t *)_dyld_get_image_header(i));
        if (first_symbol && strlen(first_symbol) > 1)
        {
            handle = (void *)((intptr_t)handle | 1); // in order to trigger findExportedSymbol instead of findExportedSymbolInImageOrDependentImages. See `dlsym` implementation at http://opensource.apple.com/source/dyld/dyld-239.3/src/dyldAPIs.cpp
            first_symbol++; // in order to remove the leading underscore
            void *address = dlsym(handle, first_symbol);
            Dl_info info;
            if (dladdr(address, &info))
                return info.dli_fname;
        }
    }
    return NULL;
}

int main(int argc, const char * argv[])
{
    void *libxml2 = dlopen("libxml2.dylib", RTLD_LAZY);
    printf("libxml2 path: %s\n", pathname_for_handle(libxml2));
    dlclose(libxml2);
    return 0;
}

If you run this code, it will yield the expected result: libxml2 path: /usr/lib/libxml2.2.dylib