How to find the function DIE (Debugging Information Entry) using libdw, given an eip (instruction pointer)?

195 views Asked by At

Libdw, part of elfutils, is more recent than libdwarf. Unfortunately there is virtually no available documentation, nor examples.

I just finished writing a program that, given an instruction pointer of a running program, finds and prints the name of the function that that is in (which is not always available by calling dladdr, which can only show dynamic (aka public/visible) symbols). I will post this here as answer to the following question:

How can one use libdw to extract the name of a function (with hidden visibility) from the DWARF info in an ELF executable, when only having an instruction pointer?

1

There are 1 answers

3
Carlo Wood On

The following code snippet does so. The main function calls get_test_address() to get a Dwarf_Addr inside that function. I used libunwind functions for that, but that is kind of irrelevant.

I used dladdr to find and get the ELF object file and its load address.

#define _GNU_SOURCE
#include <dlfcn.h>
#define UNW_LOCAL_ONLY
#include <dwarf.h>
#include <elfutils/libdw.h>

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#include <libunwind.h>

void find_function_name_for_ip(char const* elf_path, Dwarf_Addr rel_ip)
{
  // Get file descriptor to ELF object.
  int fd = open(elf_path, O_RDONLY);
  if (fd < 0)
  {
    perror("open");
    return;
  }

  // Initialize libdw.
  Dwarf *dw = dwarf_begin(fd, DWARF_C_READ);
  if (dw == NULL)
  {
    fprintf(stderr, "dwarf_begin failed\n");
    close(fd);
    return;
  }

  // Get address ranges (of compilation units in this object).
  Dwarf_Aranges *aranges;
  size_t cnt;
  if (dwarf_getaranges(dw, &aranges, &cnt) != 0)
  {
    fprintf(stderr, "dwarf_getaranges failed\n");
    dwarf_end(dw);
    close(fd);
    return;
  }

  printf("There are %lu address ranges.\n", cnt);

  // Get the (address range of the) compilation unit containing rel_ip.
  Dwarf_Arange *arange = dwarf_getarange_addr(aranges, rel_ip);
  if (arange)
  {
    // Extract the offset into .debug_info.
    Dwarf_Addr start;
    Dwarf_Word length;
    Dwarf_Off offset;
    if (dwarf_getarangeinfo(arange, &start, &length, &offset) == 0)
    {
      printf("start = %lu, length = %u, offset = %ld\n", start, length, offset);

      // Obtain the DIE of the compilation unit.
      Dwarf_Die cu_die;
      Dwarf_Die* cu_die_ptr = dwarf_offdie(dw, offset, &cu_die);
      if (cu_die_ptr)
      {
        assert(cu_die_ptr == &cu_die);
        char const* name = dwarf_diename(cu_die_ptr);
        if (name)
          printf("Found compilation unit: %s\n", name);

        // Get the first child of this DIE (some type; it was 'size_t' for me).
        Dwarf_Die child_die;
        if (dwarf_child(cu_die_ptr, &child_die) != 0)
        {
          fprintf(stderr, "dwarf_child failed\n");
          return;
        }

        // Iterate over all children of the compilation unit.
        do
        {
          // We are only interested in DIE that represents a function.
          if (dwarf_tag(&child_die) == DW_TAG_subprogram)
          {
            Dwarf_Attribute decl_attr;
            bool is_declaration = 0;

            // Declarations are DW_TAG_subprogram too; skip all declarations.
            // We are only interested in definitions.
            if (dwarf_attr(&child_die, DW_AT_declaration, &decl_attr) &&
                dwarf_formflag(&decl_attr, &is_declaration) == 0 &&
                is_declaration)
              continue;

            // Check if this is the function that contains rel_ip (pc = program counter - same thing as ip).
            if (dwarf_haspc(&child_die, rel_ip))
            {
              // Get the name of the function.
              char const* func_name = dwarf_diename(&child_die);
              if (func_name)
              {
                printf("Found DIE for function: %s\n", func_name);
                // You can do more with the DIE here.
                break;
              }
            }
          }
        }
        while (dwarf_siblingof(&child_die, &child_die) == 0);
      }
    }
  }
  else
  {
    printf("DWARF arange not found\n");
  }

  dwarf_end(dw);
  close(fd);
}

Dwarf_Addr get_test_address()
{
  unw_cursor_t cursor;
  unw_context_t context;

  // Initialize libunwind.
  unw_getcontext(&context);

  unw_init_local(&cursor, &context);

  unw_word_t ip;
  unw_get_reg(&cursor, UNW_REG_IP, &ip);        // Now ip points to the return address of the above unw_getcontext call (aka, to unw_init_local(&cursor, &context) in this case).

  return ip;
}

int main()
{
  Dwarf_Addr ip = get_test_address();

  Dl_info info;
  if (dladdr((void*)ip, &info))
  {
    char const* elf_path = info.dli_fname;
    Dwarf_Addr rel_ip = ip - (Dwarf_Addr)info.dli_fbase;
    printf("ip = 0x%lx --> %ld\n", ip, rel_ip);

    // Compile with -rdynamic (for example) to allow dladdr to print something too, here.
    printf("Function name returned by dladdr: %s\n", (info.dli_sname ? info.dli_sname : "<NULL>"));

    find_function_name_for_ip(elf_path, rel_ip);
  }

  return 0;
}

Compile and link this test program as follows:

daniel:~/projects/libcwd/libcwd>gcc -g -o find_function find_function.c -lunwind -ldw -lelf
daniel:~/projects/libcwd/libcwd>./find_function
ip = 0x55b24edbd690 --> 5776
Function name returned by dladdr: <NULL>
There are 1 address ranges.
start = 4793, length = 1272, offset = 12
Found compilation unit: find_function.c
Found DIE for function: get_test_address