#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Utilities to get and manipulate symbols from a binary."""

import collections
import logging
import os
import re
import subprocess
import sys

import cygprofile_utils

sys.path.insert(
    0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
                    'third_party', 'android_platform', 'development',
                    'scripts'))
import symbol

_MAX_WARNINGS_TO_PRINT = 200

SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size',
                                                   'section'))

def SetArchitecture(arch):
  """Set the architecture for binaries to be symbolized."""
  symbol.ARCH = arch


def _FromObjdumpLine(line):
  """Create a SymbolInfo by parsing a properly formatted objdump output line.

  Args:
    line: line from objdump

  Returns:
    An instance of SymbolInfo if the line represents a symbol, None otherwise.
  """
  # All of the symbol lines we care about are in the form
  # 0000000000  g    F   .text.foo     000000000 [.hidden] foo
  # where g (global) might also be l (local) or w (weak).
  parts = line.split()
  if len(parts) < 6 or parts[2] != 'F':
    return None

  assert len(parts) == 6 or (len(parts) == 7 and parts[5] == '.hidden')
  accepted_scopes = set(['g', 'l', 'w'])
  assert parts[1] in accepted_scopes

  offset = int(parts[0], 16)
  section = parts[3]
  size = int(parts[4], 16)
  name = parts[-1].rstrip('\n')
  # Forbid ARM mapping symbols and other unexpected symbol names, but allow $
  # characters in a non-initial position, which can appear as a component of a
  # mangled name, e.g. Clang can mangle a lambda function to:
  # 02cd61e0 l     F .text  000000c0 _ZZL11get_globalsvENK3$_1clEv
  # The equivalent objdump line from GCC is:
  # 0325c58c l     F .text  000000d0 _ZZL11get_globalsvENKUlvE_clEv
  assert re.match('^[a-zA-Z0-9_.][a-zA-Z0-9_.$]*$', name)
  return SymbolInfo(name=name, offset=offset, section=section, size=size)


def _SymbolInfosFromStream(objdump_lines):
  """Parses the output of objdump, and get all the symbols from a binary.

  Args:
    objdump_lines: An iterable of lines

  Returns:
    A list of SymbolInfo.
  """
  symbol_infos = []
  for line in objdump_lines:
    symbol_info = _FromObjdumpLine(line)
    if symbol_info is not None:
      symbol_infos.append(symbol_info)
  return symbol_infos


def SymbolInfosFromBinary(binary_filename):
  """Runs objdump to get all the symbols from a binary.

  Args:
    binary_filename: path to the binary.

  Returns:
    A list of SymbolInfo from the binary.
  """
  command = (symbol.ToolPath('objdump'), '-t', '-w', binary_filename)
  p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE)
  try:
    result = _SymbolInfosFromStream(p.stdout)
    return result
  finally:
    p.stdout.close()
    p.wait()


def GroupSymbolInfosByOffset(symbol_infos):
  """Create a dict {offset: [symbol_info1, ...], ...}.

  As several symbols can be at the same offset, this is a 1-to-many
  relationship.

  Args:
    symbol_infos: iterable of SymbolInfo instances

  Returns:
    a dict {offset: [symbol_info1, ...], ...}
  """
  offset_to_symbol_infos = collections.defaultdict(list)
  for symbol_info in symbol_infos:
    offset_to_symbol_infos[symbol_info.offset].append(symbol_info)
  return dict(offset_to_symbol_infos)

def GroupSymbolInfosByName(symbol_infos):
  """Create a dict {name: [symbol_info1, ...], ...}.

  A symbol can have several offsets, this is a 1-to-many relationship.

  Args:
    symbol_infos: iterable of SymbolInfo instances

  Returns:
    a dict {name: [symbol_info1, ...], ...}
  """
  name_to_symbol_infos = collections.defaultdict(list)
  for symbol_info in symbol_infos:
    name_to_symbol_infos[symbol_info.name].append(symbol_info)
  return dict(name_to_symbol_infos)

def CreateNameToSymbolInfo(symbol_infos):
  """Create a dict {name: symbol_info, ...}.

  Args:
    symbol_infos: iterable of SymbolInfo instances

  Returns:
    a dict {name: symbol_info, ...}
    If a symbol name corresponds to more than one symbol_info, the symbol_info
    with the lowest offset is chosen.
  """
  # TODO(lizeb,pasko): move the functionality in this method into
  # check_orderfile.
  symbol_infos_by_name = {}
  warnings = cygprofile_utils.WarningCollector(_MAX_WARNINGS_TO_PRINT)
  for infos in GroupSymbolInfosByName(symbol_infos).itervalues():
    first_symbol_info = min(infos, key=lambda x:x.offset)
    symbol_infos_by_name[first_symbol_info.name] = first_symbol_info
    if len(infos) > 1:
      warnings.Write('Symbol %s appears at %d offsets: %s' %
                     (first_symbol_info.name,
                      len(infos),
                      ','.join([hex(x.offset) for x in infos])))
  warnings.WriteEnd('symbols at multiple offsets.')
  return symbol_infos_by_name


def DemangleSymbol(mangled_symbol):
  """Return the demangled form of mangled_symbol."""
  return symbol.CallCppFilt(mangled_symbol)
