initial commit

This commit is contained in:
Rairosu
2024-01-24 13:07:27 +01:00
commit 06c785c352
115 changed files with 33162 additions and 0 deletions

View File

@@ -0,0 +1,93 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
project(dwarf_import)
file(GLOB PLUGIN_SOURCES CONFIGURE_DEPENDS
${PROJECT_SOURCE_DIR}/Cargo.toml
${PROJECT_SOURCE_DIR}/src/*.rs
${PROJECT_SOURCE_DIR}/../shared/Cargo.toml
${PROJECT_SOURCE_DIR}/../shared/src/*.rs)
file(GLOB_RECURSE API_SOURCES CONFIGURE_DEPENDS
${PROJECT_SOURCE_DIR}/../../../../binaryninjacore.h
${PROJECT_SOURCE_DIR}/../../../binaryninjacore-sys/build.rs
${PROJECT_SOURCE_DIR}/../../../binaryninjacore-sys/Cargo.toml
${PROJECT_SOURCE_DIR}/../../../binaryninjacore-sys/src/*
${PROJECT_SOURCE_DIR}/../../../Cargo.toml
${PROJECT_SOURCE_DIR}/../../../src/*.rs)
if(CMAKE_BUILD_TYPE MATCHES Debug)
set(TARGET_DIR ${PROJECT_BINARY_DIR}/target/debug)
set(CARGO_OPTS --target-dir=${PROJECT_BINARY_DIR}/target)
else()
set(TARGET_DIR ${PROJECT_BINARY_DIR}/target/release)
set(CARGO_OPTS --target-dir=${PROJECT_BINARY_DIR}/target --release)
set(OUTPUT_PDB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}dwarf_import.pdb)
endif()
set(OUTPUT_FILE ${CMAKE_STATIC_LIBRARY_PREFIX}dwarf_import${CMAKE_SHARED_LIBRARY_SUFFIX})
set(PLUGIN_PATH ${TARGET_DIR}/${OUTPUT_FILE})
add_custom_target(dwarf_import ALL DEPENDS ${PLUGIN_PATH})
add_dependencies(dwarf_import binaryninjaapi)
find_program(RUSTUP_PATH rustup REQUIRED HINTS ~/.cargo/bin)
if(CARGO_API_VERSION)
set(RUSTUP_COMMAND ${RUSTUP_PATH} run ${CARGO_API_VERSION} cargo build)
else()
set(RUSTUP_COMMAND ${RUSTUP_PATH} run ${CARGO_STABLE_VERSION} cargo build)
endif()
if(APPLE)
if(UNIVERSAL)
if(CMAKE_BUILD_TYPE MATCHES Debug)
set(AARCH64_LIB_PATH ${PROJECT_BINARY_DIR}/target/aarch64-apple-darwin/debug/${OUTPUT_FILE})
set(X86_64_LIB_PATH ${PROJECT_BINARY_DIR}/target/x86_64-apple-darwin/debug/${OUTPUT_FILE})
else()
set(AARCH64_LIB_PATH ${PROJECT_BINARY_DIR}/target/aarch64-apple-darwin/release/${OUTPUT_FILE})
set(X86_64_LIB_PATH ${PROJECT_BINARY_DIR}/target/x86_64-apple-darwin/release/${OUTPUT_FILE})
endif()
add_custom_command(
OUTPUT ${PLUGIN_PATH}
COMMAND ${CMAKE_COMMAND} -E env
MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BN_CORE_OUTPUT_DIR}
${RUSTUP_COMMAND} --target=aarch64-apple-darwin ${CARGO_OPTS}
COMMAND ${CMAKE_COMMAND} -E env
MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BN_CORE_OUTPUT_DIR}
${RUSTUP_COMMAND} --target=x86_64-apple-darwin ${CARGO_OPTS}
COMMAND mkdir -p ${TARGET_DIR}
COMMAND lipo -create ${AARCH64_LIB_PATH} ${X86_64_LIB_PATH} -output ${PLUGIN_PATH}
COMMAND ${CMAKE_COMMAND} -E copy ${PLUGIN_PATH} ${BN_CORE_PLUGIN_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES})
else()
if(CMAKE_BUILD_TYPE MATCHES Debug)
set(LIB_PATH ${PROJECT_BINARY_DIR}/target/debug/${OUTPUT_FILE})
else()
set(LIB_PATH ${PROJECT_BINARY_DIR}/target/release/${OUTPUT_FILE})
endif()
add_custom_command(
OUTPUT ${PLUGIN_PATH}
COMMAND ${CMAKE_COMMAND} -E env MACOSX_DEPLOYMENT_TARGET=10.14 BINARYNINJADIR=${BN_CORE_OUTPUT_DIR} ${RUSTUP_COMMAND} ${CARGO_OPTS}
COMMAND ${CMAKE_COMMAND} -E copy ${PLUGIN_PATH} ${BN_CORE_PLUGIN_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES})
endif()
elseif(WIN32)
add_custom_command(
OUTPUT ${PLUGIN_PATH}
COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BN_CORE_OUTPUT_DIR} ${RUSTUP_COMMAND} ${CARGO_OPTS}
COMMAND ${CMAKE_COMMAND} -E copy ${PLUGIN_PATH} ${BN_CORE_PLUGIN_DIR}
COMMAND ${CMAKE_COMMAND} -E copy ${TARGET_DIR}/${OUTPUT_PDB_NAME} ${BN_CORE_PLUGIN_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES})
else()
add_custom_command(
OUTPUT ${PLUGIN_PATH}
COMMAND ${CMAKE_COMMAND} -E env BINARYNINJADIR=${BN_CORE_OUTPUT_DIR} ${RUSTUP_COMMAND} ${CARGO_OPTS}
COMMAND ${CMAKE_COMMAND} -E copy ${PLUGIN_PATH} ${BN_CORE_PLUGIN_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
DEPENDS ${PLUGIN_SOURCES} ${API_SOURCES})
endif()

View File

@@ -0,0 +1,14 @@
[package]
name = "dwarf_import"
version = "0.1.0"
authors = ["KyleMiles <kyle@vector35.com>"]
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
dwarfreader = { path = "../shared/" }
binaryninja = { path = "../../../" }
gimli = "0.27"
log = "0.4.17"

View File

@@ -0,0 +1,390 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
use crate::types::get_type;
use binaryninja::{
rc::*,
types::{EnumerationBuilder, FunctionParameter, ReferenceType, Type, TypeBuilder},
};
use gimli::{constants, AttributeValue::Encoding, DebuggingInformationEntry, Reader, Unit};
pub(crate) fn handle_base_type<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<Ref<Type>> {
// All base types have:
// DW_AT_encoding (our concept of type_class)
// DW_AT_byte_size and/or DW_AT_bit_size
// *DW_AT_name
// *DW_AT_endianity (assumed default for arch)
// *DW_AT_data_bit_offset (assumed 0)
// *Some indication of signedness?
// * = Optional
let name = debug_info_builder_context.get_name(unit, entry)?;
let size = get_size_as_usize(entry)?;
match entry.attr_value(constants::DW_AT_encoding) {
Ok(Some(Encoding(encoding))) => {
match encoding {
constants::DW_ATE_address => None,
constants::DW_ATE_boolean => Some(Type::bool()),
constants::DW_ATE_complex_float => None,
constants::DW_ATE_float => Some(Type::named_float(size, name)),
constants::DW_ATE_signed => Some(Type::named_int(size, true, name)),
constants::DW_ATE_signed_char => Some(Type::named_int(size, true, name)),
constants::DW_ATE_unsigned => Some(Type::named_int(size, false, name)),
constants::DW_ATE_unsigned_char => Some(Type::named_int(size, false, name)),
constants::DW_ATE_imaginary_float => None,
constants::DW_ATE_packed_decimal => None,
constants::DW_ATE_numeric_string => None,
constants::DW_ATE_edited => None,
constants::DW_ATE_signed_fixed => None,
constants::DW_ATE_unsigned_fixed => None,
constants::DW_ATE_decimal_float => Some(Type::named_float(size, name)),
constants::DW_ATE_UTF => Some(Type::named_int(size, false, name)), // TODO : Verify
constants::DW_ATE_UCS => None,
constants::DW_ATE_ASCII => None, // Some sort of array?
constants::DW_ATE_lo_user => None,
constants::DW_ATE_hi_user => None,
_ => None, // Anything else is invalid at time of writing (gimli v0.23.0)
}
}
_ => None,
}
}
pub(crate) fn handle_enum<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<Ref<Type>> {
// All base types have:
// DW_AT_byte_size
// *DW_AT_name
// *DW_AT_enum_class
// *DW_AT_type
// ?DW_AT_abstract_origin
// ?DW_AT_accessibility
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_bit_size
// ?DW_AT_bit_stride
// ?DW_AT_byte_stride
// ?DW_AT_data_location
// ?DW_AT_declaration
// ?DW_AT_description
// ?DW_AT_sibling
// ?DW_AT_signature
// ?DW_AT_specification
// ?DW_AT_start_scope
// ?DW_AT_visibility
// * = Optional
// Children of enumeration_types are enumerators which contain:
// DW_AT_name
// DW_AT_const_value
// *DW_AT_description
let enumeration_builder = EnumerationBuilder::new();
let mut tree = unit.entries_tree(Some(entry.offset())).unwrap();
let mut children = tree.root().unwrap().children();
while let Ok(Some(child)) = children.next() {
if child.entry().tag() == constants::DW_TAG_enumerator {
let name = debug_info_builder_context.get_name(unit, child.entry())?;
let value = get_attr_as_u64(
&child
.entry()
.attr(constants::DW_AT_const_value)
.unwrap()
.unwrap(),
)
.unwrap();
enumeration_builder.insert(name, value);
}
}
Some(Type::enumeration(
&enumeration_builder.finalize(),
get_size_as_usize(entry).unwrap_or(8),
false,
))
}
pub(crate) fn handle_typedef(
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
typedef_name: String,
) -> (Option<Ref<Type>>, bool) {
// All base types have:
// DW_AT_name
// *DW_AT_type
// * = Optional
// This will fail in the case where we have a typedef to a type that doesn't exist (failed to parse, incomplete, etc)
if let Some(entry_type_offset) = entry_type {
if let Some((name, t)) = debug_info_builder.get_type(entry_type_offset) {
if typedef_name == name {
return (Some(t), false);
} else if typedef_name != name {
return (Some(t), true);
}
}
}
// 5.3: "typedef represents a declaration of the type that is not also a definition"
(None, false)
}
pub(crate) fn handle_pointer<R: Reader<Offset = usize>>(
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
reference_type: ReferenceType,
) -> Option<Ref<Type>> {
// All pointer types have:
// DW_AT_type
// *DW_AT_byte_size
// ?DW_AT_name
// ?DW_AT_address
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_data_location
// * = Optional
if let Some(pointer_size) = get_size_as_usize(entry) {
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
Some(Type::pointer_of_width(
parent_type.as_ref(),
pointer_size,
false,
false,
Some(reference_type),
))
} else {
Some(Type::pointer_of_width(
Type::void().as_ref(),
pointer_size,
false,
false,
Some(reference_type),
))
}
} else if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
Some(Type::pointer_of_width(
parent_type.as_ref(),
debug_info_builder_context.default_address_size(),
false,
false,
Some(reference_type),
))
} else {
Some(Type::pointer_of_width(
Type::void().as_ref(),
debug_info_builder_context.default_address_size(),
false,
false,
Some(reference_type),
))
}
}
pub(crate) fn handle_array<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
) -> Option<Ref<Type>> {
// All array types have:
// DW_AT_type
// *DW_AT_name
// *DW_AT_ordering
// *DW_AT_byte_stride or DW_AT_bit_stride
// *DW_AT_byte_size or DW_AT_bit_size
// *DW_AT_allocated
// *DW_AT_associated and
// *DW_AT_data_location
// * = Optional
// For multidimensional arrays, DW_TAG_subrange_type or DW_TAG_enumeration_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let mut tree = unit.entries_tree(Some(entry.offset())).unwrap();
let mut children = tree.root().unwrap().children();
// TODO : This is currently applying the size in reverse order
let mut result_type: Option<Ref<Type>> = None;
while let Ok(Some(child)) = children.next() {
if let Some(inner_type) = result_type {
result_type = Some(Type::array(
inner_type.as_ref(),
get_subrange_size(child.entry()),
));
} else {
result_type = Some(Type::array(
parent_type.as_ref(),
get_subrange_size(child.entry()),
));
}
}
result_type.map_or(Some(Type::array(parent_type.as_ref(), 0)), Some)
} else {
None
}
}
pub(crate) fn handle_function<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
) -> Option<Ref<Type>> {
// All subroutine types have:
// *DW_AT_name
// *DW_AT_type (if not provided, void)
// *DW_AT_prototyped
// ?DW_AT_abstract_origin
// ?DW_AT_accessibility
// ?DW_AT_address_class
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_data_location
// ?DW_AT_declaration
// ?DW_AT_description
// ?DW_AT_sibling
// ?DW_AT_start_scope
// ?DW_AT_visibility
// * = Optional
// May have children, including DW_TAG_formal_parameters, which all have:
// *DW_AT_type
// * = Optional
// or is otherwise DW_TAG_unspecified_parameters
let return_type = match entry_type {
Some(entry_type_offset) => {
debug_info_builder
.get_type(entry_type_offset)
.expect("Subroutine return type was not processed")
.1
}
None => Type::void(),
};
// Alias function type in the case that it contains itself
if let Some(name) = debug_info_builder_context.get_name(unit, entry) {
debug_info_builder.add_type(
get_uid(unit, entry),
name.clone(),
Type::named_type_from_type(
name,
&Type::function::<String, &binaryninja::types::Type>(
return_type.as_ref(),
&[],
false,
),
),
false,
);
}
let mut parameters: Vec<FunctionParameter<String>> = vec![];
let mut variable_arguments = false;
// Get all the children and populate
let mut tree = unit.entries_tree(Some(entry.offset())).unwrap();
let mut children = tree.root().unwrap().children();
while let Ok(Some(child)) = children.next() {
if child.entry().tag() == constants::DW_TAG_formal_parameter {
if let (Some(child_uid), Some(name)) = {
(
get_type(
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
),
debug_info_builder_context.get_name(unit, child.entry()),
)
} {
let child_type = debug_info_builder.get_type(child_uid).unwrap().1;
parameters.push(FunctionParameter::new(child_type, name, None));
}
} else if child.entry().tag() == constants::DW_TAG_unspecified_parameters {
variable_arguments = true;
}
}
if debug_info_builder_context.get_name(unit, entry).is_some() {
debug_info_builder.remove_type(get_uid(unit, entry));
}
Some(Type::function(
return_type.as_ref(),
&parameters,
variable_arguments,
))
}
pub(crate) fn handle_const(
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
) -> Option<Ref<Type>> {
// All const types have:
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_data_location
// ?DW_AT_name
// ?DW_AT_sibling
// ?DW_AT_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
Some((*parent_type).to_builder().set_const(true).finalize())
} else {
Some(TypeBuilder::void().set_const(true).finalize())
}
}
pub(crate) fn handle_volatile(
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
) -> Option<Ref<Type>> {
// All const types have:
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_data_location
// ?DW_AT_name
// ?DW_AT_sibling
// ?DW_AT_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
Some((*parent_type).to_builder().set_volatile(true).finalize())
} else {
Some(TypeBuilder::void().set_volatile(true).finalize())
}
}

View File

@@ -0,0 +1,407 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::helpers::{get_uid, resolve_specification, DieReference};
use binaryninja::{
binaryview::{BinaryView, BinaryViewBase, BinaryViewExt},
debuginfo::{DebugFunctionInfo, DebugInfo},
platform::Platform,
rc::*,
symbol::SymbolType,
templatesimplifier::simplify_str_to_fqn,
types::{Conf, FunctionParameter, Type},
};
use gimli::{DebuggingInformationEntry, Dwarf, Reader, Unit};
use log::{error, warn};
use std::{
collections::{hash_map::Values, HashMap},
hash::Hash,
};
pub(crate) type TypeUID = usize;
/////////////////////////
// FunctionInfoBuilder
// TODO : Function local variables
#[derive(PartialEq, Eq, Hash)]
pub(crate) struct FunctionInfoBuilder {
pub(crate) full_name: Option<String>,
pub(crate) raw_name: Option<String>,
pub(crate) return_type: Option<TypeUID>,
pub(crate) address: Option<u64>,
pub(crate) parameters: Vec<Option<(String, TypeUID)>>,
pub(crate) platform: Option<Ref<Platform>>,
}
impl FunctionInfoBuilder {
pub(crate) fn update(
&mut self,
full_name: Option<String>,
raw_name: Option<String>,
return_type: Option<TypeUID>,
address: Option<u64>,
parameters: Vec<Option<(String, TypeUID)>>,
) {
if full_name.is_some() {
self.full_name = full_name;
}
if raw_name.is_some() {
self.raw_name = raw_name;
}
if return_type.is_some() {
self.return_type = return_type;
}
if address.is_some() {
self.address = address;
}
for (i, new_parameter) in parameters.into_iter().enumerate() {
match self.parameters.get(i) {
Some(None) => self.parameters[i] = new_parameter,
Some(Some(_)) => (),
// Some(Some((name, _))) if name.as_bytes().is_empty() => {
// self.parameters[i] = new_parameter
// }
// Some(Some((_, uid))) if *uid == 0 => self.parameters[i] = new_parameter, // TODO : This is a placebo....void types aren't actually UID 0
_ => self.parameters.push(new_parameter),
}
}
}
}
//////////////////////
// DebugInfoBuilder
// TODO : Don't make this pub...fix the value thing
pub(crate) struct DebugType {
name: String,
t: Ref<Type>,
commit: bool,
}
pub(crate) struct DebugInfoBuilderContext<R: Reader<Offset = usize>> {
dwarf: Dwarf<R>,
units: Vec<Unit<R>>,
names: HashMap<TypeUID, String>,
default_address_size: usize,
pub(crate) total_die_count: usize,
}
impl<R: Reader<Offset = usize>> DebugInfoBuilderContext<R> {
pub(crate) fn new(view: &BinaryView, dwarf: Dwarf<R>) -> Option<Self> {
let mut units = vec![];
let mut iter = dwarf.units();
while let Ok(Some(header)) = iter.next() {
if let Ok(unit) = dwarf.unit(header) {
units.push(unit);
} else {
error!("Unable to read DWARF information. File may be malformed or corrupted. Not applying debug info.");
return None;
}
}
Some(Self {
dwarf,
units,
names: HashMap::new(),
default_address_size: view.address_size(),
total_die_count: 0,
})
}
pub(crate) fn dwarf(&self) -> &Dwarf<R> {
&self.dwarf
}
pub(crate) fn units(&self) -> &[Unit<R>] {
&self.units
}
pub(crate) fn default_address_size(&self) -> usize {
self.default_address_size
}
pub(crate) fn set_name(&mut self, die_uid: TypeUID, name: String) {
assert!(self.names.insert(die_uid, name).is_none());
}
pub(crate) fn get_name(
&self,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
) -> Option<String> {
match resolve_specification(unit, entry, self) {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => self
.names
.get(&get_uid(
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
))
.cloned(),
DieReference::Err => None,
}
}
}
// DWARF info is stored and displayed in a tree, but is really a graph
// The purpose of this builder is to help resolve those graph edges by mapping partial function
// info and types to one DIE's UID (T) before adding the completed info to BN's debug info
pub(crate) struct DebugInfoBuilder {
functions: Vec<FunctionInfoBuilder>,
types: HashMap<TypeUID, DebugType>,
data_variables: HashMap<u64, (Option<String>, TypeUID)>,
}
impl DebugInfoBuilder {
pub(crate) fn new() -> Self {
Self {
functions: vec![],
types: HashMap::new(),
data_variables: HashMap::new(),
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn insert_function(
&mut self,
full_name: Option<String>,
raw_name: Option<String>,
return_type: Option<TypeUID>,
address: Option<u64>,
parameters: Vec<Option<(String, TypeUID)>>,
) {
// Raw names should be the primary key, but if they don't exist, use the full name
// TODO : Consider further falling back on address/architecture
if let Some(function) = self
.functions
.iter_mut()
.find(|func| func.raw_name.is_some() && func.raw_name == raw_name)
{
function.update(full_name, raw_name, return_type, address, parameters);
} else if let Some(function) = self.functions.iter_mut().find(|func| {
(func.raw_name.is_none() || raw_name.is_none())
&& func.full_name.is_some()
&& func.full_name == full_name
}) {
function.update(full_name, raw_name, return_type, address, parameters);
} else {
self.functions.push(FunctionInfoBuilder {
full_name,
raw_name,
return_type,
address,
parameters,
platform: None,
});
}
}
pub(crate) fn functions(&self) -> &[FunctionInfoBuilder] {
&self.functions
}
pub(crate) fn types(&self) -> Values<'_, TypeUID, DebugType> {
self.types.values()
}
pub(crate) fn add_type(
&mut self,
type_uid: TypeUID,
name: String,
t: Ref<Type>,
commit: bool,
) {
if let Some(DebugType {
name: existing_name,
t: existing_type,
commit: _,
}) = self.types.insert(
type_uid,
DebugType {
name: name.clone(),
t: t.clone(),
commit,
},
) {
if existing_type != t && commit {
error!("DWARF info contains duplicate type definition. Overwriting type `{}` (named `{:?}`) with `{}` (named `{:?}`)",
existing_type,
existing_name,
t,
name
);
}
}
}
pub(crate) fn remove_type(&mut self, type_uid: TypeUID) {
self.types.remove(&type_uid);
}
// TODO : Non-copy?
pub(crate) fn get_type(&self, type_uid: TypeUID) -> Option<(String, Ref<Type>)> {
self.types
.get(&type_uid)
.map(|type_ref_ref| (type_ref_ref.name.clone(), type_ref_ref.t.clone()))
}
pub(crate) fn contains_type(&self, type_uid: TypeUID) -> bool {
self.types.get(&type_uid).is_some()
}
pub(crate) fn add_data_variable(
&mut self,
address: u64,
name: Option<String>,
type_uid: TypeUID,
) {
if let Some((_existing_name, existing_type_uid)) =
self.data_variables.insert(address, (name, type_uid))
{
let existing_type = self.get_type(existing_type_uid).unwrap().1;
let new_type = self.get_type(type_uid).unwrap().1;
if existing_type_uid != type_uid || existing_type != new_type {
error!("DWARF info contains duplicate data variable definition. Overwriting data variable at 0x{:08x} (`{}`) with `{}`",
address,
self.get_type(existing_type_uid).unwrap().1,
self.get_type(type_uid).unwrap().1
);
}
}
}
fn commit_types(&self, debug_info: &mut DebugInfo) {
for debug_type in self.types() {
if debug_type.commit {
debug_info.add_type(debug_type.name.clone(), debug_type.t.as_ref(), &[]);
// TODO : Components
}
}
}
// TODO : Consume data?
fn commit_data_variables(&self, debug_info: &mut DebugInfo) {
for (&address, (name, type_uid)) in &self.data_variables {
assert!(debug_info.add_data_variable(
address,
&self.get_type(*type_uid).unwrap().1,
name.clone(),
&[] // TODO : Components
));
}
}
fn get_function_type(&self, function: &FunctionInfoBuilder) -> Ref<Type> {
let return_type = match function.return_type {
Some(return_type_id) => Conf::new(self.get_type(return_type_id).unwrap().1.clone(), 0),
_ => Conf::new(binaryninja::types::Type::void(), 0),
};
let parameters: Vec<FunctionParameter<String>> = function
.parameters
.iter()
.filter_map(|parameter| match parameter {
Some((name, 0)) => Some(FunctionParameter::new(Type::void(), name.clone(), None)),
Some((name, uid)) => Some(FunctionParameter::new(
self.get_type(*uid).unwrap().1,
name.clone(),
None,
)),
_ => None,
})
.collect();
// TODO : Handle
let variable_parameters = false;
binaryninja::types::Type::function(&return_type, &parameters, variable_parameters)
}
fn commit_functions(&self, debug_info: &mut DebugInfo) {
for function in self.functions() {
// let calling_convention: Option<Ref<CallingConvention<CoreArchitecture>>> = None;
debug_info.add_function(DebugFunctionInfo::new(
function.full_name.clone(),
function.full_name.clone(), // TODO : This should eventually be changed, but the "full_name" should probably be the unsimplified version, and the "short_name" should be the simplified version...currently the symbols view shows the full version, so changing it here too makes it look bad in the UI
function.raw_name.clone(),
Some(self.get_function_type(function)),
function.address,
function.platform.clone(),
vec![], // TODO : Components
));
}
}
pub(crate) fn post_process(&mut self, bv: &BinaryView, _debug_info: &mut DebugInfo) -> &Self {
// TODO : We don't need post-processing if we process correctly the first time....
// When originally resolving names, we need to check:
// If there's already a name from binja that's "more correct" than what we found (has more namespaces)
// If there's no name for the DIE, but there's a linkage name that's resolved in binja to a usable name
// This is no longer true, because DWARF doesn't provide platform information for functions, so we at least need to post-process thumb functions
for func in &mut self.functions {
// If the function's raw name already exists in the binary...
if let Some(raw_name) = &func.raw_name {
if let Ok(symbol) = bv.symbol_by_raw_name(raw_name) {
// Link mangled names without addresses to existing symbols in the binary
if func.address.is_none() && func.raw_name.is_some() {
// DWARF doesn't contain GOT info, so remove any entries there...they will be wrong (relying on Binja's mechanisms for the GOT is good )
if symbol.sym_type() != SymbolType::ImportAddress {
func.address = Some(symbol.address());
}
}
if let Some(full_name) = &func.full_name {
let func_full_name = full_name;
let symbol_full_name = symbol.full_name();
// If our name has fewer namespaces than the existing name, assume we lost the namespace info
if simplify_str_to_fqn(func_full_name, true).len()
< simplify_str_to_fqn(symbol_full_name.clone(), true).len()
{
func.full_name =
Some(symbol_full_name.to_string());
}
}
}
}
if let Some(address) = func.address {
let existing_functions = bv.functions_at(address);
if existing_functions.len() > 1 {
warn!("Multiple existing functions at address {address:08x}. One or more functions at this address may have the wrong platform information. Please report this binary.");
} else if existing_functions.len() == 1 {
func.platform = Some(existing_functions.get(0).platform());
}
}
}
self
}
pub(crate) fn commit_info(&self, debug_info: &mut DebugInfo) {
self.commit_types(debug_info);
self.commit_data_variables(debug_info);
self.commit_functions(debug_info);
}
}

View File

@@ -0,0 +1,78 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
use crate::types::get_type;
use gimli::{constants, DebuggingInformationEntry, Reader, Unit};
fn get_parameters<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Vec<Option<(String, TypeUID)>> {
if !entry.has_children() {
vec![]
} else {
// We make a new tree from the current entry to iterate over its children
let mut sub_die_tree = unit.entries_tree(Some(entry.offset())).unwrap();
let root = sub_die_tree.root().unwrap();
let mut result = vec![];
let mut children = root.children();
while let Some(child) = children.next().unwrap() {
match child.entry().tag() {
constants::DW_TAG_formal_parameter => {
let name = debug_info_builder_context.get_name(unit, child.entry());
let type_ = get_type(
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
);
if let Some(parameter_name) = name {
if let Some(parameter_type) = type_ {
result.push(Some((parameter_name, parameter_type)));
} else {
result.push(Some((parameter_name, 0)))
}
} else {
result.push(None)
}
}
constants::DW_TAG_unspecified_parameters => (),
_ => (),
}
}
result
}
}
pub(crate) fn parse_function_entry<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) {
// Collect function properties (if they exist in this DIE)
let full_name = debug_info_builder_context.get_name(unit, entry);
let raw_name = get_raw_name(unit, entry, debug_info_builder_context);
let return_type = get_type(unit, entry, debug_info_builder_context, debug_info_builder);
let address = get_start_address(unit, entry, debug_info_builder_context);
let parameters = get_parameters(unit, entry, debug_info_builder_context, debug_info_builder);
debug_info_builder.insert_function(full_name, raw_name, return_type, address, parameters);
}

View File

@@ -0,0 +1,293 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::DebugInfoBuilderContext;
use gimli::{
constants, Attribute, AttributeValue,
AttributeValue::{DebugInfoRef, UnitRef},
DebuggingInformationEntry, Operation, Reader, Unit, UnitOffset, UnitSectionOffset,
};
use log::warn;
pub(crate) fn get_uid<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
) -> usize {
match entry.offset().to_unit_section_offset(unit) {
UnitSectionOffset::DebugInfoOffset(o) => o.0,
UnitSectionOffset::DebugTypesOffset(o) => o.0,
}
}
////////////////////////////////////
// DIE attr convenience functions
pub(crate) enum DieReference<'a, R: Reader<Offset = usize>> {
UnitAndOffset((&'a Unit<R>, UnitOffset)),
Err,
}
pub(crate) fn get_attr_die<'a, R: Reader<Offset = usize>>(
unit: &'a Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &'a DebugInfoBuilderContext<R>,
attr: constants::DwAt,
) -> Option<DieReference<'a, R>> {
match entry.attr_value(attr) {
Ok(Some(UnitRef(offset))) => Some(DieReference::UnitAndOffset((unit, offset))),
Ok(Some(DebugInfoRef(offset))) => {
for source_unit in debug_info_builder_context.units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((source_unit, new_offset)));
}
}
warn!("Failed to fetch DIE. Debug information may be incomplete.");
None
}
// Ok(Some(DebugInfoRefSup(offset))) TODO - dwarf 5 stuff
_ => None,
}
}
pub(crate) fn resolve_specification<'a, R: Reader<Offset = usize>>(
unit: &'a Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &'a DebugInfoBuilderContext<R>,
) -> DieReference<'a, R> {
if let Some(die_reference) = get_attr_die(
unit,
entry,
debug_info_builder_context,
constants::DW_AT_specification,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
if let Ok(entry) = entry_unit.entry(entry_offset) {
resolve_specification(entry_unit, &entry, debug_info_builder_context)
} else {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
DieReference::Err
}
}
DieReference::Err => DieReference::Err,
}
} else if let Some(die_reference) = get_attr_die(
unit,
entry,
debug_info_builder_context,
constants::DW_AT_abstract_origin,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
if entry_offset == entry.offset() {
warn!("DWARF information is invalid (infinite abstract origin reference cycle). Debug information may be incomplete.");
DieReference::Err
} else if let Ok(new_entry) = entry_unit.entry(entry_offset) {
resolve_specification(entry_unit, &new_entry, debug_info_builder_context)
} else {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
DieReference::Err
}
}
DieReference::Err => DieReference::Err,
}
} else {
DieReference::UnitAndOffset((unit, entry.offset()))
}
}
// Get name from DIE, or referenced dependencies
pub(crate) fn get_name<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<String> {
match resolve_specification(unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
if let Ok(Some(attr_val)) = entry_unit
.entry(entry_offset)
.unwrap()
.attr_value(constants::DW_AT_name)
{
if let Ok(attr_string) = debug_info_builder_context
.dwarf()
.attr_string(entry_unit, attr_val)
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
}
// if let Some(raw_name) = get_raw_name(unit, entry, debug_info_builder_context) {
// if let Some(arch) = debug_info_builder_context.default_architecture() {
// if let Ok((_, names)) = demangle_gnu3(&arch, raw_name, true) {
// return Some(names.join("::"));
// }
// }
// }
None
}
DieReference::Err => None,
}
}
// Get raw name from DIE, or referenced dependencies
pub(crate) fn get_raw_name<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<String> {
if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_linkage_name) {
if let Ok(attr_string) = debug_info_builder_context
.dwarf()
.attr_string(unit, attr_val)
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
}
None
}
// Get the size of an object as a usize
pub(crate) fn get_size_as_usize<R: Reader<Offset = usize>>(
entry: &DebuggingInformationEntry<R>,
) -> Option<usize> {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_byte_size) {
get_attr_as_usize(attr)
} else if let Ok(Some(attr)) = entry.attr(constants::DW_AT_bit_size) {
get_attr_as_usize(attr).map(|attr_value| attr_value / 8)
} else {
None
}
}
// Get the size of an object as a u64
pub(crate) fn get_size_as_u64<R: Reader<Offset = usize>>(
entry: &DebuggingInformationEntry<R>,
) -> Option<u64> {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_byte_size) {
get_attr_as_u64(&attr)
} else if let Ok(Some(attr)) = entry.attr(constants::DW_AT_bit_size) {
get_attr_as_u64(&attr).map(|attr_value| attr_value / 8)
} else {
None
}
}
// Get the size of a subrange as a u64
pub(crate) fn get_subrange_size<R: Reader<Offset = usize>>(
entry: &DebuggingInformationEntry<R>,
) -> u64 {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_upper_bound) {
get_attr_as_u64(&attr).map_or(0, |v| v + 1)
} else if let Ok(Some(attr)) = entry.attr(constants::DW_AT_count) {
get_attr_as_u64(&attr).unwrap_or(0)
} else if let Ok(Some(attr)) = entry.attr(constants::DW_AT_lower_bound) {
get_attr_as_u64(&attr).map_or(0, |v| v + 1)
} else {
0
}
}
// Get the start address of a function
pub(crate) fn get_start_address<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<u64> {
if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_low_pc) {
match debug_info_builder_context
.dwarf()
.attr_address(unit, attr_val)
{
Ok(Some(val)) => Some(val),
_ => None,
}
} else if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_entry_pc) {
match debug_info_builder_context
.dwarf()
.attr_address(unit, attr_val)
{
Ok(Some(val)) => Some(val),
_ => None,
}
} else if let Ok(Some(attr_value)) = entry.attr_value(constants::DW_AT_ranges) {
if let Ok(Some(ranges_offset)) = debug_info_builder_context
.dwarf()
.attr_ranges_offset(unit, attr_value)
{
if let Ok(mut ranges) = debug_info_builder_context
.dwarf()
.ranges(unit, ranges_offset)
{
if let Ok(Some(range)) = ranges.next() {
return Some(range.begin);
}
}
}
return None;
} else {
None
}
}
// Get an attribute value as a u64 if it can be coerced
pub(crate) fn get_attr_as_u64<R: Reader<Offset = usize>>(attr: &Attribute<R>) -> Option<u64> {
if let Some(value) = attr.u8_value() {
Some(value.into())
} else if let Some(value) = attr.u16_value() {
Some(value.into())
} else if let Some(value) = attr.udata_value() {
Some(value)
} else {
attr.sdata_value().map(|value| value as u64)
}
}
// Get an attribute value as a usize if it can be coerced
pub(crate) fn get_attr_as_usize<R: Reader<Offset = usize>>(attr: Attribute<R>) -> Option<usize> {
if let Some(value) = attr.u8_value() {
Some(value.into())
} else if let Some(value) = attr.u16_value() {
Some(value.into())
} else if let Some(value) = attr.udata_value() {
Some(value as usize)
} else {
attr.sdata_value().map(|value| value as usize)
}
}
// Get an attribute value as a usize if it can be coerced
// Parses DW_OP_address, DW_OP_const
pub(crate) fn get_expr_value<R: Reader<Offset = usize>>(
unit: &Unit<R>,
attr: Attribute<R>,
) -> Option<u64> {
if let AttributeValue::Exprloc(mut expression) = attr.value() {
match Operation::parse(&mut expression.0, unit.encoding()) {
Ok(Operation::PlusConstant { value }) => Some(value),
Ok(Operation::UnsignedConstant { value }) => Some(value),
Ok(Operation::Address { address: 0 }) => None,
Ok(Operation::Address { address }) => Some(address),
_ => None,
}
} else {
None
}
}

View File

@@ -0,0 +1,295 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod die_handlers;
mod dwarfdebuginfo;
mod functions;
mod helpers;
mod types;
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext};
use crate::functions::parse_function_entry;
use crate::helpers::{get_attr_die, get_name, get_uid, DieReference};
use crate::types::parse_data_variable;
use binaryninja::{
binaryview::{BinaryView, BinaryViewExt},
debuginfo::{CustomDebugInfoParser, DebugInfo, DebugInfoParser},
logger,
templatesimplifier::simplify_str_to_str,
};
use dwarfreader::{
create_section_reader, get_endian, is_dwo_dwarf, is_non_dwo_dwarf, is_raw_dwo_dwarf,
};
use gimli::{constants, DebuggingInformationEntry, Dwarf, DwarfFileType, Reader, SectionId, Unit};
use log::{error, warn, LevelFilter};
fn recover_names<R: Reader<Offset = usize>>(
debug_info_builder_context: &mut DebugInfoBuilderContext<R>,
progress: &dyn Fn(usize, usize) -> Result<(), ()>,
) -> bool {
let mut iter = debug_info_builder_context.dwarf().units();
while let Ok(Some(header)) = iter.next() {
let unit = debug_info_builder_context.dwarf().unit(header).unwrap();
let mut namespace_qualifiers: Vec<(isize, String)> = vec![];
let mut entries = unit.entries();
let mut depth = 0;
// The first entry in the unit is the header for the unit
if let Ok(Some((delta_depth, _))) = entries.next_dfs() {
depth += delta_depth;
debug_info_builder_context.total_die_count += 1;
}
while let Ok(Some((delta_depth, entry))) = entries.next_dfs() {
debug_info_builder_context.total_die_count += 1;
if (*progress)(0, debug_info_builder_context.total_die_count).is_err() {
return false; // Parsing canceled
};
depth += delta_depth;
if depth < 0 {
error!("DWARF information is seriously malformed. Aborting parsing.");
return false;
}
// TODO : Better module/component support
namespace_qualifiers.retain(|&(entry_depth, _)| entry_depth < depth);
match entry.tag() {
constants::DW_TAG_namespace => {
fn resolve_namespace_name<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
namespace_qualifiers: &mut Vec<(isize, String)>,
depth: isize,
) {
if let Some(namespace_qualifier) =
get_name(unit, entry, debug_info_builder_context)
{
namespace_qualifiers.push((depth, namespace_qualifier));
} else if let Some(die_reference) = get_attr_die(
unit,
entry,
debug_info_builder_context,
constants::DW_AT_extension,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
resolve_namespace_name(
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
namespace_qualifiers,
depth,
)
}
DieReference::Err => {
warn!(
"Failed to fetch DIE. Debug information may be incomplete."
);
}
}
} else {
namespace_qualifiers
.push((depth, "anonymous_namespace".to_string()));
}
}
resolve_namespace_name(
&unit,
entry,
debug_info_builder_context,
&mut namespace_qualifiers,
depth,
);
}
constants::DW_TAG_class_type
| constants::DW_TAG_structure_type
| constants::DW_TAG_union_type => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
namespace_qualifiers.push((depth, name))
} else {
namespace_qualifiers.push((
depth,
match entry.tag() {
constants::DW_TAG_class_type => "anonymous_class".to_string(),
constants::DW_TAG_structure_type => "anonymous_structure".to_string(),
constants::DW_TAG_union_type => "anonymous_union".to_string(),
_ => unreachable!(),
}
))
}
debug_info_builder_context.set_name(
get_uid(&unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.map(|(_, namespace)| namespace.to_owned())
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
);
}
constants::DW_TAG_typedef
| constants::DW_TAG_subprogram
| constants::DW_TAG_enumeration_type => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
debug_info_builder_context.set_name(
get_uid(&unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.chain(vec![&(-1, name)].into_iter())
.map(|(_, namespace)| {
namespace.to_owned()
})
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
);
}
}
_ => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
debug_info_builder_context.set_name(get_uid(&unit, entry), name);
}
}
}
}
}
true
}
fn parse_unit<R: Reader<Offset = usize>>(
unit: &Unit<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
progress: &dyn Fn(usize, usize) -> Result<(), ()>,
current_die_number: &mut usize,
) {
let mut entries = unit.entries();
// Really all we care about as we iterate the entries in a given unit is how they modify state (our perception of the file)
// There's a lot of junk we don't care about in DWARF info, so we choose a couple DIEs and mutate state (add functions (which adds the types it uses) and keep track of what namespace we're in)
while let Ok(Some((_, entry))) = entries.next_dfs() {
*current_die_number += 1;
if (*progress)(
*current_die_number,
debug_info_builder_context.total_die_count,
)
.is_err()
{
return; // Parsing canceled
}
match entry.tag() {
constants::DW_TAG_subprogram => {
parse_function_entry(unit, entry, debug_info_builder_context, debug_info_builder)
}
constants::DW_TAG_variable => {
parse_data_variable(unit, entry, debug_info_builder_context, debug_info_builder)
}
_ => (),
}
}
}
fn parse_dwarf(
view: &BinaryView,
progress: Box<dyn Fn(usize, usize) -> Result<(), ()>>,
) -> DebugInfoBuilder {
// Determine if this is a DWO
// TODO : Make this more robust...some DWOs follow non-DWO conventions
let dwo_file = is_dwo_dwarf(view) || is_raw_dwo_dwarf(view);
// Figure out if it's the given view or the raw view that has the dwarf info in it
let raw_view = &view.raw_view().unwrap();
let view = if is_dwo_dwarf(view) || is_non_dwo_dwarf(view) {
view
} else {
raw_view
};
// gimli setup
let endian = get_endian(view);
let mut section_reader =
|section_id: SectionId| -> _ { create_section_reader(section_id, view, endian, dwo_file) };
let mut dwarf = Dwarf::load(&mut section_reader).unwrap();
if dwo_file {
dwarf.file_type = DwarfFileType::Dwo;
}
// Create debug info builder and recover name mapping first
// Since DWARF is stored as a tree with arbitrary implicit edges among leaves,
// it is not possible to correctly track namespaces while you're parsing "in order" without backtracking,
// so we just do it up front
let mut debug_info_builder = DebugInfoBuilder::new();
if let Some(mut debug_info_builder_context) = DebugInfoBuilderContext::new(view, dwarf) {
if !recover_names(&mut debug_info_builder_context, &progress)
|| debug_info_builder_context.total_die_count == 0
{
return debug_info_builder;
}
// Parse all the compilation units
let mut current_die_number = 0;
for unit in debug_info_builder_context.units() {
parse_unit(
unit,
&debug_info_builder_context,
&mut debug_info_builder,
&progress,
&mut current_die_number,
);
}
}
debug_info_builder
}
struct DWARFParser;
impl CustomDebugInfoParser for DWARFParser {
fn is_valid(&self, view: &BinaryView) -> bool {
dwarfreader::is_valid(view)
}
fn parse_info(
&self,
debug_info: &mut DebugInfo,
bv: &BinaryView,
debug_file: &BinaryView,
progress: Box<dyn Fn(usize, usize) -> Result<(), ()>>,
) -> bool {
parse_dwarf(debug_file, progress)
.post_process(bv, debug_info)
.commit_info(debug_info);
true
}
}
#[no_mangle]
pub extern "C" fn CorePluginInit() -> bool {
logger::init(LevelFilter::Debug).unwrap();
DebugInfoParser::register("DWARF", DWARFParser {});
true
}

View File

@@ -0,0 +1,393 @@
// Copyright 2021-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::die_handlers::*;
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
use binaryninja::{
rc::*,
types::{
MemberAccess, MemberScope, ReferenceType, StructureBuilder, StructureType, Type, TypeClass,
},
};
use gimli::{constants, DebuggingInformationEntry, Reader, Unit};
use log::warn;
pub(crate) fn parse_data_variable<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) {
let full_name = debug_info_builder_context.get_name(unit, entry);
let type_uid = get_type(unit, entry, debug_info_builder_context, debug_info_builder);
let address = if let Ok(Some(attr)) = entry.attr(constants::DW_AT_location) {
get_expr_value(unit, attr)
} else {
None
};
if let (Some(address), Some(type_uid)) = (address, type_uid) {
debug_info_builder.add_data_variable(address, full_name, type_uid);
}
}
fn do_structure_parse<R: Reader<Offset = usize>>(
structure_type: StructureType,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Option<usize> {
// All struct, union, and class types will have:
// *DW_AT_name
// *DW_AT_byte_size or *DW_AT_bit_size
// *DW_AT_declaration
// *DW_AT_signature
// *DW_AT_specification
// ?DW_AT_abstract_origin
// ?DW_AT_accessibility
// ?DW_AT_allocated
// ?DW_AT_associated
// ?DW_AT_data_location
// ?DW_AT_description
// ?DW_AT_start_scope
// ?DW_AT_visibility
// * = Optional
// Structure/Class/Union _Children_ consist of:
// Data members:
// DW_AT_type
// *DW_AT_name
// *DW_AT_accessibility (default private for classes, public for everything else)
// *DW_AT_mutable
// *DW_AT_data_member_location xor *DW_AT_data_bit_offset (otherwise assume zero) <- there are some deprecations for DWARF 4
// *DW_AT_byte_size xor DW_AT_bit_size, iff the storage size is different than it usually would be for the given member type
// Function members:
// *DW_AT_accessibility (default private for classes, public for everything else)
// *DW_AT_virtuality (assume false)
// If true: DW_AT_vtable_elem_location
// *DW_AT_explicit (assume false)
// *DW_AT_object_pointer (assume false; for non-static member function; references the formal parameter that has "DW_AT_artificial = true" and represents "self" or "this" (language specified))
// *DW_AT_specification
// * = Optional
if let Ok(Some(_)) = entry.attr(constants::DW_AT_declaration) {
return None;
}
let full_name = if get_name(unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(unit, entry)
} else {
None
};
// Create structure with proper size
let size = get_size_as_u64(entry).unwrap_or(0);
let structure_builder: StructureBuilder = StructureBuilder::new();
structure_builder
.set_packed(true)
.set_width(size)
.set_structure_type(structure_type);
// This reference type will be used by any children to grab while we're still building this type
// it will also be how any other types refer to this struct
if let Some(full_name) = &full_name {
debug_info_builder.add_type(
get_uid(unit, entry),
full_name.clone(),
Type::named_type_from_type(
full_name.clone(),
&Type::structure(&structure_builder.finalize()),
),
false,
);
} else {
// We _need_ to have initial typedefs or else we can enter infinite parsing loops
// These get overwritten in the last step with the actual type, however, so this
// is either perfectly fine or breaking a bunch of NTRs
let full_name = format!("anonymous_structure_{:x}", get_uid(unit, entry));
debug_info_builder.add_type(
get_uid(unit, entry),
full_name.clone(),
Type::named_type_from_type(full_name, &Type::structure(&structure_builder.finalize())),
false,
);
}
// Get all the children and populate
let mut tree = unit.entries_tree(Some(entry.offset())).unwrap();
let mut children = tree.root().unwrap().children();
while let Ok(Some(child)) = children.next() {
if child.entry().tag() == constants::DW_TAG_member {
if let Some(child_type_id) = get_type(
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
) {
if let Some((_, child_type)) = debug_info_builder.get_type(child_type_id) {
if let Some(child_name) = debug_info_builder_context
.get_name(unit, child.entry())
.map_or(
if child_type.type_class() == TypeClass::StructureTypeClass {
Some("".to_string())
} else {
None
},
Some,
)
{
// TODO : support DW_AT_data_bit_offset for offset as well
if let Ok(Some(raw_struct_offset)) =
child.entry().attr(constants::DW_AT_data_member_location)
{
// TODO : Let this fail; don't unwrap_or_default get_expr_value
let struct_offset =
get_attr_as_u64(&raw_struct_offset).unwrap_or_else(|| {
get_expr_value(unit, raw_struct_offset).unwrap_or_default()
});
structure_builder.insert(
child_type.as_ref(),
child_name,
struct_offset,
false,
MemberAccess::NoAccess, // TODO : Resolve actual scopes, if possible
MemberScope::NoScope,
);
} else {
structure_builder.append(
child_type.as_ref(),
child_name,
MemberAccess::NoAccess,
MemberScope::NoScope,
);
}
}
}
}
}
}
let finalized_structure = Type::structure(&structure_builder.finalize());
if let Some(full_name) = full_name {
debug_info_builder.add_type(
get_uid(unit, entry) + 1, // TODO : This is super broke (uid + 1 is not guaranteed to be unique)
full_name,
finalized_structure,
true,
);
} else {
debug_info_builder.add_type(
get_uid(unit, entry),
format!("{}", finalized_structure),
finalized_structure,
false, // Don't commit anonymous unions (because I think it'll break things)
);
}
Some(get_uid(unit, entry))
}
// This function iterates up through the dependency references, adding all the types along the way until there are no more or stopping at the first one already tracked, then returns the UID of the type of the given DIE
pub(crate) fn get_type<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Option<TypeUID> {
// If this node (and thus all its referenced nodes) has already been processed, just return the offset
if debug_info_builder.contains_type(get_uid(unit, entry)) {
return Some(get_uid(unit, entry));
}
// Don't parse types that are just declarations and not definitions
if let Ok(Some(_)) = entry.attr(constants::DW_AT_declaration) {
return None;
}
let entry_type = if let Some(die_reference) = get_attr_die(
unit,
entry,
debug_info_builder_context,
constants::DW_AT_type,
) {
// This needs to recurse first (before the early return below) to ensure all sub-types have been parsed
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => get_type(
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
debug_info_builder,
),
DieReference::Err => {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
None
}
}
} else {
// This needs to recurse first (before the early return below) to ensure all sub-types have been parsed
match resolve_specification(unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((entry_unit, entry_offset))
if entry_unit.header.offset() != unit.header.offset()
&& entry_offset != entry.offset() =>
{
get_type(
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
debug_info_builder,
)
}
DieReference::UnitAndOffset(_) => None,
DieReference::Err => {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
None
}
}
};
// If this node (and thus all its referenced nodes) has already been processed, just return the offset
// This check is not redundant because this type might have been processes in the recursive calls above
if debug_info_builder.contains_type(get_uid(unit, entry)) {
return Some(get_uid(unit, entry));
}
// Collect the required information to create a type and add it to the type map. Also, add the dependencies of this type to the type's typeinfo
// Create the type, make a TypeInfo for it, and add it to the debug info
let (type_def, mut commit): (Option<Ref<Type>>, bool) = match entry.tag() {
constants::DW_TAG_base_type => (
handle_base_type(unit, entry, debug_info_builder_context),
false,
),
constants::DW_TAG_structure_type => {
return do_structure_parse(
StructureType::StructStructureType,
unit,
entry,
debug_info_builder_context,
debug_info_builder,
)
}
constants::DW_TAG_class_type => {
return do_structure_parse(
StructureType::ClassStructureType,
unit,
entry,
debug_info_builder_context,
debug_info_builder,
)
}
constants::DW_TAG_union_type => {
return do_structure_parse(
StructureType::UnionStructureType,
unit,
entry,
debug_info_builder_context,
debug_info_builder,
)
}
// Enum
constants::DW_TAG_enumeration_type => {
(handle_enum(unit, entry, debug_info_builder_context), true)
}
// Basic types
constants::DW_TAG_typedef => {
if let Some(name) = debug_info_builder_context.get_name(unit, entry) {
handle_typedef(debug_info_builder, entry_type, name)
} else {
(None, false)
}
}
constants::DW_TAG_pointer_type => (
handle_pointer(
entry,
debug_info_builder_context,
debug_info_builder,
entry_type,
ReferenceType::PointerReferenceType,
),
false,
),
constants::DW_TAG_reference_type => (
handle_pointer(
entry,
debug_info_builder_context,
debug_info_builder,
entry_type,
ReferenceType::ReferenceReferenceType,
),
false,
),
constants::DW_TAG_rvalue_reference_type => (
handle_pointer(
entry,
debug_info_builder_context,
debug_info_builder,
entry_type,
ReferenceType::RValueReferenceType,
),
false,
),
constants::DW_TAG_array_type => (
handle_array(unit, entry, debug_info_builder, entry_type),
false,
),
// Strange Types
constants::DW_TAG_unspecified_type => (Some(Type::void()), false),
constants::DW_TAG_subroutine_type => (
handle_function(
unit,
entry,
debug_info_builder_context,
debug_info_builder,
entry_type,
),
false,
),
// Weird types
constants::DW_TAG_const_type => (handle_const(debug_info_builder, entry_type), false),
constants::DW_TAG_volatile_type => (handle_volatile(debug_info_builder, entry_type), true), // TODO : Maybe false here
// Pass-through everything else!
_ => return entry_type,
};
// Wrap our resultant type in a TypeInfo so that the internal DebugInfo class can manage it
if let Some(type_def) = type_def {
let name = if get_name(unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(unit, entry)
} else {
None
}
.unwrap_or_else(|| {
commit = false;
format!("{}", type_def)
});
debug_info_builder.add_type(get_uid(unit, entry), name, type_def, commit);
Some(get_uid(unit, entry))
} else {
None
}
}