update rust crate

This commit is contained in:
2024-08-17 16:20:28 +02:00
parent 670fa334db
commit 2167e0512a
88 changed files with 20508 additions and 1741 deletions

View File

@@ -10,5 +10,8 @@ crate-type = ["cdylib"]
[dependencies]
dwarfreader = { path = "../shared/" }
binaryninja = { path = "../../../" }
gimli = "0.28"
gimli = "0.31"
log = "0.4.20"
iset = "0.2.2"
cpp_demangle = "0.4.3"
regex = "1"

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
use crate::{helpers::*, ReaderType};
use crate::types::get_type;
use binaryninja::{
@@ -21,9 +21,11 @@ use binaryninja::{
types::{EnumerationBuilder, FunctionParameter, ReferenceType, Type, TypeBuilder},
};
use gimli::{constants, AttributeValue::Encoding, DebuggingInformationEntry, Reader, Unit};
use gimli::Dwarf;
use gimli::{constants, AttributeValue::Encoding, DebuggingInformationEntry, Unit};
pub(crate) fn handle_base_type<R: Reader<Offset = usize>>(
pub(crate) fn handle_base_type<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
@@ -37,7 +39,7 @@ pub(crate) fn handle_base_type<R: Reader<Offset = usize>>(
// *Some indication of signedness?
// * = Optional
let name = debug_info_builder_context.get_name(unit, entry)?;
let name = debug_info_builder_context.get_name(dwarf, unit, entry)?;
let size = get_size_as_usize(entry)?;
match entry.attr_value(constants::DW_AT_encoding) {
Ok(Some(Encoding(encoding))) => {
@@ -69,7 +71,8 @@ pub(crate) fn handle_base_type<R: Reader<Offset = usize>>(
}
}
pub(crate) fn handle_enum<R: Reader<Offset = usize>>(
pub(crate) fn handle_enum<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
@@ -107,17 +110,18 @@ pub(crate) fn handle_enum<R: Reader<Offset = usize>>(
let mut children = tree.root().unwrap().children();
while let Ok(Some(child)) = children.next() {
if child.entry().tag() == constants::DW_TAG_enumerator {
let name = debug_info_builder_context.get_name(unit, child.entry())?;
let value = get_attr_as_u64(
&child
.entry()
.attr(constants::DW_AT_const_value)
.unwrap()
.unwrap(),
)
.unwrap();
enumeration_builder.insert(name, value);
let name = debug_info_builder_context.get_name(dwarf, unit, child.entry())?;
let attr = &child
.entry()
.attr(constants::DW_AT_const_value)
.unwrap()
.unwrap();
if let Some(value) = get_attr_as_u64(attr) {
enumeration_builder.insert(name, value);
} else {
log::error!("Unhandled enum member value type - please report this");
return None;
}
}
}
@@ -131,7 +135,7 @@ pub(crate) fn handle_enum<R: Reader<Offset = usize>>(
pub(crate) fn handle_typedef(
debug_info_builder: &mut DebugInfoBuilder,
entry_type: Option<TypeUID>,
typedef_name: String,
typedef_name: &String,
) -> (Option<Ref<Type>>, bool) {
// All base types have:
// DW_AT_name
@@ -140,12 +144,8 @@ pub(crate) fn handle_typedef(
// This will fail in the case where we have a typedef to a type that doesn't exist (failed to parse, incomplete, etc)
if let Some(entry_type_offset) = entry_type {
if let Some((name, t)) = debug_info_builder.get_type(entry_type_offset) {
if typedef_name == name {
return (Some(t), false);
} else if typedef_name != name {
return (Some(t), true);
}
if let Some(t) = debug_info_builder.get_type(entry_type_offset) {
return (Some(t.get_type()), typedef_name != t.get_name());
}
}
@@ -153,7 +153,7 @@ pub(crate) fn handle_typedef(
(None, false)
}
pub(crate) fn handle_pointer<R: Reader<Offset = usize>>(
pub(crate) fn handle_pointer<R: ReaderType>(
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
@@ -172,7 +172,7 @@ pub(crate) fn handle_pointer<R: Reader<Offset = usize>>(
if let Some(pointer_size) = get_size_as_usize(entry) {
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().get_type();
Some(Type::pointer_of_width(
parent_type.as_ref(),
pointer_size,
@@ -190,7 +190,7 @@ pub(crate) fn handle_pointer<R: Reader<Offset = usize>>(
))
}
} else if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().get_type();
Some(Type::pointer_of_width(
parent_type.as_ref(),
debug_info_builder_context.default_address_size(),
@@ -209,7 +209,7 @@ pub(crate) fn handle_pointer<R: Reader<Offset = usize>>(
}
}
pub(crate) fn handle_array<R: Reader<Offset = usize>>(
pub(crate) fn handle_array<R: ReaderType>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder: &mut DebugInfoBuilder,
@@ -228,7 +228,7 @@ pub(crate) fn handle_array<R: Reader<Offset = usize>>(
// For multidimensional arrays, DW_TAG_subrange_type or DW_TAG_enumeration_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().get_type();
let mut tree = unit.entries_tree(Some(entry.offset())).unwrap();
let mut children = tree.root().unwrap().children();
@@ -255,7 +255,8 @@ pub(crate) fn handle_array<R: Reader<Offset = usize>>(
}
}
pub(crate) fn handle_function<R: Reader<Offset = usize>>(
pub(crate) fn handle_function<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
@@ -289,29 +290,25 @@ pub(crate) fn handle_function<R: Reader<Offset = usize>>(
debug_info_builder
.get_type(entry_type_offset)
.expect("Subroutine return type was not processed")
.1
.get_type()
}
None => Type::void(),
};
// Alias function type in the case that it contains itself
if let Some(name) = debug_info_builder_context.get_name(unit, entry) {
if let Some(name) = debug_info_builder_context.get_name(dwarf, unit, entry) {
debug_info_builder.add_type(
get_uid(unit, entry),
name.clone(),
get_uid(dwarf, unit, entry),
&name,
Type::named_type_from_type(
name,
&Type::function::<String, &binaryninja::types::Type>(
return_type.as_ref(),
&[],
false,
),
&name,
&Type::function::<&binaryninja::types::Type>(return_type.as_ref(), &[], false),
),
false,
);
}
let mut parameters: Vec<FunctionParameter<String>> = vec![];
let mut parameters: Vec<FunctionParameter> = vec![];
let mut variable_arguments = false;
// Get all the children and populate
@@ -322,15 +319,16 @@ pub(crate) fn handle_function<R: Reader<Offset = usize>>(
if let (Some(child_uid), Some(name)) = {
(
get_type(
dwarf,
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
),
debug_info_builder_context.get_name(unit, child.entry()),
debug_info_builder_context.get_name(dwarf, unit, child.entry()),
)
} {
let child_type = debug_info_builder.get_type(child_uid).unwrap().1;
let child_type = debug_info_builder.get_type(child_uid).unwrap().get_type();
parameters.push(FunctionParameter::new(child_type, name, None));
}
} else if child.entry().tag() == constants::DW_TAG_unspecified_parameters {
@@ -338,8 +336,8 @@ pub(crate) fn handle_function<R: Reader<Offset = usize>>(
}
}
if debug_info_builder_context.get_name(unit, entry).is_some() {
debug_info_builder.remove_type(get_uid(unit, entry));
if debug_info_builder_context.get_name(dwarf, unit, entry).is_some() {
debug_info_builder.remove_type(get_uid(dwarf, unit, entry));
}
Some(Type::function(
@@ -362,7 +360,7 @@ pub(crate) fn handle_const(
// ?DW_AT_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().get_type();
Some((*parent_type).to_builder().set_const(true).finalize())
} else {
Some(TypeBuilder::void().set_const(true).finalize())
@@ -382,7 +380,7 @@ pub(crate) fn handle_volatile(
// ?DW_AT_type
if let Some(entry_type_offset) = entry_type {
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().1;
let parent_type = debug_info_builder.get_type(entry_type_offset).unwrap().get_type();
Some((*parent_type).to_builder().set_volatile(true).finalize())
} else {
Some(TypeBuilder::void().set_volatile(true).finalize())

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::helpers::{get_uid, resolve_specification, DieReference};
use crate::{helpers::{get_uid, resolve_specification, DieReference}, ReaderType};
use binaryninja::{
binaryview::{BinaryView, BinaryViewBase, BinaryViewExt},
@@ -21,13 +21,14 @@ use binaryninja::{
rc::*,
symbol::SymbolType,
templatesimplifier::simplify_str_to_fqn,
types::{Conf, FunctionParameter, Type},
types::{Conf, FunctionParameter, NamedTypedVariable, Type, Variable, VariableSourceType},
};
use gimli::{DebuggingInformationEntry, Dwarf, Reader, Unit};
use gimli::{DebuggingInformationEntry, Dwarf, Unit};
use log::{error, warn};
use log::{debug, error, warn};
use std::{
cmp::Ordering,
collections::{hash_map::Values, HashMap},
hash::Hash,
};
@@ -46,6 +47,8 @@ pub(crate) struct FunctionInfoBuilder {
pub(crate) address: Option<u64>,
pub(crate) parameters: Vec<Option<(String, TypeUID)>>,
pub(crate) platform: Option<Ref<Platform>>,
pub(crate) variable_arguments: bool,
pub(crate) stack_variables: Vec<NamedTypedVariable>,
}
impl FunctionInfoBuilder {
@@ -55,7 +58,7 @@ impl FunctionInfoBuilder {
raw_name: Option<String>,
return_type: Option<TypeUID>,
address: Option<u64>,
parameters: Vec<Option<(String, TypeUID)>>,
parameters: &Vec<Option<(String, TypeUID)>>,
) {
if full_name.is_some() {
self.full_name = full_name;
@@ -75,13 +78,13 @@ impl FunctionInfoBuilder {
for (i, new_parameter) in parameters.into_iter().enumerate() {
match self.parameters.get(i) {
Some(None) => self.parameters[i] = new_parameter,
Some(None) => self.parameters[i] = new_parameter.clone(),
Some(Some(_)) => (),
// Some(Some((name, _))) if name.as_bytes().is_empty() => {
// self.parameters[i] = new_parameter
// }
// Some(Some((_, uid))) if *uid == 0 => self.parameters[i] = new_parameter, // TODO : This is a placebo....void types aren't actually UID 0
_ => self.parameters.push(new_parameter),
_ => self.parameters.push(new_parameter.clone()),
}
}
}
@@ -97,16 +100,27 @@ pub(crate) struct DebugType {
commit: bool,
}
pub(crate) struct DebugInfoBuilderContext<R: Reader<Offset = usize>> {
dwarf: Dwarf<R>,
impl DebugType {
pub fn get_name(&self) -> &String {
&self.name
}
pub fn get_type(&self) -> Ref<Type> {
self.t.clone()
}
}
pub(crate) struct DebugInfoBuilderContext<R: ReaderType> {
units: Vec<Unit<R>>,
sup_units: Vec<Unit<R>>,
names: HashMap<TypeUID, String>,
default_address_size: usize,
pub(crate) total_die_count: usize,
}
impl<R: Reader<Offset = usize>> DebugInfoBuilderContext<R> {
pub(crate) fn new(view: &BinaryView, dwarf: Dwarf<R>) -> Option<Self> {
impl<R: ReaderType> DebugInfoBuilderContext<R> {
pub(crate) fn new(view: &BinaryView, dwarf: &Dwarf<R>) -> Option<Self> {
let mut units = vec![];
let mut iter = dwarf.units();
while let Ok(Some(header)) = iter.next() {
@@ -118,40 +132,56 @@ impl<R: Reader<Offset = usize>> DebugInfoBuilderContext<R> {
}
}
let mut sup_units = vec![];
if let Some(sup_dwarf) = dwarf.sup() {
let mut sup_iter = sup_dwarf.units();
while let Ok(Some(header)) = sup_iter.next() {
if let Ok(unit) = sup_dwarf.unit(header) {
sup_units.push(unit);
} else {
error!("Unable to read supplementary DWARF information. File may be malformed or corrupted. Not applying debug info.");
return None;
}
}
}
Some(Self {
dwarf,
units,
sup_units,
names: HashMap::new(),
default_address_size: view.address_size(),
total_die_count: 0,
})
}
pub(crate) fn dwarf(&self) -> &Dwarf<R> {
&self.dwarf
}
pub(crate) fn units(&self) -> &[Unit<R>] {
&self.units
}
pub(crate) fn sup_units(&self) -> &[Unit<R>] {
&self.sup_units
}
pub(crate) fn default_address_size(&self) -> usize {
self.default_address_size
}
pub(crate) fn set_name(&mut self, die_uid: TypeUID, name: String) {
// die_uids need to be unique here
assert!(self.names.insert(die_uid, name).is_none());
}
pub(crate) fn get_name(
&self,
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
) -> Option<String> {
match resolve_specification(unit, entry, self) {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => self
match resolve_specification(dwarf, unit, entry, self) {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => self
.names
.get(&get_uid(
dwarf,
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
))
@@ -166,19 +196,29 @@ impl<R: Reader<Offset = usize>> DebugInfoBuilderContext<R> {
// info and types to one DIE's UID (T) before adding the completed info to BN's debug info
pub(crate) struct DebugInfoBuilder {
functions: Vec<FunctionInfoBuilder>,
raw_function_name_indices: HashMap<String, usize>,
full_function_name_indices: HashMap<String, usize>,
types: HashMap<TypeUID, DebugType>,
data_variables: HashMap<u64, (Option<String>, TypeUID)>,
range_data_offsets: iset::IntervalMap<u64, i64>
}
impl DebugInfoBuilder {
pub(crate) fn new() -> Self {
Self {
functions: vec![],
raw_function_name_indices: HashMap::new(),
full_function_name_indices: HashMap::new(),
types: HashMap::new(),
data_variables: HashMap::new(),
range_data_offsets: iset::IntervalMap::new(),
}
}
pub(crate) fn set_range_data_offsets(&mut self, offsets: iset::IntervalMap<u64, i64>) {
self.range_data_offsets = offsets
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn insert_function(
&mut self,
@@ -186,32 +226,87 @@ impl DebugInfoBuilder {
raw_name: Option<String>,
return_type: Option<TypeUID>,
address: Option<u64>,
parameters: Vec<Option<(String, TypeUID)>>,
) {
parameters: &Vec<Option<(String, TypeUID)>>,
variable_arguments: bool,
) -> Option<usize> {
// Returns the index of the function
// Raw names should be the primary key, but if they don't exist, use the full name
// TODO : Consider further falling back on address/architecture
if let Some(function) = self
.functions
.iter_mut()
.find(|func| func.raw_name.is_some() && func.raw_name == raw_name)
{
function.update(full_name, raw_name, return_type, address, parameters);
} else if let Some(function) = self.functions.iter_mut().find(|func| {
(func.raw_name.is_none() || raw_name.is_none())
&& func.full_name.is_some()
&& func.full_name == full_name
}) {
function.update(full_name, raw_name, return_type, address, parameters);
} else {
self.functions.push(FunctionInfoBuilder {
full_name,
raw_name,
return_type,
address,
parameters,
platform: None,
});
/*
If it has a raw_name and we know it, update it and return
Else if it has a full_name and we know it, update it and return
Else Add a new entry if we don't know the full_name or raw_name
*/
if let Some(ident) = &raw_name {
// check if we already know about this raw name's index
// if we do, and the full name will change, remove the known full index if it exists
// update the function
// if the full name exists, update the stored index for the full name
if let Some(idx) = self.raw_function_name_indices.get(ident) {
let function = self.functions.get_mut(*idx).unwrap();
if function.full_name.is_some() && function.full_name != full_name {
self.full_function_name_indices.remove(function.full_name.as_ref().unwrap());
}
function.update(full_name, raw_name, return_type, address, parameters);
if function.full_name.is_some() {
self.full_function_name_indices.insert(function.full_name.clone().unwrap(), *idx);
}
return Some(*idx);
}
}
else if let Some(ident) = &full_name {
// check if we already know about this full name's index
// if we do, and the raw name will change, remove the known raw index if it exists
// update the function
// if the raw name exists, update the stored index for the raw name
if let Some(idx) = self.full_function_name_indices.get(ident) {
let function = self.functions.get_mut(*idx).unwrap();
if function.raw_name.is_some() && function.raw_name != raw_name {
self.raw_function_name_indices.remove(function.raw_name.as_ref().unwrap());
}
function.update(full_name, raw_name, return_type, address, parameters);
if function.raw_name.is_some() {
self.raw_function_name_indices.insert(function.raw_name.clone().unwrap(), *idx);
}
return Some(*idx);
}
}
else {
debug!("Function entry in DWARF without full or raw name.");
return None;
}
let function = FunctionInfoBuilder {
full_name,
raw_name,
return_type,
address,
parameters: parameters.clone(),
platform: None,
variable_arguments,
stack_variables: vec![],
};
if let Some(n) = &function.full_name {
self.full_function_name_indices.insert(n.clone(), self.functions.len());
}
if let Some(n) = &function.raw_name {
self.raw_function_name_indices.insert(n.clone(), self.functions.len());
}
self.functions.push(function);
Some(self.functions.len()-1)
}
pub(crate) fn functions(&self) -> &[FunctionInfoBuilder] {
@@ -222,13 +317,7 @@ impl DebugInfoBuilder {
self.types.values()
}
pub(crate) fn add_type(
&mut self,
type_uid: TypeUID,
name: String,
t: Ref<Type>,
commit: bool,
) {
pub(crate) fn add_type(&mut self, type_uid: TypeUID, name: &String, t: Ref<Type>, commit: bool) {
if let Some(DebugType {
name: existing_name,
t: existing_type,
@@ -242,7 +331,7 @@ impl DebugInfoBuilder {
},
) {
if existing_type != t && commit {
error!("DWARF info contains duplicate type definition. Overwriting type `{}` (named `{:?}`) with `{}` (named `{:?}`)",
warn!("DWARF info contains duplicate type definition. Overwriting type `{}` (named `{:?}`) with `{}` (named `{:?}`)",
existing_type,
existing_name,
t,
@@ -256,15 +345,76 @@ impl DebugInfoBuilder {
self.types.remove(&type_uid);
}
// TODO : Non-copy?
pub(crate) fn get_type(&self, type_uid: TypeUID) -> Option<(String, Ref<Type>)> {
self.types
.get(&type_uid)
.map(|type_ref_ref| (type_ref_ref.name.clone(), type_ref_ref.t.clone()))
pub(crate) fn get_type(&self, type_uid: TypeUID) -> Option<&DebugType> {
self.types.get(&type_uid)
}
pub(crate) fn contains_type(&self, type_uid: TypeUID) -> bool {
self.types.get(&type_uid).is_some()
self.types.contains_key(&type_uid)
}
pub(crate) fn add_stack_variable(
&mut self,
fn_idx: Option<usize>,
offset: i64,
name: Option<String>,
type_uid: Option<TypeUID>,
) {
let name = match name {
Some(x) => {
if x.len() == 1 && x.chars().next() == Some('\x00') {
// Anonymous variable, generate name
format!("debug_var_{}", offset)
}
else {
x
}
},
None => {
// Anonymous variable, generate name
format!("debug_var_{}", offset)
}
};
let Some(function_index) = fn_idx else {
// If we somehow lost track of what subprogram we're in or we're not actually in a subprogram
error!("Trying to add a local variable outside of a subprogram. Please report this issue.");
return;
};
// Either get the known type or use a 0 confidence void type so we at least get the name applied
let t = match type_uid {
Some(uid) => Conf::new(self.get_type(uid).unwrap().get_type(), 128),
None => Conf::new(Type::void(), 0)
};
let function = &mut self.functions[function_index];
// TODO: If we can't find a known offset can we try to guess somehow?
let Some(func_addr) = function.address else {
// If we somehow are processing a function's variables before the function is created
error!("Trying to add a local variable without a known function start. Please report this issue.");
return;
};
let Some(offset_adjustment) = self.range_data_offsets.values_overlap(func_addr).next() else {
// Unknown why, but this is happening with MachO + external dSYM
debug!("Refusing to add a local variable ({}@{}) to function at {} without a known CIE offset.", name, offset, func_addr);
return;
};
let adjusted_offset = offset - offset_adjustment;
if adjusted_offset > 0 {
// If we somehow end up with a positive sp offset
error!("Trying to add a local variable at positive storage offset {}. Please report this issue.", adjusted_offset);
return;
}
let var = Variable::new(VariableSourceType::StackVariableSourceType, 0, adjusted_offset);
function.stack_variables.push(NamedTypedVariable::new(var, name, t, false));
}
pub(crate) fn add_data_variable(
@@ -276,14 +426,14 @@ impl DebugInfoBuilder {
if let Some((_existing_name, existing_type_uid)) =
self.data_variables.insert(address, (name, type_uid))
{
let existing_type = self.get_type(existing_type_uid).unwrap().1;
let new_type = self.get_type(type_uid).unwrap().1;
let existing_type = self.get_type(existing_type_uid).unwrap().get_type();
let new_type = self.get_type(type_uid).unwrap().get_type();
if existing_type_uid != type_uid || existing_type != new_type {
error!("DWARF info contains duplicate data variable definition. Overwriting data variable at 0x{:08x} (`{}`) with `{}`",
warn!("DWARF info contains duplicate data variable definition. Overwriting data variable at 0x{:08x} (`{}`) with `{}`",
address,
self.get_type(existing_type_uid).unwrap().1,
self.get_type(type_uid).unwrap().1
existing_type,
new_type
);
}
}
@@ -303,7 +453,7 @@ impl DebugInfoBuilder {
for (&address, (name, type_uid)) in &self.data_variables {
assert!(debug_info.add_data_variable(
address,
&self.get_type(*type_uid).unwrap().1,
&self.get_type(*type_uid).unwrap().t,
name.clone(),
&[] // TODO : Components
));
@@ -312,17 +462,17 @@ impl DebugInfoBuilder {
fn get_function_type(&self, function: &FunctionInfoBuilder) -> Ref<Type> {
let return_type = match function.return_type {
Some(return_type_id) => Conf::new(self.get_type(return_type_id).unwrap().1.clone(), 0),
Some(return_type_id) => Conf::new(self.get_type(return_type_id).unwrap().get_type(), 128),
_ => Conf::new(binaryninja::types::Type::void(), 0),
};
let parameters: Vec<FunctionParameter<String>> = function
let parameters: Vec<FunctionParameter> = function
.parameters
.iter()
.filter_map(|parameter| match parameter {
Some((name, 0)) => Some(FunctionParameter::new(Type::void(), name.clone(), None)),
Some((name, uid)) => Some(FunctionParameter::new(
self.get_type(*uid).unwrap().1,
self.get_type(*uid).unwrap().get_type(),
name.clone(),
None,
)),
@@ -330,10 +480,7 @@ impl DebugInfoBuilder {
})
.collect();
// TODO : Handle
let variable_parameters = false;
binaryninja::types::Type::function(&return_type, &parameters, variable_parameters)
binaryninja::types::Type::function(&return_type, &parameters, function.variable_arguments)
}
fn commit_functions(&self, debug_info: &mut DebugInfo) {
@@ -348,12 +495,12 @@ impl DebugInfoBuilder {
function.address,
function.platform.clone(),
vec![], // TODO : Components
function.stack_variables.clone(), // TODO: local non-stack variables
));
}
}
pub(crate) fn post_process(&mut self, bv: &BinaryView, _debug_info: &mut DebugInfo) -> &Self {
// TODO : We don't need post-processing if we process correctly the first time....
// When originally resolving names, we need to check:
// If there's already a name from binja that's "more correct" than what we found (has more namespaces)
// If there's no name for the DIE, but there's a linkage name that's resolved in binja to a usable name
@@ -379,19 +526,22 @@ impl DebugInfoBuilder {
if simplify_str_to_fqn(func_full_name, true).len()
< simplify_str_to_fqn(symbol_full_name.clone(), true).len()
{
func.full_name =
Some(symbol_full_name.to_string());
func.full_name = Some(symbol_full_name.to_string());
}
}
}
}
if let Some(address) = func.address {
let existing_functions = bv.functions_at(address);
if existing_functions.len() > 1 {
warn!("Multiple existing functions at address {address:08x}. One or more functions at this address may have the wrong platform information. Please report this binary.");
} else if existing_functions.len() == 1 {
func.platform = Some(existing_functions.get(0).platform());
if let Some(address) = func.address.as_mut() {
let diff = bv.start() - bv.original_image_base();
*address += diff; // rebase the address
let existing_functions = bv.functions_at(*address);
match existing_functions.len().cmp(&1) {
Ordering::Greater => {
warn!("Multiple existing functions at address {address:08x}. One or more functions at this address may have the wrong platform information. Please report this binary.");
}
Ordering::Equal => func.platform = Some(existing_functions.get(0).platform()),
Ordering::Less => {}
}
}
}

View File

@@ -12,67 +12,119 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::OnceLock;
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
use crate::{helpers::*, ReaderType};
use crate::types::get_type;
use gimli::{constants, DebuggingInformationEntry, Reader, Unit};
use binaryninja::templatesimplifier::simplify_str_to_str;
use cpp_demangle::DemangleOptions;
use gimli::{constants, DebuggingInformationEntry, Dwarf, Unit};
use log::debug;
use regex::Regex;
fn get_parameters<R: Reader<Offset = usize>>(
fn get_parameters<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Vec<Option<(String, TypeUID)>> {
) -> (Vec<Option<(String, TypeUID)>>, bool) {
if !entry.has_children() {
vec![]
} else {
// We make a new tree from the current entry to iterate over its children
let mut sub_die_tree = unit.entries_tree(Some(entry.offset())).unwrap();
let root = sub_die_tree.root().unwrap();
return (vec![], false);
}
let mut result = vec![];
let mut children = root.children();
while let Some(child) = children.next().unwrap() {
match child.entry().tag() {
constants::DW_TAG_formal_parameter => {
let name = debug_info_builder_context.get_name(unit, child.entry());
let type_ = get_type(
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
);
if let Some(parameter_name) = name {
if let Some(parameter_type) = type_ {
result.push(Some((parameter_name, parameter_type)));
} else {
result.push(Some((parameter_name, 0)))
}
// We make a new tree from the current entry to iterate over its children
let mut sub_die_tree = unit.entries_tree(Some(entry.offset())).unwrap();
let root = sub_die_tree.root().unwrap();
let mut variable_arguments = false;
let mut result = vec![];
let mut children = root.children();
while let Some(child) = children.next().unwrap() {
match child.entry().tag() {
constants::DW_TAG_formal_parameter => {
//TODO: if the param type is a typedef to an anonymous struct (typedef struct {...} foo) then this is reoslved to an anonymous struct instead of foo
// We should still recurse to make sure we load all types this param type depends on, but
let name = debug_info_builder_context.get_name(dwarf, unit, child.entry());
let type_ = get_type(
dwarf,
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
);
if let Some(parameter_name) = name {
if let Some(parameter_type) = type_ {
result.push(Some((parameter_name, parameter_type)));
} else {
result.push(None)
result.push(Some((parameter_name, 0)))
}
} else {
result.push(None)
}
}
constants::DW_TAG_unspecified_parameters => variable_arguments = true,
_ => (),
}
}
(result, variable_arguments)
}
pub(crate) fn parse_function_entry<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Option<usize> {
// Collect function properties (if they exist in this DIE)
let raw_name = get_raw_name(dwarf, unit, entry);
let return_type = get_type(dwarf, unit, entry, debug_info_builder_context, debug_info_builder);
let address = get_start_address(dwarf, unit, entry);
let (parameters, variable_arguments) = get_parameters(dwarf, unit, entry, debug_info_builder_context, debug_info_builder);
// If we have a raw name, it might be mangled, see if we can demangle it into full_name
// raw_name should contain a superset of the info we have in full_name
let mut full_name = None;
if let Some(possibly_mangled_name) = &raw_name {
if possibly_mangled_name.starts_with('_') {
static OPTIONS_MEM: OnceLock<DemangleOptions> = OnceLock::new();
let demangle_options = OPTIONS_MEM.get_or_init(|| {
DemangleOptions::new()
.no_return_type()
.hide_expression_literal_types()
.no_params()
});
static ABI_REGEX_MEM: OnceLock<Regex> = OnceLock::new();
let abi_regex = ABI_REGEX_MEM.get_or_init(|| {
Regex::new(r"\[abi:v\d+\]").unwrap()
});
if let Ok(sym) = cpp_demangle::Symbol::new(possibly_mangled_name) {
if let Ok(demangled) = sym.demangle(demangle_options) {
let cleaned = abi_regex.replace_all(&demangled, "");
let simplified = simplify_str_to_str(&cleaned);
full_name = Some(simplified.to_string());
}
constants::DW_TAG_unspecified_parameters => (),
_ => (),
}
}
result
}
}
pub(crate) fn parse_function_entry<R: Reader<Offset = usize>>(
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) {
// Collect function properties (if they exist in this DIE)
let full_name = debug_info_builder_context.get_name(unit, entry);
let raw_name = get_raw_name(unit, entry, debug_info_builder_context);
let return_type = get_type(unit, entry, debug_info_builder_context, debug_info_builder);
let address = get_start_address(unit, entry, debug_info_builder_context);
let parameters = get_parameters(unit, entry, debug_info_builder_context, debug_info_builder);
// If we didn't demangle the raw name, fetch the name given
if full_name.is_none() {
full_name = debug_info_builder_context.get_name(dwarf, unit, entry)
}
debug_info_builder.insert_function(full_name, raw_name, return_type, address, parameters);
if raw_name.is_none() && full_name.is_none() {
debug!(
"Function entry in DWARF without full or raw name: .debug_info offset {:?}",
entry.offset().to_debug_info_offset(&unit.header)
);
return None;
}
debug_info_builder.insert_function(full_name, raw_name, return_type, address, &parameters, variable_arguments)
}

View File

@@ -12,124 +12,179 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::DebugInfoBuilderContext;
use std::path::PathBuf;
use std::{
collections::HashMap,
ops::Deref,
sync::mpsc,
str::FromStr
};
use crate::{DebugInfoBuilderContext, ReaderType};
use binaryninja::binaryview::BinaryViewBase;
use binaryninja::filemetadata::FileMetadata;
use binaryninja::Endianness;
use binaryninja::{binaryview::{BinaryView, BinaryViewExt}, downloadprovider::{DownloadInstanceInputOutputCallbacks, DownloadProvider}, rc::Ref, settings::Settings};
use gimli::Dwarf;
use gimli::{
constants, Attribute, AttributeValue,
AttributeValue::{DebugInfoRef, UnitRef},
DebuggingInformationEntry, Operation, Reader, Unit, UnitOffset, UnitSectionOffset,
AttributeValue::{DebugInfoRef, DebugInfoRefSup, UnitRef},
DebuggingInformationEntry, Operation, Unit, UnitOffset, UnitSectionOffset,
};
use log::warn;
pub(crate) fn get_uid<R: Reader<Offset = usize>>(
pub(crate) fn get_uid<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
) -> usize {
match entry.offset().to_unit_section_offset(unit) {
// We set a large gap between supplementary and main entries
let adj = dwarf.sup().map_or(0, |_| 0x1000000000000000);
let entry_offset = match entry.offset().to_unit_section_offset(unit) {
UnitSectionOffset::DebugInfoOffset(o) => o.0,
UnitSectionOffset::DebugTypesOffset(o) => o.0,
}
};
entry_offset + adj
}
////////////////////////////////////
// DIE attr convenience functions
pub(crate) enum DieReference<'a, R: Reader<Offset = usize>> {
UnitAndOffset((&'a Unit<R>, UnitOffset)),
pub(crate) enum DieReference<'a, R: ReaderType> {
UnitAndOffset((&'a Dwarf<R>, &'a Unit<R>, UnitOffset)),
Err,
}
pub(crate) fn get_attr_die<'a, R: Reader<Offset = usize>>(
pub(crate) fn get_attr_die<'a, R: ReaderType>(
dwarf: &'a Dwarf<R>,
unit: &'a Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &'a DebugInfoBuilderContext<R>,
attr: constants::DwAt,
) -> Option<DieReference<'a, R>> {
match entry.attr_value(attr) {
Ok(Some(UnitRef(offset))) => Some(DieReference::UnitAndOffset((unit, offset))),
Ok(Some(UnitRef(offset))) => Some(DieReference::UnitAndOffset((dwarf, unit, offset))),
Ok(Some(DebugInfoRef(offset))) => {
for source_unit in debug_info_builder_context.units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((source_unit, new_offset)));
if dwarf.sup().is_some() {
for source_unit in debug_info_builder_context.units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((dwarf, source_unit, new_offset)));
}
}
}
warn!("Failed to fetch DIE. Debug information may be incomplete.");
else {
// This could either have no supplementary file because it is one or because it just doesn't have one
// operate on supplementary file if dwarf is a supplementary file, else self
// It's possible this is a reference in the supplementary file to itself
for source_unit in debug_info_builder_context.sup_units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((dwarf, source_unit, new_offset)));
}
}
// ... or it just doesn't have a supplementary file
for source_unit in debug_info_builder_context.units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((dwarf, source_unit, new_offset)));
}
}
}
None
}
// Ok(Some(DebugInfoRefSup(offset))) TODO - dwarf 5 stuff
},
Ok(Some(DebugInfoRefSup(offset))) => {
for source_unit in debug_info_builder_context.sup_units() {
if let Some(new_offset) = offset.to_unit_offset(&source_unit.header) {
return Some(DieReference::UnitAndOffset((dwarf.sup().unwrap(), source_unit, new_offset)));
}
}
warn!("Failed to fetch DIE. Supplementary debug information may be incomplete.");
None
},
_ => None,
}
}
pub(crate) fn resolve_specification<'a, R: Reader<Offset = usize>>(
pub(crate) fn resolve_specification<'a, R: ReaderType>(
dwarf: &'a Dwarf<R>,
unit: &'a Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &'a DebugInfoBuilderContext<R>,
) -> DieReference<'a, R> {
if let Some(die_reference) = get_attr_die(
dwarf,
unit,
entry,
debug_info_builder_context,
constants::DW_AT_specification,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => {
if let Ok(entry) = entry_unit.entry(entry_offset) {
resolve_specification(entry_unit, &entry, debug_info_builder_context)
resolve_specification(dwarf, entry_unit, &entry, debug_info_builder_context)
} else {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
warn!("Failed to fetch DIE for attr DW_AT_specification. Debug information may be incomplete.");
DieReference::Err
}
}
DieReference::Err => DieReference::Err,
}
} else if let Some(die_reference) = get_attr_die(
dwarf,
unit,
entry,
debug_info_builder_context,
constants::DW_AT_abstract_origin,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
if entry_offset == entry.offset() {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => {
if entry_offset == entry.offset() && unit.header.offset() == entry_unit.header.offset() {
warn!("DWARF information is invalid (infinite abstract origin reference cycle). Debug information may be incomplete.");
DieReference::Err
} else if let Ok(new_entry) = entry_unit.entry(entry_offset) {
resolve_specification(entry_unit, &new_entry, debug_info_builder_context)
resolve_specification(dwarf, entry_unit, &new_entry, debug_info_builder_context)
} else {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
warn!("Failed to fetch DIE for attr DW_AT_abstract_origin. Debug information may be incomplete.");
DieReference::Err
}
}
DieReference::Err => DieReference::Err,
}
} else {
DieReference::UnitAndOffset((unit, entry.offset()))
DieReference::UnitAndOffset((dwarf, unit, entry.offset()))
}
}
// Get name from DIE, or referenced dependencies
pub(crate) fn get_name<R: Reader<Offset = usize>>(
pub(crate) fn get_name<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<String> {
match resolve_specification(unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
match resolve_specification(dwarf, unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => {
if let Ok(Some(attr_val)) = entry_unit
.entry(entry_offset)
.unwrap()
.attr_value(constants::DW_AT_name)
{
if let Ok(attr_string) = debug_info_builder_context
.dwarf()
.attr_string(entry_unit, attr_val)
if let Ok(attr_string) = dwarf.attr_string(entry_unit, attr_val.clone())
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
else if let Some(dwarf) = &dwarf.sup {
if let Ok(attr_string) = dwarf.attr_string(entry_unit, attr_val)
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
}
}
// if let Some(raw_name) = get_raw_name(unit, entry, debug_info_builder_context) {
@@ -146,26 +201,32 @@ pub(crate) fn get_name<R: Reader<Offset = usize>>(
}
// Get raw name from DIE, or referenced dependencies
pub(crate) fn get_raw_name<R: Reader<Offset = usize>>(
pub(crate) fn get_raw_name<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<String> {
if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_linkage_name) {
if let Ok(attr_string) = debug_info_builder_context
.dwarf()
.attr_string(unit, attr_val)
if let Ok(attr_string) = dwarf.attr_string(unit, attr_val.clone())
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
else if let Some(dwarf) = dwarf.sup() {
if let Ok(attr_string) = dwarf.attr_string(unit, attr_val)
{
if let Ok(attr_string) = attr_string.to_string() {
return Some(attr_string.to_string());
}
}
}
}
None
}
// Get the size of an object as a usize
pub(crate) fn get_size_as_usize<R: Reader<Offset = usize>>(
pub(crate) fn get_size_as_usize<R: ReaderType>(
entry: &DebuggingInformationEntry<R>,
) -> Option<usize> {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_byte_size) {
@@ -178,7 +239,7 @@ pub(crate) fn get_size_as_usize<R: Reader<Offset = usize>>(
}
// Get the size of an object as a u64
pub(crate) fn get_size_as_u64<R: Reader<Offset = usize>>(
pub(crate) fn get_size_as_u64<R: ReaderType>(
entry: &DebuggingInformationEntry<R>,
) -> Option<u64> {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_byte_size) {
@@ -191,7 +252,7 @@ pub(crate) fn get_size_as_u64<R: Reader<Offset = usize>>(
}
// Get the size of a subrange as a u64
pub(crate) fn get_subrange_size<R: Reader<Offset = usize>>(
pub(crate) fn get_subrange_size<R: ReaderType>(
entry: &DebuggingInformationEntry<R>,
) -> u64 {
if let Ok(Some(attr)) = entry.attr(constants::DW_AT_upper_bound) {
@@ -206,35 +267,27 @@ pub(crate) fn get_subrange_size<R: Reader<Offset = usize>>(
}
// Get the start address of a function
pub(crate) fn get_start_address<R: Reader<Offset = usize>>(
pub(crate) fn get_start_address<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
) -> Option<u64> {
if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_low_pc) {
match debug_info_builder_context
.dwarf()
.attr_address(unit, attr_val)
match dwarf.attr_address(unit, attr_val)
{
Ok(Some(val)) => Some(val),
_ => None,
}
} else if let Ok(Some(attr_val)) = entry.attr_value(constants::DW_AT_entry_pc) {
match debug_info_builder_context
.dwarf()
.attr_address(unit, attr_val)
match dwarf.attr_address(unit, attr_val)
{
Ok(Some(val)) => Some(val),
_ => None,
}
} else if let Ok(Some(attr_value)) = entry.attr_value(constants::DW_AT_ranges) {
if let Ok(Some(ranges_offset)) = debug_info_builder_context
.dwarf()
.attr_ranges_offset(unit, attr_value)
if let Ok(Some(ranges_offset)) = dwarf.attr_ranges_offset(unit, attr_value)
{
if let Ok(mut ranges) = debug_info_builder_context
.dwarf()
.ranges(unit, ranges_offset)
if let Ok(mut ranges) = dwarf.ranges(unit, ranges_offset)
{
if let Ok(Some(range)) = ranges.next() {
return Some(range.begin);
@@ -248,20 +301,26 @@ pub(crate) fn get_start_address<R: Reader<Offset = usize>>(
}
// Get an attribute value as a u64 if it can be coerced
pub(crate) fn get_attr_as_u64<R: Reader<Offset = usize>>(attr: &Attribute<R>) -> Option<u64> {
if let Some(value) = attr.u8_value() {
Some(value.into())
} else if let Some(value) = attr.u16_value() {
Some(value.into())
} else if let Some(value) = attr.udata_value() {
pub(crate) fn get_attr_as_u64<R: ReaderType>(attr: &Attribute<R>) -> Option<u64> {
if let Some(value) = attr.udata_value() {
Some(value)
} else if let Some(value) = attr.sdata_value() {
Some(value as u64)
} else if let AttributeValue::Block(mut data) = attr.value() {
match data.len() {
1 => data.read_u8().map(u64::from).ok(),
2 => data.read_u16().map(u64::from).ok(),
4 => data.read_u32().map(u64::from).ok(),
8 => data.read_u64().ok(),
_ => None
}
} else {
attr.sdata_value().map(|value| value as u64)
None
}
}
// Get an attribute value as a usize if it can be coerced
pub(crate) fn get_attr_as_usize<R: Reader<Offset = usize>>(attr: Attribute<R>) -> Option<usize> {
pub(crate) fn get_attr_as_usize<R: ReaderType>(attr: Attribute<R>) -> Option<usize> {
if let Some(value) = attr.u8_value() {
Some(value.into())
} else if let Some(value) = attr.u16_value() {
@@ -275,7 +334,7 @@ pub(crate) fn get_attr_as_usize<R: Reader<Offset = usize>>(attr: Attribute<R>) -
// Get an attribute value as a usize if it can be coerced
// Parses DW_OP_address, DW_OP_const
pub(crate) fn get_expr_value<R: Reader<Offset = usize>>(
pub(crate) fn get_expr_value<R: ReaderType>(
unit: &Unit<R>,
attr: Attribute<R>,
) -> Option<u64> {
@@ -285,9 +344,252 @@ pub(crate) fn get_expr_value<R: Reader<Offset = usize>>(
Ok(Operation::UnsignedConstant { value }) => Some(value),
Ok(Operation::Address { address: 0 }) => None,
Ok(Operation::Address { address }) => Some(address),
_ => None,
_ => None
}
} else {
None
}
}
pub(crate) fn get_build_id(view: &BinaryView) -> Result<String, String> {
let mut build_id: Option<String> = None;
if let Ok(raw_view) = view.raw_view() {
if let Ok(build_id_section) = raw_view.section_by_name(".note.gnu.build-id") {
// Name size - 4 bytes
// Desc size - 4 bytes
// Type - 4 bytes
// Name - n bytes
// Desc - n bytes
let build_id_bytes = raw_view.read_vec(build_id_section.start(), build_id_section.len());
if build_id_bytes.len() < 12 {
return Err("Build id section must be at least 12 bytes".to_string());
}
let name_len: u32;
let desc_len: u32;
let note_type: u32;
match raw_view.default_endianness() {
Endianness::LittleEndian => {
name_len = u32::from_le_bytes(build_id_bytes[0..4].try_into().unwrap());
desc_len = u32::from_le_bytes(build_id_bytes[4..8].try_into().unwrap());
note_type = u32::from_le_bytes(build_id_bytes[8..12].try_into().unwrap());
},
Endianness::BigEndian => {
name_len = u32::from_be_bytes(build_id_bytes[0..4].try_into().unwrap());
desc_len = u32::from_be_bytes(build_id_bytes[4..8].try_into().unwrap());
note_type = u32::from_be_bytes(build_id_bytes[8..12].try_into().unwrap());
}
};
if note_type != 3 {
return Err(format!("Build id section has wrong type: {}", note_type));
}
let expected_len = (12 + name_len + desc_len) as usize;
if build_id_bytes.len() < expected_len {
return Err(format!("Build id section not expected length: expected {}, got {}", expected_len, build_id_bytes.len()));
}
let desc: &[u8] = &build_id_bytes[(12+name_len as usize)..expected_len];
build_id = Some(desc.iter().map(|b| format!("{:02x}", b)).collect());
}
}
if let Some(x) = build_id {
Ok(x)
}
else {
Err("Failed to get build id".to_string())
}
}
pub(crate) fn download_debug_info(build_id: &String, view: &BinaryView) -> Result<Ref<BinaryView>, String> {
let settings = Settings::new("");
let debug_server_urls = settings.get_string_list("network.debuginfodServers", Some(view), None);
for debug_server_url in debug_server_urls.iter() {
let artifact_url = format!("{}/buildid/{}/debuginfo", debug_server_url, build_id);
// Download from remote
let (tx, rx) = mpsc::channel();
let write = move |data: &[u8]| -> usize {
if let Ok(_) = tx.send(Vec::from(data)) {
data.len()
} else {
0
}
};
let dp = DownloadProvider::try_default().map_err(|_| "No default download provider")?;
let mut inst = dp
.create_instance()
.map_err(|_| "Couldn't create download instance")?;
let result = inst
.perform_custom_request(
"GET",
artifact_url,
HashMap::<String, String>::new(),
DownloadInstanceInputOutputCallbacks {
read: None,
write: Some(Box::new(write)),
progress: None,
},
)
.map_err(|e| e.to_string())?;
if result.status_code != 200 {
continue;
}
let mut expected_length = None;
for (k, v) in result.headers.iter() {
if k.to_lowercase() == "content-length" {
expected_length = Some(usize::from_str(v).map_err(|e| e.to_string())?);
}
}
let mut data = vec![];
while let Ok(packet) = rx.try_recv() {
data.extend(packet.into_iter());
}
if let Some(length) = expected_length {
if data.len() != length {
return Err(format!(
"Bad length: expected {} got {}",
length,
data.len()
));
}
}
let options = "{\"analysis.debugInfo.internal\": false}";
let bv = BinaryView::from_data(FileMetadata::new().deref(), &data)
.map_err(|_| "Unable to create binary view from downloaded data".to_string())?;
return binaryninja::load_view(bv.deref(), false, Some(options))
.ok_or("Unable to load binary view from downloaded data".to_string());
}
return Err("Could not find a server with debug info for this file".to_string());
}
pub(crate) fn find_local_debug_file_for_build_id(build_id: &String, view: &BinaryView) -> Option<String> {
let settings = Settings::new("");
let debug_dirs_enabled = settings.get_bool("analysis.debugInfo.enableDebugDirectories", Some(view), None);
if !debug_dirs_enabled {
return None;
}
let debug_info_paths = settings.get_string_list("analysis.debugInfo.debugDirectories", Some(view), None);
if debug_info_paths.is_empty() {
return None
}
for debug_info_path in debug_info_paths.into_iter() {
if let Ok(path) = PathBuf::from_str(&debug_info_path.to_string())
{
let elf_path = path
.join(&build_id[..2])
.join(&build_id[2..])
.join("elf");
let debug_ext_path = path
.join(&build_id[..2])
.join(format!("{}.debug", &build_id[2..]));
let final_path = if debug_ext_path.exists() {
debug_ext_path
}
else if elf_path.exists() {
elf_path
}
else {
// No paths exist in this dir, try the next one
continue;
};
return final_path
.to_str()
.and_then(|x| Some(x.to_string()));
}
}
None
}
pub(crate) fn load_debug_info_for_build_id(build_id: &String, view: &BinaryView) -> (Option<Ref<BinaryView>>, bool) {
if let Some(debug_file_path) = find_local_debug_file_for_build_id(build_id, view) {
return
(
binaryninja::load_with_options(
debug_file_path,
false,
Some("{\"analysis.debugInfo.internal\": false}")
),
false
);
}
else if Settings::new("").get_bool("network.enableDebuginfod", Some(view), None) {
return (
download_debug_info(build_id, view).ok(),
true
);
}
(None, false)
}
pub(crate) fn find_sibling_debug_file(view: &BinaryView) -> Option<String> {
let settings = Settings::new("");
let load_sibling_debug = settings.get_bool("analysis.debugInfo.loadSiblingDebugFiles", Some(view), None);
if !load_sibling_debug {
return None;
}
let filename = view.file().filename().to_string();
let debug_file = PathBuf::from(format!("{}.debug", filename));
let dsym_folder = PathBuf::from(format!("{}.dSYM/", filename));
if debug_file.exists() && debug_file.is_file() {
return Some(debug_file.to_string_lossy().to_string());
}
if dsym_folder.exists() && dsym_folder.is_dir() {
let dsym_file = dsym_folder
.join("Contents/Resources/DWARF/")
.join(filename); // TODO: should this just pull any file out? Can there be multiple files?
if dsym_file.exists() {
return Some(dsym_file.to_string_lossy().to_string());
}
}
None
}
pub(crate) fn load_sibling_debug_file(view: &BinaryView) -> (Option<Ref<BinaryView>>, bool) {
let Some(debug_file) = find_sibling_debug_file(view) else {
return (None, false);
};
let load_settings = match view.default_platform() {
Some(plat) => format!("{{\"analysis.debugInfo.internal\": false, \"loader.platform\": \"{}\"}}", plat.name()),
None => "{\"analysis.debugInfo.internal\": false}".to_string()
};
(
binaryninja::load_with_options(
debug_file,
false,
Some(load_settings)
),
false
)
}

View File

@@ -18,32 +18,60 @@ mod functions;
mod helpers;
mod types;
use std::collections::HashMap;
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext};
use crate::functions::parse_function_entry;
use crate::helpers::{get_attr_die, get_name, get_uid, DieReference};
use crate::types::parse_data_variable;
use crate::types::parse_variable;
use binaryninja::binaryview::BinaryViewBase;
use binaryninja::{
binaryview::{BinaryView, BinaryViewExt},
debuginfo::{CustomDebugInfoParser, DebugInfo, DebugInfoParser},
logger,
settings::Settings,
templatesimplifier::simplify_str_to_str,
};
use dwarfreader::{
create_section_reader, get_endian, is_dwo_dwarf, is_non_dwo_dwarf, is_raw_dwo_dwarf,
};
use gimli::{constants, DebuggingInformationEntry, Dwarf, DwarfFileType, Reader, SectionId, Unit};
use gimli::{constants, DebuggingInformationEntry, Dwarf, DwarfFileType, Reader, Section, SectionId, Unit, UnwindSection};
use helpers::{get_build_id, load_debug_info_for_build_id};
use log::{error, warn, LevelFilter};
fn recover_names<R: Reader<Offset = usize>>(
trait ReaderType: Reader<Offset = usize> {}
impl<T: Reader<Offset = usize>> ReaderType for T {}
fn recover_names<R: ReaderType>(
dwarf: &Dwarf<R>,
debug_info_builder_context: &mut DebugInfoBuilderContext<R>,
progress: &dyn Fn(usize, usize) -> Result<(), ()>,
) -> bool {
let mut iter = debug_info_builder_context.dwarf().units();
let mut res = true;
if let Some(sup_dwarf) = dwarf.sup() {
res = recover_names_internal(sup_dwarf, debug_info_builder_context, progress);
}
if res {
res = recover_names_internal(dwarf, debug_info_builder_context, progress);
}
res
}
fn recover_names_internal<R: ReaderType>(
dwarf: &Dwarf<R>,
debug_info_builder_context: &mut DebugInfoBuilderContext<R>,
progress: &dyn Fn(usize, usize) -> Result<(), ()>,
) -> bool {
let mut iter = dwarf.units();
while let Ok(Some(header)) = iter.next() {
let unit = debug_info_builder_context.dwarf().unit(header).unwrap();
let unit = dwarf.unit(header).unwrap();
let mut namespace_qualifiers: Vec<(isize, String)> = vec![];
let mut entries = unit.entries();
let mut depth = 0;
@@ -72,7 +100,8 @@ fn recover_names<R: Reader<Offset = usize>>(
match entry.tag() {
constants::DW_TAG_namespace => {
fn resolve_namespace_name<R: Reader<Offset = usize>>(
fn resolve_namespace_name<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
@@ -80,18 +109,20 @@ fn recover_names<R: Reader<Offset = usize>>(
depth: isize,
) {
if let Some(namespace_qualifier) =
get_name(unit, entry, debug_info_builder_context)
get_name(dwarf, unit, entry, debug_info_builder_context)
{
namespace_qualifiers.push((depth, namespace_qualifier));
} else if let Some(die_reference) = get_attr_die(
dwarf,
unit,
entry,
debug_info_builder_context,
constants::DW_AT_extension,
) {
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => {
resolve_namespace_name(
dwarf,
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
@@ -101,17 +132,17 @@ fn recover_names<R: Reader<Offset = usize>>(
}
DieReference::Err => {
warn!(
"Failed to fetch DIE. Debug information may be incomplete."
"Failed to fetch DIE when resolving namespace. Debug information may be incomplete."
);
}
}
} else {
namespace_qualifiers
.push((depth, "anonymous_namespace".to_string()));
namespace_qualifiers.push((depth, "anonymous_namespace".to_string()));
}
}
resolve_namespace_name(
dwarf,
&unit,
entry,
debug_info_builder_context,
@@ -122,54 +153,54 @@ fn recover_names<R: Reader<Offset = usize>>(
constants::DW_TAG_class_type
| constants::DW_TAG_structure_type
| constants::DW_TAG_union_type => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
if let Some(name) = get_name(dwarf, &unit, entry, debug_info_builder_context) {
namespace_qualifiers.push((depth, name))
} else {
namespace_qualifiers.push((
depth,
match entry.tag() {
constants::DW_TAG_class_type => "anonymous_class".to_string(),
constants::DW_TAG_structure_type => "anonymous_structure".to_string(),
constants::DW_TAG_structure_type => {
"anonymous_structure".to_string()
}
constants::DW_TAG_union_type => "anonymous_union".to_string(),
_ => unreachable!(),
}
},
))
}
debug_info_builder_context.set_name(
get_uid(&unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.map(|(_, namespace)| namespace.to_owned())
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
get_uid(dwarf, &unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.map(|(_, namespace)| namespace.to_owned())
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
);
}
constants::DW_TAG_typedef
| constants::DW_TAG_subprogram
| constants::DW_TAG_enumeration_type => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
if let Some(name) = get_name(dwarf, &unit, entry, debug_info_builder_context) {
debug_info_builder_context.set_name(
get_uid(&unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.chain(vec![&(-1, name)].into_iter())
.map(|(_, namespace)| {
namespace.to_owned()
})
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
get_uid(dwarf, &unit, entry),
simplify_str_to_str(
namespace_qualifiers
.iter()
.chain(vec![&(-1, name)].into_iter())
.map(|(_, namespace)| namespace.to_owned())
.collect::<Vec<String>>()
.join("::"),
)
.to_string(),
);
}
}
_ => {
if let Some(name) = get_name(&unit, entry, debug_info_builder_context) {
debug_info_builder_context.set_name(get_uid(&unit, entry), name);
if let Some(name) = get_name(dwarf, &unit, entry, debug_info_builder_context) {
debug_info_builder_context.set_name(get_uid(dwarf, &unit, entry), name);
}
}
}
@@ -179,7 +210,8 @@ fn recover_names<R: Reader<Offset = usize>>(
true
}
fn parse_unit<R: Reader<Offset = usize>>(
fn parse_unit<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
@@ -188,9 +220,12 @@ fn parse_unit<R: Reader<Offset = usize>>(
) {
let mut entries = unit.entries();
let mut current_depth: isize = 0;
let mut functions_by_depth: Vec<(Option<usize>, isize)> = vec![];
// Really all we care about as we iterate the entries in a given unit is how they modify state (our perception of the file)
// There's a lot of junk we don't care about in DWARF info, so we choose a couple DIEs and mutate state (add functions (which adds the types it uses) and keep track of what namespace we're in)
while let Ok(Some((_, entry))) = entries.next_dfs() {
while let Ok(Some((depth_delta, entry))) = entries.next_dfs() {
*current_die_number += 1;
if (*progress)(
*current_die_number,
@@ -201,34 +236,147 @@ fn parse_unit<R: Reader<Offset = usize>>(
return; // Parsing canceled
}
current_depth = current_depth.saturating_add(depth_delta);
loop {
if let Some((_fn_idx, depth)) = functions_by_depth.last() {
if current_depth <= *depth {
functions_by_depth.pop();
}
else {
break
}
}
else {
break;
}
}
match entry.tag() {
constants::DW_TAG_subprogram => {
parse_function_entry(unit, entry, debug_info_builder_context, debug_info_builder)
}
let fn_idx = parse_function_entry(dwarf, unit, entry, debug_info_builder_context, debug_info_builder);
functions_by_depth.push((fn_idx, current_depth));
},
constants::DW_TAG_variable => {
parse_data_variable(unit, entry, debug_info_builder_context, debug_info_builder)
}
let current_fn_idx = functions_by_depth.last().and_then(|x| x.0);
parse_variable(dwarf, unit, entry, debug_info_builder_context, debug_info_builder, current_fn_idx)
},
constants::DW_TAG_class_type |
constants::DW_TAG_enumeration_type |
constants::DW_TAG_structure_type |
constants::DW_TAG_union_type |
constants::DW_TAG_typedef => {
// Ensure types are loaded even if they're unused
types::get_type(dwarf, unit, entry, debug_info_builder_context, debug_info_builder);
},
_ => (),
}
}
}
fn parse_dwarf(
fn parse_eh_frame<R: Reader>(
view: &BinaryView,
mut eh_frame: gimli::EhFrame<R>,
) -> gimli::Result<iset::IntervalMap<u64, i64>> {
eh_frame.set_address_size(view.address_size() as u8);
let mut bases = gimli::BaseAddresses::default();
if let Ok(section) = view.section_by_name(".eh_frame_hdr").or(view.section_by_name("__eh_frame_hdr")) {
bases = bases.set_eh_frame_hdr(section.start());
}
if let Ok(section) = view.section_by_name(".eh_frame").or(view.section_by_name("__eh_frame")) {
bases = bases.set_eh_frame(section.start());
}
if let Ok(section) = view.section_by_name(".text").or(view.section_by_name("__text")) {
bases = bases.set_text(section.start());
}
if let Ok(section) = view.section_by_name(".got").or(view.section_by_name("__got")) {
bases = bases.set_got(section.start());
}
let mut cies = HashMap::new();
let mut cie_data_offsets = iset::IntervalMap::new();
let mut entries = eh_frame.entries(&bases);
loop {
match entries.next()? {
None => return Ok(cie_data_offsets),
Some(gimli::CieOrFde::Cie(_cie)) => {
// TODO: do we want to do anything with standalone CIEs?
}
Some(gimli::CieOrFde::Fde(partial)) => {
let fde = match partial.parse(|_, bases, o| {
cies.entry(o)
.or_insert_with(|| eh_frame.cie_from_offset(bases, o))
.clone()
}) {
Ok(fde) => fde,
Err(e) => {
error!("Failed to parse FDE: {}", e);
continue;
}
};
if fde.len() == 0 {
// This FDE is a terminator
return Ok(cie_data_offsets);
}
// Store CIE offset for FDE range
cie_data_offsets.insert(
fde.initial_address()..fde.initial_address()+fde.len(),
fde.cie().data_alignment_factor()
);
}
}
}
}
fn get_supplementary_build_id(bv: &BinaryView) -> Option<String> {
let raw_view = bv.raw_view().ok()?;
if let Ok(section) = raw_view.section_by_name(".gnu_debugaltlink") {
let start = section.start();
let len = section.len();
if len < 20 {
// Not large enough to hold a build id
return None;
}
raw_view
.read_vec(start, len)
.splitn(2, |x| *x == 0)
.last()
.map(|a| {
a.iter().map(|b| format!("{:02x}", b)).collect()
})
}
else {
None
}
}
fn parse_dwarf(
bv: &BinaryView,
debug_bv: &BinaryView,
supplementary_bv: Option<&BinaryView>,
progress: Box<dyn Fn(usize, usize) -> Result<(), ()>>,
) -> DebugInfoBuilder {
) -> Result<DebugInfoBuilder, ()> {
// TODO: warn if no supplementary file and .gnu_debugaltlink section present
// Determine if this is a DWO
// TODO : Make this more robust...some DWOs follow non-DWO conventions
let dwo_file = is_dwo_dwarf(view) || is_raw_dwo_dwarf(view);
// Figure out if it's the given view or the raw view that has the dwarf info in it
let raw_view = &view.raw_view().unwrap();
let view = if is_dwo_dwarf(view) || is_non_dwo_dwarf(view) {
view
let raw_view = &debug_bv.raw_view().unwrap();
let view = if is_dwo_dwarf(debug_bv) || is_non_dwo_dwarf(debug_bv) {
debug_bv
} else {
raw_view
};
let dwo_file = is_dwo_dwarf(view) || is_raw_dwo_dwarf(view);
// gimli setup
let endian = get_endian(view);
let mut section_reader =
@@ -237,24 +385,60 @@ fn parse_dwarf(
if dwo_file {
dwarf.file_type = DwarfFileType::Dwo;
}
else {
dwarf.file_type = DwarfFileType::Main;
}
if let Some(sup_bv) = supplementary_bv {
let sup_endian = get_endian(sup_bv);
let sup_dwo_file = is_dwo_dwarf(sup_bv) || is_raw_dwo_dwarf(sup_bv);
let sup_section_reader =
|section_id: SectionId| -> _ { create_section_reader(section_id, sup_bv, sup_endian, sup_dwo_file) };
if let Err(e) = dwarf.load_sup(sup_section_reader) {
error!("Failed to load supplementary file: {}", e);
}
}
let eh_frame_endian = get_endian(bv);
let mut eh_frame_section_reader =
|section_id: SectionId| -> _ { create_section_reader(section_id, bv, eh_frame_endian, dwo_file) };
let eh_frame = gimli::EhFrame::load(&mut eh_frame_section_reader).unwrap();
let range_data_offsets = parse_eh_frame(bv, eh_frame)
.map_err(|e| error!("Error parsing .eh_frame: {}", e))?;
// Create debug info builder and recover name mapping first
// Since DWARF is stored as a tree with arbitrary implicit edges among leaves,
// it is not possible to correctly track namespaces while you're parsing "in order" without backtracking,
// so we just do it up front
let mut debug_info_builder = DebugInfoBuilder::new();
if let Some(mut debug_info_builder_context) = DebugInfoBuilderContext::new(view, dwarf) {
if !recover_names(&mut debug_info_builder_context, &progress)
debug_info_builder.set_range_data_offsets(range_data_offsets);
if let Some(mut debug_info_builder_context) = DebugInfoBuilderContext::new(view, &dwarf) {
if !recover_names(&dwarf, &mut debug_info_builder_context, &progress)
|| debug_info_builder_context.total_die_count == 0
{
return debug_info_builder;
return Ok(debug_info_builder);
}
// Parse all the compilation units
let mut current_die_number = 0;
for unit in debug_info_builder_context.sup_units() {
parse_unit(
dwarf.sup().unwrap(),
&unit,
&debug_info_builder_context,
&mut debug_info_builder,
&progress,
&mut current_die_number,
);
}
for unit in debug_info_builder_context.units() {
parse_unit(
unit,
&dwarf,
&unit,
&debug_info_builder_context,
&mut debug_info_builder,
&progress,
@@ -262,14 +446,28 @@ fn parse_dwarf(
);
}
}
debug_info_builder
Ok(debug_info_builder)
}
struct DWARFParser;
impl CustomDebugInfoParser for DWARFParser {
fn is_valid(&self, view: &BinaryView) -> bool {
dwarfreader::is_valid(view)
if dwarfreader::is_valid(view) || dwarfreader::can_use_debuginfod(view) {
return true;
}
if dwarfreader::has_build_id_section(view) {
if let Ok(build_id) = get_build_id(view) {
if helpers::find_local_debug_file_for_build_id(&build_id, view).is_some() {
return true;
}
}
}
if helpers::find_sibling_debug_file(view).is_some() {
return true;
}
false
}
fn parse_info(
@@ -279,10 +477,51 @@ impl CustomDebugInfoParser for DWARFParser {
debug_file: &BinaryView,
progress: Box<dyn Fn(usize, usize) -> Result<(), ()>>,
) -> bool {
parse_dwarf(debug_file, progress)
.post_process(bv, debug_info)
.commit_info(debug_info);
true
let (external_file, close_external) = if !dwarfreader::is_valid(bv) {
if let (Some(debug_view), x) = helpers::load_sibling_debug_file(bv) {
(Some(debug_view), x)
}
else if let Ok(build_id) = get_build_id(bv) {
helpers::load_debug_info_for_build_id(&build_id, bv)
}
else {
(None, false)
}
}
else {
(None, false)
};
let sup_bv = get_supplementary_build_id(
external_file
.as_deref()
.unwrap_or(debug_file)
)
.and_then(|build_id| {
load_debug_info_for_build_id(&build_id, bv)
.0
.map(|x| x.raw_view().unwrap())
});
let result = match parse_dwarf(
bv,
external_file.as_deref().unwrap_or(debug_file),
sup_bv.as_deref(),
progress
)
{
Ok(mut builder) => {
builder.post_process(bv, debug_info).commit_info(debug_info);
true
}
Err(_) => false,
};
if let (Some(ext), true) = (external_file, close_external) {
ext.file().close();
}
result
}
}
@@ -290,6 +529,65 @@ impl CustomDebugInfoParser for DWARFParser {
pub extern "C" fn CorePluginInit() -> bool {
logger::init(LevelFilter::Debug).unwrap();
let settings = Settings::new("");
settings.register_setting_json(
"network.enableDebuginfod",
r#"{
"title" : "Enable Debuginfod Support",
"type" : "boolean",
"default" : false,
"description" : "Enable using Debuginfod servers to fetch DWARF debug info for files with a .note.gnu.build-id section.",
"ignore" : []
}"#,
);
settings.register_setting_json(
"network.debuginfodServers",
r#"{
"title" : "Debuginfod Server URLs",
"type" : "array",
"elementType" : "string",
"default" : [],
"description" : "Servers to use for fetching DWARF debug info for files with a .note.gnu.build-id section.",
"ignore" : []
}"#,
);
settings.register_setting_json(
"analysis.debugInfo.enableDebugDirectories",
r#"{
"title" : "Enable Debug File Directories",
"type" : "boolean",
"default" : true,
"description" : "Enable searching local debug directories for DWARF debug info.",
"ignore" : []
}"#,
);
settings.register_setting_json(
"analysis.debugInfo.debugDirectories",
r#"{
"title" : "Debug File Directories",
"type" : "array",
"elementType" : "string",
"default" : [],
"description" : "Paths to folder containing DWARF debug info stored by build id.",
"ignore" : []
}"#,
);
settings.register_setting_json(
"analysis.debugInfo.loadSiblingDebugFiles",
r#"{
"title" : "Enable Loading of Sibling Debug Files",
"type" : "boolean",
"default" : true,
"description" : "Enable automatic loading of X.debug and X.dSYM files next to a file named X.",
"ignore" : []
}"#,
);
DebugInfoParser::register("DWARF", DWARFParser {});
true
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::die_handlers::*;
use crate::{die_handlers::*, ReaderType};
use crate::dwarfdebuginfo::{DebugInfoBuilder, DebugInfoBuilderContext, TypeUID};
use crate::helpers::*;
@@ -23,31 +23,52 @@ use binaryninja::{
},
};
use gimli::{constants, DebuggingInformationEntry, Reader, Unit};
use gimli::{constants, AttributeValue, DebuggingInformationEntry, Dwarf, Operation, Unit};
use log::warn;
use log::{debug, error, warn};
pub(crate) fn parse_data_variable<R: Reader<Offset = usize>>(
pub(crate) fn parse_variable<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
function_index: Option<usize>,
) {
let full_name = debug_info_builder_context.get_name(unit, entry);
let type_uid = get_type(unit, entry, debug_info_builder_context, debug_info_builder);
let full_name = debug_info_builder_context.get_name(dwarf, unit, entry);
let type_uid = get_type(dwarf, unit, entry, debug_info_builder_context, debug_info_builder);
let address = if let Ok(Some(attr)) = entry.attr(constants::DW_AT_location) {
get_expr_value(unit, attr)
} else {
None
let Ok(Some(attr)) = entry.attr(constants::DW_AT_location) else {
return
};
if let (Some(address), Some(type_uid)) = (address, type_uid) {
debug_info_builder.add_data_variable(address, full_name, type_uid);
let AttributeValue::Exprloc(mut expression) = attr.value() else {
return
};
match Operation::parse(&mut expression.0, unit.encoding()) {
Ok(Operation::FrameOffset { offset }) => {
debug_info_builder.add_stack_variable(function_index, offset, full_name, type_uid);
},
//Ok(Operation::RegisterOffset { register: _, offset: _, base_type: _ }) => {
// //TODO: look up register by index (binja register indexes don't match processor indexes?)
// //TODO: calculate absolute stack offset
// //TODO: add by absolute offset
//},
Ok(Operation::Address { address }) => {
if let Some(uid) = type_uid {
debug_info_builder.add_data_variable(address, full_name, uid)
}
},
Ok(op) => {
debug!("Unhandled operation type for variable: {:?}", op);
},
Err(e) => error!("Error parsing operation type for variable {:?}: {}", full_name, e)
}
}
fn do_structure_parse<R: Reader<Offset = usize>>(
fn do_structure_parse<R: ReaderType>(
dwarf: &Dwarf<R>,
structure_type: StructureType,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
@@ -91,8 +112,8 @@ fn do_structure_parse<R: Reader<Offset = usize>>(
return None;
}
let full_name = if get_name(unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(unit, entry)
let full_name = if get_name(dwarf, unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(dwarf, unit, entry)
} else {
None
};
@@ -109,8 +130,8 @@ fn do_structure_parse<R: Reader<Offset = usize>>(
// it will also be how any other types refer to this struct
if let Some(full_name) = &full_name {
debug_info_builder.add_type(
get_uid(unit, entry),
full_name.clone(),
get_uid(dwarf, unit, entry),
&full_name,
Type::named_type_from_type(
full_name.clone(),
&Type::structure(&structure_builder.finalize()),
@@ -121,11 +142,11 @@ fn do_structure_parse<R: Reader<Offset = usize>>(
// We _need_ to have initial typedefs or else we can enter infinite parsing loops
// These get overwritten in the last step with the actual type, however, so this
// is either perfectly fine or breaking a bunch of NTRs
let full_name = format!("anonymous_structure_{:x}", get_uid(unit, entry));
let full_name = format!("anonymous_structure_{:x}", get_uid(dwarf, unit, entry));
debug_info_builder.add_type(
get_uid(unit, entry),
full_name.clone(),
Type::named_type_from_type(full_name, &Type::structure(&structure_builder.finalize())),
get_uid(dwarf, unit, entry),
&full_name,
Type::named_type_from_type(&full_name, &Type::structure(&structure_builder.finalize())),
false,
);
}
@@ -136,14 +157,16 @@ fn do_structure_parse<R: Reader<Offset = usize>>(
while let Ok(Some(child)) = children.next() {
if child.entry().tag() == constants::DW_TAG_member {
if let Some(child_type_id) = get_type(
dwarf,
unit,
child.entry(),
debug_info_builder_context,
debug_info_builder,
) {
if let Some((_, child_type)) = debug_info_builder.get_type(child_type_id) {
if let Some(t) = debug_info_builder.get_type(child_type_id) {
let child_type = t.get_type();
if let Some(child_name) = debug_info_builder_context
.get_name(unit, child.entry())
.get_name(dwarf, unit, child.entry())
.map_or(
if child_type.type_class() == TypeClass::StructureTypeClass {
Some("".to_string())
@@ -188,32 +211,34 @@ fn do_structure_parse<R: Reader<Offset = usize>>(
let finalized_structure = Type::structure(&structure_builder.finalize());
if let Some(full_name) = full_name {
debug_info_builder.add_type(
get_uid(unit, entry) + 1, // TODO : This is super broke (uid + 1 is not guaranteed to be unique)
full_name,
get_uid(dwarf, unit, entry) + 1, // TODO : This is super broke (uid + 1 is not guaranteed to be unique)
&full_name,
finalized_structure,
true,
);
} else {
debug_info_builder.add_type(
get_uid(unit, entry),
format!("{}", finalized_structure),
get_uid(dwarf, unit, entry),
&format!("{}", finalized_structure),
finalized_structure,
false, // Don't commit anonymous unions (because I think it'll break things)
);
}
Some(get_uid(unit, entry))
Some(get_uid(dwarf, unit, entry))
}
// This function iterates up through the dependency references, adding all the types along the way until there are no more or stopping at the first one already tracked, then returns the UID of the type of the given DIE
pub(crate) fn get_type<R: Reader<Offset = usize>>(
pub(crate) fn get_type<R: ReaderType>(
dwarf: &Dwarf<R>,
unit: &Unit<R>,
entry: &DebuggingInformationEntry<R>,
debug_info_builder_context: &DebugInfoBuilderContext<R>,
debug_info_builder: &mut DebugInfoBuilder,
) -> Option<TypeUID> {
// If this node (and thus all its referenced nodes) has already been processed, just return the offset
if debug_info_builder.contains_type(get_uid(unit, entry)) {
return Some(get_uid(unit, entry));
let entry_uid = get_uid(dwarf, unit, entry);
if debug_info_builder.contains_type(entry_uid) {
return Some(entry_uid);
}
// Don't parse types that are just declarations and not definitions
@@ -222,6 +247,7 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
}
let entry_type = if let Some(die_reference) = get_attr_die(
dwarf,
unit,
entry,
debug_info_builder_context,
@@ -229,25 +255,29 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
) {
// This needs to recurse first (before the early return below) to ensure all sub-types have been parsed
match die_reference {
DieReference::UnitAndOffset((entry_unit, entry_offset)) => get_type(
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
debug_info_builder,
),
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset)) => {
get_type(
dwarf,
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
debug_info_builder,
)
}
DieReference::Err => {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
warn!("Failed to fetch DIE when getting type through DW_AT_type. Debug information may be incomplete.");
None
}
}
} else {
// This needs to recurse first (before the early return below) to ensure all sub-types have been parsed
match resolve_specification(unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((entry_unit, entry_offset))
match resolve_specification(dwarf, unit, entry, debug_info_builder_context) {
DieReference::UnitAndOffset((dwarf, entry_unit, entry_offset))
if entry_unit.header.offset() != unit.header.offset()
&& entry_offset != entry.offset() =>
{
get_type(
dwarf,
entry_unit,
&entry_unit.entry(entry_offset).unwrap(),
debug_info_builder_context,
@@ -256,7 +286,7 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
}
DieReference::UnitAndOffset(_) => None,
DieReference::Err => {
warn!("Failed to fetch DIE. Debug information may be incomplete.");
warn!("Failed to fetch DIE when getting type. Debug information may be incomplete.");
None
}
}
@@ -264,20 +294,21 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
// If this node (and thus all its referenced nodes) has already been processed, just return the offset
// This check is not redundant because this type might have been processes in the recursive calls above
if debug_info_builder.contains_type(get_uid(unit, entry)) {
return Some(get_uid(unit, entry));
if debug_info_builder.contains_type(entry_uid) {
return Some(entry_uid);
}
// Collect the required information to create a type and add it to the type map. Also, add the dependencies of this type to the type's typeinfo
// Create the type, make a TypeInfo for it, and add it to the debug info
let (type_def, mut commit): (Option<Ref<Type>>, bool) = match entry.tag() {
constants::DW_TAG_base_type => (
handle_base_type(unit, entry, debug_info_builder_context),
handle_base_type(dwarf, unit, entry, debug_info_builder_context),
false,
),
constants::DW_TAG_structure_type => {
return do_structure_parse(
dwarf,
StructureType::StructStructureType,
unit,
entry,
@@ -287,6 +318,7 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
}
constants::DW_TAG_class_type => {
return do_structure_parse(
dwarf,
StructureType::ClassStructureType,
unit,
entry,
@@ -296,6 +328,7 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
}
constants::DW_TAG_union_type => {
return do_structure_parse(
dwarf,
StructureType::UnionStructureType,
unit,
entry,
@@ -306,13 +339,13 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
// Enum
constants::DW_TAG_enumeration_type => {
(handle_enum(unit, entry, debug_info_builder_context), true)
(handle_enum(dwarf, unit, entry, debug_info_builder_context), true)
}
// Basic types
constants::DW_TAG_typedef => {
if let Some(name) = debug_info_builder_context.get_name(unit, entry) {
handle_typedef(debug_info_builder, entry_type, name)
if let Some(name) = debug_info_builder_context.get_name(dwarf, unit, entry) {
handle_typedef(debug_info_builder, entry_type, &name)
} else {
(None, false)
}
@@ -356,6 +389,7 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
constants::DW_TAG_unspecified_type => (Some(Type::void()), false),
constants::DW_TAG_subroutine_type => (
handle_function(
dwarf,
unit,
entry,
debug_info_builder_context,
@@ -375,8 +409,8 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
// Wrap our resultant type in a TypeInfo so that the internal DebugInfo class can manage it
if let Some(type_def) = type_def {
let name = if get_name(unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(unit, entry)
let name = if get_name(dwarf, unit, entry, debug_info_builder_context).is_some() {
debug_info_builder_context.get_name(dwarf, unit, entry)
} else {
None
}
@@ -385,8 +419,8 @@ pub(crate) fn get_type<R: Reader<Offset = usize>>(
format!("{}", type_def)
});
debug_info_builder.add_type(get_uid(unit, entry), name, type_def, commit);
Some(get_uid(unit, entry))
debug_info_builder.add_type(entry_uid, &name, type_def, commit);
Some(entry_uid)
} else {
None
}