update rust crate

This commit is contained in:
2024-08-17 16:20:28 +02:00
parent 670fa334db
commit 2167e0512a
88 changed files with 20508 additions and 1741 deletions

View File

@@ -0,0 +1,508 @@
// Copyright 2022-2024 Vector 35 Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashMap, HashSet};
use std::env;
use std::fmt::Display;
use std::sync::OnceLock;
use anyhow::{anyhow, Result};
use log::{debug, info};
use pdb::*;
use binaryninja::architecture::{Architecture, CoreArchitecture};
use binaryninja::binaryview::{BinaryView, BinaryViewExt};
use binaryninja::callingconvention::CallingConvention;
use binaryninja::debuginfo::{DebugFunctionInfo, DebugInfo};
use binaryninja::platform::Platform;
use binaryninja::rc::Ref;
use binaryninja::settings::Settings;
use binaryninja::types::{
min_confidence, Conf, DataVariableAndName, EnumerationBuilder, NamedTypeReference,
NamedTypeReferenceClass, StructureBuilder, StructureType, Type, TypeClass,
};
use crate::symbol_parser::{ParsedDataSymbol, ParsedProcedure, ParsedSymbol};
use crate::type_parser::ParsedType;
/// Megastruct for all the parsing
/// Certain fields are only used by specific files, as marked below.
/// Why not make new structs for them? Because vvvv this garbage
pub struct PDBParserInstance<'a, S: Source<'a> + 'a> {
/// DebugInfo where types/functions will be stored eventually
pub(crate) debug_info: &'a mut DebugInfo,
/// Parent binary view (usually during BinaryView::Finalize)
pub(crate) bv: &'a BinaryView,
/// Default arch of self.bv
pub(crate) arch: CoreArchitecture,
/// Default calling convention for self.arch
pub(crate) default_cc: Ref<CallingConvention<CoreArchitecture>>,
/// Thiscall calling convention for self.bv, or default_cc if we can't find one
pub(crate) thiscall_cc: Ref<CallingConvention<CoreArchitecture>>,
/// Cdecl calling convention for self.bv, or default_cc if we can't find one
pub(crate) cdecl_cc: Ref<CallingConvention<CoreArchitecture>>,
/// Default platform of self.bv
pub(crate) platform: Ref<Platform>,
/// pdb-rs structure for making lifetime hell a real place
pub(crate) pdb: PDB<'a, S>,
/// pdb-rs Mapping of modules to addresses for resolving RVAs
pub(crate) address_map: AddressMap<'a>,
/// Binja Settings instance (for optimization)
pub(crate) settings: Ref<Settings>,
/// type_parser.rs
/// TypeIndex -> ParsedType enum used during parsing
pub(crate) indexed_types: BTreeMap<TypeIndex, ParsedType>,
/// QName -> Binja Type for finished types
pub(crate) named_types: BTreeMap<String, Ref<Type>>,
/// Raw (mangled) name -> TypeIndex for resolving forward references
pub(crate) full_type_indices: BTreeMap<String, TypeIndex>,
/// Stack of types we're currently parsing
pub(crate) type_stack: Vec<TypeIndex>,
/// Stack of parent types we're parsing nested types inside of
pub(crate) namespace_stack: Vec<String>,
/// Type Index -> Does it return on the stack
pub(crate) type_default_returnable: BTreeMap<TypeIndex, bool>,
/// symbol_parser.rs
/// List of fully parsed symbols from all modules
pub(crate) parsed_symbols: Vec<ParsedSymbol>,
/// Raw name -> index in parsed_symbols
pub(crate) parsed_symbols_by_name: BTreeMap<String, usize>,
/// Raw name -> Symbol index for looking up symbols for the currently parsing module (mostly for thunks)
pub(crate) named_symbols: BTreeMap<String, SymbolIndex>,
/// Parent -> Children symbol index tree for the currently parsing module
pub(crate) symbol_tree: BTreeMap<SymbolIndex, Vec<SymbolIndex>>,
/// Child -> Parent symbol index mapping, inverse of symbol_tree
pub(crate) symbol_parents: BTreeMap<SymbolIndex, SymbolIndex>,
/// Stack of (start, end) indices for the current symbols being parsed while constructing the tree
pub(crate) symbol_stack: Vec<(SymbolIndex, SymbolIndex)>,
/// Index -> parsed symbol for the currently parsing module
pub(crate) indexed_symbols: BTreeMap<SymbolIndex, ParsedSymbol>,
/// Symbol address -> Symbol for looking up by address
pub(crate) addressed_symbols: BTreeMap<u64, Vec<ParsedSymbol>>,
/// CPU type of the currently parsing module
pub(crate) module_cpu_type: Option<CPUType>,
}
impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> {
/// Try to create a new parser instance from a given bv/pdb
pub fn new(
debug_info: &'a mut DebugInfo,
bv: &'a BinaryView,
mut pdb: PDB<'a, S>,
) -> Result<Self> {
let arch = if let Some(arch) = bv.default_arch() {
arch
} else {
return Err(anyhow!("Cannot parse to view with no architecture"));
};
let platform = bv
.default_platform()
.expect("Expected bv to have a platform");
let address_map = pdb.address_map()?;
let default_cc = platform
.get_default_calling_convention()
.expect("Expected default calling convention");
let thiscall_cc = Self::find_calling_convention(platform.as_ref(), "thiscall")
.unwrap_or(default_cc.clone());
let cdecl_cc = platform
.get_cdecl_calling_convention()
.unwrap_or(default_cc.clone());
Ok(Self {
debug_info,
bv,
arch,
default_cc,
thiscall_cc,
cdecl_cc,
platform,
pdb,
address_map,
settings: Settings::new(""),
indexed_types: Default::default(),
named_types: Default::default(),
full_type_indices: Default::default(),
type_stack: Default::default(),
namespace_stack: Default::default(),
type_default_returnable: Default::default(),
parsed_symbols: Default::default(),
parsed_symbols_by_name: Default::default(),
named_symbols: Default::default(),
symbol_tree: Default::default(),
symbol_parents: Default::default(),
symbol_stack: Default::default(),
indexed_symbols: Default::default(),
addressed_symbols: Default::default(),
module_cpu_type: None,
})
}
/// Try to parse the pdb into the DebugInfo
pub fn try_parse_info(
&mut self,
progress: Box<dyn Fn(usize, usize) -> Result<()> + 'a>,
) -> Result<()> {
self.parse_types(Self::split_progress(&progress, 0, &[1.0, 3.0, 0.5, 0.5]))?;
for (name, ty) in self.named_types.iter() {
self.debug_info.add_type(name, ty.as_ref(), &[]); // TODO : Components
}
info!(
"PDB found {} types (before resolving NTRs)",
self.named_types.len()
);
if self
.settings
.get_bool("pdb.features.parseSymbols", Some(self.bv), None)
{
let (symbols, functions) =
self.parse_symbols(Self::split_progress(&progress, 1, &[1.0, 3.0, 0.5, 0.5]))?;
if self
.settings
.get_bool("pdb.features.createMissingNamedTypes", Some(self.bv), None)
{
self.resolve_missing_ntrs(
&symbols,
Self::split_progress(&progress, 2, &[1.0, 3.0, 0.5, 0.5]),
)?;
self.resolve_missing_ntrs(
&functions,
Self::split_progress(&progress, 3, &[1.0, 3.0, 0.5, 0.5]),
)?;
}
info!("PDB found {} types", self.named_types.len());
info!("PDB found {} data variables", symbols.len());
info!("PDB found {} functions", functions.len());
let allow_void =
self.settings
.get_bool("pdb.features.allowVoidGlobals", Some(self.bv), None);
let min_confidence_type = Conf::new(Type::void(), min_confidence());
for sym in symbols.iter() {
match sym {
ParsedSymbol::Data(ParsedDataSymbol {
address,
name,
type_,
..
}) => {
let real_type =
type_.as_ref().unwrap_or(&min_confidence_type);
if real_type.contents.type_class() == TypeClass::VoidTypeClass {
if !allow_void {
self.log(|| {
format!("Not adding void-typed symbol {:?}@{:x}", name, address)
});
continue;
}
}
self.log(|| {
format!(
"Adding data variable: 0x{:x}: {} {:?}",
address, &name.raw_name, real_type
)
});
self.debug_info
.add_data_variable_info(DataVariableAndName::new(
*address,
real_type.clone(),
true,
name.full_name.as_ref().unwrap_or(&name.raw_name),
));
}
s => {
self.log(|| format!("Not adding non-data symbol {:?}", s));
}
}
}
for sym in functions {
match sym {
ParsedSymbol::Procedure(ParsedProcedure {
address,
name,
type_,
locals: _,
..
}) => {
self.log(|| {
format!(
"Adding function: 0x{:x}: {} {:?}",
address, &name.raw_name, type_
)
});
self.debug_info.add_function(DebugFunctionInfo::new(
Some(name.short_name.unwrap_or(name.raw_name.clone())),
Some(name.full_name.unwrap_or(name.raw_name.clone())),
Some(name.raw_name),
type_.clone().and_then(|conf| {
// TODO: When DebugInfo support confidence on function types, remove this
if conf.confidence == 0 {
None
} else {
Some(conf.contents)
}
}),
Some(address),
Some(self.platform.clone()),
vec![], // TODO : Components
vec![], //TODO: local variables
));
}
_ => {}
}
}
}
Ok(())
}
fn collect_name(
&self,
name: &NamedTypeReference,
unknown_names: &mut HashMap<String, NamedTypeReferenceClass>,
) {
let used_name = name.name().to_string();
if let Some(&found) =
unknown_names.get(&used_name)
{
if found != name.class() {
// Interesting case, not sure we care
self.log(|| {
format!(
"Mismatch unknown NTR class for {}: {} ?",
&used_name,
name.class() as u32
)
});
}
} else {
self.log(|| format!("Found new unused name: {}", &used_name));
unknown_names.insert(used_name, name.class());
}
}
fn collect_names(
&self,
ty: &Type,
unknown_names: &mut HashMap<String, NamedTypeReferenceClass>,
) {
match ty.type_class() {
TypeClass::StructureTypeClass => {
if let Ok(structure) = ty.get_structure() {
if let Ok(members) = structure.members() {
for member in members {
self.collect_names(member.ty.contents.as_ref(), unknown_names);
}
}
if let Ok(bases) = structure.base_structures() {
for base in bases {
self.collect_name(base.ty.as_ref(), unknown_names);
}
}
}
}
TypeClass::PointerTypeClass => {
if let Ok(target) = ty.target() {
self.collect_names(target.contents.as_ref(), unknown_names);
}
}
TypeClass::ArrayTypeClass => {
if let Ok(element_type) = ty.element_type() {
self.collect_names(element_type.contents.as_ref(), unknown_names);
}
}
TypeClass::FunctionTypeClass => {
if let Ok(return_value) = ty.return_value() {
self.collect_names(return_value.contents.as_ref(), unknown_names);
}
if let Ok(params) = ty.parameters() {
for param in params {
self.collect_names(param.t.contents.as_ref(), unknown_names);
}
}
}
TypeClass::NamedTypeReferenceClass => {
if let Ok(ntr) = ty.get_named_type_reference() {
self.collect_name(ntr.as_ref(), unknown_names);
}
}
_ => {}
}
}
fn resolve_missing_ntrs(
&mut self,
symbols: &Vec<ParsedSymbol>,
progress: Box<dyn Fn(usize, usize) -> Result<()> + '_>,
) -> Result<()> {
let mut unknown_names = HashMap::new();
let mut known_names = self
.bv
.types()
.iter()
.map(|qnat| qnat.name().string())
.collect::<HashSet<_>>();
for ty in &self.named_types {
known_names.insert(ty.0.clone());
}
let count = symbols.len();
for (i, sym) in symbols.into_iter().enumerate() {
match sym {
ParsedSymbol::Data(ParsedDataSymbol {
type_: Some(type_), ..
}) => {
self.collect_names(type_.contents.as_ref(), &mut unknown_names);
}
ParsedSymbol::Procedure(ParsedProcedure {
type_: Some(type_),
locals,
..
}) => {
self.collect_names(type_.contents.as_ref(), &mut unknown_names);
for l in locals {
if let Some(ltype) = &l.type_ {
self.collect_names(ltype.contents.as_ref(), &mut unknown_names);
}
}
}
_ => {}
}
(progress)(i, count)?;
}
for (name, class) in unknown_names.into_iter() {
if known_names.contains(&name) {
self.log(|| format!("Found referenced name and ignoring: {}", &name));
continue;
}
self.log(|| format!("Adding referenced but unknown type {} (likely due to demangled name and stripped type)", &name));
match class {
NamedTypeReferenceClass::UnknownNamedTypeClass
| NamedTypeReferenceClass::TypedefNamedTypeClass => {
self.debug_info.add_type(name, Type::void().as_ref(), &[]); // TODO : Components
}
NamedTypeReferenceClass::ClassNamedTypeClass
| NamedTypeReferenceClass::StructNamedTypeClass
| NamedTypeReferenceClass::UnionNamedTypeClass => {
let structure = StructureBuilder::new();
match class {
NamedTypeReferenceClass::ClassNamedTypeClass => {
structure.set_structure_type(StructureType::ClassStructureType);
}
NamedTypeReferenceClass::StructNamedTypeClass => {
structure.set_structure_type(StructureType::StructStructureType);
}
NamedTypeReferenceClass::UnionNamedTypeClass => {
structure.set_structure_type(StructureType::UnionStructureType);
}
_ => {}
}
structure.set_width(1);
structure.set_alignment(1);
self.debug_info.add_type(
name,
Type::structure(structure.finalize().as_ref()).as_ref(),
&[], // TODO : Components
);
}
NamedTypeReferenceClass::EnumNamedTypeClass => {
let enumeration = EnumerationBuilder::new();
self.debug_info.add_type(
name,
Type::enumeration(
enumeration.finalize().as_ref(),
self.arch.default_integer_size(),
false,
)
.as_ref(),
&[], // TODO : Components
);
}
}
}
Ok(())
}
/// Lazy logging function that prints like 20MB of messages
pub(crate) fn log<F: FnOnce() -> D, D: Display>(&self, msg: F) {
static MEM: OnceLock<bool> = OnceLock::new();
let debug_pdb = MEM.get_or_init(|| {
env::var("BN_DEBUG_PDB").is_ok()
});
if *debug_pdb {
let space = "\t".repeat(self.type_stack.len()) + &"\t".repeat(self.symbol_stack.len());
let msg = format!("{}", msg());
debug!(
"{}{}",
space,
msg.replace("\n", &*("\n".to_string() + &space))
);
}
}
pub(crate) fn split_progress<'b, F: Fn(usize, usize) -> Result<()> + 'b>(
original_fn: F,
subpart: usize,
subpart_weights: &[f64],
) -> Box<dyn Fn(usize, usize) -> Result<()> + 'b> {
// Normalize weights
let weight_sum: f64 = subpart_weights.iter().sum();
if weight_sum < 0.0001 {
return Box::new(|_, _| Ok(()));
}
// Keep a running count of weights for the start
let mut subpart_starts = vec![];
let mut start = 0f64;
for w in subpart_weights {
subpart_starts.push(start);
start += *w;
}
let subpart_start = subpart_starts[subpart] / weight_sum;
let weight = subpart_weights[subpart] / weight_sum;
Box::new(move |cur: usize, max: usize| {
// Just use a large number for easy divisibility
let steps = 1000000f64;
let subpart_size = steps * weight;
let subpart_progress = ((cur as f64) / (max as f64)) * subpart_size;
original_fn(
(subpart_start * steps + subpart_progress) as usize,
steps as usize,
)
})
}
}