diff --git a/fxprof-processed-profile/src/lib_mappings.rs b/fxprof-processed-profile/src/lib_mappings.rs index da6b4be00..ae4dd02b0 100644 --- a/fxprof-processed-profile/src/lib_mappings.rs +++ b/fxprof-processed-profile/src/lib_mappings.rs @@ -13,7 +13,8 @@ use std::collections::BTreeMap; pub struct LibMappings { /// A BTreeMap of non-overlapping Mappings. The key is the start_avma of the mapping. /// - /// When a new mapping is added, overlapping mappings are removed. + /// When a new mapping is added, the overlapping part of any existing mapping + /// is carved away, but its non-overlapping remainder is kept. map: BTreeMap>, } @@ -31,8 +32,27 @@ impl LibMappings { } } - /// Add a mapping to this address space. Any existing mappings which overlap with the - /// new mapping are removed. + /// Add a mapping to this address space. + /// + /// The new mapping reflects the current state of the address range it + /// covers, so it takes precedence over whatever was mapped there before. But + /// rather than dropping every existing mapping it touches, we only carve out + /// the overlapping part and keep each existing mapping's non-overlapping + /// remainder: + /// + /// - a mapping reaching into the new range from the left keeps its head + /// `[old_start, start_avma)`; + /// - a mapping reaching out of the new range to the right keeps its tail + /// `[end_avma, old_end)` (its relative address is shifted accordingly); + /// - a mapping that fully contains the new range keeps both a head and a + /// tail piece on either side of it; + /// - a mapping fully inside the new range is dropped. + /// + /// This matters when a small mapping lands inside a larger one — e.g. the + /// kernel placing `[vdso]`/`[vvar]` in the gap of an interpreter's + /// over-long executable mapping. Dropping the larger mapping wholesale would + /// lose symbolication for everything else it covered; keeping the remainder + /// preserves it. /// /// `start_avma` and `end_avma` describe the address range that this mapping /// occupies. @@ -58,21 +78,67 @@ impl LibMappings { end_avma: u64, relative_address_at_start: u32, value: T, - ) { - let removal_avma_range_start = - if let Some(mapping_overlapping_with_start_avma) = self.lookup_impl(start_avma) { - mapping_overlapping_with_start_avma.start_avma - } else { - start_avma - }; - // self.map.drain(removal_avma_range_start..end_avma); - let overlapping_keys: Vec = self + ) where + T: Clone, + { + if start_avma >= end_avma { + return; + } + + // Collect the start keys of every existing mapping that intersects + // `[start_avma, end_avma)`. A mapping `[m_start, m_end)` intersects iff + // `m_start < end_avma && m_end > start_avma`. Every such mapping starts + // within the range, except possibly one that starts before `start_avma` + // and reaches into it — `lookup_impl(start_avma)` finds that one. + let mut intersecting: Vec = self .map - .range(removal_avma_range_start..end_avma) + .range(start_avma..end_avma) .map(|(start_avma, _)| *start_avma) .collect(); - for key in overlapping_keys { - self.map.remove(&key); + if let Some(straddler) = self.lookup_impl(start_avma) { + if straddler.start_avma < start_avma { + intersecting.push(straddler.start_avma); + } + } + + // Replace each intersecting mapping with its non-overlapping remainder. + for key in intersecting { + let Mapping { + start_avma: m_start, + end_avma: m_end, + relative_address_at_start: m_rel, + value: m_value, + } = self.map.remove(&key).unwrap(); + + // The head `[m_start, start_avma)` keeps the original relative + // address; the tail `[end_avma, m_end)` starts `end_avma - m_start` + // bytes further into the library, so its relative address shifts by + // that amount. + let head = Mapping { + start_avma: m_start, + end_avma: start_avma, + relative_address_at_start: m_rel, + value: m_value.clone(), + }; + let tail = Mapping { + start_avma: end_avma, + end_avma: m_end, + relative_address_at_start: m_rel.wrapping_add((end_avma - m_start) as u32), + value: m_value, + }; + match (m_start < start_avma, m_end > end_avma) { + (true, true) => { + self.map.insert(head.start_avma, head); + self.map.insert(tail.start_avma, tail); + } + (true, false) => { + self.map.insert(head.start_avma, head); + } + (false, true) => { + self.map.insert(tail.start_avma, tail); + } + (false, false) => {} // fully covered: drop it + } } self.map.insert( @@ -140,22 +206,112 @@ mod test { use super::*; #[test] - fn test_lib_mappings() { + fn lookup_covers_ranges_and_respects_gaps() { + let mut m = LibMappings::new(); + m.add_mapping(0x1000, 0x2000, 0, "a"); + m.add_mapping(0x3000, 0x4000, 0, "b"); + assert_eq!(m.lookup(0x0fff), None); + assert_eq!(m.lookup(0x1500), Some(&"a")); + assert_eq!(m.lookup(0x2000), None); // end is exclusive + assert_eq!(m.lookup(0x2500), None); // gap between mappings + assert_eq!(m.lookup(0x3500), Some(&"b")); + } + + #[test] + fn non_overlapping_neighbours_are_left_untouched() { + let mut m = LibMappings::new(); + m.add_mapping(0x1000, 0x2000, 0, "a"); + m.add_mapping(0x3000, 0x4000, 0, "b"); + // Fill the gap exactly; neither neighbour should be disturbed. + m.add_mapping(0x2000, 0x3000, 0, "c"); + assert_eq!(m.lookup(0x1800), Some(&"a")); + assert_eq!(m.lookup(0x2800), Some(&"c")); + assert_eq!(m.lookup(0x3800), Some(&"b")); + } + + #[test] + fn exact_overlap_replaces_the_value() { + let mut m = LibMappings::new(); + m.add_mapping(0x1000, 0x2000, 0, "old"); + m.add_mapping(0x1000, 0x2000, 0, "new"); + assert_eq!(m.lookup(0x1500), Some(&"new")); + } + + #[test] + fn new_mapping_clips_the_tail_of_an_existing_one() { + let mut m = LibMappings::new(); + m.add_mapping(0x1000, 0x3000, 0, "old"); + m.add_mapping(0x2000, 0x4000, 0, "new"); // overlaps old's tail + assert_eq!(m.lookup(0x1500), Some(&"old")); // old's head survives + assert_eq!(m.lookup(0x2000), Some(&"new")); + assert_eq!(m.lookup(0x2fff), Some(&"new")); + assert_eq!(m.lookup(0x3500), Some(&"new")); + } + + #[test] + fn new_mapping_clips_the_head_of_an_existing_one_and_fixes_relative_address() { + let mut m = LibMappings::new(); + // "lib" is based at 0x2000 (relative address 0 there). + m.add_mapping(0x2000, 0x4000, 0, "lib"); + m.add_mapping(0x1000, 0x3000, 0, "new"); // overlaps lib's head + assert_eq!(m.lookup(0x2500), Some(&"new")); + assert_eq!(m.lookup(0x3500), Some(&"lib")); // lib's tail survives + // The surviving tail starts at 0x3000, which is 0x1000 into "lib", so an + // address there must still resolve to the correct relative address. + assert_eq!(m.convert_address(0x3800), Some((0x1800, &"lib"))); + } + + #[test] + fn mapping_fully_inside_the_new_one_is_dropped() { + let mut m = LibMappings::new(); + m.add_mapping(0x2000, 0x3000, 0, "inner"); + m.add_mapping(0x1000, 0x4000, 0, "outer"); + assert_eq!(m.lookup(0x1500), Some(&"outer")); + assert_eq!(m.lookup(0x2500), Some(&"outer")); + } + + /// The motivating real-world case: `ld.so` is mapped with an over-long + /// executable mapping that spans the gap where the kernel later drops + /// `[vdso]`/`[vvar]`. Adding `[vdso]` inside it must not evict the whole + /// interpreter mapping; the executable code before the gap, and the + /// remainder after it, must stay resolvable with correct relative addresses. + #[test] + fn vdso_inside_ld_so_mapping_keeps_the_interpreter_resolvable() { + let mut m = LibMappings::new(); + let ld_so_base = 0x7fff_0000_0000u64; + // ld.so executable mapping, base-relative (relative address 0 at start). + m.add_mapping(ld_so_base, ld_so_base + 0x40000, 0, "ld.so"); + // [vdso] lands in the inter-segment gap, fully inside the ld.so mapping. + let vdso_start = ld_so_base + 0x2b000; + m.add_mapping(vdso_start, vdso_start + 0x2000, 0, "[vdso]"); + + // Executable code before the gap still resolves to ld.so... + assert_eq!( + m.convert_address(ld_so_base + 0x2000), + Some((0x2000, &"ld.so")) + ); + // ...the gap itself now belongs to [vdso]... + assert_eq!(m.lookup(vdso_start + 0x1000), Some(&"[vdso]")); + // ...and the remainder after [vdso] is still ld.so, with the relative + // address preserved across the split (this is what regressed before). + assert_eq!( + m.convert_address(ld_so_base + 0x30000), + Some((0x30000, &"ld.so")) + ); + } + + #[test] + fn one_new_mapping_carves_several_existing_ones() { let mut m = LibMappings::new(); - m.add_mapping(100, 200, 100, "100..200"); - m.add_mapping(200, 250, 200, "200..250"); - assert_eq!(m.lookup(200), Some(&"200..250")); - m.add_mapping(180, 220, 180, "180..220"); - assert_eq!(m.lookup(200), Some(&"180..220")); - assert_eq!(m.lookup(170), None); - assert_eq!(m.lookup(220), None); - m.add_mapping(225, 250, 225, "225..250"); - m.add_mapping(255, 270, 255, "255..270"); - m.add_mapping(100, 150, 100, "100..150"); - assert_eq!(m.lookup(90), None); - assert_eq!(m.lookup(150), None); - assert_eq!(m.lookup(149), Some(&"100..150")); - assert_eq!(m.lookup(200), Some(&"180..220")); - assert_eq!(m.lookup(260), Some(&"255..270")); + m.add_mapping(0x1000, 0x2000, 0, "a"); // clipped on its tail + m.add_mapping(0x2000, 0x3000, 0, "b"); // fully covered + m.add_mapping(0x3000, 0x5000, 0, "c"); // clipped on its head + m.add_mapping(0x1800, 0x4000, 0, "new"); + assert_eq!(m.lookup(0x1400), Some(&"a")); // a's head [0x1000,0x1800) + assert_eq!(m.lookup(0x1800), Some(&"new")); + assert_eq!(m.lookup(0x2800), Some(&"new")); // b is gone + assert_eq!(m.lookup(0x3fff), Some(&"new")); + assert_eq!(m.lookup(0x4000), Some(&"c")); // c's tail [0x4000,0x5000) + assert_eq!(m.lookup(0x4800), Some(&"c")); } } diff --git a/samply-symbols/src/jitdump.rs b/samply-symbols/src/jitdump.rs index 76413bf6f..a6327f50e 100644 --- a/samply-symbols/src/jitdump.rs +++ b/samply-symbols/src/jitdump.rs @@ -20,9 +20,21 @@ use crate::shared::{ LookupAddress, SymbolInfo, }; use crate::symbol_map::{GetInnerSymbolMap, SymbolMap, SymbolMapTrait}; +use crate::v8_inline::V8InlineMap; use crate::{FileAndPathHelper, SourceFilePath, SourceFilePathHandle, SyncAddressInfo}; use crate::{FunctionNameHandle, SymbolMapStringInterner, SymbolNameHandle}; +/// Resolve the V8 code log for the process that produced a jitdump. +/// +/// `CODSPEED_V8_LOG` names a directory holding one `codspeed-v8-.log` per +/// profiled process. Joining the jitdump's own pid picks the matching log, so a +/// multi-process capture keeps each process's inline data separate. Returns +/// `None` when the env var is unset. +fn v8_log_path_for_pid(pid: u32) -> Option { + let dir = std::env::var_os("CODSPEED_V8_LOG")?; + Some(std::path::Path::new(&dir).join(format!("codspeed-v8-{pid}.log"))) +} + pub fn is_jitdump_file(file_contents: &FileContentsWrapper) -> bool { const MAGIC_BYTES_BE: &[u8] = b"JiTD"; const MAGIC_BYTES_LE: &[u8] = b"DTiJ"; @@ -189,19 +201,30 @@ impl GetInnerSymbolMap for JitDumpSymbolMap { pub struct JitDumpSymbolMapOuter { data: FileContentsWrapper, index: JitDumpIndex, + /// Inline map from the V8 code log of the process that produced this + /// jitdump, used to expand TurboFan/Maglev inlined frames that the jitdump + /// itself collapses (COD-2821). `None` when no log was found. + v8_inline: Option, } impl JitDumpSymbolMapOuter { pub fn new(data: FileContentsWrapper) -> Result { let cursor = FileContentsCursor::new(&data); let reader = JitDumpReader::new(cursor)?; + let pid = reader.header().pid; let index = JitDumpIndex::from_reader(reader).map_err(Error::JitDumpFileReading)?; - Ok(Self { data, index }) + let v8_inline = v8_log_path_for_pid(pid).and_then(|p| V8InlineMap::from_log_file(&p)); + Ok(Self { + data, + index, + v8_inline, + }) } pub fn make_symbol_map(&self) -> JitDumpSymbolMapInnerWrapper<'_> { let inner = JitDumpSymbolMapInner { index: &self.index, + v8_inline: self.v8_inline.as_ref(), cache: Mutex::new(JitDumpSymbolMapCache::new( &self.data, &self.index, @@ -214,6 +237,7 @@ impl JitDumpSymbolMapOuter { struct JitDumpSymbolMapInner<'a, T: FileContents> { index: &'a JitDumpIndex, + v8_inline: Option<&'a V8InlineMap>, cache: Mutex>, } @@ -296,6 +320,39 @@ impl<'a, T: FileContents> JitDumpSymbolMapInner<'a, T> { size: Some(self.index.entries[index].code_bytes_len as u32), name: name.into(), }; + + // The jitdump records only the innermost inlined source line per PC, so + // a TurboFan/Maglev-inlined chain collapses to a single frame (COD-2821). + // Recover the missing callers from the V8 log, joining on (address, size) + // since V8 reuses freed code addresses over the run. These frames sit + // above the code object's own jitdump frame, which is appended last so a + // non-inlined hit resolves exactly as before. + let inlined_frames: Vec = if let Some(v8) = self.v8_inline { + let code_size = self.index.entries[index].code_bytes_len as u32; + let code_addr = cache.get_debug_info(index).map(|di| di.code_addr); + code_addr + .and_then(|addr| v8.lookup(addr, code_size, offset_relative_to_symbol)) + .map(|frames| { + frames + .iter() + .map(|f| { + let function = cache.string_interner.intern_owned(&f.name); + let file_path = cache.string_interner.intern_owned(&f.file); + FrameDebugInfo { + function: Some(function.into()), + file_path: Some(file_path.into()), + line_number: Some(f.line), + column_number: Some(f.col), + ..Default::default() + } + }) + .collect() + }) + .unwrap_or_default() + } else { + Vec::new() + }; + let Some(debug_info) = cache.get_debug_info(index) else { return Some(SyncAddressInfo { symbol, @@ -312,7 +369,7 @@ impl<'a, T: FileContents> JitDumpSymbolMapInner<'a, T> { .string_interner .intern_owned(&String::from_utf8_lossy(&s)), }; - let frame = FrameDebugInfo { + let outer_frame = FrameDebugInfo { function: Some(name.into()), file_path: Some(file_path.into()), line_number: Some(line), @@ -320,7 +377,10 @@ impl<'a, T: FileContents> JitDumpSymbolMapInner<'a, T> { ..Default::default() }; - let frames = Some(FramesLookupResult::Available(vec![frame])); + // Innermost inlined frame first, the code object's own frame last. + let mut frames = inlined_frames; + frames.push(outer_frame); + let frames = Some(FramesLookupResult::Available(frames)); Some(SyncAddressInfo { symbol, frames }) } } diff --git a/samply-symbols/src/lib.rs b/samply-symbols/src/lib.rs index c2a85bc8e..490dc6d02 100644 --- a/samply-symbols/src/lib.rs +++ b/samply-symbols/src/lib.rs @@ -239,6 +239,7 @@ mod source_file_path; mod symbol_map; mod symbol_map_object; mod symbol_map_string_interner; +mod v8_inline; mod windows; pub use crate::binary_image::{BinaryImage, CodeByteReadingError}; diff --git a/samply-symbols/src/v8_inline.rs b/samply-symbols/src/v8_inline.rs new file mode 100644 index 000000000..979f6306c --- /dev/null +++ b/samply-symbols/src/v8_inline.rs @@ -0,0 +1,403 @@ +//! Reconstruct V8 TurboFan/Maglev inline frames from the V8 code log, keyed by +//! JIT code address + size. +//! +//! V8's jitdump records only the *innermost* inlined source position per PC, so +//! a sampling profiler collapses an inlined chain `root -> a -> b -> c` down to +//! just `root` (COD-2821). The full inline tree is instead present in the code +//! log's `code-source-info` records (`inlining_positions` + `inlined_functions`). +//! Both use the same runtime code addresses, so joining by address expands a +//! single jitdump frame into the real inline stack. +//! +//! A code address is not unique over a process's lifetime: V8 frees code and +//! reuses the address for another object. We disambiguate by also keying on the +//! code object's byte size, which both the jitdump (`CODE_LOAD`) and the log +//! (`code-creation`) report identically and which is available at lookup time. + +use std::collections::HashMap; +use std::path::Path; + +/// One reconstructed inlined frame. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct V8Frame { + /// Display name in V8's jitdump style: `JS: ::` + /// (anonymous functions have an empty ``, e.g. `JS:* /path:17:32`). This + /// matches how the jitdump names the standalone (non-inlined) code object + /// for the same function, so inlined and non-inlined occurrences coalesce. + pub name: String, + pub file: String, + pub line: u32, + pub col: u32, +} + +#[derive(Debug, Default)] +struct CodeInline { + /// `(code_offset, script_offset, inlining_id)`, sorted ascending by code_offset. + positions: Vec<(u32, u32, Option)>, + /// `code-source-info` inlining tree: `(inlined_function_id, call_site_script_offset, parent_inlining_id)`. + inl_pos: Vec<(i32, u32, Option)>, + /// Resolved inlined functions, indexed by `inlined_function_id`, already + /// formatted with this code object's tier marker. + inl_fns: Vec, +} + +/// Inline maps for every optimized JS code object that has inlining, keyed by +/// the code's runtime start address (== jitdump `CODE_LOAD` address) and byte +/// size (== jitdump `CODE_LOAD` size), so addresses reused over the run don't +/// collide. +#[derive(Debug, Default)] +pub struct V8InlineMap { + by_key: HashMap<(u64, u32), CodeInline>, +} + +impl V8InlineMap { + pub fn from_log_file(path: &Path) -> Option { + let text = std::fs::read_to_string(path).ok()?; + let map = Self::from_log_str(&text); + if map.by_key.is_empty() { + None + } else { + Some(map) + } + } + + pub fn from_log_str(text: &str) -> Self { + // Pass 1: build sfi_addr -> (function, location) from `code-creation` + // records (one is emitted per tier; any resolves the name). A function + // can be inlined into code that is logged before the function's own + // standalone code-creation, so we need the full map before pass 2. + let mut sfi_fn: HashMap = HashMap::new(); + for line in text.lines() { + let Some(rest) = line.strip_prefix("code-creation,") else { + continue; + }; + // ,,,,,,, + let p: Vec<&str> = rest.splitn(8, ',').collect(); + if p.len() < 7 { + continue; + } + let Some(sfi) = parse_hex(p[6]) else { + continue; + }; + sfi_fn.entry(sfi).or_insert_with(|| parse_name(p[5])); + } + + // Pass 2 (in order): parse `code-source-info` records that carry an + // inlining tree, pairing each with its code object's size + tier marker. + // V8 emits `code-creation` immediately followed by the matching + // `code-source-info` for the same address, so the last code-creation + // seen at an address identifies the object the code-source-info belongs + // to (and supplies its size, which keys the map, and its tier marker, + // which formats the frame names). + let mut by_key = HashMap::new(); + let mut live_at_addr: HashMap = HashMap::new(); + for line in text.lines() { + if let Some(rest) = line.strip_prefix("code-creation,") { + let p: Vec<&str> = rest.splitn(8, ',').collect(); + if p.len() < 7 { + continue; + } + let (Some(addr), Ok(size)) = (parse_hex(p[3]), p[4].parse::()) else { + continue; + }; + let marker = p.get(7).copied().unwrap_or("").to_string(); + live_at_addr.insert(addr, (size, marker)); + } else if let Some(rest) = line.strip_prefix("code-source-info,") { + // ,