From 15cb8817c1a5f17b46659fa263ea538a65e38dc4 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 10 Jun 2026 02:22:38 -0700 Subject: [PATCH 1/3] postprocess: tests: update qsort.rs after recent transpiler changes The transpiler now emits an allow for clippy::missing_safety_doc, which shifts the expected function spans by one line. --- c2rust-postprocess/tests/examples/qsort.rs | 1 + c2rust-postprocess/tests/test_definitions.py | 24 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/c2rust-postprocess/tests/examples/qsort.rs b/c2rust-postprocess/tests/examples/qsort.rs index 20423152f8..2eb0b7b877 100644 --- a/c2rust-postprocess/tests/examples/qsort.rs +++ b/c2rust-postprocess/tests/examples/qsort.rs @@ -1,4 +1,5 @@ #![allow( + clippy::missing_safety_doc, dead_code, non_camel_case_types, non_snake_case, diff --git a/c2rust-postprocess/tests/test_definitions.py b/c2rust-postprocess/tests/test_definitions.py index ddc91fc744..590b490bc1 100644 --- a/c2rust-postprocess/tests/test_definitions.py +++ b/c2rust-postprocess/tests/test_definitions.py @@ -35,24 +35,24 @@ def test_get_rust_function_spans(transpile_qsort, pytestconfig): expected_fn_spans = [ { "name": "swap", - "start_line": 10, - "end_line": 14, - "start_byte": 154, - "end_byte": 316, + "start_line": 11, + "end_line": 15, + "start_byte": 186, + "end_byte": 348, }, { "name": "partition", - "start_line": 16, - "end_line": 39, - "start_byte": 330, - "end_byte": 1193, + "start_line": 17, + "end_line": 40, + "start_byte": 362, + "end_byte": 1225, }, { "name": "quickSort", - "start_line": 41, - "end_line": 51, - "start_byte": 1207, - "end_byte": 1566, + "start_line": 42, + "end_line": 52, + "start_byte": 1239, + "end_byte": 1598, }, ] From 30e2da6dac24989e913b6b585a3f33096ba11c60 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 10 Jun 2026 02:22:48 -0700 Subject: [PATCH 2/3] transpile: emit preprocessed text for each definition in the C decl map Restructure *.c_decls.json from a flat identifier-to-snippet map into {definitions: {ident: {definition, preprocessed_definition}}}. The AST exporter runs an in-process preprocessor-only pass per translation unit (clang -E -fdirectives-only -C equivalent): conditional directives are resolved, dropping inactive regions and their comments, while macro invocations and comments are preserved. Line markers map the output back to each function definition's source range; the main file is recognized by the spelling in the leading line marker, since marker paths are relative to the compilation directory. Functions whose mapping fails get a null preprocessed_definition. --- c2rust-ast-exporter/src/AstExporter.cpp | 88 +++++++++- c2rust-ast-exporter/src/AstExporter.hpp | 4 +- c2rust-ast-exporter/src/ExportResult.cpp | 7 +- c2rust-ast-exporter/src/ExportResult.hpp | 3 + c2rust-ast-exporter/src/lib.rs | 42 +++-- c2rust-transpile/src/lib.rs | 13 +- c2rust-transpile/src/translator/mod.rs | 153 ++++++++++++++++-- .../tests/c_decls_snapshots/directives.c | 23 +++ c2rust-transpile/tests/snapshots.rs | 6 + .../snapshots__c_decls@directives.c.snap | 16 ++ .../snapshots/snapshots__c_decls@nh.c.snap | 92 ++++++++--- 11 files changed, 397 insertions(+), 50 deletions(-) create mode 100644 c2rust-transpile/tests/c_decls_snapshots/directives.c create mode 100644 c2rust-transpile/tests/snapshots/snapshots__c_decls@directives.c.snap diff --git a/c2rust-ast-exporter/src/AstExporter.cpp b/c2rust-ast-exporter/src/AstExporter.cpp index 87e56b481e..af49fb9041 100644 --- a/c2rust-ast-exporter/src/AstExporter.cpp +++ b/c2rust-ast-exporter/src/AstExporter.cpp @@ -15,6 +15,8 @@ #include "llvm/Support/Path.h" // Declares clang::SyntaxOnlyAction. #include "clang/Frontend/FrontendActions.h" +// Declares clang::DoPrintPreprocessedInput. +#include "clang/Frontend/Utils.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/AST/DeclVisitor.h" @@ -3030,6 +3032,41 @@ class TranslateAction : public clang::ASTFrontendAction { } }; +// Prints preprocessed source to a string, like `clang -E -fdirectives-only -C`: +// conditional directives are resolved (dropping inactive regions and their +// comments) but macro invocations are not expanded, comments are kept, and +// line markers are kept so output lines can be mapped back to source lines. +class PrintPreprocessedToStringAction + : public clang::PreprocessorFrontendAction { + PreprocessedOutputs *outputs; + + public: + explicit PrintPreprocessedToStringAction(PreprocessedOutputs *outputs) + : outputs(outputs) {} + + protected: + void ExecuteAction() override { + auto &CI = getCompilerInstance(); + + clang::PreprocessorOutputOptions Opts; + Opts.ShowCPP = 1; + Opts.ShowComments = 1; + Opts.ShowLineMarkers = 1; +#if CLANG_VERSION_MAJOR >= 11 + // Without directives-only support, fall back to full macro expansion; + // the output is still usable, just further from the original source. + Opts.DirectivesOnly = 1; + CI.getPreprocessor().SetMacroExpansionOnlyInDirectives(); +#endif // CLANG_VERSION_MAJOR >= 11 + + std::string out; + llvm::raw_string_ostream OS(out); + clang::DoPrintPreprocessedInput(CI.getPreprocessor(), &OS, Opts); + OS.flush(); + (*outputs)[make_realpath(getCurrentFile().str())] = std::move(out); + } +}; + // Apply a custom category to all command-line options so that they are the // only ones displayed. static llvm::cl::OptionCategory MyToolCategory("my-tool options"); @@ -3098,8 +3135,26 @@ class MyFrontendActionFactory : public FrontendActionFactory { #endif // CLANG_VERSION_MAJOR }; +class PreprocessActionFactory : public FrontendActionFactory { + PreprocessedOutputs *outputs; + + public: + PreprocessActionFactory(PreprocessedOutputs *outputs) : outputs(outputs) {} + +#if CLANG_VERSION_MAJOR < 10 + clang::FrontendAction *create() override { + return new PrintPreprocessedToStringAction(outputs); + } +#else + std::unique_ptr create() override { + return std::make_unique(outputs); + } +#endif // CLANG_VERSION_MAJOR +}; + // Marshal the output map into something easy to manipulate in Rust -ExportResult *make_export_result(const Outputs &outputs) { +ExportResult *make_export_result(const Outputs &outputs, + const PreprocessedOutputs &preprocessed) { auto result = new ExportResult; auto n = outputs.size(); result->resize(n); @@ -3117,6 +3172,14 @@ ExportResult *make_export_result(const Outputs &outputs) { std::copy(std::begin(bytes), std::end(bytes), byte_array); result->bytes[i] = byte_array; result->sizes[i] = bytes.size(); + + auto pp = preprocessed.find(name); + if (pp != preprocessed.end()) { + auto const &text = pp->second; + auto pp_array = new char[text.size() + 1]; + memcpy(pp_array, text.c_str(), text.size() + 1); + result->preprocessed[i] = pp_array; + } i++; } @@ -3125,7 +3188,8 @@ ExportResult *make_export_result(const Outputs &outputs) { // Extract clang AST for the source file specified in the argument vector. // Note: The arguments should only reference one source file at a time. -Outputs process(int argc, const char *argv[], int *result) { +Outputs process(int argc, const char *argv[], int *result, + PreprocessedOutputs *preprocessed) { auto argv_ = augment_argv(argc, argv); int argc_ = argv_.size() - 1; // ignore the extra nullptr @@ -3163,12 +3227,24 @@ Outputs process(int argc, const char *argv[], int *result) { *result = Tool.run(&myFrontendActionFactory); assert(outputs.size() == 1 && "Expected exactly one output."); + + if (preprocessed) { + // The AST run above already reported diagnostics; don't repeat them. + IgnoringDiagConsumer diagConsumer; + Tool.setDiagnosticConsumer(&diagConsumer); + PreprocessActionFactory preprocessActionFactory(preprocessed); + // Failure here is not fatal: the preprocessed text is optional + // metadata, and its absence shows up as a missing map entry. + Tool.run(&preprocessActionFactory); + } + return outputs; } // AST exporter library interface. extern "C" { -ExportResult *ast_exporter(int argc, const char *argv[], int debug) { +ExportResult *ast_exporter(int argc, const char *argv[], int debug, + int emit_preprocessed) { #ifndef NDEBUG if (debug) { llvm::DebugFlag = true; @@ -3177,8 +3253,10 @@ ExportResult *ast_exporter(int argc, const char *argv[], int debug) { #endif // NDEBUG int result; - auto outputs = process(argc, argv, &result); - return make_export_result(outputs); + PreprocessedOutputs preprocessed; + auto outputs = process(argc, argv, &result, + emit_preprocessed ? &preprocessed : nullptr); + return make_export_result(outputs, preprocessed); } void drop_export_result(ExportResult *result) { delete result; } diff --git a/c2rust-ast-exporter/src/AstExporter.hpp b/c2rust-ast-exporter/src/AstExporter.hpp index f5f309dbc1..015a7e8448 100644 --- a/c2rust-ast-exporter/src/AstExporter.hpp +++ b/c2rust-ast-exporter/src/AstExporter.hpp @@ -12,7 +12,9 @@ #include using Outputs = std::unordered_map>; +using PreprocessedOutputs = std::unordered_map; -Outputs process(int argc, const char *argv[], int *result); +Outputs process(int argc, const char *argv[], int *result, + PreprocessedOutputs *preprocessed = nullptr); #endif /* AstExporter_hpp */ diff --git a/c2rust-ast-exporter/src/ExportResult.cpp b/c2rust-ast-exporter/src/ExportResult.cpp index 143185b040..9cb721ac5f 100644 --- a/c2rust-ast-exporter/src/ExportResult.cpp +++ b/c2rust-ast-exporter/src/ExportResult.cpp @@ -7,7 +7,8 @@ #include "ExportResult.hpp" -ExportResult::ExportResult() : entries(0), names(), bytes(), sizes() {} +ExportResult::ExportResult() + : entries(0), names(), bytes(), sizes(), preprocessed() {} ExportResult::~ExportResult() { deallocate(); } @@ -16,9 +17,11 @@ void ExportResult::resize(std::size_t n) { names = new char *[n]; bytes = new std::uint8_t *[n]; sizes = new std::size_t[n]; + preprocessed = new char *[n]; std::fill_n(names, n, nullptr); std::fill_n(bytes, n, nullptr); std::fill_n(sizes, n, 0); + std::fill_n(preprocessed, n, nullptr); entries = n; } @@ -26,10 +29,12 @@ void ExportResult::deallocate() { for (std::size_t i = 0; i < entries; i++) { delete[] names[i]; delete[] bytes[i]; + delete[] preprocessed[i]; } delete[] names; delete[] bytes; delete[] sizes; + delete[] preprocessed; entries = 0; } diff --git a/c2rust-ast-exporter/src/ExportResult.hpp b/c2rust-ast-exporter/src/ExportResult.hpp index 9b1eae8f2f..2a8a1916c1 100644 --- a/c2rust-ast-exporter/src/ExportResult.hpp +++ b/c2rust-ast-exporter/src/ExportResult.hpp @@ -17,6 +17,9 @@ struct ExportResult { char **names; std::uint8_t **bytes; std::size_t *sizes; + // Preprocessed source text per entry; entries may be null when + // preprocessed output was not requested or could not be produced. + char **preprocessed; ExportResult(); ExportResult(ExportResult const &) = delete; diff --git a/c2rust-ast-exporter/src/lib.rs b/c2rust-ast-exporter/src/lib.rs index 46b9dfa287..7cb6ab85dd 100644 --- a/c2rust-ast-exporter/src/lib.rs +++ b/c2rust-ast-exporter/src/lib.rs @@ -27,15 +27,19 @@ pub fn get_clang_major_version() -> Option { .ok() } +/// Returns the untyped AST and, if `emit_preprocessed` was set and +/// preprocessing succeeded, the preprocessed source text of the translation +/// unit (directives-only, with comments and line markers preserved). pub fn get_untyped_ast( file_path: &Path, cc_db: &Path, extra_args: &[&str], debug: bool, -) -> Result { - let cbors = get_ast_cbors(file_path, cc_db, extra_args, debug); - let buffer = cbors - .values() + emit_preprocessed: bool, +) -> Result<(clang_ast::AstContext, Option), Error> { + let cbors = get_ast_cbors(file_path, cc_db, extra_args, debug, emit_preprocessed); + let (buffer, preprocessed) = cbors + .into_values() .next() .ok_or_else(|| Error::new(ErrorKind::InvalidData, "Could not parse input file"))?; @@ -46,7 +50,9 @@ pub fn get_untyped_ast( let items: Value = from_slice(&buffer[..]).unwrap(); - clang_ast::process(items).map_err(|e| Error::new(ErrorKind::InvalidData, format!("{}", e))) + let ast = clang_ast::process(items) + .map_err(|e| Error::new(ErrorKind::InvalidData, format!("{}", e)))?; + Ok((ast, preprocessed)) } /// libClangTooling is not thread-safe, so we must not allow concurrent calls to `ast_exporter`. @@ -57,9 +63,8 @@ fn get_ast_cbors( cc_db: &Path, extra_args: &[&str], debug: bool, -) -> HashMap> { - let mut res = 0; - + emit_preprocessed: bool, +) -> HashMap, Option)> { let mut args_owned = vec![CString::new("ast_exporter").unwrap()]; args_owned.push(CString::new(file_path.to_str().unwrap()).unwrap()); args_owned.push(CString::new("-p").unwrap()); @@ -78,7 +83,7 @@ fn get_ast_cbors( args_ptrs.len() as c_int, args_ptrs.as_ptr(), debug.into(), - &mut res, + emit_preprocessed.into(), ); drop(lock); hashmap = marshal_result(ptr); @@ -98,12 +103,13 @@ extern "C" { /// # Safety /// /// Not thread-safe; must not be called multiple times concurrently. - // ExportResult *ast_exporter(int argc, char *argv[]); + // ExportResult *ast_exporter(int argc, char *argv[], + // int debug, int emit_preprocessed); fn ast_exporter( argc: c_int, argv: *const *const c_char, debug: c_int, - res: *mut c_int, + emit_preprocessed: c_int, ) -> *mut ffi::ExportResult; // void drop_export_result(ExportResult *result); @@ -112,7 +118,9 @@ extern "C" { fn clang_version() -> *const c_char; } -unsafe fn marshal_result(result: *const ffi::ExportResult) -> HashMap> { +unsafe fn marshal_result( + result: *const ffi::ExportResult, +) -> HashMap, Option)> { let mut output = HashMap::new(); let n = (*result).entries as isize; @@ -131,7 +139,15 @@ unsafe fn marshal_result(result: *const ffi::ExportResult) -> HashMap { warn!( @@ -720,9 +721,17 @@ fn transpile_single( println!("{:#?}", Printer::new(io::stdout()).print(&typed_context)); } + // Extract preprocessed text for each function definition so the C decl + // map can carry it alongside the original source snippet. + let preprocessed_definitions = preprocessed_source + .map(|source| { + translator::collect_preprocessed_definitions(&typed_context, input_path, &source) + }) + .unwrap_or_default(); + // Perform the translation let (translated_string, maybe_decl_map, pragmas, crates) = - translator::translate(typed_context, tcfg, input_path); + translator::translate(typed_context, tcfg, input_path, &preprocessed_definitions); if let Some(decl_map) = maybe_decl_map { let decl_map_path = output_path.with_extension("c_decls.json"); diff --git a/c2rust-transpile/src/translator/mod.rs b/c2rust-transpile/src/translator/mod.rs index 3998138b8e..10ee8be698 100644 --- a/c2rust-transpile/src/translator/mod.rs +++ b/c2rust-transpile/src/translator/mod.rs @@ -15,6 +15,7 @@ use indexmap::{IndexMap, IndexSet}; use itertools::Itertools; use log::{error, trace, warn}; use proc_macro2::{Punct, Spacing::*, Span, TokenStream, TokenTree}; +use serde_derive::Serialize; use syn::spanned::Spanned as _; use syn::{ AttrStyle, BareVariadic, BinOp, Block, Expr, ExprBinary, ExprBlock, ExprBreak, ExprCast, @@ -557,20 +558,38 @@ pub fn emit_c_decl_map( t: &Translation, converted_decls: &HashMap, decl_source_ranges: IndexMap, + preprocessed_definitions: &IndexMap, ) -> DeclMap { let mut path_to_c_source_range: IndexMap<&Ident, _> = Default::default(); + let mut path_to_c_name: IndexMap<&Ident, String> = Default::default(); for (decl, source_range) in decl_source_ranges { + let c_name = t + .ast_context + .get_decl(&decl) + .and_then(|decl| decl.kind.get_name()) + .cloned(); match converted_decls.get(&decl) { Some(ConvertedDecl::ForeignItem(item)) => { - path_to_c_source_range - .insert(foreign_item_ident_vis(item).unwrap().0, source_range); + let ident = foreign_item_ident_vis(item).unwrap().0; + path_to_c_source_range.insert(ident, source_range); + if let Some(c_name) = c_name.clone() { + path_to_c_name.insert(ident, c_name); + } } Some(ConvertedDecl::Item(item)) => { - path_to_c_source_range.insert(item_ident(item).unwrap(), source_range); + let ident = item_ident(item).unwrap(); + path_to_c_source_range.insert(ident, source_range); + if let Some(c_name) = c_name.clone() { + path_to_c_name.insert(ident, c_name); + } } Some(ConvertedDecl::Items(items)) => { for item in items { - path_to_c_source_range.insert(item_ident(item).unwrap(), source_range); + let ident = item_ident(item).unwrap(); + path_to_c_source_range.insert(ident, source_range); + if let Some(c_name) = c_name.clone() { + path_to_c_name.insert(ident, c_name); + } } } Some(ConvertedDecl::NoItem) => {} @@ -655,7 +674,7 @@ pub fn emit_c_decl_map( &file_content[begin_offset..end_offset] }; - let item_path_to_c_source: IndexMap<_, _> = path_to_c_source_range + let definitions = path_to_c_source_range .into_iter() .map(|(ident, range)| { let path = ident.to_string(); @@ -665,10 +684,19 @@ pub fn emit_c_decl_map( range.end, )) .unwrap(); - (path, c_src.to_owned()) + ( + path, + CDeclMapDefinition { + definition: c_src.to_owned(), + preprocessed_definition: path_to_c_name + .get(ident) + .and_then(|c_name| preprocessed_definitions.get(c_name)) + .cloned(), + }, + ) }) .collect(); - item_path_to_c_source + DeclMap { definitions } } pub fn translate_failure(tcfg: &TranspilerConfig, msg: &str) { @@ -679,12 +707,117 @@ pub fn translate_failure(tcfg: &TranspilerConfig, msg: &str) { } } -type DeclMap = IndexMap; +/// Extract per-function preprocessed text from preprocessor output with line +/// markers (`clang -E -fdirectives-only -C` equivalent), as produced by the +/// AST exporter. Line markers map output lines back to source lines, which +/// are then matched against the source ranges of top-level function +/// definitions. Returns a map from C function name to preprocessed text. +pub fn collect_preprocessed_definitions( + ast_context: &TypedAstContext, + main_file: &Path, + preprocessed: &str, +) -> IndexMap { + let main_file_canonical = main_file + .canonicalize() + .unwrap_or_else(|_| main_file.to_owned()); + + // Markers spell the file the way the compile command did, which may be + // relative to a compilation directory we don't know. The output reliably + // begins with a marker for the main file, so learn its spelling there. + let main_file_spelling = preprocessed.lines().next().and_then(parse_line_marker); + + // Associate each non-marker output line with its original source line, + // keeping only lines that come from the main file. + let mut main_file_lines: Vec<(u64, &str)> = Vec::new(); + let mut is_main_file_cache: HashMap<&str, bool> = HashMap::new(); + let mut in_main_file = false; + let mut current_line: u64 = 0; + for line in preprocessed.lines() { + if let Some((line_no, file)) = parse_line_marker(line) { + in_main_file = *is_main_file_cache.entry(file).or_insert_with(|| { + Some(file) == main_file_spelling.map(|(_, file)| file) + || Path::new(file) + .canonicalize() + .map(|path| path == main_file_canonical) + .unwrap_or(false) + }); + current_line = line_no; + } else { + if in_main_file { + main_file_lines.push((current_line, line)); + } + current_line += 1; + } + } + + ast_context + .top_decl_locs() + .into_iter() + .filter_map(|(decl_id, range)| { + let decl = ast_context.get_decl(&decl_id)?; + let CDeclKind::Function { + name, + body: Some(_), + .. + } = &decl.kind + else { + return None; + }; + + // `earliest_begin` includes the gap holding leading comments, but + // points into the line on which the previous decl ended; skip + // that partial line unless this decl also starts on it. + let mut begin_line = range.earliest_begin.line; + if range.earliest_begin.column > 1 && range.strict_begin.line > begin_line { + begin_line += 1; + } + let end_line = range.end.line; + + let definition = main_file_lines + .iter() + .filter(|(line_no, _)| (begin_line..=end_line).contains(line_no)) + .map(|(_, text)| *text) + .join("\n"); + let definition = definition.trim_matches('\n'); + if definition.trim().is_empty() { + return None; + } + Some((name.clone(), definition.to_owned())) + }) + .collect() +} + +/// Parse a GNU-style line marker (`# "" [flags...]`) emitted by +/// the preprocessor. The returned line number and file apply to the next +/// output line. Other `#` lines (e.g. `#define` passed through in +/// directives-only mode) are not markers and yield `None`. +fn parse_line_marker(line: &str) -> Option<(u64, &str)> { + let rest = line.strip_prefix('#')?; + let rest = rest.trim_start_matches([' ', '\t']); + let digits_len = rest.bytes().take_while(|b| b.is_ascii_digit()).count(); + let line_no: u64 = rest.get(..digits_len)?.parse().ok()?; + let rest = rest[digits_len..].trim_start_matches([' ', '\t']); + let path = rest.strip_prefix('"')?; + let path = &path[..path.find('"')?]; + Some((line_no, path)) +} + +#[derive(Serialize)] +pub struct DeclMap { + definitions: IndexMap, +} + +#[derive(Serialize)] +struct CDeclMapDefinition { + definition: String, + preprocessed_definition: Option, +} pub fn translate( ast_context: TypedAstContext, tcfg: &TranspilerConfig, main_file: &Path, + preprocessed_definitions: &IndexMap, ) -> (String, Option, PragmaVec, CrateSet) { let mut t = Translation::new(ast_context, tcfg, main_file); let ctx = ExprContext { @@ -918,8 +1051,8 @@ pub fn translate( .collect::>(); // Generate a map from Rust items to the source code of their C declarations. - let decl_map = - decl_source_ranges.map(|ranges| emit_c_decl_map(&t, &converted_decls, ranges)); + let decl_map = decl_source_ranges + .map(|ranges| emit_c_decl_map(&t, &converted_decls, ranges, preprocessed_definitions)); t.ast_context.sort_top_decls_for_emitting(); diff --git a/c2rust-transpile/tests/c_decls_snapshots/directives.c b/c2rust-transpile/tests/c_decls_snapshots/directives.c new file mode 100644 index 0000000000..7cde7199f5 --- /dev/null +++ b/c2rust-transpile/tests/c_decls_snapshots/directives.c @@ -0,0 +1,23 @@ +// comment for cond_fn +int cond_fn(void) { +#ifdef NOT_DEFINED + /* comment in inactive region; must not appear in preprocessed text */ + return 1; +#else + // comment in active region + return 2; +#endif +} + +#if 0 +/* entirely disabled function */ +int disabled_fn(void) { return 3; } +#endif + +#define DOUBLE(x) ((x) + (x)) + +/* comment for uses_macro */ +int uses_macro(int n) { + /* macro invocation should stay unexpanded */ + return DOUBLE(n); +} diff --git a/c2rust-transpile/tests/snapshots.rs b/c2rust-transpile/tests/snapshots.rs index 4f10e330ea..1333b925bb 100644 --- a/c2rust-transpile/tests/snapshots.rs +++ b/c2rust-transpile/tests/snapshots.rs @@ -596,3 +596,9 @@ fn test_c_decls_nh() { let c_path = Path::new("tests/c_decls_snapshots/nh.c"); transpile_with_c_decl_map_snapshot(c_path); } + +#[test] +fn test_c_decls_directives() { + let c_path = Path::new("tests/c_decls_snapshots/directives.c"); + transpile_with_c_decl_map_snapshot(c_path); +} diff --git a/c2rust-transpile/tests/snapshots/snapshots__c_decls@directives.c.snap b/c2rust-transpile/tests/snapshots/snapshots__c_decls@directives.c.snap new file mode 100644 index 0000000000..9a42c1b96b --- /dev/null +++ b/c2rust-transpile/tests/snapshots/snapshots__c_decls@directives.c.snap @@ -0,0 +1,16 @@ +--- +source: c2rust-transpile/tests/snapshots.rs +expression: cat tests/c_decls_snapshots/directives.c_decls.json +--- +{ + "definitions": { + "cond_fn": { + "definition": "// comment for cond_fn\nint cond_fn(void) {\n#ifdef NOT_DEFINED\n /* comment in inactive region; must not appear in preprocessed text */\n return 1;\n#else\n // comment in active region\n return 2;\n#endif\n}", + "preprocessed_definition": "// comment for cond_fn\nint cond_fn(void) {\n\n\n\n\n // comment in active region\n return 2;\n\n}" + }, + "uses_macro": { + "definition": "/* comment for uses_macro */\nint uses_macro(int n) {\n /* macro invocation should stay unexpanded */\n return DOUBLE(n);\n}", + "preprocessed_definition": "/* comment for uses_macro */\nint uses_macro(int n) {\n /* macro invocation should stay unexpanded */\n return DOUBLE(n);\n}" + } + } +} diff --git a/c2rust-transpile/tests/snapshots/snapshots__c_decls@nh.c.snap b/c2rust-transpile/tests/snapshots/snapshots__c_decls@nh.c.snap index c0639595d4..1d2ae33395 100644 --- a/c2rust-transpile/tests/snapshots/snapshots__c_decls@nh.c.snap +++ b/c2rust-transpile/tests/snapshots/snapshots__c_decls@nh.c.snap @@ -3,22 +3,78 @@ source: c2rust-transpile/tests/snapshots.rs expression: cat tests/c_decls_snapshots/nh.c_decls.json --- { - "FOO": "FOO 4000+50", - "a": "#include \n#include \nint a;", - "a1": "// comment before multiple decl\nint a1,", - "a2": ", a2,", - "a3": ", a3;", - "another": "int another;", - "another_func": "int\nanother_func(void)\n{\n int var = 0;\n return 4 + var;\n}", - "bb": "int bb;", - "ccc": "/*comment for cc*/int ccc;", - "d": "int d;", - "e": "int e;", - "f": "void f(void){}", - "forward_decl_before_other_def": "/*real defn*/\nvoid forward_decl_before_other_def(int x) { }", - "func_after_decl": "void func_after_decl(int n) {}", - "func_holding_define": "int\nfunc_holding_define(void)\n{\n#define FOO 4000+50\n return FOO;\n}", - "g": "void g(void){}", - "h": "/*comment for h*/void h(void){}", - "ngx_hash_init": "//comment for ngx_hash_init\nvoid\nngx_hash_init(void)\n{\n size_t len;\n ngx_uint_t align = len & ~sizeof(void *);\n}" + "definitions": { + "FOO": { + "definition": "FOO 4000+50", + "preprocessed_definition": null + }, + "a": { + "definition": "#include \n#include \nint a;", + "preprocessed_definition": null + }, + "a1": { + "definition": "// comment before multiple decl\nint a1,", + "preprocessed_definition": null + }, + "a2": { + "definition": ", a2,", + "preprocessed_definition": null + }, + "a3": { + "definition": ", a3;", + "preprocessed_definition": null + }, + "another": { + "definition": "int another;", + "preprocessed_definition": null + }, + "another_func": { + "definition": "int\nanother_func(void)\n{\n int var = 0;\n return 4 + var;\n}", + "preprocessed_definition": "int\nanother_func(void)\n{\n int var = 0;\n return 4 + var;\n}" + }, + "bb": { + "definition": "int bb;", + "preprocessed_definition": null + }, + "ccc": { + "definition": "/*comment for cc*/int ccc;", + "preprocessed_definition": null + }, + "d": { + "definition": "int d;", + "preprocessed_definition": null + }, + "e": { + "definition": "int e;", + "preprocessed_definition": null + }, + "f": { + "definition": "void f(void){}", + "preprocessed_definition": "void f(void){}void g(void){}/*comment for h*/void h(void){}int d;int e;;;;;;;;int another;" + }, + "forward_decl_before_other_def": { + "definition": "/*real defn*/\nvoid forward_decl_before_other_def(int x) { }", + "preprocessed_definition": "/*real defn*/\nvoid forward_decl_before_other_def(int x) { }" + }, + "func_after_decl": { + "definition": "void func_after_decl(int n) {}", + "preprocessed_definition": "void func_after_decl(int n) {}" + }, + "func_holding_define": { + "definition": "int\nfunc_holding_define(void)\n{\n#define FOO 4000+50\n return FOO;\n}", + "preprocessed_definition": "int\nfunc_holding_define(void)\n{\n#define FOO 4000+50\n return FOO;\n}" + }, + "g": { + "definition": "void g(void){}", + "preprocessed_definition": "void f(void){}void g(void){}/*comment for h*/void h(void){}int d;int e;;;;;;;;int another;" + }, + "h": { + "definition": "/*comment for h*/void h(void){}", + "preprocessed_definition": "void f(void){}void g(void){}/*comment for h*/void h(void){}int d;int e;;;;;;;;int another;" + }, + "ngx_hash_init": { + "definition": "//comment for ngx_hash_init\nvoid\nngx_hash_init(void)\n{\n size_t len;\n ngx_uint_t align = len & ~sizeof(void *);\n}", + "preprocessed_definition": "//comment for ngx_hash_init\nvoid\nngx_hash_init(void)\n{\n size_t len;\n ngx_uint_t align = len & ~sizeof(void *);\n}" + } + } } From 9a5f17f4b866264d9cbb4eb996a17dd99413d535 Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 10 Jun 2026 02:22:55 -0700 Subject: [PATCH 3/3] postprocess: include C definitions before and after preprocessing in prompts Read both forms from the structured *.c_decls.json instead of running a clang subprocess per snippet. The CommentsTransform prompt includes the preprocessed text only when it differs from the original, so prompts (and llm-cache keys) are unchanged for directive-free functions; comment gating and response validation use the preprocessed text so comments in inactive preprocessor regions are not transferred. --- .../postprocess/definitions/__init__.py | 30 ++++++++++- .../postprocess/transforms/base.py | 5 +- .../postprocess/transforms/comments.py | 51 +++++++++++++++---- .../tests/examples/qsort.c_decls.json | 6 +-- 4 files changed, 72 insertions(+), 20 deletions(-) diff --git a/c2rust-postprocess/postprocess/definitions/__init__.py b/c2rust-postprocess/postprocess/definitions/__init__.py index 77dc9f8afa..9c22a7f063 100644 --- a/c2rust-postprocess/postprocess/definitions/__init__.py +++ b/c2rust-postprocess/postprocess/definitions/__init__.py @@ -2,6 +2,7 @@ import logging import subprocess from collections.abc import Generator, Iterable +from dataclasses import dataclass from pathlib import Path from tempfile import NamedTemporaryFile from typing import Any @@ -233,12 +234,37 @@ def get_rust_definitions(root_rust_source_file: Path) -> dict[str, str]: return json.loads(result.stdout) -def get_c_definitions(root_rust_source_file: Path) -> dict[str, str]: +@dataclass(frozen=True) +class CDefinition: + definition: str + preprocessed_definition: str | None + + @property + def effective(self) -> str: + """Preprocessed text when available, original otherwise.""" + return self.preprocessed_definition or self.definition + + @property + def was_changed_by_preprocessing(self) -> bool: + return ( + self.preprocessed_definition is not None + and self.preprocessed_definition != self.definition + ) + + +def get_c_definitions(root_rust_source_file: Path) -> dict[str, CDefinition]: c_defs_json = root_rust_source_file.with_suffix(".c_decls.json") logging.debug(f"Loading C definitions from {c_defs_json}") try: - return json.loads(c_defs_json.read_text()) + c_decls = json.loads(c_defs_json.read_text()) + return { + identifier: CDefinition( + definition=decl["definition"], + preprocessed_definition=decl.get("preprocessed_definition"), + ) + for identifier, decl in c_decls["definitions"].items() + } except OSError as e: e.add_note(f"C definitions JSON file not found: {c_defs_json}") raise e diff --git a/c2rust-postprocess/postprocess/transforms/base.py b/c2rust-postprocess/postprocess/transforms/base.py index 16a144a4f4..5ce27c6781 100644 --- a/c2rust-postprocess/postprocess/transforms/base.py +++ b/c2rust-postprocess/postprocess/transforms/base.py @@ -4,6 +4,7 @@ from pathlib import Path from postprocess.definitions import ( + CDefinition, get_c_definitions, get_rust_definitions, ) @@ -32,7 +33,7 @@ def apply_ident( self, rust_source_file: Path, rust_definition: str, - c_definition: str, + c_definition: CDefinition, identifier: str, update_rust: bool = True, ) -> None: @@ -114,7 +115,7 @@ def apply_file( c_definition = c_definitions[identifier] - highlighted_c_definition = get_highlighted_c(c_definition) + highlighted_c_definition = get_highlighted_c(c_definition.effective) logging.debug( f"C function {identifier} definition:\n{highlighted_c_definition}\n" ) diff --git a/c2rust-postprocess/postprocess/transforms/comments.py b/c2rust-postprocess/postprocess/transforms/comments.py index d750d03f98..4bf5c644a2 100644 --- a/c2rust-postprocess/postprocess/transforms/comments.py +++ b/c2rust-postprocess/postprocess/transforms/comments.py @@ -4,6 +4,7 @@ from postprocess.cache import AbstractCache from postprocess.definitions import ( + CDefinition, get_c_comments, get_rust_comments, update_rust_definition, @@ -20,31 +21,51 @@ class CommentsTransformPrompt: c_function: str + c_function_preprocessed: str | None rust_function: str prompt_text: str identifier: str - __slots__ = ("c_function", "rust_function", "prompt_text", "identifier") + __slots__ = ( + "c_function", + "c_function_preprocessed", + "rust_function", + "prompt_text", + "identifier", + ) def __init__( - self, c_function: str, rust_function: str, prompt_text: str, identifier: str + self, + c_function: str, + c_function_preprocessed: str | None, + rust_function: str, + prompt_text: str, + identifier: str, ): self.c_function = c_function + self.c_function_preprocessed = c_function_preprocessed self.rust_function = rust_function self.prompt_text = prompt_text self.identifier = identifier def __str__(self) -> str: - return ( + prompt = ( self.prompt_text + "\n\n" + "C function:\n```c\n" + self.c_function + "```\n\n" - + "Rust function:\n```rust\n" - + self.rust_function - + "```\n" ) + if self.c_function_preprocessed is not None: + prompt += ( + "The same C function after preprocessing; comments that do not" + " appear here were in inactive preprocessor regions and must" + " not be transferred:\n```c\n" + + self.c_function_preprocessed + + "```\n\n" + ) + prompt += "Rust function:\n```rust\n" + self.rust_function + "```\n" + return prompt class CommentsTransform(AbstractTransform): @@ -57,7 +78,7 @@ def apply_ident( self, rust_source_file: Path, rust_definition: str, - c_definition: str, + c_definition: CDefinition, identifier: str, update_rust: bool = True, ) -> None: @@ -70,8 +91,9 @@ def apply_ident( ) return - # Skip functions without comments in C definition - c_comments = get_c_comments(c_definition) + # Skip functions without comments that survive preprocessing; comments + # only present in inactive preprocessor regions must not be transferred. + c_comments = get_c_comments(c_definition.effective) if not c_comments: logging.info(f"Skipping C function without comments: {identifier}") return @@ -86,7 +108,14 @@ def apply_ident( prompt_text = dedent(prompt_text).strip() prompt = CommentsTransformPrompt( - c_function=c_definition, + c_function=c_definition.definition, + # Only include the preprocessed text when it differs, so prompts + # (and thus cache keys) are unchanged for directive-free functions. + c_function_preprocessed=( + c_definition.preprocessed_definition + if c_definition.was_changed_by_preprocessing + else None + ), rust_function=rust_definition, prompt_text=prompt_text, identifier=identifier, @@ -120,7 +149,7 @@ def apply_ident( rust_fn = remove_backticks(response) - c_comments = get_c_comments(prompt.c_function) + c_comments = get_c_comments(c_definition.effective) logging.debug(f"{c_comments=}") rust_comments = get_rust_comments(rust_fn) diff --git a/c2rust-postprocess/tests/examples/qsort.c_decls.json b/c2rust-postprocess/tests/examples/qsort.c_decls.json index 288124d2c1..db9e99ae7d 100644 --- a/c2rust-postprocess/tests/examples/qsort.c_decls.json +++ b/c2rust-postprocess/tests/examples/qsort.c_decls.json @@ -1,5 +1 @@ -{ - "swap": "void swap(int* a, int* b)\n{\n int t = *a;\n *a = *b;\n *b = t;\n}", - "quickSort": "void quickSort(int arr[], int low, int high)\n{\n if (low < high) {\n /* pi is the partitioning index; arr[pi] is now at the right place */\n int pi = partition(arr, low, high);\n\n /* Recursively sort elements before and after partition */\n quickSort(arr, low, pi - 1);\n quickSort(arr, pi + 1, high);\n }\n}", - "partition": "/* \n * Lomuto Partition Scheme:\n * Partitions the array so that elements < pivot are on the left, \n * and elements >= pivot are on the right.\n */\nint partition (int arr[], int low, int high)\n{\n // Partition the subarray around the last element as pivot and return pivot's final index.\n int pivot = arr[high];\n int i = low - 1;\n\n for (int j = low; j <= high - 1; j++) {\n if (arr[j] <= pivot) {\n i++;\n // Move elements <= pivot into the left partition.\n swap(&arr[i], &arr[j]);\n }\n }\n // Place pivot just after the final element of the left partition.\n swap(&arr[i + 1], &arr[high]);\n return i + 1;\n}" -} \ No newline at end of file +{"definitions":{"swap":{"definition":"void swap(int* a, int* b)\n{\n int t = *a;\n *a = *b;\n *b = t;\n}","preprocessed_definition":"void swap(int* a, int* b)\n{\n int t = *a;\n *a = *b;\n *b = t;\n}"},"partition":{"definition":"/* \n * Lomuto Partition Scheme:\n * Partitions the array so that elements < pivot are on the left, \n * and elements >= pivot are on the right.\n */\nint partition (int arr[], int low, int high)\n{\n // Partition the subarray around the last element as pivot and return pivot's final index.\n int pivot = arr[high];\n int i = low - 1;\n\n for (int j = low; j <= high - 1; j++) {\n if (arr[j] <= pivot) {\n i++;\n // Move elements <= pivot into the left partition.\n swap(&arr[i], &arr[j]);\n }\n }\n // Place pivot just after the final element of the left partition.\n swap(&arr[i + 1], &arr[high]);\n return i + 1;\n}","preprocessed_definition":"/* \n * Lomuto Partition Scheme:\n * Partitions the array so that elements < pivot are on the left, \n * and elements >= pivot are on the right.\n */\nint partition (int arr[], int low, int high)\n{\n // Partition the subarray around the last element as pivot and return pivot's final index.\n int pivot = arr[high];\n int i = low - 1;\n\n for (int j = low; j <= high - 1; j++) {\n if (arr[j] <= pivot) {\n i++;\n // Move elements <= pivot into the left partition.\n swap(&arr[i], &arr[j]);\n }\n }\n // Place pivot just after the final element of the left partition.\n swap(&arr[i + 1], &arr[high]);\n return i + 1;\n}"},"quickSort":{"definition":"void quickSort(int arr[], int low, int high)\n{\n if (low < high) {\n /* pi is the partitioning index; arr[pi] is now at the right place */\n int pi = partition(arr, low, high);\n\n /* Recursively sort elements before and after partition */\n quickSort(arr, low, pi - 1);\n quickSort(arr, pi + 1, high);\n }\n}","preprocessed_definition":"void quickSort(int arr[], int low, int high)\n{\n if (low < high) {\n /* pi is the partitioning index; arr[pi] is now at the right place */\n int pi = partition(arr, low, high);\n\n /* Recursively sort elements before and after partition */\n quickSort(arr, low, pi - 1);\n quickSort(arr, pi + 1, high);\n }\n}"}}} \ No newline at end of file