From c7fd962d90f06e7bbdd9dd1b66c53d48a3225221 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Fri, 18 Apr 2025 21:34:35 +0300 Subject: [PATCH] Fix address conversion in BlockDisassembler to properly handle RVA addresses and ensure entry blocks are correctly identified --- X86Disassembler/Analysers/AsmFunction.cs | 34 +- .../Analysers/BlockDisassembler.cs | 180 +++++++- .../Analysers/ControlFlowAnalyzer.cs | 277 +++++++++++++ X86Disassembler/Analysers/DecompilerEngine.cs | 149 +++++++ X86Disassembler/Analysers/FunctionAnalyzer.cs | 132 ++++++ .../Analysers/PseudocodeGenerator.cs | 385 ++++++++++++++++++ X86Disassembler/Analysers/VariableAnalyzer.cs | 252 ++++++++++++ 7 files changed, 1384 insertions(+), 25 deletions(-) create mode 100644 X86Disassembler/Analysers/ControlFlowAnalyzer.cs create mode 100644 X86Disassembler/Analysers/DecompilerEngine.cs create mode 100644 X86Disassembler/Analysers/FunctionAnalyzer.cs create mode 100644 X86Disassembler/Analysers/PseudocodeGenerator.cs create mode 100644 X86Disassembler/Analysers/VariableAnalyzer.cs diff --git a/X86Disassembler/Analysers/AsmFunction.cs b/X86Disassembler/Analysers/AsmFunction.cs index 5179fc6..8f89f14 100644 --- a/X86Disassembler/Analysers/AsmFunction.cs +++ b/X86Disassembler/Analysers/AsmFunction.cs @@ -1,13 +1,41 @@ -namespace X86Disassembler.Analysers; +namespace X86Disassembler.Analysers; +/// +/// Represents a disassembled function with its control flow graph +/// public class AsmFunction { + /// + /// The starting address of the function + /// public ulong Address { get; set; } - public List Blocks { get; set; } + /// + /// The list of basic blocks that make up the function + /// + public List Blocks { get; set; } = []; + /// + /// The entry block of the function + /// + public InstructionBlock? EntryBlock => Blocks.FirstOrDefault(b => b.Address == Address); + + /// + /// The exit blocks of the function (blocks that end with a return instruction) + /// + public List ExitBlocks => Blocks.Where(b => + b.Instructions.Count > 0 && + b.Instructions[^1].Type.IsRet()).ToList(); + + /// + /// Returns a string representation of the function, including its address and blocks + /// public override string ToString() { - return $"Function at {Address:X8}\n{string.Join("\n", Blocks.Select(x => $"\t{x}"))}"; + return $"Function at 0x{Address:X8}\n" + + $"Entry Block: 0x{EntryBlock?.Address.ToString("X8") ?? "None"}\n" + + $"Exit Blocks: {(ExitBlocks.Count > 0 ? string.Join(", ", ExitBlocks.Select(b => $"0x{b.Address:X8}")) : "None")}\n" + + $"Total Blocks: {Blocks.Count}\n" + + $"{string.Join("\n", Blocks.Select(x => $"\t{x}"))}"; } } \ No newline at end of file diff --git a/X86Disassembler/Analysers/BlockDisassembler.cs b/X86Disassembler/Analysers/BlockDisassembler.cs index 50c2c06..eb6c3b8 100644 --- a/X86Disassembler/Analysers/BlockDisassembler.cs +++ b/X86Disassembler/Analysers/BlockDisassembler.cs @@ -1,4 +1,4 @@ -using X86Disassembler.X86; +using X86Disassembler.X86; namespace X86Disassembler.Analysers; @@ -47,11 +47,21 @@ public class BlockDisassembler // Queue of addresses to process (breadth-first approach) Queue addressQueue = []; + // Calculate the file offset from the RVA by subtracting the base address - addressQueue.Enqueue(rvaAddress - _baseAddress); + // Store the file offset for processing, but we'll convert back to RVA when creating blocks + ulong fileOffset = rvaAddress - _baseAddress; + addressQueue.Enqueue(fileOffset); + + // Keep track of the original entry point RVA for the function + ulong entryPointRVA = rvaAddress; // List to store discovered basic blocks List blocks = []; + + // Dictionary to track blocks by address for quick lookup + Dictionary blocksByAddress = new Dictionary(); + while (addressQueue.Count > 0) { // Get the next address to process @@ -69,16 +79,36 @@ public class BlockDisassembler // Collect instructions for this block List instructions = []; + + // Get the current block if it exists (for tracking predecessors) + InstructionBlock? currentBlock = null; + if (blocksByAddress.TryGetValue(address, out var existingBlock)) + { + currentBlock = existingBlock; + } // Process instructions until we hit a control flow change while (true) { + // Get the current position + ulong currentPosition = (ulong)decoder.GetPosition(); + // If we've stepped onto an existing block, create a new block up to this point // and stop processing this path (to avoid duplicating instructions) - if (blocks.Any(x => x.Address == (ulong) decoder.GetPosition())) + if (blocksByAddress.TryGetValue(currentPosition, out var targetBlock) && currentPosition != address) { Console.WriteLine("Stepped on to existing block. Creating in the middle"); - RegisterBlock(blocks, address, instructions); + + // Register this block and establish the relationship with the target block + var newBlock = RegisterBlock(blocks, address, instructions, null, false, false); + blocksByAddress[address] = newBlock; + + // Add the target block as a successor to the new block + newBlock.Successors.Add(targetBlock); + + // Add the new block as a predecessor to the target block + targetBlock.Predecessors.Add(newBlock); + break; } @@ -98,17 +128,22 @@ public class BlockDisassembler // For conditional jumps, we need to follow both the jump target and the fall-through path if (instruction.Type.IsConditionalJump()) { + // Get the jump target address + uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue(); + + // Get the fall-through address (next instruction after this jump) + uint fallThroughAddress = (uint)decoder.GetPosition(); + // Register this block (it ends with a conditional jump) - RegisterBlock(blocks, address, instructions); + var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); + blocksByAddress[address] = newBlock; // Queue the jump target address for processing - addressQueue.Enqueue( - instruction.StructuredOperands[0] - .GetValue() - ); + addressQueue.Enqueue(jumpTargetAddress); // Queue the fall-through address (next instruction after this jump) - addressQueue.Enqueue((uint) decoder.GetPosition()); + addressQueue.Enqueue(fallThroughAddress); + break; } @@ -116,14 +151,16 @@ public class BlockDisassembler // For unconditional jumps, we only follow the jump target if (instruction.Type.IsRegularJump()) { + // Get the jump target address + uint jumpTargetAddress = instruction.StructuredOperands[0].GetValue(); + // Register this block (it ends with an unconditional jump) - RegisterBlock(blocks, address, instructions); + var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); + blocksByAddress[address] = newBlock; // Queue the jump target address for processing - addressQueue.Enqueue( - instruction.StructuredOperands[0] - .GetValue() - ); + addressQueue.Enqueue(jumpTargetAddress); + break; } @@ -132,7 +169,9 @@ public class BlockDisassembler if (instruction.Type.IsRet()) { // Register this block (it ends with a return) - RegisterBlock(blocks, address, instructions); + var newBlock = RegisterBlock(blocks, address, instructions, currentBlock, false, false); + blocksByAddress[address] = newBlock; + break; } } @@ -142,11 +181,41 @@ public class BlockDisassembler // we need to sort the blocks ourselves blocks.Sort((b1, b2) => b1.Address.CompareTo(b2.Address)); - return new AsmFunction() + // Convert all block addresses from file offsets to RVA + foreach (var block in blocks) { - Address = rvaAddress, + // Convert from file offset to RVA by adding the base address + ulong rvaBlockAddress = block.Address + _baseAddress; + Console.WriteLine($"Converting block address from file offset 0x{block.Address:X8} to RVA 0x{rvaBlockAddress:X8}"); + block.Address = rvaBlockAddress; + } + + // Create a new AsmFunction with the RVA address + var asmFunction = new AsmFunction() + { + Address = entryPointRVA, Blocks = blocks, }; + + // Verify that the entry block exists + var entryBlock = asmFunction.EntryBlock; + if (entryBlock == null) + { + Console.WriteLine($"Warning: No entry block found at RVA 0x{entryPointRVA:X8}"); + + // Try to find a block at the file offset address (for backward compatibility) + var fallbackBlock = blocks.FirstOrDefault(b => b.Address == (fileOffset + _baseAddress)); + if (fallbackBlock != null) + { + Console.WriteLine($"Found fallback entry block at RVA 0x{fallbackBlock.Address:X8}"); + } + } + else + { + Console.WriteLine($"Found entry block at RVA 0x{entryBlock.Address:X8}"); + } + + return asmFunction; } /// @@ -155,8 +224,42 @@ public class BlockDisassembler /// The list of blocks to add to /// The starting address of the block /// The instructions contained in the block - public void RegisterBlock(List blocks, ulong address, List instructions) + /// The current block being processed (null if this is the first block) + /// Whether this block is a jump target + /// Whether this block is a fall-through from another block + /// The newly created block + public InstructionBlock RegisterBlock( + List blocks, + ulong address, + List instructions, + InstructionBlock? currentBlock = null, + bool isJumpTarget = false, + bool isFallThrough = false) { + // Check if a block already exists at this address + var existingBlock = blocks.FirstOrDefault(b => b.Address == address); + + if (existingBlock != null) + { + // If the current block is not null, update the relationships + if (currentBlock != null) + { + // Add the existing block as a successor to the current block if not already present + if (!currentBlock.Successors.Contains(existingBlock)) + { + currentBlock.Successors.Add(existingBlock); + } + + // Add the current block as a predecessor to the existing block if not already present + if (!existingBlock.Predecessors.Contains(currentBlock)) + { + existingBlock.Predecessors.Add(currentBlock); + } + } + + return existingBlock; + } + // Create a new block with the provided address and instructions var block = new InstructionBlock() { @@ -166,9 +269,21 @@ public class BlockDisassembler // Add the block to the collection blocks.Add(block); + + // If the current block is not null, update the relationships + if (currentBlock != null) + { + // Add the new block as a successor to the current block + currentBlock.Successors.Add(block); + + // Add the current block as a predecessor to the new block + block.Predecessors.Add(currentBlock); + } // Log the created block for debugging Console.WriteLine($"Created block:\n{block}"); + + return block; } } @@ -185,13 +300,34 @@ public class InstructionBlock /// /// The list of instructions contained in this block /// - public List Instructions { get; set; } + public List Instructions { get; set; } = []; /// - /// Returns a string representation of the block, including its address and instructions + /// The blocks that can transfer control to this block + /// + public List Predecessors { get; set; } = []; + + /// + /// The blocks that this block can transfer control to + /// + public List Successors { get; set; } = []; + + /// + /// Returns a string representation of the block, including its address, instructions, and control flow information /// public override string ToString() { - return $"Address: {Address:X8}\n{string.Join("\n", Instructions)}"; + // Create a string for predecessors + string predecessorsStr = Predecessors.Count > 0 + ? $"Predecessors: {string.Join(", ", Predecessors.Select(p => $"0x{p.Address:X8}"))}" + : "No predecessors"; + + // Create a string for successors + string successorsStr = Successors.Count > 0 + ? $"Successors: {string.Join(", ", Successors.Select(s => $"0x{s.Address:X8}"))}" + : "No successors"; + + // Return the complete string representation + return $"Address: 0x{Address:X8}\n{predecessorsStr}\n{successorsStr}\n{string.Join("\n", Instructions)}"; } } \ No newline at end of file diff --git a/X86Disassembler/Analysers/ControlFlowAnalyzer.cs b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs new file mode 100644 index 0000000..710e949 --- /dev/null +++ b/X86Disassembler/Analysers/ControlFlowAnalyzer.cs @@ -0,0 +1,277 @@ +using X86Disassembler.Analysers.DecompilerTypes; +using X86Disassembler.X86; +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.Analysers; + +/// +/// Analyzes control flow structures in disassembled code +/// +public class ControlFlowAnalyzer +{ + /// + /// The analyzer context + /// + private readonly AnalyzerContext _context; + + /// + /// Creates a new control flow analyzer + /// + /// The analyzer context + public ControlFlowAnalyzer(AnalyzerContext context) + { + _context = context; + } + + /// + /// Analyzes the control flow of a function to identify high-level structures + /// + /// The function to analyze + public void AnalyzeControlFlow(Function function) + { + // First, identify if-else structures + IdentifyIfElseStructures(function); + + // Then, identify switch statements + IdentifySwitchStatements(function); + } + + /// + /// Identifies if-else structures in the control flow graph + /// + /// The function to analyze + private void IdentifyIfElseStructures(Function function) + { + // For each block in the function + foreach (var block in function.AsmFunction.Blocks) + { + // Skip blocks that don't end with a conditional jump + if (block.Instructions.Count == 0) + { + continue; + } + + var lastInstruction = block.Instructions[^1]; + + // Look for conditional jumps (Jcc instructions) + if (IsConditionalJump(lastInstruction.Type)) + { + // This is a potential if-then-else structure + // The true branch is the target of the jump + // The false branch is the fallthrough block + + // Get the jump target address + ulong targetAddress = GetJumpTargetAddress(lastInstruction); + + // Find the target block + if (_context.BlocksByAddress.TryGetValue(targetAddress, out var targetBlock)) + { + // Find the fallthrough block (the block that follows this one in memory) + var fallthroughBlock = FindFallthroughBlock(block); + + if (fallthroughBlock != null) + { + // Store the if-else structure in the context + var ifElseStructure = new IfElseStructure + { + ConditionBlock = block, + ThenBlock = targetBlock, + ElseBlock = fallthroughBlock + }; + + _context.StoreAnalysisData(block.Address, "IfElseStructure", ifElseStructure); + } + } + } + } + } + + /// + /// Identifies switch statements in the control flow graph + /// + /// The function to analyze + private void IdentifySwitchStatements(Function function) + { + // For each block in the function + foreach (var block in function.AsmFunction.Blocks) + { + // Look for patterns that indicate a switch statement + // Common patterns include: + // 1. A series of compare and jump instructions + // 2. An indirect jump through a jump table + + // For now, we'll focus on the first pattern (series of compares) + if (IsPotentialSwitchHeader(block)) + { + // This is a potential switch statement + var switchStructure = new SwitchStructure + { + HeaderBlock = block, + Cases = [] + }; + + // Find the cases by analyzing the successors + foreach (var successor in block.Successors) + { + // Each successor is a potential case + switchStructure.Cases.Add(new SwitchCase + { + CaseBlock = successor, + Value = 0 // We'd need more analysis to determine the actual value + }); + } + + // Store the switch structure in the context + _context.StoreAnalysisData(block.Address, "SwitchStructure", switchStructure); + } + } + } + + /// + /// Checks if the given instruction type is a conditional jump + /// + /// The instruction type + /// True if the instruction is a conditional jump, false otherwise + private bool IsConditionalJump(InstructionType type) + { + // Check for common conditional jumps + return type == InstructionType.Jz || + type == InstructionType.Jnz || + type == InstructionType.Jg || + type == InstructionType.Jge || + type == InstructionType.Jl || + type == InstructionType.Jle || + type == InstructionType.Ja || + type == InstructionType.Jae || + type == InstructionType.Jb || + type == InstructionType.Jbe || + type == InstructionType.Jo || + type == InstructionType.Jno || + type == InstructionType.Js || + type == InstructionType.Jns || + type == InstructionType.Jp || + type == InstructionType.Jnp; + } + + /// + /// Gets the target address of a jump instruction + /// + /// The jump instruction + /// The target address of the jump + private ulong GetJumpTargetAddress(Instruction instruction) + { + // The target address is usually the first operand of the jump instruction + if (instruction.StructuredOperands.Count > 0 && + instruction.StructuredOperands[0] is ImmediateOperand immOp) + { + return (ulong)immOp.Value; + } + + // If we can't determine the target, return 0 + return 0; + } + + /// + /// Finds the fallthrough block for a given block + /// + /// The block to find the fallthrough for + /// The fallthrough block, or null if none found + private InstructionBlock? FindFallthroughBlock(InstructionBlock block) + { + // The fallthrough block is the one that follows this one in memory + // It should be a successor of this block + foreach (var successor in block.Successors) + { + // Check if this successor is the fallthrough block + // (its address should be immediately after this block) + if (successor.Address > block.Address) + { + return successor; + } + } + + return null; + } + + /// + /// Checks if the given block is a potential switch statement header + /// + /// The block to check + /// True if the block is a potential switch header, false otherwise + private bool IsPotentialSwitchHeader(InstructionBlock block) + { + // A switch header typically has multiple successors + if (block.Successors.Count <= 2) + { + return false; + } + + // Look for patterns that indicate a switch statement + // For now, we'll just check if the block ends with an indirect jump + if (block.Instructions.Count > 0) + { + var lastInstruction = block.Instructions[^1]; + if (lastInstruction.Type == InstructionType.Jmp && + lastInstruction.StructuredOperands.Count > 0 && + !(lastInstruction.StructuredOperands[0] is ImmediateOperand)) + { + return true; + } + } + + return false; + } + + /// + /// Represents an if-else structure in the control flow graph + /// + public class IfElseStructure + { + /// + /// The block containing the condition + /// + public InstructionBlock ConditionBlock { get; set; } = null!; + + /// + /// The block containing the 'then' branch + /// + public InstructionBlock ThenBlock { get; set; } = null!; + + /// + /// The block containing the 'else' branch (may be null for if-then structures) + /// + public InstructionBlock ElseBlock { get; set; } = null!; + } + + /// + /// Represents a switch statement in the control flow graph + /// + public class SwitchStructure + { + /// + /// The block containing the switch header + /// + public InstructionBlock HeaderBlock { get; set; } = null!; + + /// + /// The cases of the switch statement + /// + public List Cases { get; set; } = []; + } + + /// + /// Represents a case in a switch statement + /// + public class SwitchCase + { + /// + /// The value of the case + /// + public int Value { get; set; } + + /// + /// The block containing the case code + /// + public InstructionBlock CaseBlock { get; set; } = null!; + } +} diff --git a/X86Disassembler/Analysers/DecompilerEngine.cs b/X86Disassembler/Analysers/DecompilerEngine.cs new file mode 100644 index 0000000..f6c3e01 --- /dev/null +++ b/X86Disassembler/Analysers/DecompilerEngine.cs @@ -0,0 +1,149 @@ +using X86Disassembler.Analysers.DecompilerTypes; +using X86Disassembler.PE; +using X86Disassembler.X86; + +namespace X86Disassembler.Analysers; + +/// +/// Main engine for decompiling x86 code +/// +public class DecompilerEngine +{ + /// + /// The PE file being analyzed + /// + private readonly PeFile _peFile; + + /// + /// Dictionary of analyzed functions by address + /// + private readonly Dictionary _functions = []; + + /// + /// Dictionary of exported function names by address + /// + private readonly Dictionary _exportedFunctions = []; + + /// + /// Creates a new decompiler engine for the specified PE file + /// + /// The PE file to decompile + public DecompilerEngine(PeFile peFile) + { + _peFile = peFile; + + // Initialize the exported functions dictionary + foreach (var export in peFile.ExportedFunctions) + { + _exportedFunctions[export.AddressRva] = export.Name; + } + } + + /// + /// Decompiles a function at the specified address + /// + /// The address of the function to decompile + /// The decompiled function + public Function DecompileFunction(ulong address) + { + // Check if we've already analyzed this function + if (_functions.TryGetValue(address, out var existingFunction)) + { + return existingFunction; + } + + // Find the code section containing this address + var codeSection = _peFile.SectionHeaders.Find(s => + s.ContainsCode() && + address >= s.VirtualAddress && + address < s.VirtualAddress + s.VirtualSize); + + if (codeSection == null) + { + throw new InvalidOperationException($"No code section found containing address 0x{address:X8}"); + } + + // Get the section data + int sectionIndex = _peFile.SectionHeaders.IndexOf(codeSection); + byte[] codeBytes = _peFile.GetSectionData(sectionIndex); + + // Create a disassembler for the code section + var disassembler = new BlockDisassembler(codeBytes, codeSection.VirtualAddress); + + // Disassemble the function + var asmFunction = disassembler.DisassembleFromAddress((uint)address); + + // Create an analyzer context + var context = new AnalyzerContext(asmFunction); + + // Run the analyzers + var loopAnalyzer = new LoopAnalyzer(); + loopAnalyzer.AnalyzeLoops(context); + + var dataFlowAnalyzer = new DataFlowAnalyzer(); + dataFlowAnalyzer.AnalyzeDataFlow(context); + + // Get the function name from exports if available + string functionName = _exportedFunctions.TryGetValue(address, out var name) + ? name + : $"func_{address:X8}"; + + // Analyze the function + var functionAnalyzer = new FunctionAnalyzer(context); + var function = functionAnalyzer.AnalyzeFunction(address, functionName); + + // Analyze control flow structures + var controlFlowAnalyzer = new ControlFlowAnalyzer(context); + controlFlowAnalyzer.AnalyzeControlFlow(function); + + + + // Store the function in our cache + _functions[address] = function; + + return function; + } + + /// + /// Generates C-like pseudocode for a decompiled function + /// + /// The function to generate pseudocode for + /// The generated pseudocode + public string GeneratePseudocode(Function function) + { + // Create a pseudocode generator + var generator = new PseudocodeGenerator(); + + // Generate the pseudocode + return generator.GeneratePseudocode(function); + } + + /// + /// Decompiles all exported functions in the PE file + /// + /// A dictionary of decompiled functions by address + public Dictionary DecompileAllExportedFunctions() + { + foreach (var export in _peFile.ExportedFunctions) + { + // Skip forwarded exports + if (export.IsForwarder) + { + continue; + } + + try + { + DecompileFunction(export.AddressRva); + } + catch (Exception ex) + { + Console.WriteLine($"Error decompiling function {export.Name} at 0x{export.AddressRva:X8}: {ex.Message}"); + } + } + + return _functions; + } +} + + diff --git a/X86Disassembler/Analysers/FunctionAnalyzer.cs b/X86Disassembler/Analysers/FunctionAnalyzer.cs new file mode 100644 index 0000000..fac2e1d --- /dev/null +++ b/X86Disassembler/Analysers/FunctionAnalyzer.cs @@ -0,0 +1,132 @@ +using X86Disassembler.Analysers.DecompilerTypes; +using X86Disassembler.X86; +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.Analysers; + +/// +/// Analyzes disassembled functions to identify variables, parameters, and control flow structures +/// +public class FunctionAnalyzer +{ + /// + /// The analyzer context + /// + private readonly AnalyzerContext _context; + + /// + /// Creates a new function analyzer + /// + /// The analyzer context + public FunctionAnalyzer(AnalyzerContext context) + { + _context = context; + } + + /// + /// Analyzes a function at the specified address + /// + /// The address of the function + /// The name of the function (if known) + /// The analyzed function + public Function AnalyzeFunction(ulong address, string name = "") + { + // If no name is provided, generate one based on the address + if (string.IsNullOrEmpty(name)) + { + name = $"func_{address:X8}"; + } + + // Create a function object + var function = new Function(name, address, _context.Function) + { + ReturnType = DataType.Unknown // Default to unknown return type + }; + + // Create a variable analyzer and analyze variables + var variableAnalyzer = new VariableAnalyzer(_context); + variableAnalyzer.AnalyzeStackVariables(function); + + // Determine the calling convention + DetermineCallingConvention(function); + + // Infer parameter and return types + InferTypes(function); + + return function; + } + + /// + /// Determines the calling convention of a function based on its behavior + /// + /// The function to analyze + private void DetermineCallingConvention(Function function) + { + // By default, we'll assume cdecl + function.CallingConvention = CallingConvention.Cdecl; + + // Get the exit blocks (blocks with ret instructions) + var exitBlocks = function.AsmFunction.Blocks.Where(b => + b.Instructions.Count > 0 && + b.Instructions.Last().Type == InstructionType.Ret).ToList(); + + // Check if the function cleans up its own stack + bool cleansOwnStack = false; + + // Look for ret instructions with an immediate operand + foreach (var block in function.AsmFunction.Blocks) + { + var lastInstruction = block.Instructions.LastOrDefault(); + if (lastInstruction != null && lastInstruction.Type == InstructionType.Ret) + { + // If the ret instruction has an immediate operand, it's cleaning its own stack + if (lastInstruction.StructuredOperands.Count > 0 && + lastInstruction.StructuredOperands[0] is ImmediateOperand immOp && + immOp.Value > 0) + { + cleansOwnStack = true; + break; + } + } + } + + // If the function cleans its own stack, it's likely stdcall + if (cleansOwnStack) + { + function.CallingConvention = CallingConvention.Stdcall; + + // Check for thiscall (ECX used for this pointer) + // This would require more sophisticated analysis of register usage + } + + // Check for fastcall (first two parameters in ECX and EDX) + // This would require more sophisticated analysis of register usage + } + + /// + /// Infers types for parameters and local variables based on their usage + /// + /// The function to analyze + private void InferTypes(Function function) + { + // This is a complex analysis that would require tracking how variables are used + // For now, we'll just set default types + + // Set return type based on register usage + function.ReturnType = DataType.Int; // Default to int + + // For each parameter, try to infer its type + foreach (var param in function.Parameters) + { + // Default to int for now + param.Type = DataType.Int; + } + + // For each local variable, try to infer its type + foreach (var localVar in function.LocalVariables) + { + // Default to int for now + localVar.Type = DataType.Int; + } + } +} diff --git a/X86Disassembler/Analysers/PseudocodeGenerator.cs b/X86Disassembler/Analysers/PseudocodeGenerator.cs new file mode 100644 index 0000000..0f065ba --- /dev/null +++ b/X86Disassembler/Analysers/PseudocodeGenerator.cs @@ -0,0 +1,385 @@ +using System.Text; +using X86Disassembler.Analysers.DecompilerTypes; +using X86Disassembler.X86; +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.Analysers; + +/// +/// Generates C-like pseudocode from decompiled functions +/// +public class PseudocodeGenerator +{ + /// + /// Generates pseudocode for a decompiled function + /// + /// The function to generate pseudocode for + /// The generated pseudocode + public string GeneratePseudocode(Function function) + { + var result = new StringBuilder(); + + // Add function signature + result.AppendLine($"{function.ReturnType} {function.Name}({string.Join(", ", function.Parameters.Select(p => $"{p.Type} {p.Name}"))})") + .AppendLine("{"); + + // Add local variable declarations + foreach (var localVar in function.LocalVariables) + { + result.AppendLine($" {localVar.Type} {localVar.Name}; // Stack offset: {localVar.StackOffset}"); + } + + // Add register variable declarations + foreach (var regVar in function.RegisterVariables) + { + result.AppendLine($" {regVar.Type} {regVar.Name}; // Register: {RegisterMapper.GetRegisterName(regVar.Register!.Value, 32)}"); + } + + if (function.LocalVariables.Count > 0 || function.RegisterVariables.Count > 0) + { + result.AppendLine(); + } + + // Generate the function body using control flow analysis + GenerateFunctionBody(function, result, 1); + + // Add a return statement + result.AppendLine() + .AppendLine(" return 0; // Placeholder return value") + .AppendLine("}"); + + return result.ToString(); + } + + /// + /// Generates the body of the function using control flow analysis + /// + /// The function to generate code for + /// The string builder to append to + /// The current indentation level + private void GenerateFunctionBody(Function function, StringBuilder result, int indentLevel) + { + // Try to find the entry block + var entryBlock = function.AsmFunction.EntryBlock; + + // If the entry block is not found, try to find a block with an address that matches the function address minus the base address + if (entryBlock == null && function.AsmFunction.Blocks.Count > 0) + { + // Get the first block as a fallback + entryBlock = function.AsmFunction.Blocks[0]; + + // Log a warning but continue with the first block + result.AppendLine($"{new string(' ', indentLevel * 4)}// Warning: Entry block not found at address 0x{function.Address:X8}, using first block at 0x{entryBlock.Address:X8}"); + } + else if (entryBlock == null) + { + result.AppendLine($"{new string(' ', indentLevel * 4)}// Function body could not be decompiled - no blocks found"); + return; + } + + // Process blocks in order, starting from the entry block + var processedBlocks = new HashSet(); + GenerateBlockCode(function, entryBlock, result, indentLevel, processedBlocks); + } + + /// + /// Generates code for a basic block and its successors + /// + /// The function containing the block + /// The block to generate code for + /// The string builder to append to + /// The current indentation level + /// Set of blocks that have already been processed + private void GenerateBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) + { + // Check if we've already processed this block + if (processedBlocks.Contains(block.Address)) + { + return; + } + + // Mark this block as processed + processedBlocks.Add(block.Address); + + // Check if this block is part of a control flow structure + var context = function.AsmFunction.Context; + + // Check for if-else structure + var ifElseStructure = context.GetAnalysisData(block.Address, "IfElseStructure"); + if (ifElseStructure != null && ifElseStructure.ConditionBlock.Address == block.Address) + { + // This block is the condition of an if-else structure + GenerateIfElseCode(function, ifElseStructure, result, indentLevel, processedBlocks); + return; + } + + // Check for switch structure + var switchStructure = context.GetAnalysisData(block.Address, "SwitchStructure"); + if (switchStructure != null && switchStructure.HeaderBlock.Address == block.Address) + { + // This block is the header of a switch structure + GenerateSwitchCode(function, switchStructure, result, indentLevel, processedBlocks); + return; + } + + // Check if this block is part of a loop + var loops = context.LoopsByBlockAddress.TryGetValue(block.Address, out var blockLoops) ? blockLoops : null; + if (loops != null && loops.Count > 0) + { + // Get the innermost loop + var loop = loops[0]; + + // Check if this is the loop header + if (loop.Header.Address == block.Address) + { + // This block is the header of a loop + GenerateLoopCode(function, loop, result, indentLevel, processedBlocks); + return; + } + } + + // If we get here, this is a regular block + GenerateRegularBlockCode(function, block, result, indentLevel, processedBlocks); + } + + /// + /// Generates code for a regular basic block + /// + /// The function containing the block + /// The block to generate code for + /// The string builder to append to + /// The current indentation level + /// Set of blocks that have already been processed + private void GenerateRegularBlockCode(Function function, InstructionBlock block, StringBuilder result, int indentLevel, HashSet processedBlocks) + { + // Add a comment with the block address + string indent = new string(' ', indentLevel * 4); + result.AppendLine($"{indent}// Block at 0x{block.Address:X8}"); + + // Generate pseudocode for the instructions in this block + foreach (var instruction in block.Instructions) + { + // Skip function prologue/epilogue instructions + if (IsPrologueOrEpilogueInstruction(instruction)) + { + continue; + } + + // Generate pseudocode for this instruction + string pseudocode = GenerateInstructionPseudocode(function, instruction); + if (!string.IsNullOrEmpty(pseudocode)) + { + result.AppendLine($"{indent}{pseudocode};"); + } + } + + // Process successors + foreach (var successor in block.Successors) + { + if (!processedBlocks.Contains(successor.Address)) + { + GenerateBlockCode(function, successor, result, indentLevel, processedBlocks); + } + } + } + + /// + /// Generates code for an if-else structure + /// + /// The function containing the structure + /// The if-else structure to generate code for + /// The string builder to append to + /// The current indentation level + /// Set of blocks that have already been processed + private void GenerateIfElseCode(Function function, ControlFlowAnalyzer.IfElseStructure ifElseStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) + { + // Mark the condition block as processed + processedBlocks.Add(ifElseStructure.ConditionBlock.Address); + + // Generate the condition expression + string condition = GenerateConditionExpression(function, ifElseStructure.ConditionBlock); + + // Add the if statement + string indent = new string(' ', indentLevel * 4); + result.AppendLine($"{indent}// If-else structure at 0x{ifElseStructure.ConditionBlock.Address:X8}") + .AppendLine($"{indent}if ({condition})"); + + // Add the then branch + result.AppendLine($"{indent}{{") + .AppendLine($"{indent} // Then branch at 0x{ifElseStructure.ThenBlock.Address:X8}"); + + // Generate code for the then branch + GenerateBlockCode(function, ifElseStructure.ThenBlock, result, indentLevel + 1, processedBlocks); + + // Close the then branch + result.AppendLine($"{indent}}}"); + + // Add the else branch if it exists and is not already processed + if (ifElseStructure.ElseBlock != null && !processedBlocks.Contains(ifElseStructure.ElseBlock.Address)) + { + result.AppendLine($"{indent}else") + .AppendLine($"{indent}{{") + .AppendLine($"{indent} // Else branch at 0x{ifElseStructure.ElseBlock.Address:X8}"); + + // Generate code for the else branch + GenerateBlockCode(function, ifElseStructure.ElseBlock, result, indentLevel + 1, processedBlocks); + + // Close the else branch + result.AppendLine($"{indent}}}"); + } + } + + /// + /// Generates code for a switch structure + /// + /// The function containing the structure + /// The switch structure to generate code for + /// The string builder to append to + /// The current indentation level + /// Set of blocks that have already been processed + private void GenerateSwitchCode(Function function, ControlFlowAnalyzer.SwitchStructure switchStructure, StringBuilder result, int indentLevel, HashSet processedBlocks) + { + // Mark the header block as processed + processedBlocks.Add(switchStructure.HeaderBlock.Address); + + // Generate the switch expression + string switchExpr = "/* switch expression */"; + + // Add the switch statement + string indent = new string(' ', indentLevel * 4); + result.AppendLine($"{indent}// Switch structure at 0x{switchStructure.HeaderBlock.Address:X8}") + .AppendLine($"{indent}switch ({switchExpr})"); + + // Add the switch body + result.AppendLine($"{indent}{{") + .AppendLine(); + + // Generate code for each case + foreach (var switchCase in switchStructure.Cases) + { + // Add the case label + result.AppendLine($"{indent} case {switchCase.Value}:") + .AppendLine($"{indent} // Case block at 0x{switchCase.CaseBlock.Address:X8}"); + + // Generate code for the case block + GenerateBlockCode(function, switchCase.CaseBlock, result, indentLevel + 2, processedBlocks); + + // Add a break statement + result.AppendLine($"{indent} break;") + .AppendLine(); + } + + // Add a default case + result.AppendLine($"{indent} default:") + .AppendLine($"{indent} // Default case") + .AppendLine($"{indent} break;"); + + // Close the switch body + result.AppendLine($"{indent}}}"); + } + + /// + /// Generates code for a loop structure + /// + /// The function containing the structure + /// The loop to generate code for + /// The string builder to append to + /// The current indentation level + /// Set of blocks that have already been processed + private void GenerateLoopCode(Function function, AnalyzerContext.Loop loop, StringBuilder result, int indentLevel, HashSet processedBlocks) + { + // Mark the header block as processed + processedBlocks.Add(loop.Header.Address); + + // Add the loop header + string indent = new string(' ', indentLevel * 4); + result.AppendLine($"{indent}// Loop at 0x{loop.Header.Address:X8}") + .AppendLine($"{indent}while (true) // Simplified loop condition"); + + // Add the loop body + result.AppendLine($"{indent}{{") + .AppendLine($"{indent} // Loop body"); + + // Generate code for the loop body (starting with the header) + GenerateBlockCode(function, loop.Header, result, indentLevel + 1, processedBlocks); + + // Close the loop body + result.AppendLine($"{indent}}}"); + } + + /// + /// Generates a condition expression for an if statement + /// + /// The function containing the block + /// The block containing the condition + /// A string representing the condition expression + private string GenerateConditionExpression(Function function, InstructionBlock conditionBlock) + { + // For now, we'll just return a placeholder + // In a real implementation, we would analyze the instructions to determine the condition + return "/* condition */"; + } + + /// + /// Generates pseudocode for a single instruction + /// + /// The function containing the instruction + /// The instruction to generate pseudocode for + /// The generated pseudocode + private string GenerateInstructionPseudocode(Function function, Instruction instruction) + { + // For now, we'll just return a comment with the instruction + return $"/* {instruction} */"; + } + + /// + /// Checks if an instruction is part of the function prologue or epilogue + /// + /// The instruction to check + /// True if the instruction is part of the prologue or epilogue, false otherwise + private bool IsPrologueOrEpilogueInstruction(Instruction instruction) + { + // Check for common prologue instructions + if (instruction.Type == InstructionType.Push && + instruction.StructuredOperands.Count > 0 && + instruction.StructuredOperands[0] is RegisterOperand regOp && + regOp.Register == RegisterIndex.Bp) + { + return true; // push ebp + } + + if (instruction.Type == InstructionType.Mov && + instruction.StructuredOperands.Count > 1 && + instruction.StructuredOperands[0] is RegisterOperand destReg && + instruction.StructuredOperands[1] is RegisterOperand srcReg && + destReg.Register == RegisterIndex.Bp && + srcReg.Register == RegisterIndex.Sp) + { + return true; // mov ebp, esp + } + + if (instruction.Type == InstructionType.Sub && + instruction.StructuredOperands.Count > 1 && + instruction.StructuredOperands[0] is RegisterOperand subReg && + subReg.Register == RegisterIndex.Sp) + { + return true; // sub esp, X + } + + // Check for common epilogue instructions + if (instruction.Type == InstructionType.Pop && + instruction.StructuredOperands.Count > 0 && + instruction.StructuredOperands[0] is RegisterOperand popReg && + popReg.Register == RegisterIndex.Bp) + { + return true; // pop ebp + } + + if (instruction.Type == InstructionType.Ret) + { + return true; // ret + } + + return false; + } +} diff --git a/X86Disassembler/Analysers/VariableAnalyzer.cs b/X86Disassembler/Analysers/VariableAnalyzer.cs new file mode 100644 index 0000000..8f9749d --- /dev/null +++ b/X86Disassembler/Analysers/VariableAnalyzer.cs @@ -0,0 +1,252 @@ +using X86Disassembler.Analysers.DecompilerTypes; +using X86Disassembler.X86; +using X86Disassembler.X86.Operands; + +namespace X86Disassembler.Analysers; + +/// +/// Analyzes disassembled code to identify and track variables +/// +public class VariableAnalyzer +{ + /// + /// The analyzer context + /// + private readonly AnalyzerContext _context; + + /// + /// Creates a new variable analyzer + /// + /// The analyzer context + public VariableAnalyzer(AnalyzerContext context) + { + _context = context; + } + + /// + /// Analyzes the function to identify stack variables + /// + /// The function to analyze + public void AnalyzeStackVariables(Function function) + { + // Dictionary to track stack offsets and their corresponding variables + var stackOffsets = new Dictionary(); + + // First, identify the function prologue to determine stack frame setup + bool hasPushEbp = false; + bool hasMovEbpEsp = false; + int localSize = 0; + + // Look for the function prologue pattern: push ebp; mov ebp, esp; sub esp, X + foreach (var block in function.AsmFunction.Blocks) + { + foreach (var instruction in block.Instructions) + { + // Look for push ebp + if (instruction.Type == InstructionType.Push && + instruction.StructuredOperands.Count > 0 && + instruction.StructuredOperands[0] is RegisterOperand regOp && + regOp.Register == RegisterIndex.Bp) + { + hasPushEbp = true; + continue; + } + + // Look for mov ebp, esp + if (instruction.Type == InstructionType.Mov && + instruction.StructuredOperands.Count > 1 && + instruction.StructuredOperands[0] is RegisterOperand destReg && + instruction.StructuredOperands[1] is RegisterOperand srcReg && + destReg.Register == RegisterIndex.Bp && + srcReg.Register == RegisterIndex.Sp) + { + hasMovEbpEsp = true; + continue; + } + + // Look for sub esp, X to determine local variable space + if (instruction.Type == InstructionType.Sub && + instruction.StructuredOperands.Count > 1 && + instruction.StructuredOperands[0] is RegisterOperand subReg && + instruction.StructuredOperands[1] is ImmediateOperand immOp && + subReg.Register == RegisterIndex.Sp) + { + localSize = (int)immOp.Value; + break; + } + } + + // If we found the complete prologue, no need to check more blocks + if (hasPushEbp && hasMovEbpEsp && localSize > 0) + { + break; + } + } + + // If we didn't find a standard prologue, we can't reliably analyze stack variables + if (!hasPushEbp || !hasMovEbpEsp) + { + return; + } + + // Now scan for memory accesses relative to EBP + foreach (var block in function.AsmFunction.Blocks) + { + foreach (var instruction in block.Instructions) + { + // Look for memory operands that reference [ebp+X] or [ebp-X] + foreach (var operand in instruction.StructuredOperands) + { + if (operand is DisplacementMemoryOperand memOp && + memOp.BaseRegister == RegisterIndex.Bp) + { + // This is accessing memory relative to EBP + int offset = (int)memOp.Displacement; + + // Determine if this is a parameter or local variable + if (offset > 0 && offset < 1000) // Positive offset = parameter (with reasonable limit) + { + // Parameters start at [ebp+8] (return address at [ebp+4], saved ebp at [ebp+0]) + int paramIndex = (offset - 8) / 4; // Assuming 4-byte parameters + + // Make sure we have enough parameters in the function + while (function.Parameters.Count <= paramIndex) + { + var param = new Variable($"param_{function.Parameters.Count + 1}", DataType.Unknown) + { + Storage = Variable.StorageType.Parameter, + StackOffset = 8 + (function.Parameters.Count * 4), + IsParameter = true, + ParameterIndex = function.Parameters.Count, + Size = 4 // Assume 4 bytes (32-bit) + }; + function.Parameters.Add(param); + } + } + else if (offset < 0 && offset > -1000) // Negative offset = local variable (with reasonable limit) + { + // Check if we've already seen this offset + if (!stackOffsets.TryGetValue(offset, out var variable)) + { + // Create a new local variable + variable = new Variable($"local_{Math.Abs(offset)}", DataType.Unknown) + { + Storage = Variable.StorageType.Stack, + StackOffset = offset, + Size = 4 // Assume 4 bytes (32-bit) + }; + + // Add to our tracking dictionaries + stackOffsets[offset] = variable; + function.LocalVariables.Add(variable); + } + + // Track the usage of this variable + TrackVariableUsage(variable, instruction); + } + } + } + } + } + + // Analyze register-based variables + AnalyzeRegisterVariables(function); + } + + /// + /// Analyzes register usage to identify variables stored in registers + /// + /// The function to analyze + private void AnalyzeRegisterVariables(Function function) + { + // This is a more complex analysis that would track register values across blocks + // For now, we'll focus on identifying registers that hold consistent values + + // Dictionary to track register variables + var registerVariables = new Dictionary(); + + // For each block, analyze register usage + foreach (var block in function.AsmFunction.Blocks) + { + // Check if we have register values for this block from data flow analysis + var registerValuesKey = "RegisterValues"; + if (_context.GetAnalysisData>(block.Address, registerValuesKey) is Dictionary registerValues) + { + foreach (var kvp in registerValues) + { + var register = kvp.Key; + var valueInfo = kvp.Value; + + // Skip special registers like ESP and EBP + if (register == RegisterIndex.Sp || register == RegisterIndex.Bp) + { + continue; + } + + // If the register holds a constant value, it might be a variable + if (valueInfo.Type == DataFlowAnalyzer.ValueInfo.ValueType.Constant) + { + // Check if we already have a variable for this register + if (!registerVariables.TryGetValue(register, out var variable)) + { + // Create a new register variable + variable = new Variable($"reg_{RegisterMapper.GetRegisterName(register, 32)}", DataType.Unknown) + { + Storage = Variable.StorageType.Register, + Register = register, + Size = 4 // Assume 4 bytes (32-bit) + }; + + // Add to our tracking dictionary + registerVariables[register] = variable; + function.RegisterVariables.Add(variable); + } + } + } + } + } + } + + /// + /// Tracks how a variable is used in an instruction + /// + /// The variable to track + /// The instruction using the variable + private void TrackVariableUsage(Variable variable, Instruction instruction) + { + // For now, we'll just try to infer the variable type based on its usage + + // If the variable is used in a comparison with 0, it might be a boolean + if (instruction.Type == InstructionType.Cmp || instruction.Type == InstructionType.Test) + { + if (instruction.StructuredOperands.Count > 1 && + instruction.StructuredOperands[1] is ImmediateOperand immOp && + immOp.Value == 0) + { + // This might be a boolean check + if (variable.Type == DataType.Unknown) + { + // Set to int for now as we don't have a bool type + variable.Type = DataType.Int; + } + } + } + + // If the variable is used with string instructions, it might be a string + // Check for string operations - we don't have specific string instruction types yet + // Skip string detection for now as we don't have the specific instruction types + // We'll detect strings through other means later + + // If the variable is used with floating-point instructions, it might be a float + // Check for floating-point operations + if (instruction.Type == InstructionType.Fld || + instruction.Type == InstructionType.Fst || + instruction.Type == InstructionType.Fstp) + { + if (variable.Type == DataType.Unknown) + { + variable.Type = DataType.Float; + } + } + } +}