From c516e063e7fbffdb51e92d08b8145f54f547d4e2 Mon Sep 17 00:00:00 2001 From: bird_egop Date: Mon, 14 Apr 2025 02:07:17 +0300 Subject: [PATCH] basic decompiler and fixes --- .../Decompiler/ControlFlowGraph.cs | 246 +++++++++ .../Decompiler/DataFlowAnalysis.cs | 516 +++++++++++++++++ X86Disassembler/Decompiler/Decompiler.cs | 522 ++++++++++++++++++ X86Disassembler/Program.cs | 23 +- X86Disassembler/X86/Disassembler.cs | 4 +- .../X86/Handlers/Jump/JgeRel8Handler.cs | 6 +- .../X86/Handlers/Jump/JmpRel32Handler.cs | 3 +- .../X86/Handlers/Jump/JmpRel8Handler.cs | 6 +- .../Jump/TwoByteConditionalJumpHandler.cs | 2 - X86Disassembler/X86/Instruction.cs | 2 +- 10 files changed, 1314 insertions(+), 16 deletions(-) create mode 100644 X86Disassembler/Decompiler/ControlFlowGraph.cs create mode 100644 X86Disassembler/Decompiler/DataFlowAnalysis.cs create mode 100644 X86Disassembler/Decompiler/Decompiler.cs diff --git a/X86Disassembler/Decompiler/ControlFlowGraph.cs b/X86Disassembler/Decompiler/ControlFlowGraph.cs new file mode 100644 index 0000000..e5e4d5e --- /dev/null +++ b/X86Disassembler/Decompiler/ControlFlowGraph.cs @@ -0,0 +1,246 @@ +namespace X86Disassembler.Decompiler; + +using System.Collections.Generic; +using X86Disassembler.X86; + +/// +/// Represents a control flow graph for decompilation +/// + public class ControlFlowGraph +{ + /// + /// Represents a basic block in the control flow graph + /// + public class BasicBlock + { + /// + /// Gets or sets the starting address of the basic block + /// + public ulong StartAddress { get; set; } + + /// + /// Gets or sets the ending address of the basic block + /// + public ulong EndAddress { get; set; } + + /// + /// Gets the list of instructions in this basic block + /// + public List Instructions { get; } = []; + + /// + /// Gets the list of successor blocks (blocks that can be executed after this one) + /// + public List Successors { get; } = []; + + /// + /// Gets the list of predecessor blocks (blocks that can execute before this one) + /// + public List Predecessors { get; } = []; + + /// + /// Returns a string representation of the basic block + /// + /// A string representation of the basic block + public override string ToString() + { + return $"Block {StartAddress:X8}-{EndAddress:X8} with {Instructions.Count} instructions"; + } + } + + // Dictionary mapping addresses to basic blocks + private readonly Dictionary _blocks = []; + + // Entry point of the control flow graph + private BasicBlock? _entryBlock; + + /// + /// Gets the entry block of the control flow graph + /// + public BasicBlock? EntryBlock => _entryBlock; + + /// + /// Gets all basic blocks in the control flow graph + /// + public IReadOnlyDictionary Blocks => _blocks; + + /// + /// Builds a control flow graph from a list of instructions + /// + /// The list of instructions + /// The entry point address + /// A control flow graph + public static ControlFlowGraph Build(List instructions, ulong entryPoint) + { + ControlFlowGraph cfg = new ControlFlowGraph(); + + // First pass: identify basic block boundaries + HashSet leaders = new HashSet(); + + // The entry point is always a leader + leaders.Add(entryPoint); + + // Identify other leaders + for (int i = 0; i < instructions.Count; i++) + { + Instruction inst = instructions[i]; + + // Check if this instruction is a branch or jump + if (IsControlTransfer(inst)) + { + // The target of a jump/branch is a leader + ulong? targetAddress = GetTargetAddress(inst); + if (targetAddress.HasValue) + { + leaders.Add(targetAddress.Value); + } + + // The instruction following a jump/branch is also a leader (if it exists) + if (i + 1 < instructions.Count) + { + leaders.Add(instructions[i + 1].Address); + } + } + } + + // Second pass: create basic blocks + BasicBlock? currentBlock = null; + + foreach (Instruction inst in instructions) + { + // If this instruction is a leader, start a new basic block + if (leaders.Contains(inst.Address)) + { + // Finalize the previous block if it exists + if (currentBlock != null) + { + currentBlock.EndAddress = inst.Address - 1; + cfg._blocks[currentBlock.StartAddress] = currentBlock; + } + + // Create a new block + currentBlock = new BasicBlock + { + StartAddress = inst.Address + }; + + // If this is the entry point, set it as the entry block + if (inst.Address == entryPoint) + { + cfg._entryBlock = currentBlock; + } + } + + // Add the instruction to the current block + if (currentBlock != null) + { + currentBlock.Instructions.Add(inst); + } + + // If this instruction is a control transfer, finalize the current block + if (IsControlTransfer(inst) && currentBlock != null) + { + currentBlock.EndAddress = inst.Address; + cfg._blocks[currentBlock.StartAddress] = currentBlock; + currentBlock = null; + } + } + + // Finalize the last block if it exists + if (currentBlock != null) + { + currentBlock.EndAddress = instructions[^1].Address; + cfg._blocks[currentBlock.StartAddress] = currentBlock; + } + + // Third pass: connect basic blocks + foreach (var block in cfg._blocks.Values) + { + // Get the last instruction in the block + Instruction lastInst = block.Instructions[^1]; + + // If the last instruction is a jump, add the target as a successor + if (IsControlTransfer(lastInst)) + { + ulong? targetAddress = GetTargetAddress(lastInst); + if (targetAddress.HasValue && cfg._blocks.TryGetValue(targetAddress.Value, out BasicBlock? targetBlock)) + { + block.Successors.Add(targetBlock); + targetBlock.Predecessors.Add(block); + } + + // If the instruction is a conditional jump, the next block is also a successor + if (IsConditionalJump(lastInst)) + { + ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length; + if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock)) + { + block.Successors.Add(nextBlock); + nextBlock.Predecessors.Add(block); + } + } + } + // If the last instruction is not a jump, the next block is the successor + else + { + ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length; + if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock)) + { + block.Successors.Add(nextBlock); + nextBlock.Predecessors.Add(block); + } + } + } + + return cfg; + } + + /// + /// Checks if an instruction is a control transfer instruction (jump, call, ret) + /// + /// The instruction to check + /// True if the instruction is a control transfer + private static bool IsControlTransfer(Instruction instruction) + { + string mnemonic = instruction.Mnemonic.ToLower(); + return mnemonic.StartsWith("j") || // All jumps (jmp, je, jne, etc.) + mnemonic == "call" || + mnemonic == "ret"; + } + + /// + /// Checks if an instruction is a conditional jump + /// + /// The instruction to check + /// True if the instruction is a conditional jump + private static bool IsConditionalJump(Instruction instruction) + { + string mnemonic = instruction.Mnemonic.ToLower(); + return mnemonic.StartsWith("j") && mnemonic != "jmp"; // All jumps except jmp + } + + /// + /// Gets the target address of a control transfer instruction + /// + /// The instruction + /// The target address, or null if it cannot be determined + private static ulong? GetTargetAddress(Instruction instruction) + { + string operands = instruction.Operands; + + // Check if the operand is a direct address (e.g., "0x12345678") + if (operands.StartsWith("0x") && ulong.TryParse(operands.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out ulong address)) + { + return address; + } + + // For relative jumps, calculate the target address + if (instruction.Mnemonic.ToLower().StartsWith("j") && int.TryParse(operands, out int offset)) + { + return instruction.Address + (ulong)instruction.RawBytes.Length + (ulong)offset; + } + + // For now, we cannot determine the target for indirect jumps + return null; + } +} diff --git a/X86Disassembler/Decompiler/DataFlowAnalysis.cs b/X86Disassembler/Decompiler/DataFlowAnalysis.cs new file mode 100644 index 0000000..ae71c0f --- /dev/null +++ b/X86Disassembler/Decompiler/DataFlowAnalysis.cs @@ -0,0 +1,516 @@ +namespace X86Disassembler.Decompiler; + +using System.Collections.Generic; +using X86Disassembler.X86; + +/// +/// Performs data flow analysis on x86 instructions +/// +public class DataFlowAnalysis +{ + /// + /// Represents a variable in the decompiled code + /// + public class Variable + { + /// + /// Gets or sets the name of the variable + /// + public string Name { get; set; } = string.Empty; + + /// + /// Gets or sets the type of the variable (if known) + /// + public string Type { get; set; } = "int"; // Default to int + + /// + /// Gets or sets the storage location (register, memory, etc.) + /// + public string Location { get; set; } = string.Empty; + + /// + /// Gets or sets whether this variable is a parameter + /// + public bool IsParameter { get; set; } + + /// + /// Gets or sets whether this variable is a return value + /// + public bool IsReturnValue { get; set; } + } + + /// + /// Represents an operation in the decompiled code + /// + public class Operation + { + /// + /// Gets or sets the operation type + /// + public string Type { get; set; } = string.Empty; + + /// + /// Gets or sets the destination variable + /// + public Variable? Destination { get; set; } + + /// + /// Gets or sets the source variables or constants + /// + public List Sources { get; } = []; // Can be Variable or constant value + + /// + /// Gets or sets the original instruction + /// + public Instruction OriginalInstruction { get; set; } = null!; + } + + // Map of register names to variables + private readonly Dictionary _registerVariables = []; + + // Map of memory locations to variables + private readonly Dictionary _memoryVariables = []; + + // List of operations + private readonly List _operations = []; + + // Counter for generating variable names + private int _variableCounter = 0; + + /// + /// Gets the list of operations + /// + public IReadOnlyList Operations => _operations; + + /// + /// Gets the list of variables + /// + public IEnumerable Variables + { + get + { + HashSet uniqueVariables = []; + foreach (var variable in _registerVariables.Values) + { + uniqueVariables.Add(variable); + } + foreach (var variable in _memoryVariables.Values) + { + uniqueVariables.Add(variable); + } + return uniqueVariables; + } + } + + /// + /// Analyzes a list of instructions to identify variables and operations + /// + /// The list of instructions to analyze + public void Analyze(List instructions) + { + // Initialize common register variables + InitializeRegisterVariables(); + + // Process each instruction + foreach (var instruction in instructions) + { + AnalyzeInstruction(instruction); + } + } + + /// + /// Initializes common register variables + /// + private void InitializeRegisterVariables() + { + // 32-bit general purpose registers + _registerVariables["eax"] = new Variable { Name = "eax", Location = "eax" }; + _registerVariables["ebx"] = new Variable { Name = "ebx", Location = "ebx" }; + _registerVariables["ecx"] = new Variable { Name = "ecx", Location = "ecx" }; + _registerVariables["edx"] = new Variable { Name = "edx", Location = "edx" }; + _registerVariables["esi"] = new Variable { Name = "esi", Location = "esi" }; + _registerVariables["edi"] = new Variable { Name = "edi", Location = "edi" }; + _registerVariables["ebp"] = new Variable { Name = "ebp", Location = "ebp" }; + _registerVariables["esp"] = new Variable { Name = "esp", Location = "esp" }; + + // Mark EAX as the return value register + _registerVariables["eax"].IsReturnValue = true; + + // 16-bit registers + _registerVariables["ax"] = new Variable { Name = "ax", Location = "ax" }; + _registerVariables["bx"] = new Variable { Name = "bx", Location = "bx" }; + _registerVariables["cx"] = new Variable { Name = "cx", Location = "cx" }; + _registerVariables["dx"] = new Variable { Name = "dx", Location = "dx" }; + _registerVariables["si"] = new Variable { Name = "si", Location = "si" }; + _registerVariables["di"] = new Variable { Name = "di", Location = "di" }; + _registerVariables["bp"] = new Variable { Name = "bp", Location = "bp" }; + _registerVariables["sp"] = new Variable { Name = "sp", Location = "sp" }; + + // 8-bit registers + _registerVariables["al"] = new Variable { Name = "al", Location = "al" }; + _registerVariables["ah"] = new Variable { Name = "ah", Location = "ah" }; + _registerVariables["bl"] = new Variable { Name = "bl", Location = "bl" }; + _registerVariables["bh"] = new Variable { Name = "bh", Location = "bh" }; + _registerVariables["cl"] = new Variable { Name = "cl", Location = "cl" }; + _registerVariables["ch"] = new Variable { Name = "ch", Location = "ch" }; + _registerVariables["dl"] = new Variable { Name = "dl", Location = "dl" }; + _registerVariables["dh"] = new Variable { Name = "dh", Location = "dh" }; + } + + /// + /// Analyzes a single instruction to identify variables and operations + /// + /// The instruction to analyze + private void AnalyzeInstruction(Instruction instruction) + { + string mnemonic = instruction.Mnemonic.ToLower(); + string operands = instruction.Operands; + + // Skip instructions without operands + if (string.IsNullOrEmpty(operands)) + { + return; + } + + // Split operands + string[] operandParts = operands.Split(','); + for (int i = 0; i < operandParts.Length; i++) + { + operandParts[i] = operandParts[i].Trim(); + } + + // Create an operation based on the instruction type + Operation operation = new Operation + { + OriginalInstruction = instruction + }; + + switch (mnemonic) + { + case "mov": + HandleMovInstruction(operation, operandParts); + break; + + case "add": + case "sub": + case "mul": + case "div": + case "and": + case "or": + case "xor": + HandleArithmeticInstruction(operation, mnemonic, operandParts); + break; + + case "push": + case "pop": + HandleStackInstruction(operation, mnemonic, operandParts); + break; + + case "call": + HandleCallInstruction(operation, operandParts); + break; + + case "ret": + HandleReturnInstruction(operation); + break; + + case "cmp": + case "test": + HandleComparisonInstruction(operation, mnemonic, operandParts); + break; + + case "jmp": + case "je": + case "jne": + case "jg": + case "jge": + case "jl": + case "jle": + HandleJumpInstruction(operation, mnemonic, operandParts); + break; + + default: + // For other instructions, just record the operation type + operation.Type = mnemonic; + break; + } + + // Add the operation to the list + _operations.Add(operation); + } + + /// + /// Handles a MOV instruction + /// + /// The operation to populate + /// The operand parts + private void HandleMovInstruction(Operation operation, string[] operandParts) + { + if (operandParts.Length != 2) + { + return; + } + + operation.Type = "assignment"; + + // Get or create the destination variable + Variable destination = GetOrCreateVariable(operandParts[0]); + operation.Destination = destination; + + // Get the source (variable or constant) + object source = GetOperandValue(operandParts[1]); + operation.Sources.Add(source); + } + + /// + /// Handles an arithmetic instruction (ADD, SUB, MUL, DIV, AND, OR, XOR) + /// + /// The operation to populate + /// The instruction mnemonic + /// The operand parts + private void HandleArithmeticInstruction(Operation operation, string mnemonic, string[] operandParts) + { + if (operandParts.Length != 2) + { + return; + } + + operation.Type = mnemonic; + + // Get or create the destination variable + Variable destination = GetOrCreateVariable(operandParts[0]); + operation.Destination = destination; + + // Get the source (variable or constant) + object source = GetOperandValue(operandParts[1]); + operation.Sources.Add(source); + operation.Sources.Add(destination); // The destination is also a source in arithmetic operations + } + + /// + /// Handles a stack instruction (PUSH, POP) + /// + /// The operation to populate + /// The instruction mnemonic + /// The operand parts + private void HandleStackInstruction(Operation operation, string mnemonic, string[] operandParts) + { + if (operandParts.Length != 1) + { + return; + } + + operation.Type = mnemonic; + + if (mnemonic == "push") + { + // For PUSH, the operand is the source + object source = GetOperandValue(operandParts[0]); + operation.Sources.Add(source); + } + else if (mnemonic == "pop") + { + // For POP, the operand is the destination + Variable destination = GetOrCreateVariable(operandParts[0]); + operation.Destination = destination; + } + } + + /// + /// Handles a CALL instruction + /// + /// The operation to populate + /// The operand parts + private void HandleCallInstruction(Operation operation, string[] operandParts) + { + if (operandParts.Length != 1) + { + return; + } + + operation.Type = "call"; + + // The operand is the function name or address + operation.Sources.Add(operandParts[0]); + } + + /// + /// Handles a RET instruction + /// + /// The operation to populate + private void HandleReturnInstruction(Operation operation) + { + operation.Type = "return"; + + // The return value is in EAX + if (_registerVariables.TryGetValue("eax", out Variable? eax)) + { + operation.Sources.Add(eax); + } + } + + /// + /// Handles a comparison instruction (CMP, TEST) + /// + /// The operation to populate + /// The instruction mnemonic + /// The operand parts + private void HandleComparisonInstruction(Operation operation, string mnemonic, string[] operandParts) + { + if (operandParts.Length != 2) + { + return; + } + + operation.Type = mnemonic; + + // Get the operands + object left = GetOperandValue(operandParts[0]); + object right = GetOperandValue(operandParts[1]); + + operation.Sources.Add(left); + operation.Sources.Add(right); + } + + /// + /// Handles a jump instruction (JMP, JE, JNE, etc.) + /// + /// The operation to populate + /// The instruction mnemonic + /// The operand parts + private void HandleJumpInstruction(Operation operation, string mnemonic, string[] operandParts) + { + if (operandParts.Length != 1) + { + return; + } + + operation.Type = mnemonic; + + // The operand is the jump target + operation.Sources.Add(operandParts[0]); + } + + /// + /// Gets or creates a variable for an operand + /// + /// The operand string + /// The variable + private Variable GetOrCreateVariable(string operand) + { + // Check if it's a register + if (IsRegister(operand)) + { + string register = operand.ToLower(); + if (_registerVariables.TryGetValue(register, out Variable? variable)) + { + return variable; + } + } + + // Check if it's a memory location + if (IsMemoryLocation(operand)) + { + string normalizedLocation = NormalizeMemoryLocation(operand); + if (_memoryVariables.TryGetValue(normalizedLocation, out Variable? variable)) + { + return variable; + } + + // Create a new variable for this memory location + variable = new Variable + { + Name = $"var_{_variableCounter++}", + Location = normalizedLocation + }; + + _memoryVariables[normalizedLocation] = variable; + return variable; + } + + // If it's neither a register nor a memory location, create a temporary variable + Variable tempVariable = new Variable + { + Name = $"temp_{_variableCounter++}", + Location = operand + }; + + return tempVariable; + } + + /// + /// Gets the value of an operand (variable or constant) + /// + /// The operand string + /// The operand value (Variable or constant) + private object GetOperandValue(string operand) + { + // Check if it's a register or memory location + if (IsRegister(operand) || IsMemoryLocation(operand)) + { + return GetOrCreateVariable(operand); + } + + // Check if it's a hexadecimal constant + if (operand.StartsWith("0x") && operand.Length > 2) + { + if (int.TryParse(operand.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out int value)) + { + return value; + } + } + + // Check if it's a decimal constant + if (int.TryParse(operand, out int decimalValue)) + { + return decimalValue; + } + + // Otherwise, return the operand as a string + return operand; + } + + /// + /// Checks if an operand is a register + /// + /// The operand to check + /// True if the operand is a register + private bool IsRegister(string operand) + { + string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", + "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", + "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" }; + + return Array.IndexOf(registers, operand.ToLower()) >= 0; + } + + /// + /// Checks if an operand is a memory location + /// + /// The operand to check + /// True if the operand is a memory location + private bool IsMemoryLocation(string operand) + { + return operand.Contains('[') && operand.Contains(']'); + } + + /// + /// Normalizes a memory location operand + /// + /// The operand to normalize + /// The normalized memory location + private string NormalizeMemoryLocation(string operand) + { + // Extract the part inside the brackets + int startIndex = operand.IndexOf('['); + int endIndex = operand.IndexOf(']'); + + if (startIndex >= 0 && endIndex > startIndex) + { + string memoryReference = operand.Substring(startIndex + 1, endIndex - startIndex - 1).Trim(); + return memoryReference; + } + + return operand; + } +} diff --git a/X86Disassembler/Decompiler/Decompiler.cs b/X86Disassembler/Decompiler/Decompiler.cs new file mode 100644 index 0000000..4d806fc --- /dev/null +++ b/X86Disassembler/Decompiler/Decompiler.cs @@ -0,0 +1,522 @@ +namespace X86Disassembler.Decompiler; + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using X86Disassembler.X86; + +/// +/// Main decompiler class that translates assembly code into higher-level code +/// +public class Decompiler +{ + // The list of disassembled instructions + private readonly List _instructions; + + // The control flow graph + private ControlFlowGraph? _controlFlowGraph; + + // The data flow analysis + private DataFlowAnalysis? _dataFlowAnalysis; + + // The entry point address + private readonly ulong _entryPoint; + + /// + /// Initializes a new instance of the Decompiler class + /// + /// The list of disassembled instructions + /// The entry point address + public Decompiler(List instructions, ulong entryPoint) + { + _instructions = instructions; + _entryPoint = entryPoint; + } + + /// + /// Decompiles the instructions and returns the decompiled code + /// + /// The decompiled code + public string Decompile() + { + // Build the control flow graph + _controlFlowGraph = ControlFlowGraph.Build(_instructions, _entryPoint); + + // Perform data flow analysis + _dataFlowAnalysis = new DataFlowAnalysis(); + _dataFlowAnalysis.Analyze(_instructions); + + // Generate pseudocode from the control flow graph and data flow analysis + return GeneratePseudocode(); + } + + /// + /// Generates pseudocode from the control flow graph and data flow analysis + /// + /// The generated pseudocode + private string GeneratePseudocode() + { + if (_controlFlowGraph == null || _controlFlowGraph.EntryBlock == null) + { + return "// Could not build control flow graph"; + } + + StringBuilder code = new StringBuilder(); + + // Add a function header + code.AppendLine("// Decompiled function"); + code.AppendLine("int DecompiledFunction() {") + .AppendLine(); + + // Generate variable declarations + if (_dataFlowAnalysis != null) + { + foreach (var variable in _dataFlowAnalysis.Variables) + { + // Skip register variables + if (IsRegister(variable.Location)) + { + continue; + } + + // Generate a variable declaration + code.AppendLine($" {variable.Type} {variable.Name}; // {variable.Location}"); + } + + if (_dataFlowAnalysis.Variables.Any(v => !IsRegister(v.Location))) + { + code.AppendLine(); + } + } + + // Process the blocks in a depth-first order + HashSet visitedBlocks = new HashSet(); + GenerateCodeForBlock(_controlFlowGraph.EntryBlock, code, visitedBlocks, 1); + + // Add a return statement if not already present + if (!code.ToString().Contains("return")) + { + code.AppendLine(" return 0;"); + } + + // Close the function + code.AppendLine("}"); + + return code.ToString(); + } + + /// + /// Generates code for a basic block and its successors + /// + /// The basic block + /// The code builder + /// The set of visited blocks + /// The indentation level + private void GenerateCodeForBlock(ControlFlowGraph.BasicBlock block, StringBuilder code, HashSet visitedBlocks, int indentLevel) + { + // If we've already visited this block, add a goto statement + if (visitedBlocks.Contains(block.StartAddress)) + { + string indent = new string(' ', indentLevel * 4); + code.AppendLine($"{indent}goto block_{block.StartAddress:X8};"); + return; + } + + // Mark this block as visited + visitedBlocks.Add(block.StartAddress); + + // Add a label for this block + string blockIndent = new string(' ', (indentLevel - 1) * 4); + code.AppendLine($"{blockIndent}block_{block.StartAddress:X8}:") + .AppendLine(); + + // Generate code for the instructions in this block + foreach (var instruction in block.Instructions) + { + string instructionCode = TranslateInstruction(instruction, indentLevel); + if (!string.IsNullOrEmpty(instructionCode)) + { + code.AppendLine(instructionCode); + } + } + + // Handle successors based on the control flow + if (block.Successors.Count == 1) + { + // Unconditional branch to the next block + GenerateCodeForBlock(block.Successors[0], code, visitedBlocks, indentLevel); + } + else if (block.Successors.Count == 2) + { + // Conditional branch + string indent = new string(' ', indentLevel * 4); + + // Get the last instruction in the block + Instruction lastInstruction = block.Instructions[^1]; + string condition = GetConditionFromJump(lastInstruction); + + // Find the fall-through block and the jump target block + ControlFlowGraph.BasicBlock? fallthroughBlock = null; + ControlFlowGraph.BasicBlock? jumpTargetBlock = null; + + ulong nextAddress = lastInstruction.Address + (ulong)lastInstruction.RawBytes.Length; + foreach (var successor in block.Successors) + { + if (successor.StartAddress == nextAddress) + { + fallthroughBlock = successor; + } + else + { + jumpTargetBlock = successor; + } + } + + if (fallthroughBlock != null && jumpTargetBlock != null) + { + // Generate an if statement + code.AppendLine($"{indent}if ({condition}) {{") + .AppendLine(); + + // Generate code for the jump target block + GenerateCodeForBlock(jumpTargetBlock, code, visitedBlocks, indentLevel + 1); + + // Close the if statement + code.AppendLine($"{indent}}}") + .AppendLine(); + + // Generate code for the fall-through block + GenerateCodeForBlock(fallthroughBlock, code, visitedBlocks, indentLevel); + } + else + { + // If we couldn't determine the fall-through and jump target blocks, + // just generate code for both successors + foreach (var successor in block.Successors) + { + GenerateCodeForBlock(successor, code, visitedBlocks, indentLevel); + } + } + } + } + + /// + /// Translates an instruction into a higher-level code statement + /// + /// The instruction to translate + /// The indentation level + /// The translated code statement + private string TranslateInstruction(Instruction instruction, int indentLevel) + { + string indent = new string(' ', indentLevel * 4); + string mnemonic = instruction.Mnemonic.ToLower(); + string operands = instruction.Operands; + + // Skip jumps (handled by control flow) + if (mnemonic.StartsWith("j")) + { + return $"{indent}// {instruction}"; + } + + // Handle different instruction types + switch (mnemonic) + { + case "mov": + return TranslateMovInstruction(instruction, indent); + + case "add": + case "sub": + case "mul": + case "div": + case "and": + case "or": + case "xor": + return TranslateArithmeticInstruction(instruction, indent); + + case "push": + case "pop": + return $"{indent}// {instruction}"; + + case "call": + return TranslateCallInstruction(instruction, indent); + + case "ret": + return TranslateReturnInstruction(instruction, indent); + + case "cmp": + case "test": + return $"{indent}// {instruction}"; + + default: + // For other instructions, just add a comment + return $"{indent}// {instruction}"; + } + } + + /// + /// Translates a MOV instruction + /// + /// The instruction to translate + /// The indentation string + /// The translated code statement + private string TranslateMovInstruction(Instruction instruction, string indent) + { + string[] operandParts = instruction.Operands.Split(','); + if (operandParts.Length != 2) + { + return $"{indent}// {instruction}"; + } + + string destination = operandParts[0].Trim(); + string source = operandParts[1].Trim(); + + // Skip register-to-register moves for registers we don't track + if (IsRegister(destination) && IsRegister(source)) + { + return $"{indent}// {instruction}"; + } + + // Translate memory access + if (IsMemoryLocation(destination)) + { + string variableName = GetVariableNameForMemory(destination); + return $"{indent}{variableName} = {GetReadableOperand(source)}; // {instruction}"; + } + else if (IsMemoryLocation(source)) + { + string variableName = GetVariableNameForMemory(source); + return $"{indent}{GetReadableOperand(destination)} = {variableName}; // {instruction}"; + } + + // Default case + return $"{indent}{GetReadableOperand(destination)} = {GetReadableOperand(source)}; // {instruction}"; + } + + /// + /// Translates an arithmetic instruction + /// + /// The instruction to translate + /// The indentation string + /// The translated code statement + private string TranslateArithmeticInstruction(Instruction instruction, string indent) + { + string[] operandParts = instruction.Operands.Split(','); + if (operandParts.Length != 2) + { + return $"{indent}// {instruction}"; + } + + string destination = operandParts[0].Trim(); + string source = operandParts[1].Trim(); + string operatorSymbol = GetOperatorForMnemonic(instruction.Mnemonic.ToLower()); + + // Skip register-to-register operations for registers we don't track + if (IsRegister(destination) && IsRegister(source)) + { + return $"{indent}// {instruction}"; + } + + // Translate the operation + return $"{indent}{GetReadableOperand(destination)} {operatorSymbol}= {GetReadableOperand(source)}; // {instruction}"; + } + + /// + /// Translates a CALL instruction + /// + /// The instruction to translate + /// The indentation string + /// The translated code statement + private string TranslateCallInstruction(Instruction instruction, string indent) + { + string target = instruction.Operands.Trim(); + + // Try to get a function name from the target + string functionName = GetFunctionNameFromTarget(target); + + return $"{indent}{functionName}(); // {instruction}"; + } + + /// + /// Translates a RET instruction + /// + /// The instruction to translate + /// The indentation string + /// The translated code statement + private string TranslateReturnInstruction(Instruction instruction, string indent) + { + // Check if EAX is used as a return value + if (_dataFlowAnalysis != null) + { + var eaxVariable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == "eax" && v.IsReturnValue); + if (eaxVariable != null) + { + return $"{indent}return {eaxVariable.Name}; // {instruction}"; + } + } + + return $"{indent}return; // {instruction}"; + } + + /// + /// Gets the condition from a conditional jump instruction + /// + /// The jump instruction + /// The condition expression + private string GetConditionFromJump(Instruction instruction) + { + string mnemonic = instruction.Mnemonic.ToLower(); + + // Map jump mnemonics to conditions + return mnemonic switch + { + "je" => "a == b", + "jne" => "a != b", + "jz" => "a == 0", + "jnz" => "a != 0", + "jg" => "a > b", + "jge" => "a >= b", + "jl" => "a < b", + "jle" => "a <= b", + "ja" => "a > b (unsigned)", + "jae" => "a >= b (unsigned)", + "jb" => "a < b (unsigned)", + "jbe" => "a <= b (unsigned)", + _ => "condition" + }; + } + + /// + /// Gets the operator for an arithmetic mnemonic + /// + /// The instruction mnemonic + /// The operator + private string GetOperatorForMnemonic(string mnemonic) + { + return mnemonic switch + { + "add" => "+", + "sub" => "-", + "mul" => "*", + "div" => "/", + "and" => "&", + "or" => "|", + "xor" => "^", + _ => mnemonic + }; + } + + /// + /// Gets a readable representation of an operand + /// + /// The operand + /// A readable representation + private string GetReadableOperand(string operand) + { + // If it's a register, return it as is + if (IsRegister(operand)) + { + return operand; + } + + // If it's a memory location, get a variable name + if (IsMemoryLocation(operand)) + { + return GetVariableNameForMemory(operand); + } + + // If it's a hexadecimal constant, format it + if (operand.StartsWith("0x") && operand.Length > 2) + { + return operand; + } + + // Otherwise, return it as is + return operand; + } + + /// + /// Gets a variable name for a memory location + /// + /// The memory location + /// A variable name + private string GetVariableNameForMemory(string memoryLocation) + { + if (_dataFlowAnalysis == null) + { + return "memory"; + } + + // Extract the part inside the brackets + int startIndex = memoryLocation.IndexOf('['); + int endIndex = memoryLocation.IndexOf(']'); + + if (startIndex >= 0 && endIndex > startIndex) + { + string memoryReference = memoryLocation.Substring(startIndex + 1, endIndex - startIndex - 1).Trim(); + + // Try to find a variable for this memory location + var variable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == memoryReference); + if (variable != null) + { + return variable.Name; + } + + // If it's a stack variable (relative to EBP), give it a meaningful name + if (memoryReference.StartsWith("ebp+") || memoryReference.StartsWith("ebp-")) + { + string offset = memoryReference.Substring(4); + return $"local_{offset.Replace("+", "plus_").Replace("-", "minus_")}"; + } + } + + return "memory"; + } + + /// + /// Gets a function name from a call target + /// + /// The call target + /// A function name + private string GetFunctionNameFromTarget(string target) + { + // If it's a direct address, format it + if (target.StartsWith("0x") && target.Length > 2) + { + return $"function_{target.Substring(2)}"; + } + + // If it's a memory location, extract the address + if (IsMemoryLocation(target)) + { + return $"function_ptr_{GetVariableNameForMemory(target)}"; + } + + // Otherwise, use the target as is + return target; + } + + /// + /// Checks if an operand is a register + /// + /// The operand to check + /// True if the operand is a register + private bool IsRegister(string operand) + { + string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", + "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", + "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" }; + + return Array.IndexOf(registers, operand.ToLower()) >= 0; + } + + /// + /// Checks if an operand is a memory location + /// + /// The operand to check + /// True if the operand is a memory location + private bool IsMemoryLocation(string operand) + { + return operand.Contains('[') && operand.Contains(']'); + } +} diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs index c19ded2..0dbc5ba 100644 --- a/X86Disassembler/Program.cs +++ b/X86Disassembler/Program.cs @@ -4,6 +4,7 @@ using System.Text; using System.Collections.Generic; using X86Disassembler.PE; using X86Disassembler.X86; +using X86Disassembler.Decompiler; namespace X86Disassembler; @@ -70,7 +71,7 @@ public class Program Console.WriteLine($"Disassembling section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); // Create a disassembler for the code section - Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress); + Disassembler disassembler = new Disassembler(codeBytes, peFile.OptionalHeader.ImageBase + section.VirtualAddress); // Disassemble all instructions var instructions = disassembler.Disassemble(); @@ -97,6 +98,26 @@ public class Program { Console.WriteLine($"... ({instructions.Count - count} more instructions not shown)"); } + + // Decompile the instructions + Console.WriteLine("\nDecompiling the first function:\n"); + + // For demonstration, we'll decompile a small subset of instructions + // In a real scenario, you'd identify function boundaries first + int functionSize = Math.Min(50, instructions.Count); + List functionInstructions = instructions.GetRange(0, functionSize); + + // Create a decompiler for the function + Decompiler.Decompiler decompiler = new Decompiler.Decompiler( + functionInstructions, + functionInstructions[0].Address + ); + + // Decompile the function + string decompiledCode = decompiler.Decompile(); + + // Print the decompiled code + Console.WriteLine(decompiledCode); } // Console.WriteLine("\nPress Enter to exit..."); diff --git a/X86Disassembler/X86/Disassembler.cs b/X86Disassembler/X86/Disassembler.cs index bd7ad56..e784cca 100644 --- a/X86Disassembler/X86/Disassembler.cs +++ b/X86Disassembler/X86/Disassembler.cs @@ -15,7 +15,7 @@ public class Disassembler private readonly int _length; // The base address of the code - private readonly uint _baseAddress; + private readonly ulong _baseAddress; // Segment override prefixes private static readonly byte[] SegmentOverridePrefixes = { 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 }; @@ -25,7 +25,7 @@ public class Disassembler /// /// The buffer containing the code to disassemble /// The base address of the code - public Disassembler(byte[] codeBuffer, uint baseAddress) + public Disassembler(byte[] codeBuffer, ulong baseAddress) { _codeBuffer = codeBuffer; _length = codeBuffer.Length; diff --git a/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs b/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs index 550156c..c3e7082 100644 --- a/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs +++ b/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs @@ -43,13 +43,11 @@ public class JgeRel8Handler : InstructionHandler instruction.Operands = "??"; return true; } - - // Read the offset and calculate target address - int position = Decoder.GetPosition(); + sbyte offset = (sbyte)Decoder.ReadByte(); // Calculate target address (instruction address + instruction length + offset) - uint targetAddress = (uint)(instruction.Address + 2 + offset); + ulong targetAddress = instruction.Address + 2UL + (uint)offset; // Format the target address instruction.Operands = $"0x{targetAddress:X8}"; diff --git a/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs b/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs index 0b7aa8e..8883334 100644 --- a/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs +++ b/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs @@ -38,8 +38,7 @@ public class JmpRel32Handler : InstructionHandler instruction.Mnemonic = "jmp"; // Check if we have enough bytes for the offset (4 bytes) - int position = Decoder.GetPosition(); - if (position + 4 > Length) + if (!Decoder.CanReadUInt()) { return false; } diff --git a/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs b/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs index bfeb0a5..187c137 100644 --- a/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs +++ b/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs @@ -42,13 +42,11 @@ public class JmpRel8Handler : InstructionHandler { return true; } - - // Read the offset and calculate target address - int position = Decoder.GetPosition(); + sbyte offset = (sbyte)Decoder.ReadByte(); // Calculate target address (instruction address + instruction length + offset) - uint targetAddress = (uint)(instruction.Address + 2 + offset); + ulong targetAddress = instruction.Address + 2UL + (uint)offset; // Format the target address instruction.Operands = $"0x{targetAddress:X8}"; diff --git a/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs b/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs index 8759b88..8f96bcd 100644 --- a/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs +++ b/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs @@ -55,8 +55,6 @@ public class TwoByteConditionalJumpHandler : InstructionHandler /// True if the instruction was successfully decoded public override bool Decode(byte opcode, Instruction instruction) { - int position = Decoder.GetPosition(); - // Check if we have enough bytes for the second byte if (!Decoder.CanReadByte()) { diff --git a/X86Disassembler/X86/Instruction.cs b/X86Disassembler/X86/Instruction.cs index 12274aa..5e7ad5c 100644 --- a/X86Disassembler/X86/Instruction.cs +++ b/X86Disassembler/X86/Instruction.cs @@ -8,7 +8,7 @@ public class Instruction /// /// Gets or sets the address of the instruction /// - public uint Address { get; set; } + public ulong Address { get; set; } /// /// Gets or sets the mnemonic of the instruction