namespace X86Disassembler.Decompiler; using System; using System.Collections.Generic; using System.Linq; using System.Text; using X86Disassembler.X86; using X86Disassembler.X86.Operands; /// /// Main decompiler class that translates assembly code into higher-level code /// public class Decompiler { // The list of disassembled instructions private readonly List _instructions; // The control flow graph private ControlFlowGraph? _controlFlowGraph; // The data flow analysis private DataFlowAnalysis? _dataFlowAnalysis; // The entry point address private readonly ulong _entryPoint; /// /// Initializes a new instance of the Decompiler class /// /// The list of disassembled instructions /// The entry point address public Decompiler(List instructions, ulong entryPoint) { _instructions = instructions; _entryPoint = entryPoint; } /// /// Decompiles the instructions and returns the decompiled code /// /// The decompiled code public string Decompile() { // Build the control flow graph _controlFlowGraph = ControlFlowGraph.Build(_instructions, _entryPoint); // Perform data flow analysis _dataFlowAnalysis = new DataFlowAnalysis(); _dataFlowAnalysis.Analyze(_instructions); // Generate pseudocode from the control flow graph and data flow analysis return GeneratePseudocode(); } /// /// Generates pseudocode from the control flow graph and data flow analysis /// /// The generated pseudocode private string GeneratePseudocode() { if (_controlFlowGraph == null || _controlFlowGraph.EntryBlock == null) { return "// Could not build control flow graph"; } StringBuilder code = new StringBuilder(); // Add a function header code.AppendLine("// Decompiled function"); code.AppendLine("int DecompiledFunction() {") .AppendLine(); // Generate variable declarations if (_dataFlowAnalysis != null) { foreach (var variable in _dataFlowAnalysis.Variables) { // Skip register variables if (IsRegister(variable.Location)) { continue; } // Generate a variable declaration code.AppendLine($" {variable.Type} {variable.Name}; // {variable.Location}"); } if (_dataFlowAnalysis.Variables.Any(v => !IsRegister(v.Location))) { code.AppendLine(); } } // Process the blocks in a depth-first order HashSet visitedBlocks = new HashSet(); GenerateCodeForBlock(_controlFlowGraph.EntryBlock, code, visitedBlocks, 1); // Add a return statement if not already present if (!code.ToString().Contains("return")) { code.AppendLine(" return 0;"); } // Close the function code.AppendLine("}"); return code.ToString(); } /// /// Generates code for a basic block and its successors /// /// The basic block /// The code builder /// The set of visited blocks /// The indentation level private void GenerateCodeForBlock(ControlFlowGraph.BasicBlock block, StringBuilder code, HashSet visitedBlocks, int indentLevel) { // If we've already visited this block, add a goto statement if (visitedBlocks.Contains(block.StartAddress)) { string indent = new string(' ', indentLevel * 4); code.AppendLine($"{indent}goto block_{block.StartAddress:X8};"); return; } // Mark this block as visited visitedBlocks.Add(block.StartAddress); // Add a label for this block string blockIndent = new string(' ', (indentLevel - 1) * 4); code.AppendLine($"{blockIndent}block_{block.StartAddress:X8}:") .AppendLine(); // Generate code for the instructions in this block foreach (var instruction in block.Instructions) { string instructionCode = TranslateInstruction(instruction, indentLevel); if (!string.IsNullOrEmpty(instructionCode)) { code.AppendLine(instructionCode); } } // Handle successors based on the control flow if (block.Successors.Count == 1) { // Unconditional branch to the next block GenerateCodeForBlock(block.Successors[0], code, visitedBlocks, indentLevel); } else if (block.Successors.Count == 2) { // Conditional branch string indent = new string(' ', indentLevel * 4); // Get the last instruction in the block Instruction lastInstruction = block.Instructions[^1]; string condition = GetConditionFromJump(lastInstruction); // Find the fall-through block and the jump target block ControlFlowGraph.BasicBlock? fallthroughBlock = null; ControlFlowGraph.BasicBlock? jumpTargetBlock = null; // Use a constant estimated instruction length since RawBytes is not available const int estimatedInstructionLength = 4; // Typical x86 instruction length ulong nextAddress = lastInstruction.Address + (ulong)estimatedInstructionLength; foreach (var successor in block.Successors) { if (successor.StartAddress == nextAddress) { fallthroughBlock = successor; } else { jumpTargetBlock = successor; } } if (fallthroughBlock != null && jumpTargetBlock != null) { // Generate an if statement code.AppendLine($"{indent}if ({condition}) {{") .AppendLine(); // Generate code for the jump target block GenerateCodeForBlock(jumpTargetBlock, code, visitedBlocks, indentLevel + 1); // Close the if statement code.AppendLine($"{indent}}}") .AppendLine(); // Generate code for the fall-through block GenerateCodeForBlock(fallthroughBlock, code, visitedBlocks, indentLevel); } else { // If we couldn't determine the fall-through and jump target blocks, // just generate code for both successors foreach (var successor in block.Successors) { GenerateCodeForBlock(successor, code, visitedBlocks, indentLevel); } } } } /// /// Translates an instruction into a higher-level code statement /// /// The instruction to translate /// The indentation level /// The translated code statement private string TranslateInstruction(Instruction instruction, int indentLevel) { string indent = new string(' ', indentLevel * 4); string mnemonic = instruction.Type.ToString().ToLower(); string operands = ""; // Format operands if available if (instruction.StructuredOperands != null && instruction.StructuredOperands.Count > 0) { operands = string.Join(", ", instruction.StructuredOperands.Select(op => op.ToString())); } // Skip jumps (handled by control flow) if (mnemonic.StartsWith("j")) { return $"{indent}// {instruction}"; } // Handle different instruction types switch (mnemonic) { case "mov": return TranslateMovInstruction(instruction, indent); case "add": case "sub": case "mul": case "div": case "and": case "or": case "xor": return TranslateArithmeticInstruction(instruction, indent); case "push": case "pop": return $"{indent}// {instruction}"; case "call": return TranslateCallInstruction(instruction, indent); case "ret": return TranslateReturnInstruction(instruction, indent); case "cmp": case "test": return $"{indent}// {instruction}"; default: // For other instructions, just add a comment return $"{indent}// {instruction}"; } } /// /// Translates a MOV instruction /// /// The instruction to translate /// The indentation string /// The translated code statement private string TranslateMovInstruction(Instruction instruction, string indent) { string[] operandParts = instruction.StructuredOperands.Select(op => op.ToString()).ToArray(); if (operandParts.Length != 2) { return $"{indent}// {instruction}"; } string destination = operandParts[0].Trim(); string source = operandParts[1].Trim(); // Skip register-to-register moves for registers we don't track if (IsRegister(destination) && IsRegister(source)) { return $"{indent}// {instruction}"; } // Translate memory access if (IsMemoryLocation(destination)) { string variableName = GetVariableNameForMemory(destination); return $"{indent}{variableName} = {GetReadableOperand(source)}; // {instruction}"; } else if (IsMemoryLocation(source)) { string variableName = GetVariableNameForMemory(source); return $"{indent}{GetReadableOperand(destination)} = {variableName}; // {instruction}"; } // Default case return $"{indent}{GetReadableOperand(destination)} = {GetReadableOperand(source)}; // {instruction}"; } /// /// Translates an arithmetic instruction /// /// The instruction to translate /// The indentation string /// The translated code statement private string TranslateArithmeticInstruction(Instruction instruction, string indent) { string[] operandParts = instruction.StructuredOperands.Select(op => op.ToString()).ToArray(); if (operandParts.Length != 2) { return $"{indent}// {instruction}"; } string destination = operandParts[0].Trim(); string source = operandParts[1].Trim(); string operatorSymbol = GetOperatorForMnemonic(instruction.Type.ToString().ToLower()); // Skip register-to-register operations for registers we don't track if (IsRegister(destination) && IsRegister(source)) { return $"{indent}// {instruction}"; } // Translate the operation return $"{indent}{GetReadableOperand(destination)} {operatorSymbol}= {GetReadableOperand(source)}; // {instruction}"; } /// /// Translates a CALL instruction /// /// The instruction to translate /// The indentation string /// The translated code statement private string TranslateCallInstruction(Instruction instruction, string indent) { string target = instruction.StructuredOperands.FirstOrDefault()?.ToString() ?? ""; // Try to get a function name from the target string functionName = GetFunctionNameFromTarget(target); return $"{indent}{functionName}(); // {instruction}"; } /// /// Translates a RET instruction /// /// The instruction to translate /// The indentation string /// The translated code statement private string TranslateReturnInstruction(Instruction instruction, string indent) { // Check if EAX is used as a return value if (_dataFlowAnalysis != null) { var eaxVariable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == "eax" && v.IsReturnValue); if (eaxVariable != null) { return $"{indent}return {eaxVariable.Name}; // {instruction}"; } } return $"{indent}return; // {instruction}"; } /// /// Gets the condition from a conditional jump instruction /// /// The jump instruction /// The condition expression private string GetConditionFromJump(Instruction instruction) { string mnemonic = instruction.Type.ToString().ToLower(); // Map jump mnemonics to conditions return mnemonic switch { "je" => "a == b", "jne" => "a != b", "jz" => "a == 0", "jnz" => "a != 0", "jg" => "a > b", "jge" => "a >= b", "jl" => "a < b", "jle" => "a <= b", "ja" => "a > b (unsigned)", "jae" => "a >= b (unsigned)", "jb" => "a < b (unsigned)", "jbe" => "a <= b (unsigned)", _ => "condition" }; } /// /// Gets the operator for an arithmetic mnemonic /// /// The instruction mnemonic /// The operator private string GetOperatorForMnemonic(string mnemonic) { return mnemonic switch { "add" => "+", "sub" => "-", "mul" => "*", "div" => "/", "and" => "&", "or" => "|", "xor" => "^", _ => mnemonic }; } /// /// Gets a readable representation of an operand /// /// The operand /// A readable representation private string GetReadableOperand(string operand) { // If it's a register, return it as is if (IsRegister(operand)) { return operand; } // If it's a memory location, get a variable name if (IsMemoryLocation(operand)) { return GetVariableNameForMemory(operand); } // If it's a hexadecimal constant, format it if (operand.StartsWith("0x") && operand.Length > 2) { return operand; } // Otherwise, return it as is return operand; } /// /// Gets a variable name for a memory location /// /// The memory location /// A variable name private string GetVariableNameForMemory(string memoryLocation) { if (_dataFlowAnalysis == null) { return "memory"; } // Extract the part inside the brackets int startIndex = memoryLocation.IndexOf('['); int endIndex = memoryLocation.IndexOf(']'); if (startIndex >= 0 && endIndex > startIndex) { string memoryReference = memoryLocation.Substring(startIndex + 1, endIndex - startIndex - 1).Trim(); // Try to find a variable for this memory location var variable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == memoryReference); if (variable != null) { return variable.Name; } // If it's a stack variable (relative to EBP), give it a meaningful name if (memoryReference.StartsWith("ebp+") || memoryReference.StartsWith("ebp-")) { string offset = memoryReference.Substring(4); return $"local_{offset.Replace("+", "plus_").Replace("-", "minus_")}"; } } return "memory"; } /// /// Gets a function name from a call target /// /// The call target /// A function name private string GetFunctionNameFromTarget(string target) { // If it's a direct address, format it if (target.StartsWith("0x") && target.Length > 2) { return $"function_{target.Substring(2)}"; } // If it's a memory location, extract the address if (IsMemoryLocation(target)) { return $"function_ptr_{GetVariableNameForMemory(target)}"; } // Otherwise, use the target as is return target; } /// /// Checks if an operand is a register /// /// The operand to check /// True if the operand is a register private bool IsRegister(string operand) { string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" }; return Array.IndexOf(registers, operand.ToLower()) >= 0; } /// /// Checks if an operand is a memory location /// /// The operand to check /// True if the operand is a memory location private bool IsMemoryLocation(string operand) { return operand.Contains('[') && operand.Contains(']'); } }