diff --git a/X86Disassembler/Decompiler/ControlFlowGraph.cs b/X86Disassembler/Decompiler/ControlFlowGraph.cs
new file mode 100644
index 0000000..e5e4d5e
--- /dev/null
+++ b/X86Disassembler/Decompiler/ControlFlowGraph.cs
@@ -0,0 +1,246 @@
+namespace X86Disassembler.Decompiler;
+
+using System.Collections.Generic;
+using X86Disassembler.X86;
+
+///
+/// Represents a control flow graph for decompilation
+///
+ public class ControlFlowGraph
+{
+ ///
+ /// Represents a basic block in the control flow graph
+ ///
+ public class BasicBlock
+ {
+ ///
+ /// Gets or sets the starting address of the basic block
+ ///
+ public ulong StartAddress { get; set; }
+
+ ///
+ /// Gets or sets the ending address of the basic block
+ ///
+ public ulong EndAddress { get; set; }
+
+ ///
+ /// Gets the list of instructions in this basic block
+ ///
+ public List Instructions { get; } = [];
+
+ ///
+ /// Gets the list of successor blocks (blocks that can be executed after this one)
+ ///
+ public List Successors { get; } = [];
+
+ ///
+ /// Gets the list of predecessor blocks (blocks that can execute before this one)
+ ///
+ public List Predecessors { get; } = [];
+
+ ///
+ /// Returns a string representation of the basic block
+ ///
+ /// A string representation of the basic block
+ public override string ToString()
+ {
+ return $"Block {StartAddress:X8}-{EndAddress:X8} with {Instructions.Count} instructions";
+ }
+ }
+
+ // Dictionary mapping addresses to basic blocks
+ private readonly Dictionary _blocks = [];
+
+ // Entry point of the control flow graph
+ private BasicBlock? _entryBlock;
+
+ ///
+ /// Gets the entry block of the control flow graph
+ ///
+ public BasicBlock? EntryBlock => _entryBlock;
+
+ ///
+ /// Gets all basic blocks in the control flow graph
+ ///
+ public IReadOnlyDictionary Blocks => _blocks;
+
+ ///
+ /// Builds a control flow graph from a list of instructions
+ ///
+ /// The list of instructions
+ /// The entry point address
+ /// A control flow graph
+ public static ControlFlowGraph Build(List instructions, ulong entryPoint)
+ {
+ ControlFlowGraph cfg = new ControlFlowGraph();
+
+ // First pass: identify basic block boundaries
+ HashSet leaders = new HashSet();
+
+ // The entry point is always a leader
+ leaders.Add(entryPoint);
+
+ // Identify other leaders
+ for (int i = 0; i < instructions.Count; i++)
+ {
+ Instruction inst = instructions[i];
+
+ // Check if this instruction is a branch or jump
+ if (IsControlTransfer(inst))
+ {
+ // The target of a jump/branch is a leader
+ ulong? targetAddress = GetTargetAddress(inst);
+ if (targetAddress.HasValue)
+ {
+ leaders.Add(targetAddress.Value);
+ }
+
+ // The instruction following a jump/branch is also a leader (if it exists)
+ if (i + 1 < instructions.Count)
+ {
+ leaders.Add(instructions[i + 1].Address);
+ }
+ }
+ }
+
+ // Second pass: create basic blocks
+ BasicBlock? currentBlock = null;
+
+ foreach (Instruction inst in instructions)
+ {
+ // If this instruction is a leader, start a new basic block
+ if (leaders.Contains(inst.Address))
+ {
+ // Finalize the previous block if it exists
+ if (currentBlock != null)
+ {
+ currentBlock.EndAddress = inst.Address - 1;
+ cfg._blocks[currentBlock.StartAddress] = currentBlock;
+ }
+
+ // Create a new block
+ currentBlock = new BasicBlock
+ {
+ StartAddress = inst.Address
+ };
+
+ // If this is the entry point, set it as the entry block
+ if (inst.Address == entryPoint)
+ {
+ cfg._entryBlock = currentBlock;
+ }
+ }
+
+ // Add the instruction to the current block
+ if (currentBlock != null)
+ {
+ currentBlock.Instructions.Add(inst);
+ }
+
+ // If this instruction is a control transfer, finalize the current block
+ if (IsControlTransfer(inst) && currentBlock != null)
+ {
+ currentBlock.EndAddress = inst.Address;
+ cfg._blocks[currentBlock.StartAddress] = currentBlock;
+ currentBlock = null;
+ }
+ }
+
+ // Finalize the last block if it exists
+ if (currentBlock != null)
+ {
+ currentBlock.EndAddress = instructions[^1].Address;
+ cfg._blocks[currentBlock.StartAddress] = currentBlock;
+ }
+
+ // Third pass: connect basic blocks
+ foreach (var block in cfg._blocks.Values)
+ {
+ // Get the last instruction in the block
+ Instruction lastInst = block.Instructions[^1];
+
+ // If the last instruction is a jump, add the target as a successor
+ if (IsControlTransfer(lastInst))
+ {
+ ulong? targetAddress = GetTargetAddress(lastInst);
+ if (targetAddress.HasValue && cfg._blocks.TryGetValue(targetAddress.Value, out BasicBlock? targetBlock))
+ {
+ block.Successors.Add(targetBlock);
+ targetBlock.Predecessors.Add(block);
+ }
+
+ // If the instruction is a conditional jump, the next block is also a successor
+ if (IsConditionalJump(lastInst))
+ {
+ ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length;
+ if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock))
+ {
+ block.Successors.Add(nextBlock);
+ nextBlock.Predecessors.Add(block);
+ }
+ }
+ }
+ // If the last instruction is not a jump, the next block is the successor
+ else
+ {
+ ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length;
+ if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock))
+ {
+ block.Successors.Add(nextBlock);
+ nextBlock.Predecessors.Add(block);
+ }
+ }
+ }
+
+ return cfg;
+ }
+
+ ///
+ /// Checks if an instruction is a control transfer instruction (jump, call, ret)
+ ///
+ /// The instruction to check
+ /// True if the instruction is a control transfer
+ private static bool IsControlTransfer(Instruction instruction)
+ {
+ string mnemonic = instruction.Mnemonic.ToLower();
+ return mnemonic.StartsWith("j") || // All jumps (jmp, je, jne, etc.)
+ mnemonic == "call" ||
+ mnemonic == "ret";
+ }
+
+ ///
+ /// Checks if an instruction is a conditional jump
+ ///
+ /// The instruction to check
+ /// True if the instruction is a conditional jump
+ private static bool IsConditionalJump(Instruction instruction)
+ {
+ string mnemonic = instruction.Mnemonic.ToLower();
+ return mnemonic.StartsWith("j") && mnemonic != "jmp"; // All jumps except jmp
+ }
+
+ ///
+ /// Gets the target address of a control transfer instruction
+ ///
+ /// The instruction
+ /// The target address, or null if it cannot be determined
+ private static ulong? GetTargetAddress(Instruction instruction)
+ {
+ string operands = instruction.Operands;
+
+ // Check if the operand is a direct address (e.g., "0x12345678")
+ if (operands.StartsWith("0x") && ulong.TryParse(operands.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out ulong address))
+ {
+ return address;
+ }
+
+ // For relative jumps, calculate the target address
+ if (instruction.Mnemonic.ToLower().StartsWith("j") && int.TryParse(operands, out int offset))
+ {
+ return instruction.Address + (ulong)instruction.RawBytes.Length + (ulong)offset;
+ }
+
+ // For now, we cannot determine the target for indirect jumps
+ return null;
+ }
+}
diff --git a/X86Disassembler/Decompiler/DataFlowAnalysis.cs b/X86Disassembler/Decompiler/DataFlowAnalysis.cs
new file mode 100644
index 0000000..ae71c0f
--- /dev/null
+++ b/X86Disassembler/Decompiler/DataFlowAnalysis.cs
@@ -0,0 +1,516 @@
+namespace X86Disassembler.Decompiler;
+
+using System.Collections.Generic;
+using X86Disassembler.X86;
+
+///
+/// Performs data flow analysis on x86 instructions
+///
+public class DataFlowAnalysis
+{
+ ///
+ /// Represents a variable in the decompiled code
+ ///
+ public class Variable
+ {
+ ///
+ /// Gets or sets the name of the variable
+ ///
+ public string Name { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the type of the variable (if known)
+ ///
+ public string Type { get; set; } = "int"; // Default to int
+
+ ///
+ /// Gets or sets the storage location (register, memory, etc.)
+ ///
+ public string Location { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets whether this variable is a parameter
+ ///
+ public bool IsParameter { get; set; }
+
+ ///
+ /// Gets or sets whether this variable is a return value
+ ///
+ public bool IsReturnValue { get; set; }
+ }
+
+ ///
+ /// Represents an operation in the decompiled code
+ ///
+ public class Operation
+ {
+ ///
+ /// Gets or sets the operation type
+ ///
+ public string Type { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the destination variable
+ ///
+ public Variable? Destination { get; set; }
+
+ ///
+ /// Gets or sets the source variables or constants
+ ///
+ public List Sources { get; } = []; // Can be Variable or constant value
+
+ ///
+ /// Gets or sets the original instruction
+ ///
+ public Instruction OriginalInstruction { get; set; } = null!;
+ }
+
+ // Map of register names to variables
+ private readonly Dictionary _registerVariables = [];
+
+ // Map of memory locations to variables
+ private readonly Dictionary _memoryVariables = [];
+
+ // List of operations
+ private readonly List _operations = [];
+
+ // Counter for generating variable names
+ private int _variableCounter = 0;
+
+ ///
+ /// Gets the list of operations
+ ///
+ public IReadOnlyList Operations => _operations;
+
+ ///
+ /// Gets the list of variables
+ ///
+ public IEnumerable Variables
+ {
+ get
+ {
+ HashSet uniqueVariables = [];
+ foreach (var variable in _registerVariables.Values)
+ {
+ uniqueVariables.Add(variable);
+ }
+ foreach (var variable in _memoryVariables.Values)
+ {
+ uniqueVariables.Add(variable);
+ }
+ return uniqueVariables;
+ }
+ }
+
+ ///
+ /// Analyzes a list of instructions to identify variables and operations
+ ///
+ /// The list of instructions to analyze
+ public void Analyze(List instructions)
+ {
+ // Initialize common register variables
+ InitializeRegisterVariables();
+
+ // Process each instruction
+ foreach (var instruction in instructions)
+ {
+ AnalyzeInstruction(instruction);
+ }
+ }
+
+ ///
+ /// Initializes common register variables
+ ///
+ private void InitializeRegisterVariables()
+ {
+ // 32-bit general purpose registers
+ _registerVariables["eax"] = new Variable { Name = "eax", Location = "eax" };
+ _registerVariables["ebx"] = new Variable { Name = "ebx", Location = "ebx" };
+ _registerVariables["ecx"] = new Variable { Name = "ecx", Location = "ecx" };
+ _registerVariables["edx"] = new Variable { Name = "edx", Location = "edx" };
+ _registerVariables["esi"] = new Variable { Name = "esi", Location = "esi" };
+ _registerVariables["edi"] = new Variable { Name = "edi", Location = "edi" };
+ _registerVariables["ebp"] = new Variable { Name = "ebp", Location = "ebp" };
+ _registerVariables["esp"] = new Variable { Name = "esp", Location = "esp" };
+
+ // Mark EAX as the return value register
+ _registerVariables["eax"].IsReturnValue = true;
+
+ // 16-bit registers
+ _registerVariables["ax"] = new Variable { Name = "ax", Location = "ax" };
+ _registerVariables["bx"] = new Variable { Name = "bx", Location = "bx" };
+ _registerVariables["cx"] = new Variable { Name = "cx", Location = "cx" };
+ _registerVariables["dx"] = new Variable { Name = "dx", Location = "dx" };
+ _registerVariables["si"] = new Variable { Name = "si", Location = "si" };
+ _registerVariables["di"] = new Variable { Name = "di", Location = "di" };
+ _registerVariables["bp"] = new Variable { Name = "bp", Location = "bp" };
+ _registerVariables["sp"] = new Variable { Name = "sp", Location = "sp" };
+
+ // 8-bit registers
+ _registerVariables["al"] = new Variable { Name = "al", Location = "al" };
+ _registerVariables["ah"] = new Variable { Name = "ah", Location = "ah" };
+ _registerVariables["bl"] = new Variable { Name = "bl", Location = "bl" };
+ _registerVariables["bh"] = new Variable { Name = "bh", Location = "bh" };
+ _registerVariables["cl"] = new Variable { Name = "cl", Location = "cl" };
+ _registerVariables["ch"] = new Variable { Name = "ch", Location = "ch" };
+ _registerVariables["dl"] = new Variable { Name = "dl", Location = "dl" };
+ _registerVariables["dh"] = new Variable { Name = "dh", Location = "dh" };
+ }
+
+ ///
+ /// Analyzes a single instruction to identify variables and operations
+ ///
+ /// The instruction to analyze
+ private void AnalyzeInstruction(Instruction instruction)
+ {
+ string mnemonic = instruction.Mnemonic.ToLower();
+ string operands = instruction.Operands;
+
+ // Skip instructions without operands
+ if (string.IsNullOrEmpty(operands))
+ {
+ return;
+ }
+
+ // Split operands
+ string[] operandParts = operands.Split(',');
+ for (int i = 0; i < operandParts.Length; i++)
+ {
+ operandParts[i] = operandParts[i].Trim();
+ }
+
+ // Create an operation based on the instruction type
+ Operation operation = new Operation
+ {
+ OriginalInstruction = instruction
+ };
+
+ switch (mnemonic)
+ {
+ case "mov":
+ HandleMovInstruction(operation, operandParts);
+ break;
+
+ case "add":
+ case "sub":
+ case "mul":
+ case "div":
+ case "and":
+ case "or":
+ case "xor":
+ HandleArithmeticInstruction(operation, mnemonic, operandParts);
+ break;
+
+ case "push":
+ case "pop":
+ HandleStackInstruction(operation, mnemonic, operandParts);
+ break;
+
+ case "call":
+ HandleCallInstruction(operation, operandParts);
+ break;
+
+ case "ret":
+ HandleReturnInstruction(operation);
+ break;
+
+ case "cmp":
+ case "test":
+ HandleComparisonInstruction(operation, mnemonic, operandParts);
+ break;
+
+ case "jmp":
+ case "je":
+ case "jne":
+ case "jg":
+ case "jge":
+ case "jl":
+ case "jle":
+ HandleJumpInstruction(operation, mnemonic, operandParts);
+ break;
+
+ default:
+ // For other instructions, just record the operation type
+ operation.Type = mnemonic;
+ break;
+ }
+
+ // Add the operation to the list
+ _operations.Add(operation);
+ }
+
+ ///
+ /// Handles a MOV instruction
+ ///
+ /// The operation to populate
+ /// The operand parts
+ private void HandleMovInstruction(Operation operation, string[] operandParts)
+ {
+ if (operandParts.Length != 2)
+ {
+ return;
+ }
+
+ operation.Type = "assignment";
+
+ // Get or create the destination variable
+ Variable destination = GetOrCreateVariable(operandParts[0]);
+ operation.Destination = destination;
+
+ // Get the source (variable or constant)
+ object source = GetOperandValue(operandParts[1]);
+ operation.Sources.Add(source);
+ }
+
+ ///
+ /// Handles an arithmetic instruction (ADD, SUB, MUL, DIV, AND, OR, XOR)
+ ///
+ /// The operation to populate
+ /// The instruction mnemonic
+ /// The operand parts
+ private void HandleArithmeticInstruction(Operation operation, string mnemonic, string[] operandParts)
+ {
+ if (operandParts.Length != 2)
+ {
+ return;
+ }
+
+ operation.Type = mnemonic;
+
+ // Get or create the destination variable
+ Variable destination = GetOrCreateVariable(operandParts[0]);
+ operation.Destination = destination;
+
+ // Get the source (variable or constant)
+ object source = GetOperandValue(operandParts[1]);
+ operation.Sources.Add(source);
+ operation.Sources.Add(destination); // The destination is also a source in arithmetic operations
+ }
+
+ ///
+ /// Handles a stack instruction (PUSH, POP)
+ ///
+ /// The operation to populate
+ /// The instruction mnemonic
+ /// The operand parts
+ private void HandleStackInstruction(Operation operation, string mnemonic, string[] operandParts)
+ {
+ if (operandParts.Length != 1)
+ {
+ return;
+ }
+
+ operation.Type = mnemonic;
+
+ if (mnemonic == "push")
+ {
+ // For PUSH, the operand is the source
+ object source = GetOperandValue(operandParts[0]);
+ operation.Sources.Add(source);
+ }
+ else if (mnemonic == "pop")
+ {
+ // For POP, the operand is the destination
+ Variable destination = GetOrCreateVariable(operandParts[0]);
+ operation.Destination = destination;
+ }
+ }
+
+ ///
+ /// Handles a CALL instruction
+ ///
+ /// The operation to populate
+ /// The operand parts
+ private void HandleCallInstruction(Operation operation, string[] operandParts)
+ {
+ if (operandParts.Length != 1)
+ {
+ return;
+ }
+
+ operation.Type = "call";
+
+ // The operand is the function name or address
+ operation.Sources.Add(operandParts[0]);
+ }
+
+ ///
+ /// Handles a RET instruction
+ ///
+ /// The operation to populate
+ private void HandleReturnInstruction(Operation operation)
+ {
+ operation.Type = "return";
+
+ // The return value is in EAX
+ if (_registerVariables.TryGetValue("eax", out Variable? eax))
+ {
+ operation.Sources.Add(eax);
+ }
+ }
+
+ ///
+ /// Handles a comparison instruction (CMP, TEST)
+ ///
+ /// The operation to populate
+ /// The instruction mnemonic
+ /// The operand parts
+ private void HandleComparisonInstruction(Operation operation, string mnemonic, string[] operandParts)
+ {
+ if (operandParts.Length != 2)
+ {
+ return;
+ }
+
+ operation.Type = mnemonic;
+
+ // Get the operands
+ object left = GetOperandValue(operandParts[0]);
+ object right = GetOperandValue(operandParts[1]);
+
+ operation.Sources.Add(left);
+ operation.Sources.Add(right);
+ }
+
+ ///
+ /// Handles a jump instruction (JMP, JE, JNE, etc.)
+ ///
+ /// The operation to populate
+ /// The instruction mnemonic
+ /// The operand parts
+ private void HandleJumpInstruction(Operation operation, string mnemonic, string[] operandParts)
+ {
+ if (operandParts.Length != 1)
+ {
+ return;
+ }
+
+ operation.Type = mnemonic;
+
+ // The operand is the jump target
+ operation.Sources.Add(operandParts[0]);
+ }
+
+ ///
+ /// Gets or creates a variable for an operand
+ ///
+ /// The operand string
+ /// The variable
+ private Variable GetOrCreateVariable(string operand)
+ {
+ // Check if it's a register
+ if (IsRegister(operand))
+ {
+ string register = operand.ToLower();
+ if (_registerVariables.TryGetValue(register, out Variable? variable))
+ {
+ return variable;
+ }
+ }
+
+ // Check if it's a memory location
+ if (IsMemoryLocation(operand))
+ {
+ string normalizedLocation = NormalizeMemoryLocation(operand);
+ if (_memoryVariables.TryGetValue(normalizedLocation, out Variable? variable))
+ {
+ return variable;
+ }
+
+ // Create a new variable for this memory location
+ variable = new Variable
+ {
+ Name = $"var_{_variableCounter++}",
+ Location = normalizedLocation
+ };
+
+ _memoryVariables[normalizedLocation] = variable;
+ return variable;
+ }
+
+ // If it's neither a register nor a memory location, create a temporary variable
+ Variable tempVariable = new Variable
+ {
+ Name = $"temp_{_variableCounter++}",
+ Location = operand
+ };
+
+ return tempVariable;
+ }
+
+ ///
+ /// Gets the value of an operand (variable or constant)
+ ///
+ /// The operand string
+ /// The operand value (Variable or constant)
+ private object GetOperandValue(string operand)
+ {
+ // Check if it's a register or memory location
+ if (IsRegister(operand) || IsMemoryLocation(operand))
+ {
+ return GetOrCreateVariable(operand);
+ }
+
+ // Check if it's a hexadecimal constant
+ if (operand.StartsWith("0x") && operand.Length > 2)
+ {
+ if (int.TryParse(operand.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out int value))
+ {
+ return value;
+ }
+ }
+
+ // Check if it's a decimal constant
+ if (int.TryParse(operand, out int decimalValue))
+ {
+ return decimalValue;
+ }
+
+ // Otherwise, return the operand as a string
+ return operand;
+ }
+
+ ///
+ /// Checks if an operand is a register
+ ///
+ /// The operand to check
+ /// True if the operand is a register
+ private bool IsRegister(string operand)
+ {
+ string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
+ "ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
+ "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
+
+ return Array.IndexOf(registers, operand.ToLower()) >= 0;
+ }
+
+ ///
+ /// Checks if an operand is a memory location
+ ///
+ /// The operand to check
+ /// True if the operand is a memory location
+ private bool IsMemoryLocation(string operand)
+ {
+ return operand.Contains('[') && operand.Contains(']');
+ }
+
+ ///
+ /// Normalizes a memory location operand
+ ///
+ /// The operand to normalize
+ /// The normalized memory location
+ private string NormalizeMemoryLocation(string operand)
+ {
+ // Extract the part inside the brackets
+ int startIndex = operand.IndexOf('[');
+ int endIndex = operand.IndexOf(']');
+
+ if (startIndex >= 0 && endIndex > startIndex)
+ {
+ string memoryReference = operand.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
+ return memoryReference;
+ }
+
+ return operand;
+ }
+}
diff --git a/X86Disassembler/Decompiler/Decompiler.cs b/X86Disassembler/Decompiler/Decompiler.cs
new file mode 100644
index 0000000..4d806fc
--- /dev/null
+++ b/X86Disassembler/Decompiler/Decompiler.cs
@@ -0,0 +1,522 @@
+namespace X86Disassembler.Decompiler;
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using X86Disassembler.X86;
+
+///
+/// Main decompiler class that translates assembly code into higher-level code
+///
+public class Decompiler
+{
+ // The list of disassembled instructions
+ private readonly List _instructions;
+
+ // The control flow graph
+ private ControlFlowGraph? _controlFlowGraph;
+
+ // The data flow analysis
+ private DataFlowAnalysis? _dataFlowAnalysis;
+
+ // The entry point address
+ private readonly ulong _entryPoint;
+
+ ///
+ /// Initializes a new instance of the Decompiler class
+ ///
+ /// The list of disassembled instructions
+ /// The entry point address
+ public Decompiler(List instructions, ulong entryPoint)
+ {
+ _instructions = instructions;
+ _entryPoint = entryPoint;
+ }
+
+ ///
+ /// Decompiles the instructions and returns the decompiled code
+ ///
+ /// The decompiled code
+ public string Decompile()
+ {
+ // Build the control flow graph
+ _controlFlowGraph = ControlFlowGraph.Build(_instructions, _entryPoint);
+
+ // Perform data flow analysis
+ _dataFlowAnalysis = new DataFlowAnalysis();
+ _dataFlowAnalysis.Analyze(_instructions);
+
+ // Generate pseudocode from the control flow graph and data flow analysis
+ return GeneratePseudocode();
+ }
+
+ ///
+ /// Generates pseudocode from the control flow graph and data flow analysis
+ ///
+ /// The generated pseudocode
+ private string GeneratePseudocode()
+ {
+ if (_controlFlowGraph == null || _controlFlowGraph.EntryBlock == null)
+ {
+ return "// Could not build control flow graph";
+ }
+
+ StringBuilder code = new StringBuilder();
+
+ // Add a function header
+ code.AppendLine("// Decompiled function");
+ code.AppendLine("int DecompiledFunction() {")
+ .AppendLine();
+
+ // Generate variable declarations
+ if (_dataFlowAnalysis != null)
+ {
+ foreach (var variable in _dataFlowAnalysis.Variables)
+ {
+ // Skip register variables
+ if (IsRegister(variable.Location))
+ {
+ continue;
+ }
+
+ // Generate a variable declaration
+ code.AppendLine($" {variable.Type} {variable.Name}; // {variable.Location}");
+ }
+
+ if (_dataFlowAnalysis.Variables.Any(v => !IsRegister(v.Location)))
+ {
+ code.AppendLine();
+ }
+ }
+
+ // Process the blocks in a depth-first order
+ HashSet visitedBlocks = new HashSet();
+ GenerateCodeForBlock(_controlFlowGraph.EntryBlock, code, visitedBlocks, 1);
+
+ // Add a return statement if not already present
+ if (!code.ToString().Contains("return"))
+ {
+ code.AppendLine(" return 0;");
+ }
+
+ // Close the function
+ code.AppendLine("}");
+
+ return code.ToString();
+ }
+
+ ///
+ /// Generates code for a basic block and its successors
+ ///
+ /// The basic block
+ /// The code builder
+ /// The set of visited blocks
+ /// The indentation level
+ private void GenerateCodeForBlock(ControlFlowGraph.BasicBlock block, StringBuilder code, HashSet visitedBlocks, int indentLevel)
+ {
+ // If we've already visited this block, add a goto statement
+ if (visitedBlocks.Contains(block.StartAddress))
+ {
+ string indent = new string(' ', indentLevel * 4);
+ code.AppendLine($"{indent}goto block_{block.StartAddress:X8};");
+ return;
+ }
+
+ // Mark this block as visited
+ visitedBlocks.Add(block.StartAddress);
+
+ // Add a label for this block
+ string blockIndent = new string(' ', (indentLevel - 1) * 4);
+ code.AppendLine($"{blockIndent}block_{block.StartAddress:X8}:")
+ .AppendLine();
+
+ // Generate code for the instructions in this block
+ foreach (var instruction in block.Instructions)
+ {
+ string instructionCode = TranslateInstruction(instruction, indentLevel);
+ if (!string.IsNullOrEmpty(instructionCode))
+ {
+ code.AppendLine(instructionCode);
+ }
+ }
+
+ // Handle successors based on the control flow
+ if (block.Successors.Count == 1)
+ {
+ // Unconditional branch to the next block
+ GenerateCodeForBlock(block.Successors[0], code, visitedBlocks, indentLevel);
+ }
+ else if (block.Successors.Count == 2)
+ {
+ // Conditional branch
+ string indent = new string(' ', indentLevel * 4);
+
+ // Get the last instruction in the block
+ Instruction lastInstruction = block.Instructions[^1];
+ string condition = GetConditionFromJump(lastInstruction);
+
+ // Find the fall-through block and the jump target block
+ ControlFlowGraph.BasicBlock? fallthroughBlock = null;
+ ControlFlowGraph.BasicBlock? jumpTargetBlock = null;
+
+ ulong nextAddress = lastInstruction.Address + (ulong)lastInstruction.RawBytes.Length;
+ foreach (var successor in block.Successors)
+ {
+ if (successor.StartAddress == nextAddress)
+ {
+ fallthroughBlock = successor;
+ }
+ else
+ {
+ jumpTargetBlock = successor;
+ }
+ }
+
+ if (fallthroughBlock != null && jumpTargetBlock != null)
+ {
+ // Generate an if statement
+ code.AppendLine($"{indent}if ({condition}) {{")
+ .AppendLine();
+
+ // Generate code for the jump target block
+ GenerateCodeForBlock(jumpTargetBlock, code, visitedBlocks, indentLevel + 1);
+
+ // Close the if statement
+ code.AppendLine($"{indent}}}")
+ .AppendLine();
+
+ // Generate code for the fall-through block
+ GenerateCodeForBlock(fallthroughBlock, code, visitedBlocks, indentLevel);
+ }
+ else
+ {
+ // If we couldn't determine the fall-through and jump target blocks,
+ // just generate code for both successors
+ foreach (var successor in block.Successors)
+ {
+ GenerateCodeForBlock(successor, code, visitedBlocks, indentLevel);
+ }
+ }
+ }
+ }
+
+ ///
+ /// Translates an instruction into a higher-level code statement
+ ///
+ /// The instruction to translate
+ /// The indentation level
+ /// The translated code statement
+ private string TranslateInstruction(Instruction instruction, int indentLevel)
+ {
+ string indent = new string(' ', indentLevel * 4);
+ string mnemonic = instruction.Mnemonic.ToLower();
+ string operands = instruction.Operands;
+
+ // Skip jumps (handled by control flow)
+ if (mnemonic.StartsWith("j"))
+ {
+ return $"{indent}// {instruction}";
+ }
+
+ // Handle different instruction types
+ switch (mnemonic)
+ {
+ case "mov":
+ return TranslateMovInstruction(instruction, indent);
+
+ case "add":
+ case "sub":
+ case "mul":
+ case "div":
+ case "and":
+ case "or":
+ case "xor":
+ return TranslateArithmeticInstruction(instruction, indent);
+
+ case "push":
+ case "pop":
+ return $"{indent}// {instruction}";
+
+ case "call":
+ return TranslateCallInstruction(instruction, indent);
+
+ case "ret":
+ return TranslateReturnInstruction(instruction, indent);
+
+ case "cmp":
+ case "test":
+ return $"{indent}// {instruction}";
+
+ default:
+ // For other instructions, just add a comment
+ return $"{indent}// {instruction}";
+ }
+ }
+
+ ///
+ /// Translates a MOV instruction
+ ///
+ /// The instruction to translate
+ /// The indentation string
+ /// The translated code statement
+ private string TranslateMovInstruction(Instruction instruction, string indent)
+ {
+ string[] operandParts = instruction.Operands.Split(',');
+ if (operandParts.Length != 2)
+ {
+ return $"{indent}// {instruction}";
+ }
+
+ string destination = operandParts[0].Trim();
+ string source = operandParts[1].Trim();
+
+ // Skip register-to-register moves for registers we don't track
+ if (IsRegister(destination) && IsRegister(source))
+ {
+ return $"{indent}// {instruction}";
+ }
+
+ // Translate memory access
+ if (IsMemoryLocation(destination))
+ {
+ string variableName = GetVariableNameForMemory(destination);
+ return $"{indent}{variableName} = {GetReadableOperand(source)}; // {instruction}";
+ }
+ else if (IsMemoryLocation(source))
+ {
+ string variableName = GetVariableNameForMemory(source);
+ return $"{indent}{GetReadableOperand(destination)} = {variableName}; // {instruction}";
+ }
+
+ // Default case
+ return $"{indent}{GetReadableOperand(destination)} = {GetReadableOperand(source)}; // {instruction}";
+ }
+
+ ///
+ /// Translates an arithmetic instruction
+ ///
+ /// The instruction to translate
+ /// The indentation string
+ /// The translated code statement
+ private string TranslateArithmeticInstruction(Instruction instruction, string indent)
+ {
+ string[] operandParts = instruction.Operands.Split(',');
+ if (operandParts.Length != 2)
+ {
+ return $"{indent}// {instruction}";
+ }
+
+ string destination = operandParts[0].Trim();
+ string source = operandParts[1].Trim();
+ string operatorSymbol = GetOperatorForMnemonic(instruction.Mnemonic.ToLower());
+
+ // Skip register-to-register operations for registers we don't track
+ if (IsRegister(destination) && IsRegister(source))
+ {
+ return $"{indent}// {instruction}";
+ }
+
+ // Translate the operation
+ return $"{indent}{GetReadableOperand(destination)} {operatorSymbol}= {GetReadableOperand(source)}; // {instruction}";
+ }
+
+ ///
+ /// Translates a CALL instruction
+ ///
+ /// The instruction to translate
+ /// The indentation string
+ /// The translated code statement
+ private string TranslateCallInstruction(Instruction instruction, string indent)
+ {
+ string target = instruction.Operands.Trim();
+
+ // Try to get a function name from the target
+ string functionName = GetFunctionNameFromTarget(target);
+
+ return $"{indent}{functionName}(); // {instruction}";
+ }
+
+ ///
+ /// Translates a RET instruction
+ ///
+ /// The instruction to translate
+ /// The indentation string
+ /// The translated code statement
+ private string TranslateReturnInstruction(Instruction instruction, string indent)
+ {
+ // Check if EAX is used as a return value
+ if (_dataFlowAnalysis != null)
+ {
+ var eaxVariable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == "eax" && v.IsReturnValue);
+ if (eaxVariable != null)
+ {
+ return $"{indent}return {eaxVariable.Name}; // {instruction}";
+ }
+ }
+
+ return $"{indent}return; // {instruction}";
+ }
+
+ ///
+ /// Gets the condition from a conditional jump instruction
+ ///
+ /// The jump instruction
+ /// The condition expression
+ private string GetConditionFromJump(Instruction instruction)
+ {
+ string mnemonic = instruction.Mnemonic.ToLower();
+
+ // Map jump mnemonics to conditions
+ return mnemonic switch
+ {
+ "je" => "a == b",
+ "jne" => "a != b",
+ "jz" => "a == 0",
+ "jnz" => "a != 0",
+ "jg" => "a > b",
+ "jge" => "a >= b",
+ "jl" => "a < b",
+ "jle" => "a <= b",
+ "ja" => "a > b (unsigned)",
+ "jae" => "a >= b (unsigned)",
+ "jb" => "a < b (unsigned)",
+ "jbe" => "a <= b (unsigned)",
+ _ => "condition"
+ };
+ }
+
+ ///
+ /// Gets the operator for an arithmetic mnemonic
+ ///
+ /// The instruction mnemonic
+ /// The operator
+ private string GetOperatorForMnemonic(string mnemonic)
+ {
+ return mnemonic switch
+ {
+ "add" => "+",
+ "sub" => "-",
+ "mul" => "*",
+ "div" => "/",
+ "and" => "&",
+ "or" => "|",
+ "xor" => "^",
+ _ => mnemonic
+ };
+ }
+
+ ///
+ /// Gets a readable representation of an operand
+ ///
+ /// The operand
+ /// A readable representation
+ private string GetReadableOperand(string operand)
+ {
+ // If it's a register, return it as is
+ if (IsRegister(operand))
+ {
+ return operand;
+ }
+
+ // If it's a memory location, get a variable name
+ if (IsMemoryLocation(operand))
+ {
+ return GetVariableNameForMemory(operand);
+ }
+
+ // If it's a hexadecimal constant, format it
+ if (operand.StartsWith("0x") && operand.Length > 2)
+ {
+ return operand;
+ }
+
+ // Otherwise, return it as is
+ return operand;
+ }
+
+ ///
+ /// Gets a variable name for a memory location
+ ///
+ /// The memory location
+ /// A variable name
+ private string GetVariableNameForMemory(string memoryLocation)
+ {
+ if (_dataFlowAnalysis == null)
+ {
+ return "memory";
+ }
+
+ // Extract the part inside the brackets
+ int startIndex = memoryLocation.IndexOf('[');
+ int endIndex = memoryLocation.IndexOf(']');
+
+ if (startIndex >= 0 && endIndex > startIndex)
+ {
+ string memoryReference = memoryLocation.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
+
+ // Try to find a variable for this memory location
+ var variable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == memoryReference);
+ if (variable != null)
+ {
+ return variable.Name;
+ }
+
+ // If it's a stack variable (relative to EBP), give it a meaningful name
+ if (memoryReference.StartsWith("ebp+") || memoryReference.StartsWith("ebp-"))
+ {
+ string offset = memoryReference.Substring(4);
+ return $"local_{offset.Replace("+", "plus_").Replace("-", "minus_")}";
+ }
+ }
+
+ return "memory";
+ }
+
+ ///
+ /// Gets a function name from a call target
+ ///
+ /// The call target
+ /// A function name
+ private string GetFunctionNameFromTarget(string target)
+ {
+ // If it's a direct address, format it
+ if (target.StartsWith("0x") && target.Length > 2)
+ {
+ return $"function_{target.Substring(2)}";
+ }
+
+ // If it's a memory location, extract the address
+ if (IsMemoryLocation(target))
+ {
+ return $"function_ptr_{GetVariableNameForMemory(target)}";
+ }
+
+ // Otherwise, use the target as is
+ return target;
+ }
+
+ ///
+ /// Checks if an operand is a register
+ ///
+ /// The operand to check
+ /// True if the operand is a register
+ private bool IsRegister(string operand)
+ {
+ string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
+ "ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
+ "al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
+
+ return Array.IndexOf(registers, operand.ToLower()) >= 0;
+ }
+
+ ///
+ /// Checks if an operand is a memory location
+ ///
+ /// The operand to check
+ /// True if the operand is a memory location
+ private bool IsMemoryLocation(string operand)
+ {
+ return operand.Contains('[') && operand.Contains(']');
+ }
+}
diff --git a/X86Disassembler/Program.cs b/X86Disassembler/Program.cs
index c19ded2..0dbc5ba 100644
--- a/X86Disassembler/Program.cs
+++ b/X86Disassembler/Program.cs
@@ -4,6 +4,7 @@ using System.Text;
using System.Collections.Generic;
using X86Disassembler.PE;
using X86Disassembler.X86;
+using X86Disassembler.Decompiler;
namespace X86Disassembler;
@@ -70,7 +71,7 @@ public class Program
Console.WriteLine($"Disassembling section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
// Create a disassembler for the code section
- Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress);
+ Disassembler disassembler = new Disassembler(codeBytes, peFile.OptionalHeader.ImageBase + section.VirtualAddress);
// Disassemble all instructions
var instructions = disassembler.Disassemble();
@@ -97,6 +98,26 @@ public class Program
{
Console.WriteLine($"... ({instructions.Count - count} more instructions not shown)");
}
+
+ // Decompile the instructions
+ Console.WriteLine("\nDecompiling the first function:\n");
+
+ // For demonstration, we'll decompile a small subset of instructions
+ // In a real scenario, you'd identify function boundaries first
+ int functionSize = Math.Min(50, instructions.Count);
+ List functionInstructions = instructions.GetRange(0, functionSize);
+
+ // Create a decompiler for the function
+ Decompiler.Decompiler decompiler = new Decompiler.Decompiler(
+ functionInstructions,
+ functionInstructions[0].Address
+ );
+
+ // Decompile the function
+ string decompiledCode = decompiler.Decompile();
+
+ // Print the decompiled code
+ Console.WriteLine(decompiledCode);
}
// Console.WriteLine("\nPress Enter to exit...");
diff --git a/X86Disassembler/X86/Disassembler.cs b/X86Disassembler/X86/Disassembler.cs
index bd7ad56..e784cca 100644
--- a/X86Disassembler/X86/Disassembler.cs
+++ b/X86Disassembler/X86/Disassembler.cs
@@ -15,7 +15,7 @@ public class Disassembler
private readonly int _length;
// The base address of the code
- private readonly uint _baseAddress;
+ private readonly ulong _baseAddress;
// Segment override prefixes
private static readonly byte[] SegmentOverridePrefixes = { 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 };
@@ -25,7 +25,7 @@ public class Disassembler
///
/// The buffer containing the code to disassemble
/// The base address of the code
- public Disassembler(byte[] codeBuffer, uint baseAddress)
+ public Disassembler(byte[] codeBuffer, ulong baseAddress)
{
_codeBuffer = codeBuffer;
_length = codeBuffer.Length;
diff --git a/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs b/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs
index 550156c..c3e7082 100644
--- a/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs
+++ b/X86Disassembler/X86/Handlers/Jump/JgeRel8Handler.cs
@@ -43,13 +43,11 @@ public class JgeRel8Handler : InstructionHandler
instruction.Operands = "??";
return true;
}
-
- // Read the offset and calculate target address
- int position = Decoder.GetPosition();
+
sbyte offset = (sbyte)Decoder.ReadByte();
// Calculate target address (instruction address + instruction length + offset)
- uint targetAddress = (uint)(instruction.Address + 2 + offset);
+ ulong targetAddress = instruction.Address + 2UL + (uint)offset;
// Format the target address
instruction.Operands = $"0x{targetAddress:X8}";
diff --git a/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs b/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs
index 0b7aa8e..8883334 100644
--- a/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs
+++ b/X86Disassembler/X86/Handlers/Jump/JmpRel32Handler.cs
@@ -38,8 +38,7 @@ public class JmpRel32Handler : InstructionHandler
instruction.Mnemonic = "jmp";
// Check if we have enough bytes for the offset (4 bytes)
- int position = Decoder.GetPosition();
- if (position + 4 > Length)
+ if (!Decoder.CanReadUInt())
{
return false;
}
diff --git a/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs b/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs
index bfeb0a5..187c137 100644
--- a/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs
+++ b/X86Disassembler/X86/Handlers/Jump/JmpRel8Handler.cs
@@ -42,13 +42,11 @@ public class JmpRel8Handler : InstructionHandler
{
return true;
}
-
- // Read the offset and calculate target address
- int position = Decoder.GetPosition();
+
sbyte offset = (sbyte)Decoder.ReadByte();
// Calculate target address (instruction address + instruction length + offset)
- uint targetAddress = (uint)(instruction.Address + 2 + offset);
+ ulong targetAddress = instruction.Address + 2UL + (uint)offset;
// Format the target address
instruction.Operands = $"0x{targetAddress:X8}";
diff --git a/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs b/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs
index 8759b88..8f96bcd 100644
--- a/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs
+++ b/X86Disassembler/X86/Handlers/Jump/TwoByteConditionalJumpHandler.cs
@@ -55,8 +55,6 @@ public class TwoByteConditionalJumpHandler : InstructionHandler
/// True if the instruction was successfully decoded
public override bool Decode(byte opcode, Instruction instruction)
{
- int position = Decoder.GetPosition();
-
// Check if we have enough bytes for the second byte
if (!Decoder.CanReadByte())
{
diff --git a/X86Disassembler/X86/Instruction.cs b/X86Disassembler/X86/Instruction.cs
index 12274aa..5e7ad5c 100644
--- a/X86Disassembler/X86/Instruction.cs
+++ b/X86Disassembler/X86/Instruction.cs
@@ -8,7 +8,7 @@ public class Instruction
///
/// Gets or sets the address of the instruction
///
- public uint Address { get; set; }
+ public ulong Address { get; set; }
///
/// Gets or sets the mnemonic of the instruction