namespace X86Disassembler.Decompiler;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using X86Disassembler.X86;
///
/// Main decompiler class that translates assembly code into higher-level code
///
public class Decompiler
{
// The list of disassembled instructions
private readonly List _instructions;
// The control flow graph
private ControlFlowGraph? _controlFlowGraph;
// The data flow analysis
private DataFlowAnalysis? _dataFlowAnalysis;
// The entry point address
private readonly ulong _entryPoint;
///
/// Initializes a new instance of the Decompiler class
///
/// The list of disassembled instructions
/// The entry point address
public Decompiler(List instructions, ulong entryPoint)
{
_instructions = instructions;
_entryPoint = entryPoint;
}
///
/// Decompiles the instructions and returns the decompiled code
///
/// The decompiled code
public string Decompile()
{
// Build the control flow graph
_controlFlowGraph = ControlFlowGraph.Build(_instructions, _entryPoint);
// Perform data flow analysis
_dataFlowAnalysis = new DataFlowAnalysis();
_dataFlowAnalysis.Analyze(_instructions);
// Generate pseudocode from the control flow graph and data flow analysis
return GeneratePseudocode();
}
///
/// Generates pseudocode from the control flow graph and data flow analysis
///
/// The generated pseudocode
private string GeneratePseudocode()
{
if (_controlFlowGraph == null || _controlFlowGraph.EntryBlock == null)
{
return "// Could not build control flow graph";
}
StringBuilder code = new StringBuilder();
// Add a function header
code.AppendLine("// Decompiled function");
code.AppendLine("int DecompiledFunction() {")
.AppendLine();
// Generate variable declarations
if (_dataFlowAnalysis != null)
{
foreach (var variable in _dataFlowAnalysis.Variables)
{
// Skip register variables
if (IsRegister(variable.Location))
{
continue;
}
// Generate a variable declaration
code.AppendLine($" {variable.Type} {variable.Name}; // {variable.Location}");
}
if (_dataFlowAnalysis.Variables.Any(v => !IsRegister(v.Location)))
{
code.AppendLine();
}
}
// Process the blocks in a depth-first order
HashSet visitedBlocks = new HashSet();
GenerateCodeForBlock(_controlFlowGraph.EntryBlock, code, visitedBlocks, 1);
// Add a return statement if not already present
if (!code.ToString().Contains("return"))
{
code.AppendLine(" return 0;");
}
// Close the function
code.AppendLine("}");
return code.ToString();
}
///
/// Generates code for a basic block and its successors
///
/// The basic block
/// The code builder
/// The set of visited blocks
/// The indentation level
private void GenerateCodeForBlock(ControlFlowGraph.BasicBlock block, StringBuilder code, HashSet visitedBlocks, int indentLevel)
{
// If we've already visited this block, add a goto statement
if (visitedBlocks.Contains(block.StartAddress))
{
string indent = new string(' ', indentLevel * 4);
code.AppendLine($"{indent}goto block_{block.StartAddress:X8};");
return;
}
// Mark this block as visited
visitedBlocks.Add(block.StartAddress);
// Add a label for this block
string blockIndent = new string(' ', (indentLevel - 1) * 4);
code.AppendLine($"{blockIndent}block_{block.StartAddress:X8}:")
.AppendLine();
// Generate code for the instructions in this block
foreach (var instruction in block.Instructions)
{
string instructionCode = TranslateInstruction(instruction, indentLevel);
if (!string.IsNullOrEmpty(instructionCode))
{
code.AppendLine(instructionCode);
}
}
// Handle successors based on the control flow
if (block.Successors.Count == 1)
{
// Unconditional branch to the next block
GenerateCodeForBlock(block.Successors[0], code, visitedBlocks, indentLevel);
}
else if (block.Successors.Count == 2)
{
// Conditional branch
string indent = new string(' ', indentLevel * 4);
// Get the last instruction in the block
Instruction lastInstruction = block.Instructions[^1];
string condition = GetConditionFromJump(lastInstruction);
// Find the fall-through block and the jump target block
ControlFlowGraph.BasicBlock? fallthroughBlock = null;
ControlFlowGraph.BasicBlock? jumpTargetBlock = null;
ulong nextAddress = lastInstruction.Address + (ulong)lastInstruction.RawBytes.Length;
foreach (var successor in block.Successors)
{
if (successor.StartAddress == nextAddress)
{
fallthroughBlock = successor;
}
else
{
jumpTargetBlock = successor;
}
}
if (fallthroughBlock != null && jumpTargetBlock != null)
{
// Generate an if statement
code.AppendLine($"{indent}if ({condition}) {{")
.AppendLine();
// Generate code for the jump target block
GenerateCodeForBlock(jumpTargetBlock, code, visitedBlocks, indentLevel + 1);
// Close the if statement
code.AppendLine($"{indent}}}")
.AppendLine();
// Generate code for the fall-through block
GenerateCodeForBlock(fallthroughBlock, code, visitedBlocks, indentLevel);
}
else
{
// If we couldn't determine the fall-through and jump target blocks,
// just generate code for both successors
foreach (var successor in block.Successors)
{
GenerateCodeForBlock(successor, code, visitedBlocks, indentLevel);
}
}
}
}
///
/// Translates an instruction into a higher-level code statement
///
/// The instruction to translate
/// The indentation level
/// The translated code statement
private string TranslateInstruction(Instruction instruction, int indentLevel)
{
string indent = new string(' ', indentLevel * 4);
string mnemonic = instruction.Mnemonic.ToLower();
string operands = instruction.Operands;
// Skip jumps (handled by control flow)
if (mnemonic.StartsWith("j"))
{
return $"{indent}// {instruction}";
}
// Handle different instruction types
switch (mnemonic)
{
case "mov":
return TranslateMovInstruction(instruction, indent);
case "add":
case "sub":
case "mul":
case "div":
case "and":
case "or":
case "xor":
return TranslateArithmeticInstruction(instruction, indent);
case "push":
case "pop":
return $"{indent}// {instruction}";
case "call":
return TranslateCallInstruction(instruction, indent);
case "ret":
return TranslateReturnInstruction(instruction, indent);
case "cmp":
case "test":
return $"{indent}// {instruction}";
default:
// For other instructions, just add a comment
return $"{indent}// {instruction}";
}
}
///
/// Translates a MOV instruction
///
/// The instruction to translate
/// The indentation string
/// The translated code statement
private string TranslateMovInstruction(Instruction instruction, string indent)
{
string[] operandParts = instruction.Operands.Split(',');
if (operandParts.Length != 2)
{
return $"{indent}// {instruction}";
}
string destination = operandParts[0].Trim();
string source = operandParts[1].Trim();
// Skip register-to-register moves for registers we don't track
if (IsRegister(destination) && IsRegister(source))
{
return $"{indent}// {instruction}";
}
// Translate memory access
if (IsMemoryLocation(destination))
{
string variableName = GetVariableNameForMemory(destination);
return $"{indent}{variableName} = {GetReadableOperand(source)}; // {instruction}";
}
else if (IsMemoryLocation(source))
{
string variableName = GetVariableNameForMemory(source);
return $"{indent}{GetReadableOperand(destination)} = {variableName}; // {instruction}";
}
// Default case
return $"{indent}{GetReadableOperand(destination)} = {GetReadableOperand(source)}; // {instruction}";
}
///
/// Translates an arithmetic instruction
///
/// The instruction to translate
/// The indentation string
/// The translated code statement
private string TranslateArithmeticInstruction(Instruction instruction, string indent)
{
string[] operandParts = instruction.Operands.Split(',');
if (operandParts.Length != 2)
{
return $"{indent}// {instruction}";
}
string destination = operandParts[0].Trim();
string source = operandParts[1].Trim();
string operatorSymbol = GetOperatorForMnemonic(instruction.Mnemonic.ToLower());
// Skip register-to-register operations for registers we don't track
if (IsRegister(destination) && IsRegister(source))
{
return $"{indent}// {instruction}";
}
// Translate the operation
return $"{indent}{GetReadableOperand(destination)} {operatorSymbol}= {GetReadableOperand(source)}; // {instruction}";
}
///
/// Translates a CALL instruction
///
/// The instruction to translate
/// The indentation string
/// The translated code statement
private string TranslateCallInstruction(Instruction instruction, string indent)
{
string target = instruction.Operands.Trim();
// Try to get a function name from the target
string functionName = GetFunctionNameFromTarget(target);
return $"{indent}{functionName}(); // {instruction}";
}
///
/// Translates a RET instruction
///
/// The instruction to translate
/// The indentation string
/// The translated code statement
private string TranslateReturnInstruction(Instruction instruction, string indent)
{
// Check if EAX is used as a return value
if (_dataFlowAnalysis != null)
{
var eaxVariable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == "eax" && v.IsReturnValue);
if (eaxVariable != null)
{
return $"{indent}return {eaxVariable.Name}; // {instruction}";
}
}
return $"{indent}return; // {instruction}";
}
///
/// Gets the condition from a conditional jump instruction
///
/// The jump instruction
/// The condition expression
private string GetConditionFromJump(Instruction instruction)
{
string mnemonic = instruction.Mnemonic.ToLower();
// Map jump mnemonics to conditions
return mnemonic switch
{
"je" => "a == b",
"jne" => "a != b",
"jz" => "a == 0",
"jnz" => "a != 0",
"jg" => "a > b",
"jge" => "a >= b",
"jl" => "a < b",
"jle" => "a <= b",
"ja" => "a > b (unsigned)",
"jae" => "a >= b (unsigned)",
"jb" => "a < b (unsigned)",
"jbe" => "a <= b (unsigned)",
_ => "condition"
};
}
///
/// Gets the operator for an arithmetic mnemonic
///
/// The instruction mnemonic
/// The operator
private string GetOperatorForMnemonic(string mnemonic)
{
return mnemonic switch
{
"add" => "+",
"sub" => "-",
"mul" => "*",
"div" => "/",
"and" => "&",
"or" => "|",
"xor" => "^",
_ => mnemonic
};
}
///
/// Gets a readable representation of an operand
///
/// The operand
/// A readable representation
private string GetReadableOperand(string operand)
{
// If it's a register, return it as is
if (IsRegister(operand))
{
return operand;
}
// If it's a memory location, get a variable name
if (IsMemoryLocation(operand))
{
return GetVariableNameForMemory(operand);
}
// If it's a hexadecimal constant, format it
if (operand.StartsWith("0x") && operand.Length > 2)
{
return operand;
}
// Otherwise, return it as is
return operand;
}
///
/// Gets a variable name for a memory location
///
/// The memory location
/// A variable name
private string GetVariableNameForMemory(string memoryLocation)
{
if (_dataFlowAnalysis == null)
{
return "memory";
}
// Extract the part inside the brackets
int startIndex = memoryLocation.IndexOf('[');
int endIndex = memoryLocation.IndexOf(']');
if (startIndex >= 0 && endIndex > startIndex)
{
string memoryReference = memoryLocation.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
// Try to find a variable for this memory location
var variable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == memoryReference);
if (variable != null)
{
return variable.Name;
}
// If it's a stack variable (relative to EBP), give it a meaningful name
if (memoryReference.StartsWith("ebp+") || memoryReference.StartsWith("ebp-"))
{
string offset = memoryReference.Substring(4);
return $"local_{offset.Replace("+", "plus_").Replace("-", "minus_")}";
}
}
return "memory";
}
///
/// Gets a function name from a call target
///
/// The call target
/// A function name
private string GetFunctionNameFromTarget(string target)
{
// If it's a direct address, format it
if (target.StartsWith("0x") && target.Length > 2)
{
return $"function_{target.Substring(2)}";
}
// If it's a memory location, extract the address
if (IsMemoryLocation(target))
{
return $"function_ptr_{GetVariableNameForMemory(target)}";
}
// Otherwise, use the target as is
return target;
}
///
/// Checks if an operand is a register
///
/// The operand to check
/// True if the operand is a register
private bool IsRegister(string operand)
{
string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
"al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
return Array.IndexOf(registers, operand.ToLower()) >= 0;
}
///
/// Checks if an operand is a memory location
///
/// The operand to check
/// True if the operand is a memory location
private bool IsMemoryLocation(string operand)
{
return operand.Contains('[') && operand.Contains(']');
}
}