0
mirror of https://github.com/sampletext32/ParkanPlayground.git synced 2025-05-19 03:41:18 +03:00

basic decompiler and fixes

This commit is contained in:
bird_egop 2025-04-14 02:07:17 +03:00
parent 157171fa90
commit c516e063e7
10 changed files with 1314 additions and 16 deletions

View File

@ -0,0 +1,246 @@
namespace X86Disassembler.Decompiler;
using System.Collections.Generic;
using X86Disassembler.X86;
/// <summary>
/// Represents a control flow graph for decompilation
/// </summary>
public class ControlFlowGraph
{
/// <summary>
/// Represents a basic block in the control flow graph
/// </summary>
public class BasicBlock
{
/// <summary>
/// Gets or sets the starting address of the basic block
/// </summary>
public ulong StartAddress { get; set; }
/// <summary>
/// Gets or sets the ending address of the basic block
/// </summary>
public ulong EndAddress { get; set; }
/// <summary>
/// Gets the list of instructions in this basic block
/// </summary>
public List<Instruction> Instructions { get; } = [];
/// <summary>
/// Gets the list of successor blocks (blocks that can be executed after this one)
/// </summary>
public List<BasicBlock> Successors { get; } = [];
/// <summary>
/// Gets the list of predecessor blocks (blocks that can execute before this one)
/// </summary>
public List<BasicBlock> Predecessors { get; } = [];
/// <summary>
/// Returns a string representation of the basic block
/// </summary>
/// <returns>A string representation of the basic block</returns>
public override string ToString()
{
return $"Block {StartAddress:X8}-{EndAddress:X8} with {Instructions.Count} instructions";
}
}
// Dictionary mapping addresses to basic blocks
private readonly Dictionary<ulong, BasicBlock> _blocks = [];
// Entry point of the control flow graph
private BasicBlock? _entryBlock;
/// <summary>
/// Gets the entry block of the control flow graph
/// </summary>
public BasicBlock? EntryBlock => _entryBlock;
/// <summary>
/// Gets all basic blocks in the control flow graph
/// </summary>
public IReadOnlyDictionary<ulong, BasicBlock> Blocks => _blocks;
/// <summary>
/// Builds a control flow graph from a list of instructions
/// </summary>
/// <param name="instructions">The list of instructions</param>
/// <param name="entryPoint">The entry point address</param>
/// <returns>A control flow graph</returns>
public static ControlFlowGraph Build(List<Instruction> instructions, ulong entryPoint)
{
ControlFlowGraph cfg = new ControlFlowGraph();
// First pass: identify basic block boundaries
HashSet<ulong> leaders = new HashSet<ulong>();
// The entry point is always a leader
leaders.Add(entryPoint);
// Identify other leaders
for (int i = 0; i < instructions.Count; i++)
{
Instruction inst = instructions[i];
// Check if this instruction is a branch or jump
if (IsControlTransfer(inst))
{
// The target of a jump/branch is a leader
ulong? targetAddress = GetTargetAddress(inst);
if (targetAddress.HasValue)
{
leaders.Add(targetAddress.Value);
}
// The instruction following a jump/branch is also a leader (if it exists)
if (i + 1 < instructions.Count)
{
leaders.Add(instructions[i + 1].Address);
}
}
}
// Second pass: create basic blocks
BasicBlock? currentBlock = null;
foreach (Instruction inst in instructions)
{
// If this instruction is a leader, start a new basic block
if (leaders.Contains(inst.Address))
{
// Finalize the previous block if it exists
if (currentBlock != null)
{
currentBlock.EndAddress = inst.Address - 1;
cfg._blocks[currentBlock.StartAddress] = currentBlock;
}
// Create a new block
currentBlock = new BasicBlock
{
StartAddress = inst.Address
};
// If this is the entry point, set it as the entry block
if (inst.Address == entryPoint)
{
cfg._entryBlock = currentBlock;
}
}
// Add the instruction to the current block
if (currentBlock != null)
{
currentBlock.Instructions.Add(inst);
}
// If this instruction is a control transfer, finalize the current block
if (IsControlTransfer(inst) && currentBlock != null)
{
currentBlock.EndAddress = inst.Address;
cfg._blocks[currentBlock.StartAddress] = currentBlock;
currentBlock = null;
}
}
// Finalize the last block if it exists
if (currentBlock != null)
{
currentBlock.EndAddress = instructions[^1].Address;
cfg._blocks[currentBlock.StartAddress] = currentBlock;
}
// Third pass: connect basic blocks
foreach (var block in cfg._blocks.Values)
{
// Get the last instruction in the block
Instruction lastInst = block.Instructions[^1];
// If the last instruction is a jump, add the target as a successor
if (IsControlTransfer(lastInst))
{
ulong? targetAddress = GetTargetAddress(lastInst);
if (targetAddress.HasValue && cfg._blocks.TryGetValue(targetAddress.Value, out BasicBlock? targetBlock))
{
block.Successors.Add(targetBlock);
targetBlock.Predecessors.Add(block);
}
// If the instruction is a conditional jump, the next block is also a successor
if (IsConditionalJump(lastInst))
{
ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length;
if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock))
{
block.Successors.Add(nextBlock);
nextBlock.Predecessors.Add(block);
}
}
}
// If the last instruction is not a jump, the next block is the successor
else
{
ulong nextAddress = lastInst.Address + (ulong)lastInst.RawBytes.Length;
if (cfg._blocks.TryGetValue(nextAddress, out BasicBlock? nextBlock))
{
block.Successors.Add(nextBlock);
nextBlock.Predecessors.Add(block);
}
}
}
return cfg;
}
/// <summary>
/// Checks if an instruction is a control transfer instruction (jump, call, ret)
/// </summary>
/// <param name="instruction">The instruction to check</param>
/// <returns>True if the instruction is a control transfer</returns>
private static bool IsControlTransfer(Instruction instruction)
{
string mnemonic = instruction.Mnemonic.ToLower();
return mnemonic.StartsWith("j") || // All jumps (jmp, je, jne, etc.)
mnemonic == "call" ||
mnemonic == "ret";
}
/// <summary>
/// Checks if an instruction is a conditional jump
/// </summary>
/// <param name="instruction">The instruction to check</param>
/// <returns>True if the instruction is a conditional jump</returns>
private static bool IsConditionalJump(Instruction instruction)
{
string mnemonic = instruction.Mnemonic.ToLower();
return mnemonic.StartsWith("j") && mnemonic != "jmp"; // All jumps except jmp
}
/// <summary>
/// Gets the target address of a control transfer instruction
/// </summary>
/// <param name="instruction">The instruction</param>
/// <returns>The target address, or null if it cannot be determined</returns>
private static ulong? GetTargetAddress(Instruction instruction)
{
string operands = instruction.Operands;
// Check if the operand is a direct address (e.g., "0x12345678")
if (operands.StartsWith("0x") && ulong.TryParse(operands.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out ulong address))
{
return address;
}
// For relative jumps, calculate the target address
if (instruction.Mnemonic.ToLower().StartsWith("j") && int.TryParse(operands, out int offset))
{
return instruction.Address + (ulong)instruction.RawBytes.Length + (ulong)offset;
}
// For now, we cannot determine the target for indirect jumps
return null;
}
}

View File

@ -0,0 +1,516 @@
namespace X86Disassembler.Decompiler;
using System.Collections.Generic;
using X86Disassembler.X86;
/// <summary>
/// Performs data flow analysis on x86 instructions
/// </summary>
public class DataFlowAnalysis
{
/// <summary>
/// Represents a variable in the decompiled code
/// </summary>
public class Variable
{
/// <summary>
/// Gets or sets the name of the variable
/// </summary>
public string Name { get; set; } = string.Empty;
/// <summary>
/// Gets or sets the type of the variable (if known)
/// </summary>
public string Type { get; set; } = "int"; // Default to int
/// <summary>
/// Gets or sets the storage location (register, memory, etc.)
/// </summary>
public string Location { get; set; } = string.Empty;
/// <summary>
/// Gets or sets whether this variable is a parameter
/// </summary>
public bool IsParameter { get; set; }
/// <summary>
/// Gets or sets whether this variable is a return value
/// </summary>
public bool IsReturnValue { get; set; }
}
/// <summary>
/// Represents an operation in the decompiled code
/// </summary>
public class Operation
{
/// <summary>
/// Gets or sets the operation type
/// </summary>
public string Type { get; set; } = string.Empty;
/// <summary>
/// Gets or sets the destination variable
/// </summary>
public Variable? Destination { get; set; }
/// <summary>
/// Gets or sets the source variables or constants
/// </summary>
public List<object> Sources { get; } = []; // Can be Variable or constant value
/// <summary>
/// Gets or sets the original instruction
/// </summary>
public Instruction OriginalInstruction { get; set; } = null!;
}
// Map of register names to variables
private readonly Dictionary<string, Variable> _registerVariables = [];
// Map of memory locations to variables
private readonly Dictionary<string, Variable> _memoryVariables = [];
// List of operations
private readonly List<Operation> _operations = [];
// Counter for generating variable names
private int _variableCounter = 0;
/// <summary>
/// Gets the list of operations
/// </summary>
public IReadOnlyList<Operation> Operations => _operations;
/// <summary>
/// Gets the list of variables
/// </summary>
public IEnumerable<Variable> Variables
{
get
{
HashSet<Variable> uniqueVariables = [];
foreach (var variable in _registerVariables.Values)
{
uniqueVariables.Add(variable);
}
foreach (var variable in _memoryVariables.Values)
{
uniqueVariables.Add(variable);
}
return uniqueVariables;
}
}
/// <summary>
/// Analyzes a list of instructions to identify variables and operations
/// </summary>
/// <param name="instructions">The list of instructions to analyze</param>
public void Analyze(List<Instruction> instructions)
{
// Initialize common register variables
InitializeRegisterVariables();
// Process each instruction
foreach (var instruction in instructions)
{
AnalyzeInstruction(instruction);
}
}
/// <summary>
/// Initializes common register variables
/// </summary>
private void InitializeRegisterVariables()
{
// 32-bit general purpose registers
_registerVariables["eax"] = new Variable { Name = "eax", Location = "eax" };
_registerVariables["ebx"] = new Variable { Name = "ebx", Location = "ebx" };
_registerVariables["ecx"] = new Variable { Name = "ecx", Location = "ecx" };
_registerVariables["edx"] = new Variable { Name = "edx", Location = "edx" };
_registerVariables["esi"] = new Variable { Name = "esi", Location = "esi" };
_registerVariables["edi"] = new Variable { Name = "edi", Location = "edi" };
_registerVariables["ebp"] = new Variable { Name = "ebp", Location = "ebp" };
_registerVariables["esp"] = new Variable { Name = "esp", Location = "esp" };
// Mark EAX as the return value register
_registerVariables["eax"].IsReturnValue = true;
// 16-bit registers
_registerVariables["ax"] = new Variable { Name = "ax", Location = "ax" };
_registerVariables["bx"] = new Variable { Name = "bx", Location = "bx" };
_registerVariables["cx"] = new Variable { Name = "cx", Location = "cx" };
_registerVariables["dx"] = new Variable { Name = "dx", Location = "dx" };
_registerVariables["si"] = new Variable { Name = "si", Location = "si" };
_registerVariables["di"] = new Variable { Name = "di", Location = "di" };
_registerVariables["bp"] = new Variable { Name = "bp", Location = "bp" };
_registerVariables["sp"] = new Variable { Name = "sp", Location = "sp" };
// 8-bit registers
_registerVariables["al"] = new Variable { Name = "al", Location = "al" };
_registerVariables["ah"] = new Variable { Name = "ah", Location = "ah" };
_registerVariables["bl"] = new Variable { Name = "bl", Location = "bl" };
_registerVariables["bh"] = new Variable { Name = "bh", Location = "bh" };
_registerVariables["cl"] = new Variable { Name = "cl", Location = "cl" };
_registerVariables["ch"] = new Variable { Name = "ch", Location = "ch" };
_registerVariables["dl"] = new Variable { Name = "dl", Location = "dl" };
_registerVariables["dh"] = new Variable { Name = "dh", Location = "dh" };
}
/// <summary>
/// Analyzes a single instruction to identify variables and operations
/// </summary>
/// <param name="instruction">The instruction to analyze</param>
private void AnalyzeInstruction(Instruction instruction)
{
string mnemonic = instruction.Mnemonic.ToLower();
string operands = instruction.Operands;
// Skip instructions without operands
if (string.IsNullOrEmpty(operands))
{
return;
}
// Split operands
string[] operandParts = operands.Split(',');
for (int i = 0; i < operandParts.Length; i++)
{
operandParts[i] = operandParts[i].Trim();
}
// Create an operation based on the instruction type
Operation operation = new Operation
{
OriginalInstruction = instruction
};
switch (mnemonic)
{
case "mov":
HandleMovInstruction(operation, operandParts);
break;
case "add":
case "sub":
case "mul":
case "div":
case "and":
case "or":
case "xor":
HandleArithmeticInstruction(operation, mnemonic, operandParts);
break;
case "push":
case "pop":
HandleStackInstruction(operation, mnemonic, operandParts);
break;
case "call":
HandleCallInstruction(operation, operandParts);
break;
case "ret":
HandleReturnInstruction(operation);
break;
case "cmp":
case "test":
HandleComparisonInstruction(operation, mnemonic, operandParts);
break;
case "jmp":
case "je":
case "jne":
case "jg":
case "jge":
case "jl":
case "jle":
HandleJumpInstruction(operation, mnemonic, operandParts);
break;
default:
// For other instructions, just record the operation type
operation.Type = mnemonic;
break;
}
// Add the operation to the list
_operations.Add(operation);
}
/// <summary>
/// Handles a MOV instruction
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="operandParts">The operand parts</param>
private void HandleMovInstruction(Operation operation, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = "assignment";
// Get or create the destination variable
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
// Get the source (variable or constant)
object source = GetOperandValue(operandParts[1]);
operation.Sources.Add(source);
}
/// <summary>
/// Handles an arithmetic instruction (ADD, SUB, MUL, DIV, AND, OR, XOR)
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="mnemonic">The instruction mnemonic</param>
/// <param name="operandParts">The operand parts</param>
private void HandleArithmeticInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = mnemonic;
// Get or create the destination variable
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
// Get the source (variable or constant)
object source = GetOperandValue(operandParts[1]);
operation.Sources.Add(source);
operation.Sources.Add(destination); // The destination is also a source in arithmetic operations
}
/// <summary>
/// Handles a stack instruction (PUSH, POP)
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="mnemonic">The instruction mnemonic</param>
/// <param name="operandParts">The operand parts</param>
private void HandleStackInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = mnemonic;
if (mnemonic == "push")
{
// For PUSH, the operand is the source
object source = GetOperandValue(operandParts[0]);
operation.Sources.Add(source);
}
else if (mnemonic == "pop")
{
// For POP, the operand is the destination
Variable destination = GetOrCreateVariable(operandParts[0]);
operation.Destination = destination;
}
}
/// <summary>
/// Handles a CALL instruction
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="operandParts">The operand parts</param>
private void HandleCallInstruction(Operation operation, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = "call";
// The operand is the function name or address
operation.Sources.Add(operandParts[0]);
}
/// <summary>
/// Handles a RET instruction
/// </summary>
/// <param name="operation">The operation to populate</param>
private void HandleReturnInstruction(Operation operation)
{
operation.Type = "return";
// The return value is in EAX
if (_registerVariables.TryGetValue("eax", out Variable? eax))
{
operation.Sources.Add(eax);
}
}
/// <summary>
/// Handles a comparison instruction (CMP, TEST)
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="mnemonic">The instruction mnemonic</param>
/// <param name="operandParts">The operand parts</param>
private void HandleComparisonInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 2)
{
return;
}
operation.Type = mnemonic;
// Get the operands
object left = GetOperandValue(operandParts[0]);
object right = GetOperandValue(operandParts[1]);
operation.Sources.Add(left);
operation.Sources.Add(right);
}
/// <summary>
/// Handles a jump instruction (JMP, JE, JNE, etc.)
/// </summary>
/// <param name="operation">The operation to populate</param>
/// <param name="mnemonic">The instruction mnemonic</param>
/// <param name="operandParts">The operand parts</param>
private void HandleJumpInstruction(Operation operation, string mnemonic, string[] operandParts)
{
if (operandParts.Length != 1)
{
return;
}
operation.Type = mnemonic;
// The operand is the jump target
operation.Sources.Add(operandParts[0]);
}
/// <summary>
/// Gets or creates a variable for an operand
/// </summary>
/// <param name="operand">The operand string</param>
/// <returns>The variable</returns>
private Variable GetOrCreateVariable(string operand)
{
// Check if it's a register
if (IsRegister(operand))
{
string register = operand.ToLower();
if (_registerVariables.TryGetValue(register, out Variable? variable))
{
return variable;
}
}
// Check if it's a memory location
if (IsMemoryLocation(operand))
{
string normalizedLocation = NormalizeMemoryLocation(operand);
if (_memoryVariables.TryGetValue(normalizedLocation, out Variable? variable))
{
return variable;
}
// Create a new variable for this memory location
variable = new Variable
{
Name = $"var_{_variableCounter++}",
Location = normalizedLocation
};
_memoryVariables[normalizedLocation] = variable;
return variable;
}
// If it's neither a register nor a memory location, create a temporary variable
Variable tempVariable = new Variable
{
Name = $"temp_{_variableCounter++}",
Location = operand
};
return tempVariable;
}
/// <summary>
/// Gets the value of an operand (variable or constant)
/// </summary>
/// <param name="operand">The operand string</param>
/// <returns>The operand value (Variable or constant)</returns>
private object GetOperandValue(string operand)
{
// Check if it's a register or memory location
if (IsRegister(operand) || IsMemoryLocation(operand))
{
return GetOrCreateVariable(operand);
}
// Check if it's a hexadecimal constant
if (operand.StartsWith("0x") && operand.Length > 2)
{
if (int.TryParse(operand.Substring(2), System.Globalization.NumberStyles.HexNumber, null, out int value))
{
return value;
}
}
// Check if it's a decimal constant
if (int.TryParse(operand, out int decimalValue))
{
return decimalValue;
}
// Otherwise, return the operand as a string
return operand;
}
/// <summary>
/// Checks if an operand is a register
/// </summary>
/// <param name="operand">The operand to check</param>
/// <returns>True if the operand is a register</returns>
private bool IsRegister(string operand)
{
string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
"al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
return Array.IndexOf(registers, operand.ToLower()) >= 0;
}
/// <summary>
/// Checks if an operand is a memory location
/// </summary>
/// <param name="operand">The operand to check</param>
/// <returns>True if the operand is a memory location</returns>
private bool IsMemoryLocation(string operand)
{
return operand.Contains('[') && operand.Contains(']');
}
/// <summary>
/// Normalizes a memory location operand
/// </summary>
/// <param name="operand">The operand to normalize</param>
/// <returns>The normalized memory location</returns>
private string NormalizeMemoryLocation(string operand)
{
// Extract the part inside the brackets
int startIndex = operand.IndexOf('[');
int endIndex = operand.IndexOf(']');
if (startIndex >= 0 && endIndex > startIndex)
{
string memoryReference = operand.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
return memoryReference;
}
return operand;
}
}

View File

@ -0,0 +1,522 @@
namespace X86Disassembler.Decompiler;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using X86Disassembler.X86;
/// <summary>
/// Main decompiler class that translates assembly code into higher-level code
/// </summary>
public class Decompiler
{
// The list of disassembled instructions
private readonly List<Instruction> _instructions;
// The control flow graph
private ControlFlowGraph? _controlFlowGraph;
// The data flow analysis
private DataFlowAnalysis? _dataFlowAnalysis;
// The entry point address
private readonly ulong _entryPoint;
/// <summary>
/// Initializes a new instance of the Decompiler class
/// </summary>
/// <param name="instructions">The list of disassembled instructions</param>
/// <param name="entryPoint">The entry point address</param>
public Decompiler(List<Instruction> instructions, ulong entryPoint)
{
_instructions = instructions;
_entryPoint = entryPoint;
}
/// <summary>
/// Decompiles the instructions and returns the decompiled code
/// </summary>
/// <returns>The decompiled code</returns>
public string Decompile()
{
// Build the control flow graph
_controlFlowGraph = ControlFlowGraph.Build(_instructions, _entryPoint);
// Perform data flow analysis
_dataFlowAnalysis = new DataFlowAnalysis();
_dataFlowAnalysis.Analyze(_instructions);
// Generate pseudocode from the control flow graph and data flow analysis
return GeneratePseudocode();
}
/// <summary>
/// Generates pseudocode from the control flow graph and data flow analysis
/// </summary>
/// <returns>The generated pseudocode</returns>
private string GeneratePseudocode()
{
if (_controlFlowGraph == null || _controlFlowGraph.EntryBlock == null)
{
return "// Could not build control flow graph";
}
StringBuilder code = new StringBuilder();
// Add a function header
code.AppendLine("// Decompiled function");
code.AppendLine("int DecompiledFunction() {")
.AppendLine();
// Generate variable declarations
if (_dataFlowAnalysis != null)
{
foreach (var variable in _dataFlowAnalysis.Variables)
{
// Skip register variables
if (IsRegister(variable.Location))
{
continue;
}
// Generate a variable declaration
code.AppendLine($" {variable.Type} {variable.Name}; // {variable.Location}");
}
if (_dataFlowAnalysis.Variables.Any(v => !IsRegister(v.Location)))
{
code.AppendLine();
}
}
// Process the blocks in a depth-first order
HashSet<ulong> visitedBlocks = new HashSet<ulong>();
GenerateCodeForBlock(_controlFlowGraph.EntryBlock, code, visitedBlocks, 1);
// Add a return statement if not already present
if (!code.ToString().Contains("return"))
{
code.AppendLine(" return 0;");
}
// Close the function
code.AppendLine("}");
return code.ToString();
}
/// <summary>
/// Generates code for a basic block and its successors
/// </summary>
/// <param name="block">The basic block</param>
/// <param name="code">The code builder</param>
/// <param name="visitedBlocks">The set of visited blocks</param>
/// <param name="indentLevel">The indentation level</param>
private void GenerateCodeForBlock(ControlFlowGraph.BasicBlock block, StringBuilder code, HashSet<ulong> visitedBlocks, int indentLevel)
{
// If we've already visited this block, add a goto statement
if (visitedBlocks.Contains(block.StartAddress))
{
string indent = new string(' ', indentLevel * 4);
code.AppendLine($"{indent}goto block_{block.StartAddress:X8};");
return;
}
// Mark this block as visited
visitedBlocks.Add(block.StartAddress);
// Add a label for this block
string blockIndent = new string(' ', (indentLevel - 1) * 4);
code.AppendLine($"{blockIndent}block_{block.StartAddress:X8}:")
.AppendLine();
// Generate code for the instructions in this block
foreach (var instruction in block.Instructions)
{
string instructionCode = TranslateInstruction(instruction, indentLevel);
if (!string.IsNullOrEmpty(instructionCode))
{
code.AppendLine(instructionCode);
}
}
// Handle successors based on the control flow
if (block.Successors.Count == 1)
{
// Unconditional branch to the next block
GenerateCodeForBlock(block.Successors[0], code, visitedBlocks, indentLevel);
}
else if (block.Successors.Count == 2)
{
// Conditional branch
string indent = new string(' ', indentLevel * 4);
// Get the last instruction in the block
Instruction lastInstruction = block.Instructions[^1];
string condition = GetConditionFromJump(lastInstruction);
// Find the fall-through block and the jump target block
ControlFlowGraph.BasicBlock? fallthroughBlock = null;
ControlFlowGraph.BasicBlock? jumpTargetBlock = null;
ulong nextAddress = lastInstruction.Address + (ulong)lastInstruction.RawBytes.Length;
foreach (var successor in block.Successors)
{
if (successor.StartAddress == nextAddress)
{
fallthroughBlock = successor;
}
else
{
jumpTargetBlock = successor;
}
}
if (fallthroughBlock != null && jumpTargetBlock != null)
{
// Generate an if statement
code.AppendLine($"{indent}if ({condition}) {{")
.AppendLine();
// Generate code for the jump target block
GenerateCodeForBlock(jumpTargetBlock, code, visitedBlocks, indentLevel + 1);
// Close the if statement
code.AppendLine($"{indent}}}")
.AppendLine();
// Generate code for the fall-through block
GenerateCodeForBlock(fallthroughBlock, code, visitedBlocks, indentLevel);
}
else
{
// If we couldn't determine the fall-through and jump target blocks,
// just generate code for both successors
foreach (var successor in block.Successors)
{
GenerateCodeForBlock(successor, code, visitedBlocks, indentLevel);
}
}
}
}
/// <summary>
/// Translates an instruction into a higher-level code statement
/// </summary>
/// <param name="instruction">The instruction to translate</param>
/// <param name="indentLevel">The indentation level</param>
/// <returns>The translated code statement</returns>
private string TranslateInstruction(Instruction instruction, int indentLevel)
{
string indent = new string(' ', indentLevel * 4);
string mnemonic = instruction.Mnemonic.ToLower();
string operands = instruction.Operands;
// Skip jumps (handled by control flow)
if (mnemonic.StartsWith("j"))
{
return $"{indent}// {instruction}";
}
// Handle different instruction types
switch (mnemonic)
{
case "mov":
return TranslateMovInstruction(instruction, indent);
case "add":
case "sub":
case "mul":
case "div":
case "and":
case "or":
case "xor":
return TranslateArithmeticInstruction(instruction, indent);
case "push":
case "pop":
return $"{indent}// {instruction}";
case "call":
return TranslateCallInstruction(instruction, indent);
case "ret":
return TranslateReturnInstruction(instruction, indent);
case "cmp":
case "test":
return $"{indent}// {instruction}";
default:
// For other instructions, just add a comment
return $"{indent}// {instruction}";
}
}
/// <summary>
/// Translates a MOV instruction
/// </summary>
/// <param name="instruction">The instruction to translate</param>
/// <param name="indent">The indentation string</param>
/// <returns>The translated code statement</returns>
private string TranslateMovInstruction(Instruction instruction, string indent)
{
string[] operandParts = instruction.Operands.Split(',');
if (operandParts.Length != 2)
{
return $"{indent}// {instruction}";
}
string destination = operandParts[0].Trim();
string source = operandParts[1].Trim();
// Skip register-to-register moves for registers we don't track
if (IsRegister(destination) && IsRegister(source))
{
return $"{indent}// {instruction}";
}
// Translate memory access
if (IsMemoryLocation(destination))
{
string variableName = GetVariableNameForMemory(destination);
return $"{indent}{variableName} = {GetReadableOperand(source)}; // {instruction}";
}
else if (IsMemoryLocation(source))
{
string variableName = GetVariableNameForMemory(source);
return $"{indent}{GetReadableOperand(destination)} = {variableName}; // {instruction}";
}
// Default case
return $"{indent}{GetReadableOperand(destination)} = {GetReadableOperand(source)}; // {instruction}";
}
/// <summary>
/// Translates an arithmetic instruction
/// </summary>
/// <param name="instruction">The instruction to translate</param>
/// <param name="indent">The indentation string</param>
/// <returns>The translated code statement</returns>
private string TranslateArithmeticInstruction(Instruction instruction, string indent)
{
string[] operandParts = instruction.Operands.Split(',');
if (operandParts.Length != 2)
{
return $"{indent}// {instruction}";
}
string destination = operandParts[0].Trim();
string source = operandParts[1].Trim();
string operatorSymbol = GetOperatorForMnemonic(instruction.Mnemonic.ToLower());
// Skip register-to-register operations for registers we don't track
if (IsRegister(destination) && IsRegister(source))
{
return $"{indent}// {instruction}";
}
// Translate the operation
return $"{indent}{GetReadableOperand(destination)} {operatorSymbol}= {GetReadableOperand(source)}; // {instruction}";
}
/// <summary>
/// Translates a CALL instruction
/// </summary>
/// <param name="instruction">The instruction to translate</param>
/// <param name="indent">The indentation string</param>
/// <returns>The translated code statement</returns>
private string TranslateCallInstruction(Instruction instruction, string indent)
{
string target = instruction.Operands.Trim();
// Try to get a function name from the target
string functionName = GetFunctionNameFromTarget(target);
return $"{indent}{functionName}(); // {instruction}";
}
/// <summary>
/// Translates a RET instruction
/// </summary>
/// <param name="instruction">The instruction to translate</param>
/// <param name="indent">The indentation string</param>
/// <returns>The translated code statement</returns>
private string TranslateReturnInstruction(Instruction instruction, string indent)
{
// Check if EAX is used as a return value
if (_dataFlowAnalysis != null)
{
var eaxVariable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == "eax" && v.IsReturnValue);
if (eaxVariable != null)
{
return $"{indent}return {eaxVariable.Name}; // {instruction}";
}
}
return $"{indent}return; // {instruction}";
}
/// <summary>
/// Gets the condition from a conditional jump instruction
/// </summary>
/// <param name="instruction">The jump instruction</param>
/// <returns>The condition expression</returns>
private string GetConditionFromJump(Instruction instruction)
{
string mnemonic = instruction.Mnemonic.ToLower();
// Map jump mnemonics to conditions
return mnemonic switch
{
"je" => "a == b",
"jne" => "a != b",
"jz" => "a == 0",
"jnz" => "a != 0",
"jg" => "a > b",
"jge" => "a >= b",
"jl" => "a < b",
"jle" => "a <= b",
"ja" => "a > b (unsigned)",
"jae" => "a >= b (unsigned)",
"jb" => "a < b (unsigned)",
"jbe" => "a <= b (unsigned)",
_ => "condition"
};
}
/// <summary>
/// Gets the operator for an arithmetic mnemonic
/// </summary>
/// <param name="mnemonic">The instruction mnemonic</param>
/// <returns>The operator</returns>
private string GetOperatorForMnemonic(string mnemonic)
{
return mnemonic switch
{
"add" => "+",
"sub" => "-",
"mul" => "*",
"div" => "/",
"and" => "&",
"or" => "|",
"xor" => "^",
_ => mnemonic
};
}
/// <summary>
/// Gets a readable representation of an operand
/// </summary>
/// <param name="operand">The operand</param>
/// <returns>A readable representation</returns>
private string GetReadableOperand(string operand)
{
// If it's a register, return it as is
if (IsRegister(operand))
{
return operand;
}
// If it's a memory location, get a variable name
if (IsMemoryLocation(operand))
{
return GetVariableNameForMemory(operand);
}
// If it's a hexadecimal constant, format it
if (operand.StartsWith("0x") && operand.Length > 2)
{
return operand;
}
// Otherwise, return it as is
return operand;
}
/// <summary>
/// Gets a variable name for a memory location
/// </summary>
/// <param name="memoryLocation">The memory location</param>
/// <returns>A variable name</returns>
private string GetVariableNameForMemory(string memoryLocation)
{
if (_dataFlowAnalysis == null)
{
return "memory";
}
// Extract the part inside the brackets
int startIndex = memoryLocation.IndexOf('[');
int endIndex = memoryLocation.IndexOf(']');
if (startIndex >= 0 && endIndex > startIndex)
{
string memoryReference = memoryLocation.Substring(startIndex + 1, endIndex - startIndex - 1).Trim();
// Try to find a variable for this memory location
var variable = _dataFlowAnalysis.Variables.FirstOrDefault(v => v.Location == memoryReference);
if (variable != null)
{
return variable.Name;
}
// If it's a stack variable (relative to EBP), give it a meaningful name
if (memoryReference.StartsWith("ebp+") || memoryReference.StartsWith("ebp-"))
{
string offset = memoryReference.Substring(4);
return $"local_{offset.Replace("+", "plus_").Replace("-", "minus_")}";
}
}
return "memory";
}
/// <summary>
/// Gets a function name from a call target
/// </summary>
/// <param name="target">The call target</param>
/// <returns>A function name</returns>
private string GetFunctionNameFromTarget(string target)
{
// If it's a direct address, format it
if (target.StartsWith("0x") && target.Length > 2)
{
return $"function_{target.Substring(2)}";
}
// If it's a memory location, extract the address
if (IsMemoryLocation(target))
{
return $"function_ptr_{GetVariableNameForMemory(target)}";
}
// Otherwise, use the target as is
return target;
}
/// <summary>
/// Checks if an operand is a register
/// </summary>
/// <param name="operand">The operand to check</param>
/// <returns>True if the operand is a register</returns>
private bool IsRegister(string operand)
{
string[] registers = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
"al", "ah", "bl", "bh", "cl", "ch", "dl", "dh" };
return Array.IndexOf(registers, operand.ToLower()) >= 0;
}
/// <summary>
/// Checks if an operand is a memory location
/// </summary>
/// <param name="operand">The operand to check</param>
/// <returns>True if the operand is a memory location</returns>
private bool IsMemoryLocation(string operand)
{
return operand.Contains('[') && operand.Contains(']');
}
}

View File

@ -4,6 +4,7 @@ using System.Text;
using System.Collections.Generic; using System.Collections.Generic;
using X86Disassembler.PE; using X86Disassembler.PE;
using X86Disassembler.X86; using X86Disassembler.X86;
using X86Disassembler.Decompiler;
namespace X86Disassembler; namespace X86Disassembler;
@ -70,7 +71,7 @@ public class Program
Console.WriteLine($"Disassembling section {section.Name} at RVA 0x{section.VirtualAddress:X8}:"); Console.WriteLine($"Disassembling section {section.Name} at RVA 0x{section.VirtualAddress:X8}:");
// Create a disassembler for the code section // Create a disassembler for the code section
Disassembler disassembler = new Disassembler(codeBytes, section.VirtualAddress); Disassembler disassembler = new Disassembler(codeBytes, peFile.OptionalHeader.ImageBase + section.VirtualAddress);
// Disassemble all instructions // Disassemble all instructions
var instructions = disassembler.Disassemble(); var instructions = disassembler.Disassemble();
@ -97,6 +98,26 @@ public class Program
{ {
Console.WriteLine($"... ({instructions.Count - count} more instructions not shown)"); Console.WriteLine($"... ({instructions.Count - count} more instructions not shown)");
} }
// Decompile the instructions
Console.WriteLine("\nDecompiling the first function:\n");
// For demonstration, we'll decompile a small subset of instructions
// In a real scenario, you'd identify function boundaries first
int functionSize = Math.Min(50, instructions.Count);
List<Instruction> functionInstructions = instructions.GetRange(0, functionSize);
// Create a decompiler for the function
Decompiler.Decompiler decompiler = new Decompiler.Decompiler(
functionInstructions,
functionInstructions[0].Address
);
// Decompile the function
string decompiledCode = decompiler.Decompile();
// Print the decompiled code
Console.WriteLine(decompiledCode);
} }
// Console.WriteLine("\nPress Enter to exit..."); // Console.WriteLine("\nPress Enter to exit...");

View File

@ -15,7 +15,7 @@ public class Disassembler
private readonly int _length; private readonly int _length;
// The base address of the code // The base address of the code
private readonly uint _baseAddress; private readonly ulong _baseAddress;
// Segment override prefixes // Segment override prefixes
private static readonly byte[] SegmentOverridePrefixes = { 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 }; private static readonly byte[] SegmentOverridePrefixes = { 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 };
@ -25,7 +25,7 @@ public class Disassembler
/// </summary> /// </summary>
/// <param name="codeBuffer">The buffer containing the code to disassemble</param> /// <param name="codeBuffer">The buffer containing the code to disassemble</param>
/// <param name="baseAddress">The base address of the code</param> /// <param name="baseAddress">The base address of the code</param>
public Disassembler(byte[] codeBuffer, uint baseAddress) public Disassembler(byte[] codeBuffer, ulong baseAddress)
{ {
_codeBuffer = codeBuffer; _codeBuffer = codeBuffer;
_length = codeBuffer.Length; _length = codeBuffer.Length;

View File

@ -44,12 +44,10 @@ public class JgeRel8Handler : InstructionHandler
return true; return true;
} }
// Read the offset and calculate target address
int position = Decoder.GetPosition();
sbyte offset = (sbyte)Decoder.ReadByte(); sbyte offset = (sbyte)Decoder.ReadByte();
// Calculate target address (instruction address + instruction length + offset) // Calculate target address (instruction address + instruction length + offset)
uint targetAddress = (uint)(instruction.Address + 2 + offset); ulong targetAddress = instruction.Address + 2UL + (uint)offset;
// Format the target address // Format the target address
instruction.Operands = $"0x{targetAddress:X8}"; instruction.Operands = $"0x{targetAddress:X8}";

View File

@ -38,8 +38,7 @@ public class JmpRel32Handler : InstructionHandler
instruction.Mnemonic = "jmp"; instruction.Mnemonic = "jmp";
// Check if we have enough bytes for the offset (4 bytes) // Check if we have enough bytes for the offset (4 bytes)
int position = Decoder.GetPosition(); if (!Decoder.CanReadUInt())
if (position + 4 > Length)
{ {
return false; return false;
} }

View File

@ -43,12 +43,10 @@ public class JmpRel8Handler : InstructionHandler
return true; return true;
} }
// Read the offset and calculate target address
int position = Decoder.GetPosition();
sbyte offset = (sbyte)Decoder.ReadByte(); sbyte offset = (sbyte)Decoder.ReadByte();
// Calculate target address (instruction address + instruction length + offset) // Calculate target address (instruction address + instruction length + offset)
uint targetAddress = (uint)(instruction.Address + 2 + offset); ulong targetAddress = instruction.Address + 2UL + (uint)offset;
// Format the target address // Format the target address
instruction.Operands = $"0x{targetAddress:X8}"; instruction.Operands = $"0x{targetAddress:X8}";

View File

@ -55,8 +55,6 @@ public class TwoByteConditionalJumpHandler : InstructionHandler
/// <returns>True if the instruction was successfully decoded</returns> /// <returns>True if the instruction was successfully decoded</returns>
public override bool Decode(byte opcode, Instruction instruction) public override bool Decode(byte opcode, Instruction instruction)
{ {
int position = Decoder.GetPosition();
// Check if we have enough bytes for the second byte // Check if we have enough bytes for the second byte
if (!Decoder.CanReadByte()) if (!Decoder.CanReadByte())
{ {

View File

@ -8,7 +8,7 @@ public class Instruction
/// <summary> /// <summary>
/// Gets or sets the address of the instruction /// Gets or sets the address of the instruction
/// </summary> /// </summary>
public uint Address { get; set; } public ulong Address { get; set; }
/// <summary> /// <summary>
/// Gets or sets the mnemonic of the instruction /// Gets or sets the mnemonic of the instruction